Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 37972df0

History | View | Annotate | Download (287.1 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = None
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets is not None:
217
      for (idx, tl) in enumerate(self.tasklets):
218
        logging.debug("Checking prerequisites for tasklet %s/%s",
219
                      idx + 1, len(self.tasklets))
220
        tl.CheckPrereq()
221
    else:
222
      raise NotImplementedError
223

    
224
  def Exec(self, feedback_fn):
225
    """Execute the LU.
226

227
    This method should implement the actual work. It should raise
228
    errors.OpExecError for failures that are somewhat dealt with in
229
    code, or expected.
230

231
    """
232
    if self.tasklets is not None:
233
      for (idx, tl) in enumerate(self.tasklets):
234
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
235
        tl.Exec(feedback_fn)
236
    else:
237
      raise NotImplementedError
238

    
239
  def BuildHooksEnv(self):
240
    """Build hooks environment for this LU.
241

242
    This method should return a three-node tuple consisting of: a dict
243
    containing the environment that will be used for running the
244
    specific hook for this LU, a list of node names on which the hook
245
    should run before the execution, and a list of node names on which
246
    the hook should run after the execution.
247

248
    The keys of the dict must not have 'GANETI_' prefixed as this will
249
    be handled in the hooks runner. Also note additional keys will be
250
    added by the hooks runner. If the LU doesn't define any
251
    environment, an empty dict (and not None) should be returned.
252

253
    No nodes should be returned as an empty list (and not None).
254

255
    Note that if the HPATH for a LU class is None, this function will
256
    not be called.
257

258
    """
259
    raise NotImplementedError
260

    
261
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
262
    """Notify the LU about the results of its hooks.
263

264
    This method is called every time a hooks phase is executed, and notifies
265
    the Logical Unit about the hooks' result. The LU can then use it to alter
266
    its result based on the hooks.  By default the method does nothing and the
267
    previous result is passed back unchanged but any LU can define it if it
268
    wants to use the local cluster hook-scripts somehow.
269

270
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
271
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
272
    @param hook_results: the results of the multi-node hooks rpc call
273
    @param feedback_fn: function used send feedback back to the caller
274
    @param lu_result: the previous Exec result this LU had, or None
275
        in the PRE phase
276
    @return: the new Exec result, based on the previous result
277
        and hook results
278

279
    """
280
    return lu_result
281

    
282
  def _ExpandAndLockInstance(self):
283
    """Helper function to expand and lock an instance.
284

285
    Many LUs that work on an instance take its name in self.op.instance_name
286
    and need to expand it and then declare the expanded name for locking. This
287
    function does it, and then updates self.op.instance_name to the expanded
288
    name. It also initializes needed_locks as a dict, if this hasn't been done
289
    before.
290

291
    """
292
    if self.needed_locks is None:
293
      self.needed_locks = {}
294
    else:
295
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
296
        "_ExpandAndLockInstance called with instance-level locks set"
297
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
298
    if expanded_name is None:
299
      raise errors.OpPrereqError("Instance '%s' not known" %
300
                                  self.op.instance_name)
301
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
302
    self.op.instance_name = expanded_name
303

    
304
  def _LockInstancesNodes(self, primary_only=False):
305
    """Helper function to declare instances' nodes for locking.
306

307
    This function should be called after locking one or more instances to lock
308
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
309
    with all primary or secondary nodes for instances already locked and
310
    present in self.needed_locks[locking.LEVEL_INSTANCE].
311

312
    It should be called from DeclareLocks, and for safety only works if
313
    self.recalculate_locks[locking.LEVEL_NODE] is set.
314

315
    In the future it may grow parameters to just lock some instance's nodes, or
316
    to just lock primaries or secondary nodes, if needed.
317

318
    If should be called in DeclareLocks in a way similar to::
319

320
      if level == locking.LEVEL_NODE:
321
        self._LockInstancesNodes()
322

323
    @type primary_only: boolean
324
    @param primary_only: only lock primary nodes of locked instances
325

326
    """
327
    assert locking.LEVEL_NODE in self.recalculate_locks, \
328
      "_LockInstancesNodes helper function called with no nodes to recalculate"
329

    
330
    # TODO: check if we're really been called with the instance locks held
331

    
332
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
333
    # future we might want to have different behaviors depending on the value
334
    # of self.recalculate_locks[locking.LEVEL_NODE]
335
    wanted_nodes = []
336
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
337
      instance = self.context.cfg.GetInstanceInfo(instance_name)
338
      wanted_nodes.append(instance.primary_node)
339
      if not primary_only:
340
        wanted_nodes.extend(instance.secondary_nodes)
341

    
342
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
343
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
344
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
345
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
346

    
347
    del self.recalculate_locks[locking.LEVEL_NODE]
348

    
349

    
350
class NoHooksLU(LogicalUnit):
351
  """Simple LU which runs no hooks.
352

353
  This LU is intended as a parent for other LogicalUnits which will
354
  run no hooks, in order to reduce duplicate code.
355

356
  """
357
  HPATH = None
358
  HTYPE = None
359

    
360

    
361
class Tasklet:
362
  """Tasklet base class.
363

364
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
365
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
366
  tasklets know nothing about locks.
367

368
  Subclasses must follow these rules:
369
    - Implement CheckPrereq
370
    - Implement Exec
371

372
  """
373
  def __init__(self, lu):
374
    self.lu = lu
375

    
376
    # Shortcuts
377
    self.cfg = lu.cfg
378
    self.rpc = lu.rpc
379

    
380
  def CheckPrereq(self):
381
    """Check prerequisites for this tasklets.
382

383
    This method should check whether the prerequisites for the execution of
384
    this tasklet are fulfilled. It can do internode communication, but it
385
    should be idempotent - no cluster or system changes are allowed.
386

387
    The method should raise errors.OpPrereqError in case something is not
388
    fulfilled. Its return value is ignored.
389

390
    This method should also update all parameters to their canonical form if it
391
    hasn't been done before.
392

393
    """
394
    raise NotImplementedError
395

    
396
  def Exec(self, feedback_fn):
397
    """Execute the tasklet.
398

399
    This method should implement the actual work. It should raise
400
    errors.OpExecError for failures that are somewhat dealt with in code, or
401
    expected.
402

403
    """
404
    raise NotImplementedError
405

    
406

    
407
def _GetWantedNodes(lu, nodes):
408
  """Returns list of checked and expanded node names.
409

410
  @type lu: L{LogicalUnit}
411
  @param lu: the logical unit on whose behalf we execute
412
  @type nodes: list
413
  @param nodes: list of node names or None for all nodes
414
  @rtype: list
415
  @return: the list of nodes, sorted
416
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
417

418
  """
419
  if not isinstance(nodes, list):
420
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
421

    
422
  if not nodes:
423
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
424
      " non-empty list of nodes whose name is to be expanded.")
425

    
426
  wanted = []
427
  for name in nodes:
428
    node = lu.cfg.ExpandNodeName(name)
429
    if node is None:
430
      raise errors.OpPrereqError("No such node name '%s'" % name)
431
    wanted.append(node)
432

    
433
  return utils.NiceSort(wanted)
434

    
435

    
436
def _GetWantedInstances(lu, instances):
437
  """Returns list of checked and expanded instance names.
438

439
  @type lu: L{LogicalUnit}
440
  @param lu: the logical unit on whose behalf we execute
441
  @type instances: list
442
  @param instances: list of instance names or None for all instances
443
  @rtype: list
444
  @return: the list of instances, sorted
445
  @raise errors.OpPrereqError: if the instances parameter is wrong type
446
  @raise errors.OpPrereqError: if any of the passed instances is not found
447

448
  """
449
  if not isinstance(instances, list):
450
    raise errors.OpPrereqError("Invalid argument type 'instances'")
451

    
452
  if instances:
453
    wanted = []
454

    
455
    for name in instances:
456
      instance = lu.cfg.ExpandInstanceName(name)
457
      if instance is None:
458
        raise errors.OpPrereqError("No such instance name '%s'" % name)
459
      wanted.append(instance)
460

    
461
  else:
462
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
463
  return wanted
464

    
465

    
466
def _CheckOutputFields(static, dynamic, selected):
467
  """Checks whether all selected fields are valid.
468

469
  @type static: L{utils.FieldSet}
470
  @param static: static fields set
471
  @type dynamic: L{utils.FieldSet}
472
  @param dynamic: dynamic fields set
473

474
  """
475
  f = utils.FieldSet()
476
  f.Extend(static)
477
  f.Extend(dynamic)
478

    
479
  delta = f.NonMatching(selected)
480
  if delta:
481
    raise errors.OpPrereqError("Unknown output fields selected: %s"
482
                               % ",".join(delta))
483

    
484

    
485
def _CheckBooleanOpField(op, name):
486
  """Validates boolean opcode parameters.
487

488
  This will ensure that an opcode parameter is either a boolean value,
489
  or None (but that it always exists).
490

491
  """
492
  val = getattr(op, name, None)
493
  if not (val is None or isinstance(val, bool)):
494
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
495
                               (name, str(val)))
496
  setattr(op, name, val)
497

    
498

    
499
def _CheckNodeOnline(lu, node):
500
  """Ensure that a given node is online.
501

502
  @param lu: the LU on behalf of which we make the check
503
  @param node: the node to check
504
  @raise errors.OpPrereqError: if the node is offline
505

506
  """
507
  if lu.cfg.GetNodeInfo(node).offline:
508
    raise errors.OpPrereqError("Can't use offline node %s" % node)
509

    
510

    
511
def _CheckNodeNotDrained(lu, node):
512
  """Ensure that a given node is not drained.
513

514
  @param lu: the LU on behalf of which we make the check
515
  @param node: the node to check
516
  @raise errors.OpPrereqError: if the node is drained
517

518
  """
519
  if lu.cfg.GetNodeInfo(node).drained:
520
    raise errors.OpPrereqError("Can't use drained node %s" % node)
521

    
522

    
523
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
524
                          memory, vcpus, nics, disk_template, disks,
525
                          bep, hvp, hypervisor_name):
526
  """Builds instance related env variables for hooks
527

528
  This builds the hook environment from individual variables.
529

530
  @type name: string
531
  @param name: the name of the instance
532
  @type primary_node: string
533
  @param primary_node: the name of the instance's primary node
534
  @type secondary_nodes: list
535
  @param secondary_nodes: list of secondary nodes as strings
536
  @type os_type: string
537
  @param os_type: the name of the instance's OS
538
  @type status: boolean
539
  @param status: the should_run status of the instance
540
  @type memory: string
541
  @param memory: the memory size of the instance
542
  @type vcpus: string
543
  @param vcpus: the count of VCPUs the instance has
544
  @type nics: list
545
  @param nics: list of tuples (ip, mac, mode, link) representing
546
      the NICs the instance has
547
  @type disk_template: string
548
  @param disk_template: the disk template of the instance
549
  @type disks: list
550
  @param disks: the list of (size, mode) pairs
551
  @type bep: dict
552
  @param bep: the backend parameters for the instance
553
  @type hvp: dict
554
  @param hvp: the hypervisor parameters for the instance
555
  @type hypervisor_name: string
556
  @param hypervisor_name: the hypervisor for the instance
557
  @rtype: dict
558
  @return: the hook environment for this instance
559

560
  """
561
  if status:
562
    str_status = "up"
563
  else:
564
    str_status = "down"
565
  env = {
566
    "OP_TARGET": name,
567
    "INSTANCE_NAME": name,
568
    "INSTANCE_PRIMARY": primary_node,
569
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
570
    "INSTANCE_OS_TYPE": os_type,
571
    "INSTANCE_STATUS": str_status,
572
    "INSTANCE_MEMORY": memory,
573
    "INSTANCE_VCPUS": vcpus,
574
    "INSTANCE_DISK_TEMPLATE": disk_template,
575
    "INSTANCE_HYPERVISOR": hypervisor_name,
576
  }
577

    
578
  if nics:
579
    nic_count = len(nics)
580
    for idx, (ip, mac, mode, link) in enumerate(nics):
581
      if ip is None:
582
        ip = ""
583
      env["INSTANCE_NIC%d_IP" % idx] = ip
584
      env["INSTANCE_NIC%d_MAC" % idx] = mac
585
      env["INSTANCE_NIC%d_MODE" % idx] = mode
586
      env["INSTANCE_NIC%d_LINK" % idx] = link
587
      if mode == constants.NIC_MODE_BRIDGED:
588
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
589
  else:
590
    nic_count = 0
591

    
592
  env["INSTANCE_NIC_COUNT"] = nic_count
593

    
594
  if disks:
595
    disk_count = len(disks)
596
    for idx, (size, mode) in enumerate(disks):
597
      env["INSTANCE_DISK%d_SIZE" % idx] = size
598
      env["INSTANCE_DISK%d_MODE" % idx] = mode
599
  else:
600
    disk_count = 0
601

    
602
  env["INSTANCE_DISK_COUNT"] = disk_count
603

    
604
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
605
    for key, value in source.items():
606
      env["INSTANCE_%s_%s" % (kind, key)] = value
607

    
608
  return env
609

    
610

    
611
def _NICListToTuple(lu, nics):
612
  """Build a list of nic information tuples.
613

614
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
615
  value in LUQueryInstanceData.
616

617
  @type lu:  L{LogicalUnit}
618
  @param lu: the logical unit on whose behalf we execute
619
  @type nics: list of L{objects.NIC}
620
  @param nics: list of nics to convert to hooks tuples
621

622
  """
623
  hooks_nics = []
624
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
625
  for nic in nics:
626
    ip = nic.ip
627
    mac = nic.mac
628
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
629
    mode = filled_params[constants.NIC_MODE]
630
    link = filled_params[constants.NIC_LINK]
631
    hooks_nics.append((ip, mac, mode, link))
632
  return hooks_nics
633

    
634

    
635
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
636
  """Builds instance related env variables for hooks from an object.
637

638
  @type lu: L{LogicalUnit}
639
  @param lu: the logical unit on whose behalf we execute
640
  @type instance: L{objects.Instance}
641
  @param instance: the instance for which we should build the
642
      environment
643
  @type override: dict
644
  @param override: dictionary with key/values that will override
645
      our values
646
  @rtype: dict
647
  @return: the hook environment dictionary
648

649
  """
650
  cluster = lu.cfg.GetClusterInfo()
651
  bep = cluster.FillBE(instance)
652
  hvp = cluster.FillHV(instance)
653
  args = {
654
    'name': instance.name,
655
    'primary_node': instance.primary_node,
656
    'secondary_nodes': instance.secondary_nodes,
657
    'os_type': instance.os,
658
    'status': instance.admin_up,
659
    'memory': bep[constants.BE_MEMORY],
660
    'vcpus': bep[constants.BE_VCPUS],
661
    'nics': _NICListToTuple(lu, instance.nics),
662
    'disk_template': instance.disk_template,
663
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
664
    'bep': bep,
665
    'hvp': hvp,
666
    'hypervisor_name': instance.hypervisor,
667
  }
668
  if override:
669
    args.update(override)
670
  return _BuildInstanceHookEnv(**args)
671

    
672

    
673
def _AdjustCandidatePool(lu):
674
  """Adjust the candidate pool after node operations.
675

676
  """
677
  mod_list = lu.cfg.MaintainCandidatePool()
678
  if mod_list:
679
    lu.LogInfo("Promoted nodes to master candidate role: %s",
680
               ", ".join(node.name for node in mod_list))
681
    for name in mod_list:
682
      lu.context.ReaddNode(name)
683
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
684
  if mc_now > mc_max:
685
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
686
               (mc_now, mc_max))
687

    
688

    
689
def _CheckNicsBridgesExist(lu, target_nics, target_node,
690
                               profile=constants.PP_DEFAULT):
691
  """Check that the brigdes needed by a list of nics exist.
692

693
  """
694
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
695
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
696
                for nic in target_nics]
697
  brlist = [params[constants.NIC_LINK] for params in paramslist
698
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
699
  if brlist:
700
    result = lu.rpc.call_bridges_exist(target_node, brlist)
701
    result.Raise("Error checking bridges on destination node '%s'" %
702
                 target_node, prereq=True)
703

    
704

    
705
def _CheckInstanceBridgesExist(lu, instance, node=None):
706
  """Check that the brigdes needed by an instance exist.
707

708
  """
709
  if node is None:
710
    node = instance.primary_node
711
  _CheckNicsBridgesExist(lu, instance.nics, node)
712

    
713

    
714
def _GetNodeInstancesInner(cfg, fn):
715
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
716

    
717

    
718
def _GetNodeInstances(cfg, node_name):
719
  """Returns a list of all primary and secondary instances on a node.
720

721
  """
722

    
723
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
724

    
725

    
726
def _GetNodePrimaryInstances(cfg, node_name):
727
  """Returns primary instances on a node.
728

729
  """
730
  return _GetNodeInstancesInner(cfg,
731
                                lambda inst: node_name == inst.primary_node)
732

    
733

    
734
def _GetNodeSecondaryInstances(cfg, node_name):
735
  """Returns secondary instances on a node.
736

737
  """
738
  return _GetNodeInstancesInner(cfg,
739
                                lambda inst: node_name in inst.secondary_nodes)
740

    
741

    
742
def _GetStorageTypeArgs(cfg, storage_type):
743
  """Returns the arguments for a storage type.
744

745
  """
746
  # Special case for file storage
747
  if storage_type == constants.ST_FILE:
748
    # storage.FileStorage wants a list of storage directories
749
    return [[cfg.GetFileStorageDir()]]
750

    
751
  return []
752

    
753

    
754
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
755
  faulty = []
756

    
757
  for dev in instance.disks:
758
    cfg.SetDiskID(dev, node_name)
759

    
760
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
761
  result.Raise("Failed to get disk status from node %s" % node_name,
762
               prereq=prereq)
763

    
764
  for idx, bdev_status in enumerate(result.payload):
765
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
766
      faulty.append(idx)
767

    
768
  return faulty
769

    
770

    
771
class LUPostInitCluster(LogicalUnit):
772
  """Logical unit for running hooks after cluster initialization.
773

774
  """
775
  HPATH = "cluster-init"
776
  HTYPE = constants.HTYPE_CLUSTER
777
  _OP_REQP = []
778

    
779
  def BuildHooksEnv(self):
780
    """Build hooks env.
781

782
    """
783
    env = {"OP_TARGET": self.cfg.GetClusterName()}
784
    mn = self.cfg.GetMasterNode()
785
    return env, [], [mn]
786

    
787
  def CheckPrereq(self):
788
    """No prerequisites to check.
789

790
    """
791
    return True
792

    
793
  def Exec(self, feedback_fn):
794
    """Nothing to do.
795

796
    """
797
    return True
798

    
799

    
800
class LUDestroyCluster(LogicalUnit):
801
  """Logical unit for destroying the cluster.
802

803
  """
804
  HPATH = "cluster-destroy"
805
  HTYPE = constants.HTYPE_CLUSTER
806
  _OP_REQP = []
807

    
808
  def BuildHooksEnv(self):
809
    """Build hooks env.
810

811
    """
812
    env = {"OP_TARGET": self.cfg.GetClusterName()}
813
    return env, [], []
814

    
815
  def CheckPrereq(self):
816
    """Check prerequisites.
817

818
    This checks whether the cluster is empty.
819

820
    Any errors are signaled by raising errors.OpPrereqError.
821

822
    """
823
    master = self.cfg.GetMasterNode()
824

    
825
    nodelist = self.cfg.GetNodeList()
826
    if len(nodelist) != 1 or nodelist[0] != master:
827
      raise errors.OpPrereqError("There are still %d node(s) in"
828
                                 " this cluster." % (len(nodelist) - 1))
829
    instancelist = self.cfg.GetInstanceList()
830
    if instancelist:
831
      raise errors.OpPrereqError("There are still %d instance(s) in"
832
                                 " this cluster." % len(instancelist))
833

    
834
  def Exec(self, feedback_fn):
835
    """Destroys the cluster.
836

837
    """
838
    master = self.cfg.GetMasterNode()
839

    
840
    # Run post hooks on master node before it's removed
841
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
842
    try:
843
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
844
    except:
845
      self.LogWarning("Errors occurred running hooks on %s" % master)
846

    
847
    result = self.rpc.call_node_stop_master(master, False)
848
    result.Raise("Could not disable the master role")
849
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
850
    utils.CreateBackup(priv_key)
851
    utils.CreateBackup(pub_key)
852
    return master
853

    
854

    
855
class LUVerifyCluster(LogicalUnit):
856
  """Verifies the cluster status.
857

858
  """
859
  HPATH = "cluster-verify"
860
  HTYPE = constants.HTYPE_CLUSTER
861
  _OP_REQP = ["skip_checks"]
862
  REQ_BGL = False
863

    
864
  def ExpandNames(self):
865
    self.needed_locks = {
866
      locking.LEVEL_NODE: locking.ALL_SET,
867
      locking.LEVEL_INSTANCE: locking.ALL_SET,
868
    }
869
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
870

    
871
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
872
                  node_result, feedback_fn, master_files,
873
                  drbd_map, vg_name):
874
    """Run multiple tests against a node.
875

876
    Test list:
877

878
      - compares ganeti version
879
      - checks vg existence and size > 20G
880
      - checks config file checksum
881
      - checks ssh to other nodes
882

883
    @type nodeinfo: L{objects.Node}
884
    @param nodeinfo: the node to check
885
    @param file_list: required list of files
886
    @param local_cksum: dictionary of local files and their checksums
887
    @param node_result: the results from the node
888
    @param feedback_fn: function used to accumulate results
889
    @param master_files: list of files that only masters should have
890
    @param drbd_map: the useddrbd minors for this node, in
891
        form of minor: (instance, must_exist) which correspond to instances
892
        and their running status
893
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
894

895
    """
896
    node = nodeinfo.name
897

    
898
    # main result, node_result should be a non-empty dict
899
    if not node_result or not isinstance(node_result, dict):
900
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
901
      return True
902

    
903
    # compares ganeti version
904
    local_version = constants.PROTOCOL_VERSION
905
    remote_version = node_result.get('version', None)
906
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
907
            len(remote_version) == 2):
908
      feedback_fn("  - ERROR: connection to %s failed" % (node))
909
      return True
910

    
911
    if local_version != remote_version[0]:
912
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
913
                  " node %s %s" % (local_version, node, remote_version[0]))
914
      return True
915

    
916
    # node seems compatible, we can actually try to look into its results
917

    
918
    bad = False
919

    
920
    # full package version
921
    if constants.RELEASE_VERSION != remote_version[1]:
922
      feedback_fn("  - WARNING: software version mismatch: master %s,"
923
                  " node %s %s" %
924
                  (constants.RELEASE_VERSION, node, remote_version[1]))
925

    
926
    # checks vg existence and size > 20G
927
    if vg_name is not None:
928
      vglist = node_result.get(constants.NV_VGLIST, None)
929
      if not vglist:
930
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
931
                        (node,))
932
        bad = True
933
      else:
934
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
935
                                              constants.MIN_VG_SIZE)
936
        if vgstatus:
937
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
938
          bad = True
939

    
940
    # checks config file checksum
941

    
942
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
943
    if not isinstance(remote_cksum, dict):
944
      bad = True
945
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
946
    else:
947
      for file_name in file_list:
948
        node_is_mc = nodeinfo.master_candidate
949
        must_have_file = file_name not in master_files
950
        if file_name not in remote_cksum:
951
          if node_is_mc or must_have_file:
952
            bad = True
953
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
954
        elif remote_cksum[file_name] != local_cksum[file_name]:
955
          if node_is_mc or must_have_file:
956
            bad = True
957
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
958
          else:
959
            # not candidate and this is not a must-have file
960
            bad = True
961
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
962
                        " candidates (and the file is outdated)" % file_name)
963
        else:
964
          # all good, except non-master/non-must have combination
965
          if not node_is_mc and not must_have_file:
966
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
967
                        " candidates" % file_name)
968

    
969
    # checks ssh to any
970

    
971
    if constants.NV_NODELIST not in node_result:
972
      bad = True
973
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
974
    else:
975
      if node_result[constants.NV_NODELIST]:
976
        bad = True
977
        for node in node_result[constants.NV_NODELIST]:
978
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
979
                          (node, node_result[constants.NV_NODELIST][node]))
980

    
981
    if constants.NV_NODENETTEST not in node_result:
982
      bad = True
983
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
984
    else:
985
      if node_result[constants.NV_NODENETTEST]:
986
        bad = True
987
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
988
        for node in nlist:
989
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
990
                          (node, node_result[constants.NV_NODENETTEST][node]))
991

    
992
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
993
    if isinstance(hyp_result, dict):
994
      for hv_name, hv_result in hyp_result.iteritems():
995
        if hv_result is not None:
996
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
997
                      (hv_name, hv_result))
998

    
999
    # check used drbd list
1000
    if vg_name is not None:
1001
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1002
      if not isinstance(used_minors, (tuple, list)):
1003
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
1004
                    str(used_minors))
1005
      else:
1006
        for minor, (iname, must_exist) in drbd_map.items():
1007
          if minor not in used_minors and must_exist:
1008
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
1009
                        " not active" % (minor, iname))
1010
            bad = True
1011
        for minor in used_minors:
1012
          if minor not in drbd_map:
1013
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
1014
                        minor)
1015
            bad = True
1016

    
1017
    return bad
1018

    
1019
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1020
                      node_instance, feedback_fn, n_offline):
1021
    """Verify an instance.
1022

1023
    This function checks to see if the required block devices are
1024
    available on the instance's node.
1025

1026
    """
1027
    bad = False
1028

    
1029
    node_current = instanceconfig.primary_node
1030

    
1031
    node_vol_should = {}
1032
    instanceconfig.MapLVsByNode(node_vol_should)
1033

    
1034
    for node in node_vol_should:
1035
      if node in n_offline:
1036
        # ignore missing volumes on offline nodes
1037
        continue
1038
      for volume in node_vol_should[node]:
1039
        if node not in node_vol_is or volume not in node_vol_is[node]:
1040
          feedback_fn("  - ERROR: volume %s missing on node %s" %
1041
                          (volume, node))
1042
          bad = True
1043

    
1044
    if instanceconfig.admin_up:
1045
      if ((node_current not in node_instance or
1046
          not instance in node_instance[node_current]) and
1047
          node_current not in n_offline):
1048
        feedback_fn("  - ERROR: instance %s not running on node %s" %
1049
                        (instance, node_current))
1050
        bad = True
1051

    
1052
    for node in node_instance:
1053
      if (not node == node_current):
1054
        if instance in node_instance[node]:
1055
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
1056
                          (instance, node))
1057
          bad = True
1058

    
1059
    return bad
1060

    
1061
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
1062
    """Verify if there are any unknown volumes in the cluster.
1063

1064
    The .os, .swap and backup volumes are ignored. All other volumes are
1065
    reported as unknown.
1066

1067
    """
1068
    bad = False
1069

    
1070
    for node in node_vol_is:
1071
      for volume in node_vol_is[node]:
1072
        if node not in node_vol_should or volume not in node_vol_should[node]:
1073
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
1074
                      (volume, node))
1075
          bad = True
1076
    return bad
1077

    
1078
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
1079
    """Verify the list of running instances.
1080

1081
    This checks what instances are running but unknown to the cluster.
1082

1083
    """
1084
    bad = False
1085
    for node in node_instance:
1086
      for runninginstance in node_instance[node]:
1087
        if runninginstance not in instancelist:
1088
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
1089
                          (runninginstance, node))
1090
          bad = True
1091
    return bad
1092

    
1093
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
1094
    """Verify N+1 Memory Resilience.
1095

1096
    Check that if one single node dies we can still start all the instances it
1097
    was primary for.
1098

1099
    """
1100
    bad = False
1101

    
1102
    for node, nodeinfo in node_info.iteritems():
1103
      # This code checks that every node which is now listed as secondary has
1104
      # enough memory to host all instances it is supposed to should a single
1105
      # other node in the cluster fail.
1106
      # FIXME: not ready for failover to an arbitrary node
1107
      # FIXME: does not support file-backed instances
1108
      # WARNING: we currently take into account down instances as well as up
1109
      # ones, considering that even if they're down someone might want to start
1110
      # them even in the event of a node failure.
1111
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1112
        needed_mem = 0
1113
        for instance in instances:
1114
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1115
          if bep[constants.BE_AUTO_BALANCE]:
1116
            needed_mem += bep[constants.BE_MEMORY]
1117
        if nodeinfo['mfree'] < needed_mem:
1118
          feedback_fn("  - ERROR: not enough memory on node %s to accommodate"
1119
                      " failovers should node %s fail" % (node, prinode))
1120
          bad = True
1121
    return bad
1122

    
1123
  def CheckPrereq(self):
1124
    """Check prerequisites.
1125

1126
    Transform the list of checks we're going to skip into a set and check that
1127
    all its members are valid.
1128

1129
    """
1130
    self.skip_set = frozenset(self.op.skip_checks)
1131
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1132
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1133

    
1134
  def BuildHooksEnv(self):
1135
    """Build hooks env.
1136

1137
    Cluster-Verify hooks just ran in the post phase and their failure makes
1138
    the output be logged in the verify output and the verification to fail.
1139

1140
    """
1141
    all_nodes = self.cfg.GetNodeList()
1142
    env = {
1143
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1144
      }
1145
    for node in self.cfg.GetAllNodesInfo().values():
1146
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1147

    
1148
    return env, [], all_nodes
1149

    
1150
  def Exec(self, feedback_fn):
1151
    """Verify integrity of cluster, performing various test on nodes.
1152

1153
    """
1154
    bad = False
1155
    feedback_fn("* Verifying global settings")
1156
    for msg in self.cfg.VerifyConfig():
1157
      feedback_fn("  - ERROR: %s" % msg)
1158

    
1159
    vg_name = self.cfg.GetVGName()
1160
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1161
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1162
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1163
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1164
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1165
                        for iname in instancelist)
1166
    i_non_redundant = [] # Non redundant instances
1167
    i_non_a_balanced = [] # Non auto-balanced instances
1168
    n_offline = [] # List of offline nodes
1169
    n_drained = [] # List of nodes being drained
1170
    node_volume = {}
1171
    node_instance = {}
1172
    node_info = {}
1173
    instance_cfg = {}
1174

    
1175
    # FIXME: verify OS list
1176
    # do local checksums
1177
    master_files = [constants.CLUSTER_CONF_FILE]
1178

    
1179
    file_names = ssconf.SimpleStore().GetFileList()
1180
    file_names.append(constants.SSL_CERT_FILE)
1181
    file_names.append(constants.RAPI_CERT_FILE)
1182
    file_names.extend(master_files)
1183

    
1184
    local_checksums = utils.FingerprintFiles(file_names)
1185

    
1186
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1187
    node_verify_param = {
1188
      constants.NV_FILELIST: file_names,
1189
      constants.NV_NODELIST: [node.name for node in nodeinfo
1190
                              if not node.offline],
1191
      constants.NV_HYPERVISOR: hypervisors,
1192
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1193
                                  node.secondary_ip) for node in nodeinfo
1194
                                 if not node.offline],
1195
      constants.NV_INSTANCELIST: hypervisors,
1196
      constants.NV_VERSION: None,
1197
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1198
      }
1199
    if vg_name is not None:
1200
      node_verify_param[constants.NV_VGLIST] = None
1201
      node_verify_param[constants.NV_LVLIST] = vg_name
1202
      node_verify_param[constants.NV_DRBDLIST] = None
1203
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1204
                                           self.cfg.GetClusterName())
1205

    
1206
    cluster = self.cfg.GetClusterInfo()
1207
    master_node = self.cfg.GetMasterNode()
1208
    all_drbd_map = self.cfg.ComputeDRBDMap()
1209

    
1210
    for node_i in nodeinfo:
1211
      node = node_i.name
1212

    
1213
      if node_i.offline:
1214
        feedback_fn("* Skipping offline node %s" % (node,))
1215
        n_offline.append(node)
1216
        continue
1217

    
1218
      if node == master_node:
1219
        ntype = "master"
1220
      elif node_i.master_candidate:
1221
        ntype = "master candidate"
1222
      elif node_i.drained:
1223
        ntype = "drained"
1224
        n_drained.append(node)
1225
      else:
1226
        ntype = "regular"
1227
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1228

    
1229
      msg = all_nvinfo[node].fail_msg
1230
      if msg:
1231
        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
1232
        bad = True
1233
        continue
1234

    
1235
      nresult = all_nvinfo[node].payload
1236
      node_drbd = {}
1237
      for minor, instance in all_drbd_map[node].items():
1238
        if instance not in instanceinfo:
1239
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1240
                      instance)
1241
          # ghost instance should not be running, but otherwise we
1242
          # don't give double warnings (both ghost instance and
1243
          # unallocated minor in use)
1244
          node_drbd[minor] = (instance, False)
1245
        else:
1246
          instance = instanceinfo[instance]
1247
          node_drbd[minor] = (instance.name, instance.admin_up)
1248
      result = self._VerifyNode(node_i, file_names, local_checksums,
1249
                                nresult, feedback_fn, master_files,
1250
                                node_drbd, vg_name)
1251
      bad = bad or result
1252

    
1253
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1254
      if vg_name is None:
1255
        node_volume[node] = {}
1256
      elif isinstance(lvdata, basestring):
1257
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1258
                    (node, utils.SafeEncode(lvdata)))
1259
        bad = True
1260
        node_volume[node] = {}
1261
      elif not isinstance(lvdata, dict):
1262
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1263
        bad = True
1264
        continue
1265
      else:
1266
        node_volume[node] = lvdata
1267

    
1268
      # node_instance
1269
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1270
      if not isinstance(idata, list):
1271
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1272
                    (node,))
1273
        bad = True
1274
        continue
1275

    
1276
      node_instance[node] = idata
1277

    
1278
      # node_info
1279
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1280
      if not isinstance(nodeinfo, dict):
1281
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1282
        bad = True
1283
        continue
1284

    
1285
      try:
1286
        node_info[node] = {
1287
          "mfree": int(nodeinfo['memory_free']),
1288
          "pinst": [],
1289
          "sinst": [],
1290
          # dictionary holding all instances this node is secondary for,
1291
          # grouped by their primary node. Each key is a cluster node, and each
1292
          # value is a list of instances which have the key as primary and the
1293
          # current node as secondary.  this is handy to calculate N+1 memory
1294
          # availability if you can only failover from a primary to its
1295
          # secondary.
1296
          "sinst-by-pnode": {},
1297
        }
1298
        # FIXME: devise a free space model for file based instances as well
1299
        if vg_name is not None:
1300
          if (constants.NV_VGLIST not in nresult or
1301
              vg_name not in nresult[constants.NV_VGLIST]):
1302
            feedback_fn("  - ERROR: node %s didn't return data for the"
1303
                        " volume group '%s' - it is either missing or broken" %
1304
                        (node, vg_name))
1305
            bad = True
1306
            continue
1307
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1308
      except (ValueError, KeyError):
1309
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1310
                    " from node %s" % (node,))
1311
        bad = True
1312
        continue
1313

    
1314
    node_vol_should = {}
1315

    
1316
    for instance in instancelist:
1317
      feedback_fn("* Verifying instance %s" % instance)
1318
      inst_config = instanceinfo[instance]
1319
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1320
                                     node_instance, feedback_fn, n_offline)
1321
      bad = bad or result
1322
      inst_nodes_offline = []
1323

    
1324
      inst_config.MapLVsByNode(node_vol_should)
1325

    
1326
      instance_cfg[instance] = inst_config
1327

    
1328
      pnode = inst_config.primary_node
1329
      if pnode in node_info:
1330
        node_info[pnode]['pinst'].append(instance)
1331
      elif pnode not in n_offline:
1332
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1333
                    " %s failed" % (instance, pnode))
1334
        bad = True
1335

    
1336
      if pnode in n_offline:
1337
        inst_nodes_offline.append(pnode)
1338

    
1339
      # If the instance is non-redundant we cannot survive losing its primary
1340
      # node, so we are not N+1 compliant. On the other hand we have no disk
1341
      # templates with more than one secondary so that situation is not well
1342
      # supported either.
1343
      # FIXME: does not support file-backed instances
1344
      if len(inst_config.secondary_nodes) == 0:
1345
        i_non_redundant.append(instance)
1346
      elif len(inst_config.secondary_nodes) > 1:
1347
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1348
                    % instance)
1349

    
1350
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1351
        i_non_a_balanced.append(instance)
1352

    
1353
      for snode in inst_config.secondary_nodes:
1354
        if snode in node_info:
1355
          node_info[snode]['sinst'].append(instance)
1356
          if pnode not in node_info[snode]['sinst-by-pnode']:
1357
            node_info[snode]['sinst-by-pnode'][pnode] = []
1358
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1359
        elif snode not in n_offline:
1360
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1361
                      " %s failed" % (instance, snode))
1362
          bad = True
1363
        if snode in n_offline:
1364
          inst_nodes_offline.append(snode)
1365

    
1366
      if inst_nodes_offline:
1367
        # warn that the instance lives on offline nodes, and set bad=True
1368
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1369
                    ", ".join(inst_nodes_offline))
1370
        bad = True
1371

    
1372
    feedback_fn("* Verifying orphan volumes")
1373
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1374
                                       feedback_fn)
1375
    bad = bad or result
1376

    
1377
    feedback_fn("* Verifying remaining instances")
1378
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1379
                                         feedback_fn)
1380
    bad = bad or result
1381

    
1382
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1383
      feedback_fn("* Verifying N+1 Memory redundancy")
1384
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1385
      bad = bad or result
1386

    
1387
    feedback_fn("* Other Notes")
1388
    if i_non_redundant:
1389
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1390
                  % len(i_non_redundant))
1391

    
1392
    if i_non_a_balanced:
1393
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1394
                  % len(i_non_a_balanced))
1395

    
1396
    if n_offline:
1397
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1398

    
1399
    if n_drained:
1400
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1401

    
1402
    return not bad
1403

    
1404
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1405
    """Analyze the post-hooks' result
1406

1407
    This method analyses the hook result, handles it, and sends some
1408
    nicely-formatted feedback back to the user.
1409

1410
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1411
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1412
    @param hooks_results: the results of the multi-node hooks rpc call
1413
    @param feedback_fn: function used send feedback back to the caller
1414
    @param lu_result: previous Exec result
1415
    @return: the new Exec result, based on the previous result
1416
        and hook results
1417

1418
    """
1419
    # We only really run POST phase hooks, and are only interested in
1420
    # their results
1421
    if phase == constants.HOOKS_PHASE_POST:
1422
      # Used to change hooks' output to proper indentation
1423
      indent_re = re.compile('^', re.M)
1424
      feedback_fn("* Hooks Results")
1425
      if not hooks_results:
1426
        feedback_fn("  - ERROR: general communication failure")
1427
        lu_result = 1
1428
      else:
1429
        for node_name in hooks_results:
1430
          show_node_header = True
1431
          res = hooks_results[node_name]
1432
          msg = res.fail_msg
1433
          if msg:
1434
            if res.offline:
1435
              # no need to warn or set fail return value
1436
              continue
1437
            feedback_fn("    Communication failure in hooks execution: %s" %
1438
                        msg)
1439
            lu_result = 1
1440
            continue
1441
          for script, hkr, output in res.payload:
1442
            if hkr == constants.HKR_FAIL:
1443
              # The node header is only shown once, if there are
1444
              # failing hooks on that node
1445
              if show_node_header:
1446
                feedback_fn("  Node %s:" % node_name)
1447
                show_node_header = False
1448
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1449
              output = indent_re.sub('      ', output)
1450
              feedback_fn("%s" % output)
1451
              lu_result = 1
1452

    
1453
      return lu_result
1454

    
1455

    
1456
class LUVerifyDisks(NoHooksLU):
1457
  """Verifies the cluster disks status.
1458

1459
  """
1460
  _OP_REQP = []
1461
  REQ_BGL = False
1462

    
1463
  def ExpandNames(self):
1464
    self.needed_locks = {
1465
      locking.LEVEL_NODE: locking.ALL_SET,
1466
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1467
    }
1468
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1469

    
1470
  def CheckPrereq(self):
1471
    """Check prerequisites.
1472

1473
    This has no prerequisites.
1474

1475
    """
1476
    pass
1477

    
1478
  def Exec(self, feedback_fn):
1479
    """Verify integrity of cluster disks.
1480

1481
    @rtype: tuple of three items
1482
    @return: a tuple of (dict of node-to-node_error, list of instances
1483
        which need activate-disks, dict of instance: (node, volume) for
1484
        missing volumes
1485

1486
    """
1487
    result = res_nodes, res_instances, res_missing = {}, [], {}
1488

    
1489
    vg_name = self.cfg.GetVGName()
1490
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1491
    instances = [self.cfg.GetInstanceInfo(name)
1492
                 for name in self.cfg.GetInstanceList()]
1493

    
1494
    nv_dict = {}
1495
    for inst in instances:
1496
      inst_lvs = {}
1497
      if (not inst.admin_up or
1498
          inst.disk_template not in constants.DTS_NET_MIRROR):
1499
        continue
1500
      inst.MapLVsByNode(inst_lvs)
1501
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1502
      for node, vol_list in inst_lvs.iteritems():
1503
        for vol in vol_list:
1504
          nv_dict[(node, vol)] = inst
1505

    
1506
    if not nv_dict:
1507
      return result
1508

    
1509
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1510

    
1511
    for node in nodes:
1512
      # node_volume
1513
      node_res = node_lvs[node]
1514
      if node_res.offline:
1515
        continue
1516
      msg = node_res.fail_msg
1517
      if msg:
1518
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1519
        res_nodes[node] = msg
1520
        continue
1521

    
1522
      lvs = node_res.payload
1523
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1524
        inst = nv_dict.pop((node, lv_name), None)
1525
        if (not lv_online and inst is not None
1526
            and inst.name not in res_instances):
1527
          res_instances.append(inst.name)
1528

    
1529
    # any leftover items in nv_dict are missing LVs, let's arrange the
1530
    # data better
1531
    for key, inst in nv_dict.iteritems():
1532
      if inst.name not in res_missing:
1533
        res_missing[inst.name] = []
1534
      res_missing[inst.name].append(key)
1535

    
1536
    return result
1537

    
1538

    
1539
class LURepairDiskSizes(NoHooksLU):
1540
  """Verifies the cluster disks sizes.
1541

1542
  """
1543
  _OP_REQP = ["instances"]
1544
  REQ_BGL = False
1545

    
1546
  def ExpandNames(self):
1547

    
1548
    if not isinstance(self.op.instances, list):
1549
      raise errors.OpPrereqError("Invalid argument type 'instances'")
1550

    
1551
    if self.op.instances:
1552
      self.wanted_names = []
1553
      for name in self.op.instances:
1554
        full_name = self.cfg.ExpandInstanceName(name)
1555
        if full_name is None:
1556
          raise errors.OpPrereqError("Instance '%s' not known" % name)
1557
        self.wanted_names.append(full_name)
1558
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
1559
      self.needed_locks = {
1560
        locking.LEVEL_NODE: [],
1561
        locking.LEVEL_INSTANCE: self.wanted_names,
1562
        }
1563
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1564
    else:
1565
      self.wanted_names = None
1566
      self.needed_locks = {
1567
        locking.LEVEL_NODE: locking.ALL_SET,
1568
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1569
        }
1570
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1571

    
1572
  def DeclareLocks(self, level):
1573
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1574
      self._LockInstancesNodes(primary_only=True)
1575

    
1576
  def CheckPrereq(self):
1577
    """Check prerequisites.
1578

1579
    This only checks the optional instance list against the existing names.
1580

1581
    """
1582
    if self.wanted_names is None:
1583
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1584

    
1585
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1586
                             in self.wanted_names]
1587

    
1588
  def Exec(self, feedback_fn):
1589
    """Verify the size of cluster disks.
1590

1591
    """
1592
    # TODO: check child disks too
1593
    # TODO: check differences in size between primary/secondary nodes
1594
    per_node_disks = {}
1595
    for instance in self.wanted_instances:
1596
      pnode = instance.primary_node
1597
      if pnode not in per_node_disks:
1598
        per_node_disks[pnode] = []
1599
      for idx, disk in enumerate(instance.disks):
1600
        per_node_disks[pnode].append((instance, idx, disk))
1601

    
1602
    changed = []
1603
    for node, dskl in per_node_disks.items():
1604
      result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
1605
      if result.failed:
1606
        self.LogWarning("Failure in blockdev_getsizes call to node"
1607
                        " %s, ignoring", node)
1608
        continue
1609
      if len(result.data) != len(dskl):
1610
        self.LogWarning("Invalid result from node %s, ignoring node results",
1611
                        node)
1612
        continue
1613
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1614
        if size is None:
1615
          self.LogWarning("Disk %d of instance %s did not return size"
1616
                          " information, ignoring", idx, instance.name)
1617
          continue
1618
        if not isinstance(size, (int, long)):
1619
          self.LogWarning("Disk %d of instance %s did not return valid"
1620
                          " size information, ignoring", idx, instance.name)
1621
          continue
1622
        size = size >> 20
1623
        if size != disk.size:
1624
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1625
                       " correcting: recorded %d, actual %d", idx,
1626
                       instance.name, disk.size, size)
1627
          disk.size = size
1628
          self.cfg.Update(instance)
1629
          changed.append((instance.name, idx, size))
1630
    return changed
1631

    
1632

    
1633
class LURenameCluster(LogicalUnit):
1634
  """Rename the cluster.
1635

1636
  """
1637
  HPATH = "cluster-rename"
1638
  HTYPE = constants.HTYPE_CLUSTER
1639
  _OP_REQP = ["name"]
1640

    
1641
  def BuildHooksEnv(self):
1642
    """Build hooks env.
1643

1644
    """
1645
    env = {
1646
      "OP_TARGET": self.cfg.GetClusterName(),
1647
      "NEW_NAME": self.op.name,
1648
      }
1649
    mn = self.cfg.GetMasterNode()
1650
    return env, [mn], [mn]
1651

    
1652
  def CheckPrereq(self):
1653
    """Verify that the passed name is a valid one.
1654

1655
    """
1656
    hostname = utils.HostInfo(self.op.name)
1657

    
1658
    new_name = hostname.name
1659
    self.ip = new_ip = hostname.ip
1660
    old_name = self.cfg.GetClusterName()
1661
    old_ip = self.cfg.GetMasterIP()
1662
    if new_name == old_name and new_ip == old_ip:
1663
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1664
                                 " cluster has changed")
1665
    if new_ip != old_ip:
1666
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1667
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1668
                                   " reachable on the network. Aborting." %
1669
                                   new_ip)
1670

    
1671
    self.op.name = new_name
1672

    
1673
  def Exec(self, feedback_fn):
1674
    """Rename the cluster.
1675

1676
    """
1677
    clustername = self.op.name
1678
    ip = self.ip
1679

    
1680
    # shutdown the master IP
1681
    master = self.cfg.GetMasterNode()
1682
    result = self.rpc.call_node_stop_master(master, False)
1683
    result.Raise("Could not disable the master role")
1684

    
1685
    try:
1686
      cluster = self.cfg.GetClusterInfo()
1687
      cluster.cluster_name = clustername
1688
      cluster.master_ip = ip
1689
      self.cfg.Update(cluster)
1690

    
1691
      # update the known hosts file
1692
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1693
      node_list = self.cfg.GetNodeList()
1694
      try:
1695
        node_list.remove(master)
1696
      except ValueError:
1697
        pass
1698
      result = self.rpc.call_upload_file(node_list,
1699
                                         constants.SSH_KNOWN_HOSTS_FILE)
1700
      for to_node, to_result in result.iteritems():
1701
        msg = to_result.fail_msg
1702
        if msg:
1703
          msg = ("Copy of file %s to node %s failed: %s" %
1704
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1705
          self.proc.LogWarning(msg)
1706

    
1707
    finally:
1708
      result = self.rpc.call_node_start_master(master, False, False)
1709
      msg = result.fail_msg
1710
      if msg:
1711
        self.LogWarning("Could not re-enable the master role on"
1712
                        " the master, please restart manually: %s", msg)
1713

    
1714

    
1715
def _RecursiveCheckIfLVMBased(disk):
1716
  """Check if the given disk or its children are lvm-based.
1717

1718
  @type disk: L{objects.Disk}
1719
  @param disk: the disk to check
1720
  @rtype: boolean
1721
  @return: boolean indicating whether a LD_LV dev_type was found or not
1722

1723
  """
1724
  if disk.children:
1725
    for chdisk in disk.children:
1726
      if _RecursiveCheckIfLVMBased(chdisk):
1727
        return True
1728
  return disk.dev_type == constants.LD_LV
1729

    
1730

    
1731
class LUSetClusterParams(LogicalUnit):
1732
  """Change the parameters of the cluster.
1733

1734
  """
1735
  HPATH = "cluster-modify"
1736
  HTYPE = constants.HTYPE_CLUSTER
1737
  _OP_REQP = []
1738
  REQ_BGL = False
1739

    
1740
  def CheckArguments(self):
1741
    """Check parameters
1742

1743
    """
1744
    if not hasattr(self.op, "candidate_pool_size"):
1745
      self.op.candidate_pool_size = None
1746
    if self.op.candidate_pool_size is not None:
1747
      try:
1748
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1749
      except (ValueError, TypeError), err:
1750
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1751
                                   str(err))
1752
      if self.op.candidate_pool_size < 1:
1753
        raise errors.OpPrereqError("At least one master candidate needed")
1754

    
1755
  def ExpandNames(self):
1756
    # FIXME: in the future maybe other cluster params won't require checking on
1757
    # all nodes to be modified.
1758
    self.needed_locks = {
1759
      locking.LEVEL_NODE: locking.ALL_SET,
1760
    }
1761
    self.share_locks[locking.LEVEL_NODE] = 1
1762

    
1763
  def BuildHooksEnv(self):
1764
    """Build hooks env.
1765

1766
    """
1767
    env = {
1768
      "OP_TARGET": self.cfg.GetClusterName(),
1769
      "NEW_VG_NAME": self.op.vg_name,
1770
      }
1771
    mn = self.cfg.GetMasterNode()
1772
    return env, [mn], [mn]
1773

    
1774
  def CheckPrereq(self):
1775
    """Check prerequisites.
1776

1777
    This checks whether the given params don't conflict and
1778
    if the given volume group is valid.
1779

1780
    """
1781
    if self.op.vg_name is not None and not self.op.vg_name:
1782
      instances = self.cfg.GetAllInstancesInfo().values()
1783
      for inst in instances:
1784
        for disk in inst.disks:
1785
          if _RecursiveCheckIfLVMBased(disk):
1786
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1787
                                       " lvm-based instances exist")
1788

    
1789
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1790

    
1791
    # if vg_name not None, checks given volume group on all nodes
1792
    if self.op.vg_name:
1793
      vglist = self.rpc.call_vg_list(node_list)
1794
      for node in node_list:
1795
        msg = vglist[node].fail_msg
1796
        if msg:
1797
          # ignoring down node
1798
          self.LogWarning("Error while gathering data on node %s"
1799
                          " (ignoring node): %s", node, msg)
1800
          continue
1801
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1802
                                              self.op.vg_name,
1803
                                              constants.MIN_VG_SIZE)
1804
        if vgstatus:
1805
          raise errors.OpPrereqError("Error on node '%s': %s" %
1806
                                     (node, vgstatus))
1807

    
1808
    self.cluster = cluster = self.cfg.GetClusterInfo()
1809
    # validate params changes
1810
    if self.op.beparams:
1811
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1812
      self.new_beparams = objects.FillDict(
1813
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1814

    
1815
    if self.op.nicparams:
1816
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1817
      self.new_nicparams = objects.FillDict(
1818
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1819
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1820

    
1821
    # hypervisor list/parameters
1822
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1823
    if self.op.hvparams:
1824
      if not isinstance(self.op.hvparams, dict):
1825
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1826
      for hv_name, hv_dict in self.op.hvparams.items():
1827
        if hv_name not in self.new_hvparams:
1828
          self.new_hvparams[hv_name] = hv_dict
1829
        else:
1830
          self.new_hvparams[hv_name].update(hv_dict)
1831

    
1832
    if self.op.enabled_hypervisors is not None:
1833
      self.hv_list = self.op.enabled_hypervisors
1834
      if not self.hv_list:
1835
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1836
                                   " least one member")
1837
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1838
      if invalid_hvs:
1839
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1840
                                   " entries: %s" %
1841
                                   utils.CommaJoin(invalid_hvs))
1842
    else:
1843
      self.hv_list = cluster.enabled_hypervisors
1844

    
1845
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1846
      # either the enabled list has changed, or the parameters have, validate
1847
      for hv_name, hv_params in self.new_hvparams.items():
1848
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1849
            (self.op.enabled_hypervisors and
1850
             hv_name in self.op.enabled_hypervisors)):
1851
          # either this is a new hypervisor, or its parameters have changed
1852
          hv_class = hypervisor.GetHypervisor(hv_name)
1853
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1854
          hv_class.CheckParameterSyntax(hv_params)
1855
          _CheckHVParams(self, node_list, hv_name, hv_params)
1856

    
1857
  def Exec(self, feedback_fn):
1858
    """Change the parameters of the cluster.
1859

1860
    """
1861
    if self.op.vg_name is not None:
1862
      new_volume = self.op.vg_name
1863
      if not new_volume:
1864
        new_volume = None
1865
      if new_volume != self.cfg.GetVGName():
1866
        self.cfg.SetVGName(new_volume)
1867
      else:
1868
        feedback_fn("Cluster LVM configuration already in desired"
1869
                    " state, not changing")
1870
    if self.op.hvparams:
1871
      self.cluster.hvparams = self.new_hvparams
1872
    if self.op.enabled_hypervisors is not None:
1873
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1874
    if self.op.beparams:
1875
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1876
    if self.op.nicparams:
1877
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1878

    
1879
    if self.op.candidate_pool_size is not None:
1880
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1881
      # we need to update the pool size here, otherwise the save will fail
1882
      _AdjustCandidatePool(self)
1883

    
1884
    self.cfg.Update(self.cluster)
1885

    
1886

    
1887
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1888
  """Distribute additional files which are part of the cluster configuration.
1889

1890
  ConfigWriter takes care of distributing the config and ssconf files, but
1891
  there are more files which should be distributed to all nodes. This function
1892
  makes sure those are copied.
1893

1894
  @param lu: calling logical unit
1895
  @param additional_nodes: list of nodes not in the config to distribute to
1896

1897
  """
1898
  # 1. Gather target nodes
1899
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1900
  dist_nodes = lu.cfg.GetNodeList()
1901
  if additional_nodes is not None:
1902
    dist_nodes.extend(additional_nodes)
1903
  if myself.name in dist_nodes:
1904
    dist_nodes.remove(myself.name)
1905
  # 2. Gather files to distribute
1906
  dist_files = set([constants.ETC_HOSTS,
1907
                    constants.SSH_KNOWN_HOSTS_FILE,
1908
                    constants.RAPI_CERT_FILE,
1909
                    constants.RAPI_USERS_FILE,
1910
                    constants.HMAC_CLUSTER_KEY,
1911
                   ])
1912

    
1913
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1914
  for hv_name in enabled_hypervisors:
1915
    hv_class = hypervisor.GetHypervisor(hv_name)
1916
    dist_files.update(hv_class.GetAncillaryFiles())
1917

    
1918
  # 3. Perform the files upload
1919
  for fname in dist_files:
1920
    if os.path.exists(fname):
1921
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1922
      for to_node, to_result in result.items():
1923
        msg = to_result.fail_msg
1924
        if msg:
1925
          msg = ("Copy of file %s to node %s failed: %s" %
1926
                 (fname, to_node, msg))
1927
          lu.proc.LogWarning(msg)
1928

    
1929

    
1930
class LURedistributeConfig(NoHooksLU):
1931
  """Force the redistribution of cluster configuration.
1932

1933
  This is a very simple LU.
1934

1935
  """
1936
  _OP_REQP = []
1937
  REQ_BGL = False
1938

    
1939
  def ExpandNames(self):
1940
    self.needed_locks = {
1941
      locking.LEVEL_NODE: locking.ALL_SET,
1942
    }
1943
    self.share_locks[locking.LEVEL_NODE] = 1
1944

    
1945
  def CheckPrereq(self):
1946
    """Check prerequisites.
1947

1948
    """
1949

    
1950
  def Exec(self, feedback_fn):
1951
    """Redistribute the configuration.
1952

1953
    """
1954
    self.cfg.Update(self.cfg.GetClusterInfo())
1955
    _RedistributeAncillaryFiles(self)
1956

    
1957

    
1958
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1959
  """Sleep and poll for an instance's disk to sync.
1960

1961
  """
1962
  if not instance.disks:
1963
    return True
1964

    
1965
  if not oneshot:
1966
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1967

    
1968
  node = instance.primary_node
1969

    
1970
  for dev in instance.disks:
1971
    lu.cfg.SetDiskID(dev, node)
1972

    
1973
  retries = 0
1974
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1975
  while True:
1976
    max_time = 0
1977
    done = True
1978
    cumul_degraded = False
1979
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1980
    msg = rstats.fail_msg
1981
    if msg:
1982
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1983
      retries += 1
1984
      if retries >= 10:
1985
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1986
                                 " aborting." % node)
1987
      time.sleep(6)
1988
      continue
1989
    rstats = rstats.payload
1990
    retries = 0
1991
    for i, mstat in enumerate(rstats):
1992
      if mstat is None:
1993
        lu.LogWarning("Can't compute data for node %s/%s",
1994
                           node, instance.disks[i].iv_name)
1995
        continue
1996

    
1997
      cumul_degraded = (cumul_degraded or
1998
                        (mstat.is_degraded and mstat.sync_percent is None))
1999
      if mstat.sync_percent is not None:
2000
        done = False
2001
        if mstat.estimated_time is not None:
2002
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2003
          max_time = mstat.estimated_time
2004
        else:
2005
          rem_time = "no time estimate"
2006
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2007
                        (instance.disks[i].iv_name, mstat.sync_percent, rem_time))
2008

    
2009
    # if we're done but degraded, let's do a few small retries, to
2010
    # make sure we see a stable and not transient situation; therefore
2011
    # we force restart of the loop
2012
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2013
      logging.info("Degraded disks found, %d retries left", degr_retries)
2014
      degr_retries -= 1
2015
      time.sleep(1)
2016
      continue
2017

    
2018
    if done or oneshot:
2019
      break
2020

    
2021
    time.sleep(min(60, max_time))
2022

    
2023
  if done:
2024
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2025
  return not cumul_degraded
2026

    
2027

    
2028
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2029
  """Check that mirrors are not degraded.
2030

2031
  The ldisk parameter, if True, will change the test from the
2032
  is_degraded attribute (which represents overall non-ok status for
2033
  the device(s)) to the ldisk (representing the local storage status).
2034

2035
  """
2036
  lu.cfg.SetDiskID(dev, node)
2037

    
2038
  result = True
2039

    
2040
  if on_primary or dev.AssembleOnSecondary():
2041
    rstats = lu.rpc.call_blockdev_find(node, dev)
2042
    msg = rstats.fail_msg
2043
    if msg:
2044
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2045
      result = False
2046
    elif not rstats.payload:
2047
      lu.LogWarning("Can't find disk on node %s", node)
2048
      result = False
2049
    else:
2050
      if ldisk:
2051
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2052
      else:
2053
        result = result and not rstats.payload.is_degraded
2054

    
2055
  if dev.children:
2056
    for child in dev.children:
2057
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2058

    
2059
  return result
2060

    
2061

    
2062
class LUDiagnoseOS(NoHooksLU):
2063
  """Logical unit for OS diagnose/query.
2064

2065
  """
2066
  _OP_REQP = ["output_fields", "names"]
2067
  REQ_BGL = False
2068
  _FIELDS_STATIC = utils.FieldSet()
2069
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
2070

    
2071
  def ExpandNames(self):
2072
    if self.op.names:
2073
      raise errors.OpPrereqError("Selective OS query not supported")
2074

    
2075
    _CheckOutputFields(static=self._FIELDS_STATIC,
2076
                       dynamic=self._FIELDS_DYNAMIC,
2077
                       selected=self.op.output_fields)
2078

    
2079
    # Lock all nodes, in shared mode
2080
    # Temporary removal of locks, should be reverted later
2081
    # TODO: reintroduce locks when they are lighter-weight
2082
    self.needed_locks = {}
2083
    #self.share_locks[locking.LEVEL_NODE] = 1
2084
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2085

    
2086
  def CheckPrereq(self):
2087
    """Check prerequisites.
2088

2089
    """
2090

    
2091
  @staticmethod
2092
  def _DiagnoseByOS(node_list, rlist):
2093
    """Remaps a per-node return list into an a per-os per-node dictionary
2094

2095
    @param node_list: a list with the names of all nodes
2096
    @param rlist: a map with node names as keys and OS objects as values
2097

2098
    @rtype: dict
2099
    @return: a dictionary with osnames as keys and as value another map, with
2100
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2101

2102
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2103
                                     (/srv/..., False, "invalid api")],
2104
                           "node2": [(/srv/..., True, "")]}
2105
          }
2106

2107
    """
2108
    all_os = {}
2109
    # we build here the list of nodes that didn't fail the RPC (at RPC
2110
    # level), so that nodes with a non-responding node daemon don't
2111
    # make all OSes invalid
2112
    good_nodes = [node_name for node_name in rlist
2113
                  if not rlist[node_name].fail_msg]
2114
    for node_name, nr in rlist.items():
2115
      if nr.fail_msg or not nr.payload:
2116
        continue
2117
      for name, path, status, diagnose in nr.payload:
2118
        if name not in all_os:
2119
          # build a list of nodes for this os containing empty lists
2120
          # for each node in node_list
2121
          all_os[name] = {}
2122
          for nname in good_nodes:
2123
            all_os[name][nname] = []
2124
        all_os[name][node_name].append((path, status, diagnose))
2125
    return all_os
2126

    
2127
  def Exec(self, feedback_fn):
2128
    """Compute the list of OSes.
2129

2130
    """
2131
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2132
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2133
    pol = self._DiagnoseByOS(valid_nodes, node_data)
2134
    output = []
2135
    for os_name, os_data in pol.items():
2136
      row = []
2137
      for field in self.op.output_fields:
2138
        if field == "name":
2139
          val = os_name
2140
        elif field == "valid":
2141
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
2142
        elif field == "node_status":
2143
          # this is just a copy of the dict
2144
          val = {}
2145
          for node_name, nos_list in os_data.items():
2146
            val[node_name] = nos_list
2147
        else:
2148
          raise errors.ParameterError(field)
2149
        row.append(val)
2150
      output.append(row)
2151

    
2152
    return output
2153

    
2154

    
2155
class LURemoveNode(LogicalUnit):
2156
  """Logical unit for removing a node.
2157

2158
  """
2159
  HPATH = "node-remove"
2160
  HTYPE = constants.HTYPE_NODE
2161
  _OP_REQP = ["node_name"]
2162

    
2163
  def BuildHooksEnv(self):
2164
    """Build hooks env.
2165

2166
    This doesn't run on the target node in the pre phase as a failed
2167
    node would then be impossible to remove.
2168

2169
    """
2170
    env = {
2171
      "OP_TARGET": self.op.node_name,
2172
      "NODE_NAME": self.op.node_name,
2173
      }
2174
    all_nodes = self.cfg.GetNodeList()
2175
    if self.op.node_name in all_nodes:
2176
      all_nodes.remove(self.op.node_name)
2177
    return env, all_nodes, all_nodes
2178

    
2179
  def CheckPrereq(self):
2180
    """Check prerequisites.
2181

2182
    This checks:
2183
     - the node exists in the configuration
2184
     - it does not have primary or secondary instances
2185
     - it's not the master
2186

2187
    Any errors are signaled by raising errors.OpPrereqError.
2188

2189
    """
2190
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2191
    if node is None:
2192
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
2193

    
2194
    instance_list = self.cfg.GetInstanceList()
2195

    
2196
    masternode = self.cfg.GetMasterNode()
2197
    if node.name == masternode:
2198
      raise errors.OpPrereqError("Node is the master node,"
2199
                                 " you need to failover first.")
2200

    
2201
    for instance_name in instance_list:
2202
      instance = self.cfg.GetInstanceInfo(instance_name)
2203
      if node.name in instance.all_nodes:
2204
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2205
                                   " please remove first." % instance_name)
2206
    self.op.node_name = node.name
2207
    self.node = node
2208

    
2209
  def Exec(self, feedback_fn):
2210
    """Removes the node from the cluster.
2211

2212
    """
2213
    node = self.node
2214
    logging.info("Stopping the node daemon and removing configs from node %s",
2215
                 node.name)
2216

    
2217
    self.context.RemoveNode(node.name)
2218

    
2219
    # Run post hooks on the node before it's removed
2220
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2221
    try:
2222
      h_results = hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2223
    except:
2224
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2225

    
2226
    result = self.rpc.call_node_leave_cluster(node.name)
2227
    msg = result.fail_msg
2228
    if msg:
2229
      self.LogWarning("Errors encountered on the remote node while leaving"
2230
                      " the cluster: %s", msg)
2231

    
2232
    # Promote nodes to master candidate as needed
2233
    _AdjustCandidatePool(self)
2234

    
2235

    
2236
class LUQueryNodes(NoHooksLU):
2237
  """Logical unit for querying nodes.
2238

2239
  """
2240
  _OP_REQP = ["output_fields", "names", "use_locking"]
2241
  REQ_BGL = False
2242
  _FIELDS_DYNAMIC = utils.FieldSet(
2243
    "dtotal", "dfree",
2244
    "mtotal", "mnode", "mfree",
2245
    "bootid",
2246
    "ctotal", "cnodes", "csockets",
2247
    )
2248

    
2249
  _FIELDS_STATIC = utils.FieldSet(
2250
    "name", "pinst_cnt", "sinst_cnt",
2251
    "pinst_list", "sinst_list",
2252
    "pip", "sip", "tags",
2253
    "serial_no", "ctime", "mtime",
2254
    "master_candidate",
2255
    "master",
2256
    "offline",
2257
    "drained",
2258
    "role",
2259
    )
2260

    
2261
  def ExpandNames(self):
2262
    _CheckOutputFields(static=self._FIELDS_STATIC,
2263
                       dynamic=self._FIELDS_DYNAMIC,
2264
                       selected=self.op.output_fields)
2265

    
2266
    self.needed_locks = {}
2267
    self.share_locks[locking.LEVEL_NODE] = 1
2268

    
2269
    if self.op.names:
2270
      self.wanted = _GetWantedNodes(self, self.op.names)
2271
    else:
2272
      self.wanted = locking.ALL_SET
2273

    
2274
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2275
    self.do_locking = self.do_node_query and self.op.use_locking
2276
    if self.do_locking:
2277
      # if we don't request only static fields, we need to lock the nodes
2278
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2279

    
2280

    
2281
  def CheckPrereq(self):
2282
    """Check prerequisites.
2283

2284
    """
2285
    # The validation of the node list is done in the _GetWantedNodes,
2286
    # if non empty, and if empty, there's no validation to do
2287
    pass
2288

    
2289
  def Exec(self, feedback_fn):
2290
    """Computes the list of nodes and their attributes.
2291

2292
    """
2293
    all_info = self.cfg.GetAllNodesInfo()
2294
    if self.do_locking:
2295
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2296
    elif self.wanted != locking.ALL_SET:
2297
      nodenames = self.wanted
2298
      missing = set(nodenames).difference(all_info.keys())
2299
      if missing:
2300
        raise errors.OpExecError(
2301
          "Some nodes were removed before retrieving their data: %s" % missing)
2302
    else:
2303
      nodenames = all_info.keys()
2304

    
2305
    nodenames = utils.NiceSort(nodenames)
2306
    nodelist = [all_info[name] for name in nodenames]
2307

    
2308
    # begin data gathering
2309

    
2310
    if self.do_node_query:
2311
      live_data = {}
2312
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2313
                                          self.cfg.GetHypervisorType())
2314
      for name in nodenames:
2315
        nodeinfo = node_data[name]
2316
        if not nodeinfo.fail_msg and nodeinfo.payload:
2317
          nodeinfo = nodeinfo.payload
2318
          fn = utils.TryConvert
2319
          live_data[name] = {
2320
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2321
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2322
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2323
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2324
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2325
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2326
            "bootid": nodeinfo.get('bootid', None),
2327
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2328
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2329
            }
2330
        else:
2331
          live_data[name] = {}
2332
    else:
2333
      live_data = dict.fromkeys(nodenames, {})
2334

    
2335
    node_to_primary = dict([(name, set()) for name in nodenames])
2336
    node_to_secondary = dict([(name, set()) for name in nodenames])
2337

    
2338
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2339
                             "sinst_cnt", "sinst_list"))
2340
    if inst_fields & frozenset(self.op.output_fields):
2341
      instancelist = self.cfg.GetInstanceList()
2342

    
2343
      for instance_name in instancelist:
2344
        inst = self.cfg.GetInstanceInfo(instance_name)
2345
        if inst.primary_node in node_to_primary:
2346
          node_to_primary[inst.primary_node].add(inst.name)
2347
        for secnode in inst.secondary_nodes:
2348
          if secnode in node_to_secondary:
2349
            node_to_secondary[secnode].add(inst.name)
2350

    
2351
    master_node = self.cfg.GetMasterNode()
2352

    
2353
    # end data gathering
2354

    
2355
    output = []
2356
    for node in nodelist:
2357
      node_output = []
2358
      for field in self.op.output_fields:
2359
        if field == "name":
2360
          val = node.name
2361
        elif field == "pinst_list":
2362
          val = list(node_to_primary[node.name])
2363
        elif field == "sinst_list":
2364
          val = list(node_to_secondary[node.name])
2365
        elif field == "pinst_cnt":
2366
          val = len(node_to_primary[node.name])
2367
        elif field == "sinst_cnt":
2368
          val = len(node_to_secondary[node.name])
2369
        elif field == "pip":
2370
          val = node.primary_ip
2371
        elif field == "sip":
2372
          val = node.secondary_ip
2373
        elif field == "tags":
2374
          val = list(node.GetTags())
2375
        elif field == "serial_no":
2376
          val = node.serial_no
2377
        elif field == "ctime":
2378
          val = node.ctime
2379
        elif field == "mtime":
2380
          val = node.mtime
2381
        elif field == "master_candidate":
2382
          val = node.master_candidate
2383
        elif field == "master":
2384
          val = node.name == master_node
2385
        elif field == "offline":
2386
          val = node.offline
2387
        elif field == "drained":
2388
          val = node.drained
2389
        elif self._FIELDS_DYNAMIC.Matches(field):
2390
          val = live_data[node.name].get(field, None)
2391
        elif field == "role":
2392
          if node.name == master_node:
2393
            val = "M"
2394
          elif node.master_candidate:
2395
            val = "C"
2396
          elif node.drained:
2397
            val = "D"
2398
          elif node.offline:
2399
            val = "O"
2400
          else:
2401
            val = "R"
2402
        else:
2403
          raise errors.ParameterError(field)
2404
        node_output.append(val)
2405
      output.append(node_output)
2406

    
2407
    return output
2408

    
2409

    
2410
class LUQueryNodeVolumes(NoHooksLU):
2411
  """Logical unit for getting volumes on node(s).
2412

2413
  """
2414
  _OP_REQP = ["nodes", "output_fields"]
2415
  REQ_BGL = False
2416
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2417
  _FIELDS_STATIC = utils.FieldSet("node")
2418

    
2419
  def ExpandNames(self):
2420
    _CheckOutputFields(static=self._FIELDS_STATIC,
2421
                       dynamic=self._FIELDS_DYNAMIC,
2422
                       selected=self.op.output_fields)
2423

    
2424
    self.needed_locks = {}
2425
    self.share_locks[locking.LEVEL_NODE] = 1
2426
    if not self.op.nodes:
2427
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2428
    else:
2429
      self.needed_locks[locking.LEVEL_NODE] = \
2430
        _GetWantedNodes(self, self.op.nodes)
2431

    
2432
  def CheckPrereq(self):
2433
    """Check prerequisites.
2434

2435
    This checks that the fields required are valid output fields.
2436

2437
    """
2438
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2439

    
2440
  def Exec(self, feedback_fn):
2441
    """Computes the list of nodes and their attributes.
2442

2443
    """
2444
    nodenames = self.nodes
2445
    volumes = self.rpc.call_node_volumes(nodenames)
2446

    
2447
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2448
             in self.cfg.GetInstanceList()]
2449

    
2450
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2451

    
2452
    output = []
2453
    for node in nodenames:
2454
      nresult = volumes[node]
2455
      if nresult.offline:
2456
        continue
2457
      msg = nresult.fail_msg
2458
      if msg:
2459
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2460
        continue
2461

    
2462
      node_vols = nresult.payload[:]
2463
      node_vols.sort(key=lambda vol: vol['dev'])
2464

    
2465
      for vol in node_vols:
2466
        node_output = []
2467
        for field in self.op.output_fields:
2468
          if field == "node":
2469
            val = node
2470
          elif field == "phys":
2471
            val = vol['dev']
2472
          elif field == "vg":
2473
            val = vol['vg']
2474
          elif field == "name":
2475
            val = vol['name']
2476
          elif field == "size":
2477
            val = int(float(vol['size']))
2478
          elif field == "instance":
2479
            for inst in ilist:
2480
              if node not in lv_by_node[inst]:
2481
                continue
2482
              if vol['name'] in lv_by_node[inst][node]:
2483
                val = inst.name
2484
                break
2485
            else:
2486
              val = '-'
2487
          else:
2488
            raise errors.ParameterError(field)
2489
          node_output.append(str(val))
2490

    
2491
        output.append(node_output)
2492

    
2493
    return output
2494

    
2495

    
2496
class LUQueryNodeStorage(NoHooksLU):
2497
  """Logical unit for getting information on storage units on node(s).
2498

2499
  """
2500
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2501
  REQ_BGL = False
2502
  _FIELDS_STATIC = utils.FieldSet("node")
2503

    
2504
  def ExpandNames(self):
2505
    storage_type = self.op.storage_type
2506

    
2507
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2508
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2509

    
2510
    dynamic_fields = constants.VALID_STORAGE_FIELDS[storage_type]
2511

    
2512
    _CheckOutputFields(static=self._FIELDS_STATIC,
2513
                       dynamic=utils.FieldSet(*dynamic_fields),
2514
                       selected=self.op.output_fields)
2515

    
2516
    self.needed_locks = {}
2517
    self.share_locks[locking.LEVEL_NODE] = 1
2518

    
2519
    if self.op.nodes:
2520
      self.needed_locks[locking.LEVEL_NODE] = \
2521
        _GetWantedNodes(self, self.op.nodes)
2522
    else:
2523
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2524

    
2525
  def CheckPrereq(self):
2526
    """Check prerequisites.
2527

2528
    This checks that the fields required are valid output fields.
2529

2530
    """
2531
    self.op.name = getattr(self.op, "name", None)
2532

    
2533
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2534

    
2535
  def Exec(self, feedback_fn):
2536
    """Computes the list of nodes and their attributes.
2537

2538
    """
2539
    # Always get name to sort by
2540
    if constants.SF_NAME in self.op.output_fields:
2541
      fields = self.op.output_fields[:]
2542
    else:
2543
      fields = [constants.SF_NAME] + self.op.output_fields
2544

    
2545
    # Never ask for node as it's only known to the LU
2546
    while "node" in fields:
2547
      fields.remove("node")
2548

    
2549
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2550
    name_idx = field_idx[constants.SF_NAME]
2551

    
2552
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2553
    data = self.rpc.call_storage_list(self.nodes,
2554
                                      self.op.storage_type, st_args,
2555
                                      self.op.name, fields)
2556

    
2557
    result = []
2558

    
2559
    for node in utils.NiceSort(self.nodes):
2560
      nresult = data[node]
2561
      if nresult.offline:
2562
        continue
2563

    
2564
      msg = nresult.fail_msg
2565
      if msg:
2566
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2567
        continue
2568

    
2569
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2570

    
2571
      for name in utils.NiceSort(rows.keys()):
2572
        row = rows[name]
2573

    
2574
        out = []
2575

    
2576
        for field in self.op.output_fields:
2577
          if field == "node":
2578
            val = node
2579
          elif field in field_idx:
2580
            val = row[field_idx[field]]
2581
          else:
2582
            raise errors.ParameterError(field)
2583

    
2584
          out.append(val)
2585

    
2586
        result.append(out)
2587

    
2588
    return result
2589

    
2590

    
2591
class LUModifyNodeStorage(NoHooksLU):
2592
  """Logical unit for modifying a storage volume on a node.
2593

2594
  """
2595
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2596
  REQ_BGL = False
2597

    
2598
  def CheckArguments(self):
2599
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2600
    if node_name is None:
2601
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2602

    
2603
    self.op.node_name = node_name
2604

    
2605
    storage_type = self.op.storage_type
2606
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2607
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2608

    
2609
  def ExpandNames(self):
2610
    self.needed_locks = {
2611
      locking.LEVEL_NODE: self.op.node_name,
2612
      }
2613

    
2614
  def CheckPrereq(self):
2615
    """Check prerequisites.
2616

2617
    """
2618
    storage_type = self.op.storage_type
2619

    
2620
    try:
2621
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2622
    except KeyError:
2623
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2624
                                 " modified" % storage_type)
2625

    
2626
    diff = set(self.op.changes.keys()) - modifiable
2627
    if diff:
2628
      raise errors.OpPrereqError("The following fields can not be modified for"
2629
                                 " storage units of type '%s': %r" %
2630
                                 (storage_type, list(diff)))
2631

    
2632
  def Exec(self, feedback_fn):
2633
    """Computes the list of nodes and their attributes.
2634

2635
    """
2636
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2637
    result = self.rpc.call_storage_modify(self.op.node_name,
2638
                                          self.op.storage_type, st_args,
2639
                                          self.op.name, self.op.changes)
2640
    result.Raise("Failed to modify storage unit '%s' on %s" %
2641
                 (self.op.name, self.op.node_name))
2642

    
2643

    
2644
class LUAddNode(LogicalUnit):
2645
  """Logical unit for adding node to the cluster.
2646

2647
  """
2648
  HPATH = "node-add"
2649
  HTYPE = constants.HTYPE_NODE
2650
  _OP_REQP = ["node_name"]
2651

    
2652
  def BuildHooksEnv(self):
2653
    """Build hooks env.
2654

2655
    This will run on all nodes before, and on all nodes + the new node after.
2656

2657
    """
2658
    env = {
2659
      "OP_TARGET": self.op.node_name,
2660
      "NODE_NAME": self.op.node_name,
2661
      "NODE_PIP": self.op.primary_ip,
2662
      "NODE_SIP": self.op.secondary_ip,
2663
      }
2664
    nodes_0 = self.cfg.GetNodeList()
2665
    nodes_1 = nodes_0 + [self.op.node_name, ]
2666
    return env, nodes_0, nodes_1
2667

    
2668
  def CheckPrereq(self):
2669
    """Check prerequisites.
2670

2671
    This checks:
2672
     - the new node is not already in the config
2673
     - it is resolvable
2674
     - its parameters (single/dual homed) matches the cluster
2675

2676
    Any errors are signaled by raising errors.OpPrereqError.
2677

2678
    """
2679
    node_name = self.op.node_name
2680
    cfg = self.cfg
2681

    
2682
    dns_data = utils.HostInfo(node_name)
2683

    
2684
    node = dns_data.name
2685
    primary_ip = self.op.primary_ip = dns_data.ip
2686
    secondary_ip = getattr(self.op, "secondary_ip", None)
2687
    if secondary_ip is None:
2688
      secondary_ip = primary_ip
2689
    if not utils.IsValidIP(secondary_ip):
2690
      raise errors.OpPrereqError("Invalid secondary IP given")
2691
    self.op.secondary_ip = secondary_ip
2692

    
2693
    node_list = cfg.GetNodeList()
2694
    if not self.op.readd and node in node_list:
2695
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2696
                                 node)
2697
    elif self.op.readd and node not in node_list:
2698
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2699

    
2700
    for existing_node_name in node_list:
2701
      existing_node = cfg.GetNodeInfo(existing_node_name)
2702

    
2703
      if self.op.readd and node == existing_node_name:
2704
        if (existing_node.primary_ip != primary_ip or
2705
            existing_node.secondary_ip != secondary_ip):
2706
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2707
                                     " address configuration as before")
2708
        continue
2709

    
2710
      if (existing_node.primary_ip == primary_ip or
2711
          existing_node.secondary_ip == primary_ip or
2712
          existing_node.primary_ip == secondary_ip or
2713
          existing_node.secondary_ip == secondary_ip):
2714
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2715
                                   " existing node %s" % existing_node.name)
2716

    
2717
    # check that the type of the node (single versus dual homed) is the
2718
    # same as for the master
2719
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2720
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2721
    newbie_singlehomed = secondary_ip == primary_ip
2722
    if master_singlehomed != newbie_singlehomed:
2723
      if master_singlehomed:
2724
        raise errors.OpPrereqError("The master has no private ip but the"
2725
                                   " new node has one")
2726
      else:
2727
        raise errors.OpPrereqError("The master has a private ip but the"
2728
                                   " new node doesn't have one")
2729

    
2730
    # checks reachability
2731
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2732
      raise errors.OpPrereqError("Node not reachable by ping")
2733

    
2734
    if not newbie_singlehomed:
2735
      # check reachability from my secondary ip to newbie's secondary ip
2736
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2737
                           source=myself.secondary_ip):
2738
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2739
                                   " based ping to noded port")
2740

    
2741
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2742
    if self.op.readd:
2743
      exceptions = [node]
2744
    else:
2745
      exceptions = []
2746
    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2747
    # the new node will increase mc_max with one, so:
2748
    mc_max = min(mc_max + 1, cp_size)
2749
    self.master_candidate = mc_now < mc_max
2750

    
2751
    if self.op.readd:
2752
      self.new_node = self.cfg.GetNodeInfo(node)
2753
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2754
    else:
2755
      self.new_node = objects.Node(name=node,
2756
                                   primary_ip=primary_ip,
2757
                                   secondary_ip=secondary_ip,
2758
                                   master_candidate=self.master_candidate,
2759
                                   offline=False, drained=False)
2760

    
2761
  def Exec(self, feedback_fn):
2762
    """Adds the new node to the cluster.
2763

2764
    """
2765
    new_node = self.new_node
2766
    node = new_node.name
2767

    
2768
    # for re-adds, reset the offline/drained/master-candidate flags;
2769
    # we need to reset here, otherwise offline would prevent RPC calls
2770
    # later in the procedure; this also means that if the re-add
2771
    # fails, we are left with a non-offlined, broken node
2772
    if self.op.readd:
2773
      new_node.drained = new_node.offline = False
2774
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2775
      # if we demote the node, we do cleanup later in the procedure
2776
      new_node.master_candidate = self.master_candidate
2777

    
2778
    # notify the user about any possible mc promotion
2779
    if new_node.master_candidate:
2780
      self.LogInfo("Node will be a master candidate")
2781

    
2782
    # check connectivity
2783
    result = self.rpc.call_version([node])[node]
2784
    result.Raise("Can't get version information from node %s" % node)
2785
    if constants.PROTOCOL_VERSION == result.payload:
2786
      logging.info("Communication to node %s fine, sw version %s match",
2787
                   node, result.payload)
2788
    else:
2789
      raise errors.OpExecError("Version mismatch master version %s,"
2790
                               " node version %s" %
2791
                               (constants.PROTOCOL_VERSION, result.payload))
2792

    
2793
    # setup ssh on node
2794
    logging.info("Copy ssh key to node %s", node)
2795
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2796
    keyarray = []
2797
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2798
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2799
                priv_key, pub_key]
2800

    
2801
    for i in keyfiles:
2802
      f = open(i, 'r')
2803
      try:
2804
        keyarray.append(f.read())
2805
      finally:
2806
        f.close()
2807

    
2808
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2809
                                    keyarray[2],
2810
                                    keyarray[3], keyarray[4], keyarray[5])
2811
    result.Raise("Cannot transfer ssh keys to the new node")
2812

    
2813
    # Add node to our /etc/hosts, and add key to known_hosts
2814
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2815
      utils.AddHostToEtcHosts(new_node.name)
2816

    
2817
    if new_node.secondary_ip != new_node.primary_ip:
2818
      result = self.rpc.call_node_has_ip_address(new_node.name,
2819
                                                 new_node.secondary_ip)
2820
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2821
                   prereq=True)
2822
      if not result.payload:
2823
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2824
                                 " you gave (%s). Please fix and re-run this"
2825
                                 " command." % new_node.secondary_ip)
2826

    
2827
    node_verify_list = [self.cfg.GetMasterNode()]
2828
    node_verify_param = {
2829
      'nodelist': [node],
2830
      # TODO: do a node-net-test as well?
2831
    }
2832

    
2833
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2834
                                       self.cfg.GetClusterName())
2835
    for verifier in node_verify_list:
2836
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2837
      nl_payload = result[verifier].payload['nodelist']
2838
      if nl_payload:
2839
        for failed in nl_payload:
2840
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2841
                      (verifier, nl_payload[failed]))
2842
        raise errors.OpExecError("ssh/hostname verification failed.")
2843

    
2844
    if self.op.readd:
2845
      _RedistributeAncillaryFiles(self)
2846
      self.context.ReaddNode(new_node)
2847
      # make sure we redistribute the config
2848
      self.cfg.Update(new_node)
2849
      # and make sure the new node will not have old files around
2850
      if not new_node.master_candidate:
2851
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2852
        msg = result.RemoteFailMsg()
2853
        if msg:
2854
          self.LogWarning("Node failed to demote itself from master"
2855
                          " candidate status: %s" % msg)
2856
    else:
2857
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2858
      self.context.AddNode(new_node)
2859

    
2860

    
2861
class LUSetNodeParams(LogicalUnit):
2862
  """Modifies the parameters of a node.
2863

2864
  """
2865
  HPATH = "node-modify"
2866
  HTYPE = constants.HTYPE_NODE
2867
  _OP_REQP = ["node_name"]
2868
  REQ_BGL = False
2869

    
2870
  def CheckArguments(self):
2871
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2872
    if node_name is None:
2873
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2874
    self.op.node_name = node_name
2875
    _CheckBooleanOpField(self.op, 'master_candidate')
2876
    _CheckBooleanOpField(self.op, 'offline')
2877
    _CheckBooleanOpField(self.op, 'drained')
2878
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2879
    if all_mods.count(None) == 3:
2880
      raise errors.OpPrereqError("Please pass at least one modification")
2881
    if all_mods.count(True) > 1:
2882
      raise errors.OpPrereqError("Can't set the node into more than one"
2883
                                 " state at the same time")
2884

    
2885
  def ExpandNames(self):
2886
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2887

    
2888
  def BuildHooksEnv(self):
2889
    """Build hooks env.
2890

2891
    This runs on the master node.
2892

2893
    """
2894
    env = {
2895
      "OP_TARGET": self.op.node_name,
2896
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2897
      "OFFLINE": str(self.op.offline),
2898
      "DRAINED": str(self.op.drained),
2899
      }
2900
    nl = [self.cfg.GetMasterNode(),
2901
          self.op.node_name]
2902
    return env, nl, nl
2903

    
2904
  def CheckPrereq(self):
2905
    """Check prerequisites.
2906

2907
    This only checks the instance list against the existing names.
2908

2909
    """
2910
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2911

    
2912
    if ((self.op.master_candidate == False or self.op.offline == True or
2913
         self.op.drained == True) and node.master_candidate):
2914
      # we will demote the node from master_candidate
2915
      if self.op.node_name == self.cfg.GetMasterNode():
2916
        raise errors.OpPrereqError("The master node has to be a"
2917
                                   " master candidate, online and not drained")
2918
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2919
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2920
      if num_candidates <= cp_size:
2921
        msg = ("Not enough master candidates (desired"
2922
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2923
        if self.op.force:
2924
          self.LogWarning(msg)
2925
        else:
2926
          raise errors.OpPrereqError(msg)
2927

    
2928
    if (self.op.master_candidate == True and
2929
        ((node.offline and not self.op.offline == False) or
2930
         (node.drained and not self.op.drained == False))):
2931
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2932
                                 " to master_candidate" % node.name)
2933

    
2934
    return
2935

    
2936
  def Exec(self, feedback_fn):
2937
    """Modifies a node.
2938

2939
    """
2940
    node = self.node
2941

    
2942
    result = []
2943
    changed_mc = False
2944

    
2945
    if self.op.offline is not None:
2946
      node.offline = self.op.offline
2947
      result.append(("offline", str(self.op.offline)))
2948
      if self.op.offline == True:
2949
        if node.master_candidate:
2950
          node.master_candidate = False
2951
          changed_mc = True
2952
          result.append(("master_candidate", "auto-demotion due to offline"))
2953
        if node.drained:
2954
          node.drained = False
2955
          result.append(("drained", "clear drained status due to offline"))
2956

    
2957
    if self.op.master_candidate is not None:
2958
      node.master_candidate = self.op.master_candidate
2959
      changed_mc = True
2960
      result.append(("master_candidate", str(self.op.master_candidate)))
2961
      if self.op.master_candidate == False:
2962
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2963
        msg = rrc.fail_msg
2964
        if msg:
2965
          self.LogWarning("Node failed to demote itself: %s" % msg)
2966

    
2967
    if self.op.drained is not None:
2968
      node.drained = self.op.drained
2969
      result.append(("drained", str(self.op.drained)))
2970
      if self.op.drained == True:
2971
        if node.master_candidate:
2972
          node.master_candidate = False
2973
          changed_mc = True
2974
          result.append(("master_candidate", "auto-demotion due to drain"))
2975
          rrc = self.rpc.call_node_demote_from_mc(node.name)
2976
          msg = rrc.RemoteFailMsg()
2977
          if msg:
2978
            self.LogWarning("Node failed to demote itself: %s" % msg)
2979
        if node.offline:
2980
          node.offline = False
2981
          result.append(("offline", "clear offline status due to drain"))
2982

    
2983
    # this will trigger configuration file update, if needed
2984
    self.cfg.Update(node)
2985
    # this will trigger job queue propagation or cleanup
2986
    if changed_mc:
2987
      self.context.ReaddNode(node)
2988

    
2989
    return result
2990

    
2991

    
2992
class LUPowercycleNode(NoHooksLU):
2993
  """Powercycles a node.
2994

2995
  """
2996
  _OP_REQP = ["node_name", "force"]
2997
  REQ_BGL = False
2998

    
2999
  def CheckArguments(self):
3000
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3001
    if node_name is None:
3002
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
3003
    self.op.node_name = node_name
3004
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
3005
      raise errors.OpPrereqError("The node is the master and the force"
3006
                                 " parameter was not set")
3007

    
3008
  def ExpandNames(self):
3009
    """Locking for PowercycleNode.
3010

3011
    This is a last-resort option and shouldn't block on other
3012
    jobs. Therefore, we grab no locks.
3013

3014
    """
3015
    self.needed_locks = {}
3016

    
3017
  def CheckPrereq(self):
3018
    """Check prerequisites.
3019

3020
    This LU has no prereqs.
3021

3022
    """
3023
    pass
3024

    
3025
  def Exec(self, feedback_fn):
3026
    """Reboots a node.
3027

3028
    """
3029
    result = self.rpc.call_node_powercycle(self.op.node_name,
3030
                                           self.cfg.GetHypervisorType())
3031
    result.Raise("Failed to schedule the reboot")
3032
    return result.payload
3033

    
3034

    
3035
class LUQueryClusterInfo(NoHooksLU):
3036
  """Query cluster configuration.
3037

3038
  """
3039
  _OP_REQP = []
3040
  REQ_BGL = False
3041

    
3042
  def ExpandNames(self):
3043
    self.needed_locks = {}
3044

    
3045
  def CheckPrereq(self):
3046
    """No prerequsites needed for this LU.
3047

3048
    """
3049
    pass
3050

    
3051
  def Exec(self, feedback_fn):
3052
    """Return cluster config.
3053

3054
    """
3055
    cluster = self.cfg.GetClusterInfo()
3056
    result = {
3057
      "software_version": constants.RELEASE_VERSION,
3058
      "protocol_version": constants.PROTOCOL_VERSION,
3059
      "config_version": constants.CONFIG_VERSION,
3060
      "os_api_version": max(constants.OS_API_VERSIONS),
3061
      "export_version": constants.EXPORT_VERSION,
3062
      "architecture": (platform.architecture()[0], platform.machine()),
3063
      "name": cluster.cluster_name,
3064
      "master": cluster.master_node,
3065
      "default_hypervisor": cluster.enabled_hypervisors[0],
3066
      "enabled_hypervisors": cluster.enabled_hypervisors,
3067
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3068
                        for hypervisor_name in cluster.enabled_hypervisors]),
3069
      "beparams": cluster.beparams,
3070
      "nicparams": cluster.nicparams,
3071
      "candidate_pool_size": cluster.candidate_pool_size,
3072
      "master_netdev": cluster.master_netdev,
3073
      "volume_group_name": cluster.volume_group_name,
3074
      "file_storage_dir": cluster.file_storage_dir,
3075
      "ctime": cluster.ctime,
3076
      "mtime": cluster.mtime,
3077
      }
3078

    
3079
    return result
3080

    
3081

    
3082
class LUQueryConfigValues(NoHooksLU):
3083
  """Return configuration values.
3084

3085
  """
3086
  _OP_REQP = []
3087
  REQ_BGL = False
3088
  _FIELDS_DYNAMIC = utils.FieldSet()
3089
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
3090

    
3091
  def ExpandNames(self):
3092
    self.needed_locks = {}
3093

    
3094
    _CheckOutputFields(static=self._FIELDS_STATIC,
3095
                       dynamic=self._FIELDS_DYNAMIC,
3096
                       selected=self.op.output_fields)
3097

    
3098
  def CheckPrereq(self):
3099
    """No prerequisites.
3100

3101
    """
3102
    pass
3103

    
3104
  def Exec(self, feedback_fn):
3105
    """Dump a representation of the cluster config to the standard output.
3106

3107
    """
3108
    values = []
3109
    for field in self.op.output_fields:
3110
      if field == "cluster_name":
3111
        entry = self.cfg.GetClusterName()
3112
      elif field == "master_node":
3113
        entry = self.cfg.GetMasterNode()
3114
      elif field == "drain_flag":
3115
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3116
      else:
3117
        raise errors.ParameterError(field)
3118
      values.append(entry)
3119
    return values
3120

    
3121

    
3122
class LUActivateInstanceDisks(NoHooksLU):
3123
  """Bring up an instance's disks.
3124

3125
  """
3126
  _OP_REQP = ["instance_name"]
3127
  REQ_BGL = False
3128

    
3129
  def ExpandNames(self):
3130
    self._ExpandAndLockInstance()
3131
    self.needed_locks[locking.LEVEL_NODE] = []
3132
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3133

    
3134
  def DeclareLocks(self, level):
3135
    if level == locking.LEVEL_NODE:
3136
      self._LockInstancesNodes()
3137

    
3138
  def CheckPrereq(self):
3139
    """Check prerequisites.
3140

3141
    This checks that the instance is in the cluster.
3142

3143
    """
3144
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3145
    assert self.instance is not None, \
3146
      "Cannot retrieve locked instance %s" % self.op.instance_name
3147
    _CheckNodeOnline(self, self.instance.primary_node)
3148
    if not hasattr(self.op, "ignore_size"):
3149
      self.op.ignore_size = False
3150

    
3151
  def Exec(self, feedback_fn):
3152
    """Activate the disks.
3153

3154
    """
3155
    disks_ok, disks_info = \
3156
              _AssembleInstanceDisks(self, self.instance,
3157
                                     ignore_size=self.op.ignore_size)
3158
    if not disks_ok:
3159
      raise errors.OpExecError("Cannot activate block devices")
3160

    
3161
    return disks_info
3162

    
3163

    
3164
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3165
                           ignore_size=False):
3166
  """Prepare the block devices for an instance.
3167

3168
  This sets up the block devices on all nodes.
3169

3170
  @type lu: L{LogicalUnit}
3171
  @param lu: the logical unit on whose behalf we execute
3172
  @type instance: L{objects.Instance}
3173
  @param instance: the instance for whose disks we assemble
3174
  @type ignore_secondaries: boolean
3175
  @param ignore_secondaries: if true, errors on secondary nodes
3176
      won't result in an error return from the function
3177
  @type ignore_size: boolean
3178
  @param ignore_size: if true, the current known size of the disk
3179
      will not be used during the disk activation, useful for cases
3180
      when the size is wrong
3181
  @return: False if the operation failed, otherwise a list of
3182
      (host, instance_visible_name, node_visible_name)
3183
      with the mapping from node devices to instance devices
3184

3185
  """
3186
  device_info = []
3187
  disks_ok = True
3188
  iname = instance.name
3189
  # With the two passes mechanism we try to reduce the window of
3190
  # opportunity for the race condition of switching DRBD to primary
3191
  # before handshaking occured, but we do not eliminate it
3192

    
3193
  # The proper fix would be to wait (with some limits) until the
3194
  # connection has been made and drbd transitions from WFConnection
3195
  # into any other network-connected state (Connected, SyncTarget,
3196
  # SyncSource, etc.)
3197

    
3198
  # 1st pass, assemble on all nodes in secondary mode
3199
  for inst_disk in instance.disks:
3200
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3201
      if ignore_size:
3202
        node_disk = node_disk.Copy()
3203
        node_disk.UnsetSize()
3204
      lu.cfg.SetDiskID(node_disk, node)
3205
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3206
      msg = result.fail_msg
3207
      if msg:
3208
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3209
                           " (is_primary=False, pass=1): %s",
3210
                           inst_disk.iv_name, node, msg)
3211
        if not ignore_secondaries:
3212
          disks_ok = False
3213

    
3214
  # FIXME: race condition on drbd migration to primary
3215

    
3216
  # 2nd pass, do only the primary node
3217
  for inst_disk in instance.disks:
3218
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3219
      if node != instance.primary_node:
3220
        continue
3221
      if ignore_size:
3222
        node_disk = node_disk.Copy()
3223
        node_disk.UnsetSize()
3224
      lu.cfg.SetDiskID(node_disk, node)
3225
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3226
      msg = result.fail_msg
3227
      if msg:
3228
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3229
                           " (is_primary=True, pass=2): %s",
3230
                           inst_disk.iv_name, node, msg)
3231
        disks_ok = False
3232
    device_info.append((instance.primary_node, inst_disk.iv_name,
3233
                        result.payload))
3234

    
3235
  # leave the disks configured for the primary node
3236
  # this is a workaround that would be fixed better by
3237
  # improving the logical/physical id handling
3238
  for disk in instance.disks:
3239
    lu.cfg.SetDiskID(disk, instance.primary_node)
3240

    
3241
  return disks_ok, device_info
3242

    
3243

    
3244
def _StartInstanceDisks(lu, instance, force):
3245
  """Start the disks of an instance.
3246

3247
  """
3248
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3249
                                           ignore_secondaries=force)
3250
  if not disks_ok:
3251
    _ShutdownInstanceDisks(lu, instance)
3252
    if force is not None and not force:
3253
      lu.proc.LogWarning("", hint="If the message above refers to a"
3254
                         " secondary node,"
3255
                         " you can retry the operation using '--force'.")
3256
    raise errors.OpExecError("Disk consistency error")
3257

    
3258

    
3259
class LUDeactivateInstanceDisks(NoHooksLU):
3260
  """Shutdown an instance's disks.
3261

3262
  """
3263
  _OP_REQP = ["instance_name"]
3264
  REQ_BGL = False
3265

    
3266
  def ExpandNames(self):
3267
    self._ExpandAndLockInstance()
3268
    self.needed_locks[locking.LEVEL_NODE] = []
3269
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3270

    
3271
  def DeclareLocks(self, level):
3272
    if level == locking.LEVEL_NODE:
3273
      self._LockInstancesNodes()
3274

    
3275
  def CheckPrereq(self):
3276
    """Check prerequisites.
3277

3278
    This checks that the instance is in the cluster.
3279

3280
    """
3281
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3282
    assert self.instance is not None, \
3283
      "Cannot retrieve locked instance %s" % self.op.instance_name
3284

    
3285
  def Exec(self, feedback_fn):
3286
    """Deactivate the disks
3287

3288
    """
3289
    instance = self.instance
3290
    _SafeShutdownInstanceDisks(self, instance)
3291

    
3292

    
3293
def _SafeShutdownInstanceDisks(lu, instance):
3294
  """Shutdown block devices of an instance.
3295

3296
  This function checks if an instance is running, before calling
3297
  _ShutdownInstanceDisks.
3298

3299
  """
3300
  pnode = instance.primary_node
3301
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3302
  ins_l.Raise("Can't contact node %s" % pnode)
3303

    
3304
  if instance.name in ins_l.payload:
3305
    raise errors.OpExecError("Instance is running, can't shutdown"
3306
                             " block devices.")
3307

    
3308
  _ShutdownInstanceDisks(lu, instance)
3309

    
3310

    
3311
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3312
  """Shutdown block devices of an instance.
3313

3314
  This does the shutdown on all nodes of the instance.
3315

3316
  If the ignore_primary is false, errors on the primary node are
3317
  ignored.
3318

3319
  """
3320
  all_result = True
3321
  for disk in instance.disks:
3322
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3323
      lu.cfg.SetDiskID(top_disk, node)
3324
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3325
      msg = result.fail_msg
3326
      if msg:
3327
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3328
                      disk.iv_name, node, msg)
3329
        if not ignore_primary or node != instance.primary_node:
3330
          all_result = False
3331
  return all_result
3332

    
3333

    
3334
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3335
  """Checks if a node has enough free memory.
3336

3337
  This function check if a given node has the needed amount of free
3338
  memory. In case the node has less memory or we cannot get the
3339
  information from the node, this function raise an OpPrereqError
3340
  exception.
3341

3342
  @type lu: C{LogicalUnit}
3343
  @param lu: a logical unit from which we get configuration data
3344
  @type node: C{str}
3345
  @param node: the node to check
3346
  @type reason: C{str}
3347
  @param reason: string to use in the error message
3348
  @type requested: C{int}
3349
  @param requested: the amount of memory in MiB to check for
3350
  @type hypervisor_name: C{str}
3351
  @param hypervisor_name: the hypervisor to ask for memory stats
3352
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3353
      we cannot check the node
3354

3355
  """
3356
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3357
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
3358
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3359
  if not isinstance(free_mem, int):
3360
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3361
                               " was '%s'" % (node, free_mem))
3362
  if requested > free_mem:
3363
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3364
                               " needed %s MiB, available %s MiB" %
3365
                               (node, reason, requested, free_mem))
3366

    
3367

    
3368
class LUStartupInstance(LogicalUnit):
3369
  """Starts an instance.
3370

3371
  """
3372
  HPATH = "instance-start"
3373
  HTYPE = constants.HTYPE_INSTANCE
3374
  _OP_REQP = ["instance_name", "force"]
3375
  REQ_BGL = False
3376

    
3377
  def ExpandNames(self):
3378
    self._ExpandAndLockInstance()
3379

    
3380
  def BuildHooksEnv(self):
3381
    """Build hooks env.
3382

3383
    This runs on master, primary and secondary nodes of the instance.
3384

3385
    """
3386
    env = {
3387
      "FORCE": self.op.force,
3388
      }
3389
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3390
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3391
    return env, nl, nl
3392

    
3393
  def CheckPrereq(self):
3394
    """Check prerequisites.
3395

3396
    This checks that the instance is in the cluster.
3397

3398
    """
3399
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3400
    assert self.instance is not None, \
3401
      "Cannot retrieve locked instance %s" % self.op.instance_name
3402

    
3403
    # extra beparams
3404
    self.beparams = getattr(self.op, "beparams", {})
3405
    if self.beparams:
3406
      if not isinstance(self.beparams, dict):
3407
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3408
                                   " dict" % (type(self.beparams), ))
3409
      # fill the beparams dict
3410
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3411
      self.op.beparams = self.beparams
3412

    
3413
    # extra hvparams
3414
    self.hvparams = getattr(self.op, "hvparams", {})
3415
    if self.hvparams:
3416
      if not isinstance(self.hvparams, dict):
3417
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3418
                                   " dict" % (type(self.hvparams), ))
3419

    
3420
      # check hypervisor parameter syntax (locally)
3421
      cluster = self.cfg.GetClusterInfo()
3422
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3423
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3424
                                    instance.hvparams)
3425
      filled_hvp.update(self.hvparams)
3426
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3427
      hv_type.CheckParameterSyntax(filled_hvp)
3428
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3429
      self.op.hvparams = self.hvparams
3430

    
3431
    _CheckNodeOnline(self, instance.primary_node)
3432

    
3433
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3434
    # check bridges existence
3435
    _CheckInstanceBridgesExist(self, instance)
3436

    
3437
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3438
                                              instance.name,
3439
                                              instance.hypervisor)
3440
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3441
                      prereq=True)
3442
    if not remote_info.payload: # not running already
3443
      _CheckNodeFreeMemory(self, instance.primary_node,
3444
                           "starting instance %s" % instance.name,
3445
                           bep[constants.BE_MEMORY], instance.hypervisor)
3446

    
3447
  def Exec(self, feedback_fn):
3448
    """Start the instance.
3449

3450
    """
3451
    instance = self.instance
3452
    force = self.op.force
3453

    
3454
    self.cfg.MarkInstanceUp(instance.name)
3455

    
3456
    node_current = instance.primary_node
3457

    
3458
    _StartInstanceDisks(self, instance, force)
3459

    
3460
    result = self.rpc.call_instance_start(node_current, instance,
3461
                                          self.hvparams, self.beparams)
3462
    msg = result.fail_msg
3463
    if msg:
3464
      _ShutdownInstanceDisks(self, instance)
3465
      raise errors.OpExecError("Could not start instance: %s" % msg)
3466

    
3467

    
3468
class LURebootInstance(LogicalUnit):
3469
  """Reboot an instance.
3470

3471
  """
3472
  HPATH = "instance-reboot"
3473
  HTYPE = constants.HTYPE_INSTANCE
3474
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3475
  REQ_BGL = False
3476

    
3477
  def ExpandNames(self):
3478
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3479
                                   constants.INSTANCE_REBOOT_HARD,
3480
                                   constants.INSTANCE_REBOOT_FULL]:
3481
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3482
                                  (constants.INSTANCE_REBOOT_SOFT,
3483
                                   constants.INSTANCE_REBOOT_HARD,
3484
                                   constants.INSTANCE_REBOOT_FULL))
3485
    self._ExpandAndLockInstance()
3486

    
3487
  def BuildHooksEnv(self):
3488
    """Build hooks env.
3489

3490
    This runs on master, primary and secondary nodes of the instance.
3491

3492
    """
3493
    env = {
3494
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3495
      "REBOOT_TYPE": self.op.reboot_type,
3496
      }
3497
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3498
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3499
    return env, nl, nl
3500

    
3501
  def CheckPrereq(self):
3502
    """Check prerequisites.
3503

3504
    This checks that the instance is in the cluster.
3505

3506
    """
3507
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3508
    assert self.instance is not None, \
3509
      "Cannot retrieve locked instance %s" % self.op.instance_name
3510

    
3511
    _CheckNodeOnline(self, instance.primary_node)
3512

    
3513
    # check bridges existence
3514
    _CheckInstanceBridgesExist(self, instance)
3515

    
3516
  def Exec(self, feedback_fn):
3517
    """Reboot the instance.
3518

3519
    """
3520
    instance = self.instance
3521
    ignore_secondaries = self.op.ignore_secondaries
3522
    reboot_type = self.op.reboot_type
3523

    
3524
    node_current = instance.primary_node
3525

    
3526
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3527
                       constants.INSTANCE_REBOOT_HARD]:
3528
      for disk in instance.disks:
3529
        self.cfg.SetDiskID(disk, node_current)
3530
      result = self.rpc.call_instance_reboot(node_current, instance,
3531
                                             reboot_type)
3532
      result.Raise("Could not reboot instance")
3533
    else:
3534
      result = self.rpc.call_instance_shutdown(node_current, instance)
3535
      result.Raise("Could not shutdown instance for full reboot")
3536
      _ShutdownInstanceDisks(self, instance)
3537
      _StartInstanceDisks(self, instance, ignore_secondaries)
3538
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3539
      msg = result.fail_msg
3540
      if msg:
3541
        _ShutdownInstanceDisks(self, instance)
3542
        raise errors.OpExecError("Could not start instance for"
3543
                                 " full reboot: %s" % msg)
3544

    
3545
    self.cfg.MarkInstanceUp(instance.name)
3546

    
3547

    
3548
class LUShutdownInstance(LogicalUnit):
3549
  """Shutdown an instance.
3550

3551
  """
3552
  HPATH = "instance-stop"
3553
  HTYPE = constants.HTYPE_INSTANCE
3554
  _OP_REQP = ["instance_name"]
3555
  REQ_BGL = False
3556

    
3557
  def ExpandNames(self):
3558
    self._ExpandAndLockInstance()
3559

    
3560
  def BuildHooksEnv(self):
3561
    """Build hooks env.
3562

3563
    This runs on master, primary and secondary nodes of the instance.
3564

3565
    """
3566
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3567
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3568
    return env, nl, nl
3569

    
3570
  def CheckPrereq(self):
3571
    """Check prerequisites.
3572

3573
    This checks that the instance is in the cluster.
3574

3575
    """
3576
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3577
    assert self.instance is not None, \
3578
      "Cannot retrieve locked instance %s" % self.op.instance_name
3579
    _CheckNodeOnline(self, self.instance.primary_node)
3580

    
3581
  def Exec(self, feedback_fn):
3582
    """Shutdown the instance.
3583

3584
    """
3585
    instance = self.instance
3586
    node_current = instance.primary_node
3587
    self.cfg.MarkInstanceDown(instance.name)
3588
    result = self.rpc.call_instance_shutdown(node_current, instance)
3589
    msg = result.fail_msg
3590
    if msg:
3591
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3592

    
3593
    _ShutdownInstanceDisks(self, instance)
3594

    
3595

    
3596
class LUReinstallInstance(LogicalUnit):
3597
  """Reinstall an instance.
3598

3599
  """
3600
  HPATH = "instance-reinstall"
3601
  HTYPE = constants.HTYPE_INSTANCE
3602
  _OP_REQP = ["instance_name"]
3603
  REQ_BGL = False
3604

    
3605
  def ExpandNames(self):
3606
    self._ExpandAndLockInstance()
3607

    
3608
  def BuildHooksEnv(self):
3609
    """Build hooks env.
3610

3611
    This runs on master, primary and secondary nodes of the instance.
3612

3613
    """
3614
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3615
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3616
    return env, nl, nl
3617

    
3618
  def CheckPrereq(self):
3619
    """Check prerequisites.
3620

3621
    This checks that the instance is in the cluster and is not running.
3622

3623
    """
3624
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3625
    assert instance is not None, \
3626
      "Cannot retrieve locked instance %s" % self.op.instance_name
3627
    _CheckNodeOnline(self, instance.primary_node)
3628

    
3629
    if instance.disk_template == constants.DT_DISKLESS:
3630
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3631
                                 self.op.instance_name)
3632
    if instance.admin_up:
3633
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3634
                                 self.op.instance_name)
3635
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3636
                                              instance.name,
3637
                                              instance.hypervisor)
3638
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3639
                      prereq=True)
3640
    if remote_info.payload:
3641
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3642
                                 (self.op.instance_name,
3643
                                  instance.primary_node))
3644

    
3645
    self.op.os_type = getattr(self.op, "os_type", None)
3646
    if self.op.os_type is not None:
3647
      # OS verification
3648
      pnode = self.cfg.GetNodeInfo(
3649
        self.cfg.ExpandNodeName(instance.primary_node))
3650
      if pnode is None:
3651
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3652
                                   self.op.pnode)
3653
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3654
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3655
                   (self.op.os_type, pnode.name), prereq=True)
3656

    
3657
    self.instance = instance
3658

    
3659
  def Exec(self, feedback_fn):
3660
    """Reinstall the instance.
3661

3662
    """
3663
    inst = self.instance
3664

    
3665
    if self.op.os_type is not None:
3666
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3667
      inst.os = self.op.os_type
3668
      self.cfg.Update(inst)
3669

    
3670
    _StartInstanceDisks(self, inst, None)
3671
    try:
3672
      feedback_fn("Running the instance OS create scripts...")
3673
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3674
      result.Raise("Could not install OS for instance %s on node %s" %
3675
                   (inst.name, inst.primary_node))
3676
    finally:
3677
      _ShutdownInstanceDisks(self, inst)
3678

    
3679

    
3680
class LURecreateInstanceDisks(LogicalUnit):
3681
  """Recreate an instance's missing disks.
3682

3683
  """
3684
  HPATH = "instance-recreate-disks"
3685
  HTYPE = constants.HTYPE_INSTANCE
3686
  _OP_REQP = ["instance_name", "disks"]
3687
  REQ_BGL = False
3688

    
3689
  def CheckArguments(self):
3690
    """Check the arguments.
3691

3692
    """
3693
    if not isinstance(self.op.disks, list):
3694
      raise errors.OpPrereqError("Invalid disks parameter")
3695
    for item in self.op.disks:
3696
      if (not isinstance(item, int) or
3697
          item < 0):
3698
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
3699
                                   str(item))
3700

    
3701
  def ExpandNames(self):
3702
    self._ExpandAndLockInstance()
3703

    
3704
  def BuildHooksEnv(self):
3705
    """Build hooks env.
3706

3707
    This runs on master, primary and secondary nodes of the instance.
3708

3709
    """
3710
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3711
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3712
    return env, nl, nl
3713

    
3714
  def CheckPrereq(self):
3715
    """Check prerequisites.
3716

3717
    This checks that the instance is in the cluster and is not running.
3718

3719
    """
3720
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3721
    assert instance is not None, \
3722
      "Cannot retrieve locked instance %s" % self.op.instance_name
3723
    _CheckNodeOnline(self, instance.primary_node)
3724

    
3725
    if instance.disk_template == constants.DT_DISKLESS:
3726
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3727
                                 self.op.instance_name)
3728
    if instance.admin_up:
3729
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3730
                                 self.op.instance_name)
3731
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3732
                                              instance.name,
3733
                                              instance.hypervisor)
3734
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3735
                      prereq=True)
3736
    if remote_info.payload:
3737
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3738
                                 (self.op.instance_name,
3739
                                  instance.primary_node))
3740

    
3741
    if not self.op.disks:
3742
      self.op.disks = range(len(instance.disks))
3743
    else:
3744
      for idx in self.op.disks:
3745
        if idx >= len(instance.disks):
3746
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx)
3747

    
3748
    self.instance = instance
3749

    
3750
  def Exec(self, feedback_fn):
3751
    """Recreate the disks.
3752

3753
    """
3754
    to_skip = []
3755
    for idx, disk in enumerate(self.instance.disks):
3756
      if idx not in self.op.disks: # disk idx has not been passed in
3757
        to_skip.append(idx)
3758
        continue
3759

    
3760
    _CreateDisks(self, self.instance, to_skip=to_skip)
3761

    
3762

    
3763
class LURenameInstance(LogicalUnit):
3764
  """Rename an instance.
3765

3766
  """
3767
  HPATH = "instance-rename"
3768
  HTYPE = constants.HTYPE_INSTANCE
3769
  _OP_REQP = ["instance_name", "new_name"]
3770

    
3771
  def BuildHooksEnv(self):
3772
    """Build hooks env.
3773

3774
    This runs on master, primary and secondary nodes of the instance.
3775

3776
    """
3777
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3778
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3779
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3780
    return env, nl, nl
3781

    
3782
  def CheckPrereq(self):
3783
    """Check prerequisites.
3784

3785
    This checks that the instance is in the cluster and is not running.
3786

3787
    """
3788
    instance = self.cfg.GetInstanceInfo(
3789
      self.cfg.ExpandInstanceName(self.op.instance_name))
3790
    if instance is None:
3791
      raise errors.OpPrereqError("Instance '%s' not known" %
3792
                                 self.op.instance_name)
3793
    _CheckNodeOnline(self, instance.primary_node)
3794

    
3795
    if instance.admin_up:
3796
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3797
                                 self.op.instance_name)
3798
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3799
                                              instance.name,
3800
                                              instance.hypervisor)
3801
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3802
                      prereq=True)
3803
    if remote_info.payload:
3804
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3805
                                 (self.op.instance_name,
3806
                                  instance.primary_node))
3807
    self.instance = instance
3808

    
3809
    # new name verification
3810
    name_info = utils.HostInfo(self.op.new_name)
3811

    
3812
    self.op.new_name = new_name = name_info.name
3813
    instance_list = self.cfg.GetInstanceList()
3814
    if new_name in instance_list:
3815
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3816
                                 new_name)
3817

    
3818
    if not getattr(self.op, "ignore_ip", False):
3819
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3820
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3821
                                   (name_info.ip, new_name))
3822

    
3823

    
3824
  def Exec(self, feedback_fn):
3825
    """Reinstall the instance.
3826

3827
    """
3828
    inst = self.instance
3829
    old_name = inst.name
3830

    
3831
    if inst.disk_template == constants.DT_FILE:
3832
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3833

    
3834
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3835
    # Change the instance lock. This is definitely safe while we hold the BGL
3836
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3837
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3838

    
3839
    # re-read the instance from the configuration after rename
3840
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3841

    
3842
    if inst.disk_template == constants.DT_FILE:
3843
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3844
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3845
                                                     old_file_storage_dir,
3846
                                                     new_file_storage_dir)
3847
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3848
                   " (but the instance has been renamed in Ganeti)" %
3849
                   (inst.primary_node, old_file_storage_dir,
3850
                    new_file_storage_dir))
3851

    
3852
    _StartInstanceDisks(self, inst, None)
3853
    try:
3854
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3855
                                                 old_name)
3856
      msg = result.fail_msg
3857
      if msg:
3858
        msg = ("Could not run OS rename script for instance %s on node %s"
3859
               " (but the instance has been renamed in Ganeti): %s" %
3860
               (inst.name, inst.primary_node, msg))
3861
        self.proc.LogWarning(msg)
3862
    finally:
3863
      _ShutdownInstanceDisks(self, inst)
3864

    
3865

    
3866
class LURemoveInstance(LogicalUnit):
3867
  """Remove an instance.
3868

3869
  """
3870
  HPATH = "instance-remove"
3871
  HTYPE = constants.HTYPE_INSTANCE
3872
  _OP_REQP = ["instance_name", "ignore_failures"]
3873
  REQ_BGL = False
3874

    
3875
  def ExpandNames(self):
3876
    self._ExpandAndLockInstance()
3877
    self.needed_locks[locking.LEVEL_NODE] = []
3878
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3879

    
3880
  def DeclareLocks(self, level):
3881
    if level == locking.LEVEL_NODE:
3882
      self._LockInstancesNodes()
3883

    
3884
  def BuildHooksEnv(self):
3885
    """Build hooks env.
3886

3887
    This runs on master, primary and secondary nodes of the instance.
3888

3889
    """
3890
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3891
    nl = [self.cfg.GetMasterNode()]
3892
    return env, nl, nl
3893

    
3894
  def CheckPrereq(self):
3895
    """Check prerequisites.
3896

3897
    This checks that the instance is in the cluster.
3898

3899
    """
3900
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3901
    assert self.instance is not None, \
3902
      "Cannot retrieve locked instance %s" % self.op.instance_name
3903

    
3904
  def Exec(self, feedback_fn):
3905
    """Remove the instance.
3906

3907
    """
3908
    instance = self.instance
3909
    logging.info("Shutting down instance %s on node %s",
3910
                 instance.name, instance.primary_node)
3911

    
3912
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3913
    msg = result.fail_msg
3914
    if msg:
3915
      if self.op.ignore_failures:
3916
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3917
      else:
3918
        raise errors.OpExecError("Could not shutdown instance %s on"
3919
                                 " node %s: %s" %
3920
                                 (instance.name, instance.primary_node, msg))
3921

    
3922
    logging.info("Removing block devices for instance %s", instance.name)
3923

    
3924
    if not _RemoveDisks(self, instance):
3925
      if self.op.ignore_failures:
3926
        feedback_fn("Warning: can't remove instance's disks")
3927
      else:
3928
        raise errors.OpExecError("Can't remove instance's disks")
3929

    
3930
    logging.info("Removing instance %s out of cluster config", instance.name)
3931

    
3932
    self.cfg.RemoveInstance(instance.name)
3933
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3934

    
3935

    
3936
class LUQueryInstances(NoHooksLU):
3937
  """Logical unit for querying instances.
3938

3939
  """
3940
  _OP_REQP = ["output_fields", "names", "use_locking"]
3941
  REQ_BGL = False
3942
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3943
                                    "admin_state",
3944
                                    "disk_template", "ip", "mac", "bridge",
3945
                                    "nic_mode", "nic_link",
3946
                                    "sda_size", "sdb_size", "vcpus", "tags",
3947
                                    "network_port", "beparams",
3948
                                    r"(disk)\.(size)/([0-9]+)",
3949
                                    r"(disk)\.(sizes)", "disk_usage",
3950
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3951
                                    r"(nic)\.(bridge)/([0-9]+)",
3952
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
3953
                                    r"(disk|nic)\.(count)",
3954
                                    "serial_no", "hypervisor", "hvparams",
3955
                                    "ctime", "mtime",
3956
                                    ] +
3957
                                  ["hv/%s" % name
3958
                                   for name in constants.HVS_PARAMETERS] +
3959
                                  ["be/%s" % name
3960
                                   for name in constants.BES_PARAMETERS])
3961
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3962

    
3963

    
3964
  def ExpandNames(self):
3965
    _CheckOutputFields(static=self._FIELDS_STATIC,
3966
                       dynamic=self._FIELDS_DYNAMIC,
3967
                       selected=self.op.output_fields)
3968

    
3969
    self.needed_locks = {}
3970
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3971
    self.share_locks[locking.LEVEL_NODE] = 1
3972

    
3973
    if self.op.names:
3974
      self.wanted = _GetWantedInstances(self, self.op.names)
3975
    else:
3976
      self.wanted = locking.ALL_SET
3977

    
3978
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3979
    self.do_locking = self.do_node_query and self.op.use_locking
3980
    if self.do_locking:
3981
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3982
      self.needed_locks[locking.LEVEL_NODE] = []
3983
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3984

    
3985
  def DeclareLocks(self, level):
3986
    if level == locking.LEVEL_NODE and self.do_locking:
3987
      self._LockInstancesNodes()
3988

    
3989
  def CheckPrereq(self):
3990
    """Check prerequisites.
3991

3992
    """
3993
    pass
3994

    
3995
  def Exec(self, feedback_fn):
3996
    """Computes the list of nodes and their attributes.
3997

3998
    """
3999
    all_info = self.cfg.GetAllInstancesInfo()
4000
    if self.wanted == locking.ALL_SET:
4001
      # caller didn't specify instance names, so ordering is not important
4002
      if self.do_locking:
4003
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4004
      else:
4005
        instance_names = all_info.keys()
4006
      instance_names = utils.NiceSort(instance_names)
4007
    else:
4008
      # caller did specify names, so we must keep the ordering
4009
      if self.do_locking:
4010
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4011
      else:
4012
        tgt_set = all_info.keys()
4013
      missing = set(self.wanted).difference(tgt_set)
4014
      if missing:
4015
        raise errors.OpExecError("Some instances were removed before"
4016
                                 " retrieving their data: %s" % missing)
4017
      instance_names = self.wanted
4018

    
4019
    instance_list = [all_info[iname] for iname in instance_names]
4020

    
4021
    # begin data gathering
4022

    
4023
    nodes = frozenset([inst.primary_node for inst in instance_list])
4024
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4025

    
4026
    bad_nodes = []
4027
    off_nodes = []
4028
    if self.do_node_query:
4029
      live_data = {}
4030
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4031
      for name in nodes:
4032
        result = node_data[name]
4033
        if result.offline:
4034
          # offline nodes will be in both lists
4035
          off_nodes.append(name)
4036
        if result.failed or result.fail_msg:
4037
          bad_nodes.append(name)
4038
        else:
4039
          if result.payload:
4040
            live_data.update(result.payload)
4041
          # else no instance is alive
4042
    else:
4043
      live_data = dict([(name, {}) for name in instance_names])
4044

    
4045
    # end data gathering
4046

    
4047
    HVPREFIX = "hv/"
4048
    BEPREFIX = "be/"
4049
    output = []
4050
    cluster = self.cfg.GetClusterInfo()
4051
    for instance in instance_list:
4052
      iout = []
4053
      i_hv = cluster.FillHV(instance)
4054
      i_be = cluster.FillBE(instance)
4055
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4056
                                 nic.nicparams) for nic in instance.nics]
4057
      for field in self.op.output_fields:
4058
        st_match = self._FIELDS_STATIC.Matches(field)
4059
        if field == "name":
4060
          val = instance.name
4061
        elif field == "os":
4062
          val = instance.os
4063
        elif field == "pnode":
4064
          val = instance.primary_node
4065
        elif field == "snodes":
4066
          val = list(instance.secondary_nodes)
4067
        elif field == "admin_state":
4068
          val = instance.admin_up
4069
        elif field == "oper_state":
4070
          if instance.primary_node in bad_nodes:
4071
            val = None
4072
          else:
4073
            val = bool(live_data.get(instance.name))
4074
        elif field == "status":
4075
          if instance.primary_node in off_nodes:
4076
            val = "ERROR_nodeoffline"
4077
          elif instance.primary_node in bad_nodes:
4078
            val = "ERROR_nodedown"
4079
          else:
4080
            running = bool(live_data.get(instance.name))
4081
            if running:
4082
              if instance.admin_up:
4083
                val = "running"
4084
              else:
4085
                val = "ERROR_up"
4086
            else:
4087
              if instance.admin_up:
4088
                val = "ERROR_down"
4089
              else:
4090
                val = "ADMIN_down"
4091
        elif field == "oper_ram":
4092
          if instance.primary_node in bad_nodes:
4093
            val = None
4094
          elif instance.name in live_data:
4095
            val = live_data[instance.name].get("memory", "?")
4096
          else:
4097
            val = "-"
4098
        elif field == "vcpus":
4099
          val = i_be[constants.BE_VCPUS]
4100
        elif field == "disk_template":
4101
          val = instance.disk_template
4102
        elif field == "ip":
4103
          if instance.nics:
4104
            val = instance.nics[0].ip
4105
          else:
4106
            val = None
4107
        elif field == "nic_mode":
4108
          if instance.nics:
4109
            val = i_nicp[0][constants.NIC_MODE]
4110
          else:
4111
            val = None
4112
        elif field == "nic_link":
4113
          if instance.nics:
4114
            val = i_nicp[0][constants.NIC_LINK]
4115
          else:
4116
            val = None
4117
        elif field == "bridge":
4118
          if (instance.nics and
4119
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4120
            val = i_nicp[0][constants.NIC_LINK]
4121
          else:
4122
            val = None
4123
        elif field == "mac":
4124
          if instance.nics:
4125
            val = instance.nics[0].mac
4126
          else:
4127
            val = None
4128
        elif field == "sda_size" or field == "sdb_size":
4129
          idx = ord(field[2]) - ord('a')
4130
          try:
4131
            val = instance.FindDisk(idx).size
4132
          except errors.OpPrereqError:
4133
            val = None
4134
        elif field == "disk_usage": # total disk usage per node
4135
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4136
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4137
        elif field == "tags":
4138
          val = list(instance.GetTags())
4139
        elif field == "serial_no":
4140
          val = instance.serial_no
4141
        elif field == "ctime":
4142
          val = instance.ctime
4143
        elif field == "mtime":
4144
          val = instance.mtime
4145
        elif field == "network_port":
4146
          val = instance.network_port
4147
        elif field == "hypervisor":
4148
          val = instance.hypervisor
4149
        elif field == "hvparams":
4150
          val = i_hv
4151
        elif (field.startswith(HVPREFIX) and
4152
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
4153
          val = i_hv.get(field[len(HVPREFIX):], None)
4154
        elif field == "beparams":
4155
          val = i_be
4156
        elif (field.startswith(BEPREFIX) and
4157
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4158
          val = i_be.get(field[len(BEPREFIX):], None)
4159
        elif st_match and st_match.groups():
4160
          # matches a variable list
4161
          st_groups = st_match.groups()
4162
          if st_groups and st_groups[0] == "disk":
4163
            if st_groups[1] == "count":
4164
              val = len(instance.disks)
4165
            elif st_groups[1] == "sizes":
4166
              val = [disk.size for disk in instance.disks]
4167
            elif st_groups[1] == "size":
4168
              try:
4169
                val = instance.FindDisk(st_groups[2]).size
4170
              except errors.OpPrereqError:
4171
                val = None
4172
            else:
4173
              assert False, "Unhandled disk parameter"
4174
          elif st_groups[0] == "nic":
4175
            if st_groups[1] == "count":
4176
              val = len(instance.nics)
4177
            elif st_groups[1] == "macs":
4178
              val = [nic.mac for nic in instance.nics]
4179
            elif st_groups[1] == "ips":
4180
              val = [nic.ip for nic in instance.nics]
4181
            elif st_groups[1] == "modes":
4182
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4183
            elif st_groups[1] == "links":
4184
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4185
            elif st_groups[1] == "bridges":
4186
              val = []
4187
              for nicp in i_nicp:
4188
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4189
                  val.append(nicp[constants.NIC_LINK])
4190
                else:
4191
                  val.append(None)
4192
            else:
4193
              # index-based item
4194
              nic_idx = int(st_groups[2])
4195
              if nic_idx >= len(instance.nics):
4196
                val = None
4197
              else:
4198
                if st_groups[1] == "mac":
4199
                  val = instance.nics[nic_idx].mac
4200
                elif st_groups[1] == "ip":
4201
                  val = instance.nics[nic_idx].ip
4202
                elif st_groups[1] == "mode":
4203
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4204
                elif st_groups[1] == "link":
4205
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4206
                elif st_groups[1] == "bridge":
4207
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4208
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4209
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4210
                  else:
4211
                    val = None
4212
                else:
4213
                  assert False, "Unhandled NIC parameter"
4214
          else:
4215
            assert False, ("Declared but unhandled variable parameter '%s'" %
4216
                           field)
4217
        else:
4218
          assert False, "Declared but unhandled parameter '%s'" % field
4219
        iout.append(val)
4220
      output.append(iout)
4221

    
4222
    return output
4223

    
4224

    
4225
class LUFailoverInstance(LogicalUnit):
4226
  """Failover an instance.
4227

4228
  """
4229
  HPATH = "instance-failover"
4230
  HTYPE = constants.HTYPE_INSTANCE
4231
  _OP_REQP = ["instance_name", "ignore_consistency"]
4232
  REQ_BGL = False
4233

    
4234
  def ExpandNames(self):
4235
    self._ExpandAndLockInstance()
4236
    self.needed_locks[locking.LEVEL_NODE] = []
4237
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4238

    
4239
  def DeclareLocks(self, level):
4240
    if level == locking.LEVEL_NODE:
4241
      self._LockInstancesNodes()
4242

    
4243
  def BuildHooksEnv(self):
4244
    """Build hooks env.
4245

4246
    This runs on master, primary and secondary nodes of the instance.
4247

4248
    """
4249
    env = {
4250
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4251
      }
4252
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4253
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4254
    return env, nl, nl
4255

    
4256
  def CheckPrereq(self):
4257
    """Check prerequisites.
4258

4259
    This checks that the instance is in the cluster.
4260

4261
    """
4262
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4263
    assert self.instance is not None, \
4264
      "Cannot retrieve locked instance %s" % self.op.instance_name
4265

    
4266
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4267
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4268
      raise errors.OpPrereqError("Instance's disk layout is not"
4269
                                 " network mirrored, cannot failover.")
4270

    
4271
    secondary_nodes = instance.secondary_nodes
4272
    if not secondary_nodes:
4273
      raise errors.ProgrammerError("no secondary node but using "
4274
                                   "a mirrored disk template")
4275

    
4276
    target_node = secondary_nodes[0]
4277
    _CheckNodeOnline(self, target_node)
4278
    _CheckNodeNotDrained(self, target_node)
4279
    if instance.admin_up:
4280
      # check memory requirements on the secondary node
4281
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4282
                           instance.name, bep[constants.BE_MEMORY],
4283
                           instance.hypervisor)
4284
    else:
4285
      self.LogInfo("Not checking memory on the secondary node as"
4286
                   " instance will not be started")
4287

    
4288
    # check bridge existance
4289
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4290

    
4291
  def Exec(self, feedback_fn):
4292
    """Failover an instance.
4293

4294
    The failover is done by shutting it down on its present node and
4295
    starting it on the secondary.
4296

4297
    """
4298
    instance = self.instance
4299

    
4300
    source_node = instance.primary_node
4301
    target_node = instance.secondary_nodes[0]
4302

    
4303
    feedback_fn("* checking disk consistency between source and target")
4304
    for dev in instance.disks:
4305
      # for drbd, these are drbd over lvm
4306
      if not _CheckDiskConsistency(self, dev, target_node, False):
4307
        if instance.admin_up and not self.op.ignore_consistency:
4308
          raise errors.OpExecError("Disk %s is degraded on target node,"
4309
                                   " aborting failover." % dev.iv_name)
4310

    
4311
    feedback_fn("* shutting down instance on source node")
4312
    logging.info("Shutting down instance %s on node %s",
4313
                 instance.name, source_node)
4314

    
4315
    result = self.rpc.call_instance_shutdown(source_node, instance)
4316
    msg = result.fail_msg
4317
    if msg:
4318
      if self.op.ignore_consistency:
4319
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4320
                             " Proceeding anyway. Please make sure node"
4321
                             " %s is down. Error details: %s",
4322
                             instance.name, source_node, source_node, msg)
4323
      else:
4324
        raise errors.OpExecError("Could not shutdown instance %s on"
4325
                                 " node %s: %s" %
4326
                                 (instance.name, source_node, msg))
4327

    
4328
    feedback_fn("* deactivating the instance's disks on source node")
4329
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4330
      raise errors.OpExecError("Can't shut down the instance's disks.")
4331

    
4332
    instance.primary_node = target_node
4333
    # distribute new instance config to the other nodes
4334
    self.cfg.Update(instance)
4335

    
4336
    # Only start the instance if it's marked as up
4337
    if instance.admin_up:
4338
      feedback_fn("* activating the instance's disks on target node")
4339
      logging.info("Starting instance %s on node %s",
4340
                   instance.name, target_node)
4341

    
4342
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4343
                                               ignore_secondaries=True)
4344
      if not disks_ok:
4345
        _ShutdownInstanceDisks(self, instance)
4346
        raise errors.OpExecError("Can't activate the instance's disks")
4347

    
4348
      feedback_fn("* starting the instance on the target node")
4349
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4350
      msg = result.fail_msg
4351
      if msg:
4352
        _ShutdownInstanceDisks(self, instance)
4353
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4354
                                 (instance.name, target_node, msg))
4355

    
4356

    
4357
class LUMigrateInstance(LogicalUnit):
4358
  """Migrate an instance.
4359

4360
  This is migration without shutting down, compared to the failover,
4361
  which is done with shutdown.
4362

4363
  """
4364
  HPATH = "instance-migrate"
4365
  HTYPE = constants.HTYPE_INSTANCE
4366
  _OP_REQP = ["instance_name", "live", "cleanup"]
4367

    
4368
  REQ_BGL = False
4369

    
4370
  def ExpandNames(self):
4371
    self._ExpandAndLockInstance()
4372

    
4373
    self.needed_locks[locking.LEVEL_NODE] = []
4374
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4375

    
4376
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4377
                                       self.op.live, self.op.cleanup)
4378
    self.tasklets = [self._migrater]
4379

    
4380
  def DeclareLocks(self, level):
4381
    if level == locking.LEVEL_NODE:
4382
      self._LockInstancesNodes()
4383

    
4384
  def BuildHooksEnv(self):
4385
    """Build hooks env.
4386

4387
    This runs on master, primary and secondary nodes of the instance.
4388

4389
    """
4390
    instance = self._migrater.instance
4391
    env = _BuildInstanceHookEnvByObject(self, instance)
4392
    env["MIGRATE_LIVE"] = self.op.live
4393
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4394
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4395
    return env, nl, nl
4396

    
4397

    
4398
class LUMoveInstance(LogicalUnit):
4399
  """Move an instance by data-copying.
4400

4401
  """
4402
  HPATH = "instance-move"
4403
  HTYPE = constants.HTYPE_INSTANCE
4404
  _OP_REQP = ["instance_name", "target_node"]
4405
  REQ_BGL = False
4406

    
4407
  def ExpandNames(self):
4408
    self._ExpandAndLockInstance()
4409
    target_node = self.cfg.ExpandNodeName(self.op.target_node)
4410
    if target_node is None:
4411
      raise errors.OpPrereqError("Node '%s' not known" %
4412
                                  self.op.target_node)
4413
    self.op.target_node = target_node
4414
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4415
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4416

    
4417
  def DeclareLocks(self, level):
4418
    if level == locking.LEVEL_NODE:
4419
      self._LockInstancesNodes(primary_only=True)
4420

    
4421
  def BuildHooksEnv(self):
4422
    """Build hooks env.
4423

4424
    This runs on master, primary and secondary nodes of the instance.
4425

4426
    """
4427
    env = {
4428
      "TARGET_NODE": self.op.target_node,
4429
      }
4430
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4431
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4432
                                       self.op.target_node]
4433
    return env, nl, nl
4434

    
4435
  def CheckPrereq(self):
4436
    """Check prerequisites.
4437

4438
    This checks that the instance is in the cluster.
4439

4440
    """
4441
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4442
    assert self.instance is not None, \
4443
      "Cannot retrieve locked instance %s" % self.op.instance_name
4444

    
4445
    node = self.cfg.GetNodeInfo(self.op.target_node)
4446
    assert node is not None, \
4447
      "Cannot retrieve locked node %s" % self.op.target_node
4448

    
4449
    self.target_node = target_node = node.name
4450

    
4451
    if target_node == instance.primary_node:
4452
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4453
                                 (instance.name, target_node))
4454

    
4455
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4456

    
4457
    for idx, dsk in enumerate(instance.disks):
4458
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4459
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4460
                                   " cannot copy")
4461

    
4462
    _CheckNodeOnline(self, target_node)
4463
    _CheckNodeNotDrained(self, target_node)
4464

    
4465
    if instance.admin_up:
4466
      # check memory requirements on the secondary node
4467
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4468
                           instance.name, bep[constants.BE_MEMORY],
4469
                           instance.hypervisor)
4470
    else:
4471
      self.LogInfo("Not checking memory on the secondary node as"
4472
                   " instance will not be started")
4473

    
4474
    # check bridge existance
4475
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4476

    
4477
  def Exec(self, feedback_fn):
4478
    """Move an instance.
4479

4480
    The move is done by shutting it down on its present node, copying
4481
    the data over (slow) and starting it on the new node.
4482

4483
    """
4484
    instance = self.instance
4485

    
4486
    source_node = instance.primary_node
4487
    target_node = self.target_node
4488

    
4489
    self.LogInfo("Shutting down instance %s on source node %s",
4490
                 instance.name, source_node)
4491

    
4492
    result = self.rpc.call_instance_shutdown(source_node, instance)
4493
    msg = result.fail_msg
4494
    if msg:
4495
      if self.op.ignore_consistency:
4496
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4497
                             " Proceeding anyway. Please make sure node"
4498
                             " %s is down. Error details: %s",
4499
                             instance.name, source_node, source_node, msg)
4500
      else:
4501
        raise errors.OpExecError("Could not shutdown instance %s on"
4502
                                 " node %s: %s" %
4503
                                 (instance.name, source_node, msg))
4504

    
4505
    # create the target disks
4506
    try:
4507
      _CreateDisks(self, instance, target_node=target_node)
4508
    except errors.OpExecError:
4509
      self.LogWarning("Device creation failed, reverting...")
4510
      try:
4511
        _RemoveDisks(self, instance, target_node=target_node)
4512
      finally:
4513
        self.cfg.ReleaseDRBDMinors(instance.name)
4514
        raise
4515

    
4516
    cluster_name = self.cfg.GetClusterInfo().cluster_name
4517

    
4518
    errs = []
4519
    # activate, get path, copy the data over
4520
    for idx, disk in enumerate(instance.disks):
4521
      self.LogInfo("Copying data for disk %d", idx)
4522
      result = self.rpc.call_blockdev_assemble(target_node, disk,
4523
                                               instance.name, True)
4524
      if result.fail_msg:
4525
        self.LogWarning("Can't assemble newly created disk %d: %s",
4526
                        idx, result.fail_msg)
4527
        errs.append(result.fail_msg)
4528
        break
4529
      dev_path = result.payload
4530
      result = self.rpc.call_blockdev_export(source_node, disk,
4531
                                             target_node, dev_path,
4532
                                             cluster_name)
4533
      if result.fail_msg:
4534
        self.LogWarning("Can't copy data over for disk %d: %s",
4535
                        idx, result.fail_msg)
4536
        errs.append(result.fail_msg)
4537
        break
4538

    
4539
    if errs:
4540
      self.LogWarning("Some disks failed to copy, aborting")
4541
      try:
4542
        _RemoveDisks(self, instance, target_node=target_node)
4543
      finally:
4544
        self.cfg.ReleaseDRBDMinors(instance.name)
4545
        raise errors.OpExecError("Errors during disk copy: %s" %
4546
                                 (",".join(errs),))
4547

    
4548
    instance.primary_node = target_node
4549
    self.cfg.Update(instance)
4550

    
4551
    self.LogInfo("Removing the disks on the original node")
4552
    _RemoveDisks(self, instance, target_node=source_node)
4553

    
4554
    # Only start the instance if it's marked as up
4555
    if instance.admin_up:
4556
      self.LogInfo("Starting instance %s on node %s",
4557
                   instance.name, target_node)
4558

    
4559
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4560
                                           ignore_secondaries=True)
4561
      if not disks_ok:
4562
        _ShutdownInstanceDisks(self, instance)
4563
        raise errors.OpExecError("Can't activate the instance's disks")
4564

    
4565
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4566
      msg = result.fail_msg
4567
      if msg:
4568
        _ShutdownInstanceDisks(self, instance)
4569
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4570
                                 (instance.name, target_node, msg))
4571

    
4572

    
4573
class LUMigrateNode(LogicalUnit):
4574
  """Migrate all instances from a node.
4575

4576
  """
4577
  HPATH = "node-migrate"
4578
  HTYPE = constants.HTYPE_NODE
4579
  _OP_REQP = ["node_name", "live"]
4580
  REQ_BGL = False
4581

    
4582
  def ExpandNames(self):
4583
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4584
    if self.op.node_name is None:
4585
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
4586

    
4587
    self.needed_locks = {
4588
      locking.LEVEL_NODE: [self.op.node_name],
4589
      }
4590

    
4591
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4592

    
4593
    # Create tasklets for migrating instances for all instances on this node
4594
    names = []
4595
    tasklets = []
4596

    
4597
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4598
      logging.debug("Migrating instance %s", inst.name)
4599
      names.append(inst.name)
4600

    
4601
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4602

    
4603
    self.tasklets = tasklets
4604

    
4605
    # Declare instance locks
4606
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4607

    
4608
  def DeclareLocks(self, level):
4609
    if level == locking.LEVEL_NODE:
4610
      self._LockInstancesNodes()
4611

    
4612
  def BuildHooksEnv(self):
4613
    """Build hooks env.
4614

4615
    This runs on the master, the primary and all the secondaries.
4616

4617
    """
4618
    env = {
4619
      "NODE_NAME": self.op.node_name,
4620
      }
4621

    
4622
    nl = [self.cfg.GetMasterNode()]
4623

    
4624
    return (env, nl, nl)
4625

    
4626

    
4627
class TLMigrateInstance(Tasklet):
4628
  def __init__(self, lu, instance_name, live, cleanup):
4629
    """Initializes this class.
4630

4631
    """
4632
    Tasklet.__init__(self, lu)
4633

    
4634
    # Parameters
4635
    self.instance_name = instance_name
4636
    self.live = live
4637
    self.cleanup = cleanup
4638

    
4639
  def CheckPrereq(self):
4640
    """Check prerequisites.
4641

4642
    This checks that the instance is in the cluster.
4643

4644
    """
4645
    instance = self.cfg.GetInstanceInfo(
4646
      self.cfg.ExpandInstanceName(self.instance_name))
4647
    if instance is None:
4648
      raise errors.OpPrereqError("Instance '%s' not known" %
4649
                                 self.instance_name)
4650

    
4651
    if instance.disk_template != constants.DT_DRBD8:
4652
      raise errors.OpPrereqError("Instance's disk layout is not"
4653
                                 " drbd8, cannot migrate.")
4654

    
4655
    secondary_nodes = instance.secondary_nodes
4656
    if not secondary_nodes:
4657
      raise errors.ConfigurationError("No secondary node but using"
4658
                                      " drbd8 disk template")
4659

    
4660
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
4661

    
4662
    target_node = secondary_nodes[0]
4663
    # check memory requirements on the secondary node
4664
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4665
                         instance.name, i_be[constants.BE_MEMORY],
4666
                         instance.hypervisor)
4667

    
4668
    # check bridge existance
4669
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4670

    
4671
    if not self.cleanup:
4672
      _CheckNodeNotDrained(self, target_node)
4673
      result = self.rpc.call_instance_migratable(instance.primary_node,
4674
                                                 instance)
4675
      result.Raise("Can't migrate, please use failover", prereq=True)
4676

    
4677
    self.instance = instance
4678

    
4679
  def _WaitUntilSync(self):
4680
    """Poll with custom rpc for disk sync.
4681

4682
    This uses our own step-based rpc call.
4683

4684
    """
4685
    self.feedback_fn("* wait until resync is done")
4686
    all_done = False
4687
    while not all_done:
4688
      all_done = True
4689
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4690
                                            self.nodes_ip,
4691
                                            self.instance.disks)
4692
      min_percent = 100
4693
      for node, nres in result.items():
4694
        nres.Raise("Cannot resync disks on node %s" % node)
4695
        node_done, node_percent = nres.payload
4696
        all_done = all_done and node_done
4697
        if node_percent is not None:
4698
          min_percent = min(min_percent, node_percent)
4699
      if not all_done:
4700
        if min_percent < 100:
4701
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
4702
        time.sleep(2)
4703

    
4704
  def _EnsureSecondary(self, node):
4705
    """Demote a node to secondary.
4706

4707
    """
4708
    self.feedback_fn("* switching node %s to secondary mode" % node)
4709

    
4710
    for dev in self.instance.disks:
4711
      self.cfg.SetDiskID(dev, node)
4712

    
4713
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4714
                                          self.instance.disks)
4715
    result.Raise("Cannot change disk to secondary on node %s" % node)
4716

    
4717
  def _GoStandalone(self):
4718
    """Disconnect from the network.
4719

4720
    """
4721
    self.feedback_fn("* changing into standalone mode")
4722
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4723
                                               self.instance.disks)
4724
    for node, nres in result.items():
4725
      nres.Raise("Cannot disconnect disks node %s" % node)
4726

    
4727
  def _GoReconnect(self, multimaster):
4728
    """Reconnect to the network.
4729

4730
    """
4731
    if multimaster:
4732
      msg = "dual-master"
4733
    else:
4734
      msg = "single-master"
4735
    self.feedback_fn("* changing disks into %s mode" % msg)
4736
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4737
                                           self.instance.disks,
4738
                                           self.instance.name, multimaster)
4739
    for node, nres in result.items():
4740
      nres.Raise("Cannot change disks config on node %s" % node)
4741

    
4742
  def _ExecCleanup(self):
4743
    """Try to cleanup after a failed migration.
4744

4745
    The cleanup is done by:
4746
      - check that the instance is running only on one node
4747
        (and update the config if needed)
4748
      - change disks on its secondary node to secondary
4749
      - wait until disks are fully synchronized
4750
      - disconnect from the network
4751
      - change disks into single-master mode
4752
      - wait again until disks are fully synchronized
4753

4754
    """
4755
    instance = self.instance
4756
    target_node = self.target_node
4757
    source_node = self.source_node
4758

    
4759
    # check running on only one node
4760
    self.feedback_fn("* checking where the instance actually runs"
4761
                     " (if this hangs, the hypervisor might be in"
4762
                     " a bad state)")
4763
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4764
    for node, result in ins_l.items():
4765
      result.Raise("Can't contact node %s" % node)
4766

    
4767
    runningon_source = instance.name in ins_l[source_node].payload
4768
    runningon_target = instance.name in ins_l[target_node].payload
4769

    
4770
    if runningon_source and runningon_target:
4771
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4772
                               " or the hypervisor is confused. You will have"
4773
                               " to ensure manually that it runs only on one"
4774
                               " and restart this operation.")
4775

    
4776
    if not (runningon_source or runningon_target):
4777
      raise errors.OpExecError("Instance does not seem to be running at all."
4778
                               " In this case, it's safer to repair by"
4779
                               " running 'gnt-instance stop' to ensure disk"
4780
                               " shutdown, and then restarting it.")
4781

    
4782
    if runningon_target:
4783
      # the migration has actually succeeded, we need to update the config
4784
      self.feedback_fn("* instance running on secondary node (%s),"
4785
                       " updating config" % target_node)
4786
      instance.primary_node = target_node
4787
      self.cfg.Update(instance)
4788
      demoted_node = source_node
4789
    else:
4790
      self.feedback_fn("* instance confirmed to be running on its"
4791
                       " primary node (%s)" % source_node)
4792
      demoted_node = target_node
4793

    
4794
    self._EnsureSecondary(demoted_node)
4795
    try:
4796
      self._WaitUntilSync()
4797
    except errors.OpExecError:
4798
      # we ignore here errors, since if the device is standalone, it
4799
      # won't be able to sync
4800
      pass
4801
    self._GoStandalone()
4802
    self._GoReconnect(False)
4803
    self._WaitUntilSync()
4804

    
4805
    self.feedback_fn("* done")
4806

    
4807
  def _RevertDiskStatus(self):
4808
    """Try to revert the disk status after a failed migration.
4809

4810
    """
4811
    target_node = self.target_node
4812
    try:
4813
      self._EnsureSecondary(target_node)
4814
      self._GoStandalone()
4815
      self._GoReconnect(False)
4816
      self._WaitUntilSync()
4817
    except errors.OpExecError, err:
4818
      self.lu.LogWarning("Migration failed and I can't reconnect the"
4819
                         " drives: error '%s'\n"
4820
                         "Please look and recover the instance status" %
4821
                         str(err))
4822

    
4823
  def _AbortMigration(self):
4824
    """Call the hypervisor code to abort a started migration.
4825

4826
    """
4827
    instance = self.instance
4828
    target_node = self.target_node
4829
    migration_info = self.migration_info
4830

    
4831
    abort_result = self.rpc.call_finalize_migration(target_node,
4832
                                                    instance,
4833
                                                    migration_info,
4834
                                                    False)
4835
    abort_msg = abort_result.fail_msg
4836
    if abort_msg:
4837
      logging.error("Aborting migration failed on target node %s: %s" %
4838
                    (target_node, abort_msg))
4839
      # Don't raise an exception here, as we stil have to try to revert the
4840
      # disk status, even if this step failed.
4841

    
4842
  def _ExecMigration(self):
4843
    """Migrate an instance.
4844

4845
    The migrate is done by:
4846
      - change the disks into dual-master mode
4847
      - wait until disks are fully synchronized again
4848
      - migrate the instance
4849
      - change disks on the new secondary node (the old primary) to secondary
4850
      - wait until disks are fully synchronized
4851
      - change disks into single-master mode
4852

4853
    """
4854
    instance = self.instance
4855
    target_node = self.target_node
4856
    source_node = self.source_node
4857

    
4858
    self.feedback_fn("* checking disk consistency between source and target")
4859
    for dev in instance.disks:
4860
      if not _CheckDiskConsistency(self, dev, target_node, False):
4861
        raise errors.OpExecError("Disk %s is degraded or not fully"
4862
                                 " synchronized on target node,"
4863
                                 " aborting migrate." % dev.iv_name)
4864

    
4865
    # First get the migration information from the remote node
4866
    result = self.rpc.call_migration_info(source_node, instance)
4867
    msg = result.fail_msg
4868
    if msg:
4869
      log_err = ("Failed fetching source migration information from %s: %s" %
4870
                 (source_node, msg))
4871
      logging.error(log_err)
4872
      raise errors.OpExecError(log_err)
4873

    
4874
    self.migration_info = migration_info = result.payload
4875

    
4876
    # Then switch the disks to master/master mode
4877
    self._EnsureSecondary(target_node)
4878
    self._GoStandalone()
4879
    self._GoReconnect(True)
4880
    self._WaitUntilSync()
4881

    
4882
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4883
    result = self.rpc.call_accept_instance(target_node,
4884
                                           instance,
4885
                                           migration_info,
4886
                                           self.nodes_ip[target_node])
4887

    
4888
    msg = result.fail_msg
4889
    if msg:
4890
      logging.error("Instance pre-migration failed, trying to revert"
4891
                    " disk status: %s", msg)
4892
      self._AbortMigration()
4893
      self._RevertDiskStatus()
4894
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4895
                               (instance.name, msg))
4896

    
4897
    self.feedback_fn("* migrating instance to %s" % target_node)
4898
    time.sleep(10)
4899
    result = self.rpc.call_instance_migrate(source_node, instance,
4900
                                            self.nodes_ip[target_node],
4901
                                            self.live)
4902
    msg = result.fail_msg
4903
    if msg:
4904
      logging.error("Instance migration failed, trying to revert"
4905
                    " disk status: %s", msg)
4906
      self._AbortMigration()
4907
      self._RevertDiskStatus()
4908
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4909
                               (instance.name, msg))
4910
    time.sleep(10)
4911

    
4912
    instance.primary_node = target_node
4913
    # distribute new instance config to the other nodes
4914
    self.cfg.Update(instance)
4915

    
4916
    result = self.rpc.call_finalize_migration(target_node,
4917
                                              instance,
4918
                                              migration_info,
4919
                                              True)
4920
    msg = result.fail_msg
4921
    if msg:
4922
      logging.error("Instance migration succeeded, but finalization failed:"
4923
                    " %s" % msg)
4924
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4925
                               msg)
4926

    
4927
    self._EnsureSecondary(source_node)
4928
    self._WaitUntilSync()
4929
    self._GoStandalone()
4930
    self._GoReconnect(False)
4931
    self._WaitUntilSync()
4932

    
4933
    self.feedback_fn("* done")
4934

    
4935
  def Exec(self, feedback_fn):
4936
    """Perform the migration.
4937

4938
    """
4939
    feedback_fn("Migrating instance %s" % self.instance.name)
4940

    
4941
    self.feedback_fn = feedback_fn
4942

    
4943
    self.source_node = self.instance.primary_node
4944
    self.target_node = self.instance.secondary_nodes[0]
4945
    self.all_nodes = [self.source_node, self.target_node]
4946
    self.nodes_ip = {
4947
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4948
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4949
      }
4950

    
4951
    if self.cleanup:
4952
      return self._ExecCleanup()
4953
    else:
4954
      return self._ExecMigration()
4955

    
4956

    
4957
def _CreateBlockDev(lu, node, instance, device, force_create,
4958
                    info, force_open):
4959
  """Create a tree of block devices on a given node.
4960

4961
  If this device type has to be created on secondaries, create it and
4962
  all its children.
4963

4964
  If not, just recurse to children keeping the same 'force' value.
4965

4966
  @param lu: the lu on whose behalf we execute
4967
  @param node: the node on which to create the device
4968
  @type instance: L{objects.Instance}
4969
  @param instance: the instance which owns the device
4970
  @type device: L{objects.Disk}
4971
  @param device: the device to create
4972
  @type force_create: boolean
4973
  @param force_create: whether to force creation of this device; this
4974
      will be change to True whenever we find a device which has
4975
      CreateOnSecondary() attribute
4976
  @param info: the extra 'metadata' we should attach to the device
4977
      (this will be represented as a LVM tag)
4978
  @type force_open: boolean
4979
  @param force_open: this parameter will be passes to the
4980
      L{backend.BlockdevCreate} function where it specifies
4981
      whether we run on primary or not, and it affects both
4982
      the child assembly and the device own Open() execution
4983

4984
  """
4985
  if device.CreateOnSecondary():
4986
    force_create = True
4987

    
4988
  if device.children:
4989
    for child in device.children:
4990
      _CreateBlockDev(lu, node, instance, child, force_create,
4991
                      info, force_open)
4992

    
4993
  if not force_create:
4994
    return
4995

    
4996
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4997

    
4998

    
4999
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5000
  """Create a single block device on a given node.
5001

5002
  This will not recurse over children of the device, so they must be
5003
  created in advance.
5004

5005
  @param lu: the lu on whose behalf we execute
5006
  @param node: the node on which to create the device
5007
  @type instance: L{objects.Instance}
5008
  @param instance: the instance which owns the device
5009
  @type device: L{objects.Disk}
5010
  @param device: the device to create
5011
  @param info: the extra 'metadata' we should attach to the device
5012
      (this will be represented as a LVM tag)
5013
  @type force_open: boolean
5014
  @param force_open: this parameter will be passes to the
5015
      L{backend.BlockdevCreate} function where it specifies
5016
      whether we run on primary or not, and it affects both
5017
      the child assembly and the device own Open() execution
5018

5019
  """
5020
  lu.cfg.SetDiskID(device, node)
5021
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5022
                                       instance.name, force_open, info)
5023
  result.Raise("Can't create block device %s on"
5024
               " node %s for instance %s" % (device, node, instance.name))
5025
  if device.physical_id is None:
5026
    device.physical_id = result.payload
5027

    
5028

    
5029
def _GenerateUniqueNames(lu, exts):
5030
  """Generate a suitable LV name.
5031

5032
  This will generate a logical volume name for the given instance.
5033

5034
  """
5035
  results = []
5036
  for val in exts:
5037
    new_id = lu.cfg.GenerateUniqueID()
5038
    results.append("%s%s" % (new_id, val))
5039
  return results
5040

    
5041

    
5042
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5043
                         p_minor, s_minor):
5044
  """Generate a drbd8 device complete with its children.
5045

5046
  """
5047
  port = lu.cfg.AllocatePort()
5048
  vgname = lu.cfg.GetVGName()
5049
  shared_secret = lu.cfg.GenerateDRBDSecret()
5050
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5051
                          logical_id=(vgname, names[0]))
5052
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5053
                          logical_id=(vgname, names[1]))
5054
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5055
                          logical_id=(primary, secondary, port,
5056
                                      p_minor, s_minor,
5057
                                      shared_secret),
5058
                          children=[dev_data, dev_meta],
5059
                          iv_name=iv_name)
5060
  return drbd_dev
5061

    
5062

    
5063
def _GenerateDiskTemplate(lu, template_name,
5064
                          instance_name, primary_node,
5065
                          secondary_nodes, disk_info,
5066
                          file_storage_dir, file_driver,
5067
                          base_index):
5068
  """Generate the entire disk layout for a given template type.
5069

5070
  """
5071
  #TODO: compute space requirements
5072

    
5073
  vgname = lu.cfg.GetVGName()
5074
  disk_count = len(disk_info)
5075
  disks = []
5076
  if template_name == constants.DT_DISKLESS:
5077
    pass
5078
  elif template_name == constants.DT_PLAIN:
5079
    if len(secondary_nodes) != 0:
5080
      raise errors.ProgrammerError("Wrong template configuration")
5081

    
5082
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5083
                                      for i in range(disk_count)])
5084
    for idx, disk in enumerate(disk_info):
5085
      disk_index = idx + base_index
5086
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5087
                              logical_id=(vgname, names[idx]),
5088
                              iv_name="disk/%d" % disk_index,
5089
                              mode=disk["mode"])
5090
      disks.append(disk_dev)
5091
  elif template_name == constants.DT_DRBD8:
5092
    if len(secondary_nodes) != 1:
5093
      raise errors.ProgrammerError("Wrong template configuration")
5094
    remote_node = secondary_nodes[0]
5095
    minors = lu.cfg.AllocateDRBDMinor(
5096
      [primary_node, remote_node] * len(disk_info), instance_name)
5097

    
5098
    names = []
5099
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5100
                                               for i in range(disk_count)]):
5101
      names.append(lv_prefix + "_data")
5102
      names.append(lv_prefix + "_meta")
5103
    for idx, disk in enumerate(disk_info):
5104
      disk_index = idx + base_index
5105
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5106
                                      disk["size"], names[idx*2:idx*2+2],
5107
                                      "disk/%d" % disk_index,
5108
                                      minors[idx*2], minors[idx*2+1])
5109
      disk_dev.mode = disk["mode"]
5110
      disks.append(disk_dev)
5111
  elif template_name == constants.DT_FILE:
5112
    if len(secondary_nodes) != 0:
5113
      raise errors.ProgrammerError("Wrong template configuration")
5114

    
5115
    for idx, disk in enumerate(disk_info):
5116
      disk_index = idx + base_index
5117
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5118
                              iv_name="disk/%d" % disk_index,
5119
                              logical_id=(file_driver,
5120
                                          "%s/disk%d" % (file_storage_dir,
5121
                                                         disk_index)),
5122
                              mode=disk["mode"])
5123
      disks.append(disk_dev)
5124
  else:
5125
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5126
  return disks
5127

    
5128

    
5129
def _GetInstanceInfoText(instance):
5130
  """Compute that text that should be added to the disk's metadata.
5131

5132
  """
5133
  return "originstname+%s" % instance.name
5134

    
5135

    
5136
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5137
  """Create all disks for an instance.
5138

5139
  This abstracts away some work from AddInstance.
5140

5141
  @type lu: L{LogicalUnit}
5142
  @param lu: the logical unit on whose behalf we execute
5143
  @type instance: L{objects.Instance}
5144
  @param instance: the instance whose disks we should create
5145
  @type to_skip: list
5146
  @param to_skip: list of indices to skip
5147
  @type target_node: string
5148
  @param target_node: if passed, overrides the target node for creation
5149
  @rtype: boolean
5150
  @return: the success of the creation
5151

5152
  """
5153
  info = _GetInstanceInfoText(instance)
5154
  if target_node is None:
5155
    pnode = instance.primary_node
5156
    all_nodes = instance.all_nodes
5157
  else:
5158
    pnode = target_node
5159
    all_nodes = [pnode]
5160

    
5161
  if instance.disk_template == constants.DT_FILE:
5162
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5163
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5164

    
5165
    result.Raise("Failed to create directory '%s' on"
5166
                 " node %s: %s" % (file_storage_dir, pnode))
5167

    
5168
  # Note: this needs to be kept in sync with adding of disks in
5169
  # LUSetInstanceParams
5170
  for idx, device in enumerate(instance.disks):
5171
    if to_skip and idx in to_skip:
5172
      continue
5173
    logging.info("Creating volume %s for instance %s",
5174
                 device.iv_name, instance.name)
5175
    #HARDCODE
5176
    for node in all_nodes:
5177
      f_create = node == pnode
5178
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5179

    
5180

    
5181
def _RemoveDisks(lu, instance, target_node=None):
5182
  """Remove all disks for an instance.
5183

5184
  This abstracts away some work from `AddInstance()` and
5185
  `RemoveInstance()`. Note that in case some of the devices couldn't
5186
  be removed, the removal will continue with the other ones (compare
5187
  with `_CreateDisks()`).
5188

5189
  @type lu: L{LogicalUnit}
5190
  @param lu: the logical unit on whose behalf we execute
5191
  @type instance: L{objects.Instance}
5192
  @param instance: the instance whose disks we should remove
5193
  @type target_node: string
5194
  @param target_node: used to override the node on which to remove the disks
5195
  @rtype: boolean
5196
  @return: the success of the removal
5197

5198
  """
5199
  logging.info("Removing block devices for instance %s", instance.name)
5200

    
5201
  all_result = True
5202
  for device in instance.disks:
5203
    if target_node:
5204
      edata = [(target_node, device)]
5205
    else:
5206
      edata = device.ComputeNodeTree(instance.primary_node)
5207
    for node, disk in edata:
5208
      lu.cfg.SetDiskID(disk, node)
5209
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5210
      if msg:
5211
        lu.LogWarning("Could not remove block device %s on node %s,"
5212
                      " continuing anyway: %s", device.iv_name, node, msg)
5213
        all_result = False
5214

    
5215
  if instance.disk_template == constants.DT_FILE:
5216
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5217
    if target_node is node:
5218
      tgt = instance.primary_node
5219
    else:
5220
      tgt = instance.target_node
5221
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5222
    if result.fail_msg:
5223
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5224
                    file_storage_dir, instance.primary_node, result.fail_msg)
5225
      all_result = False
5226

    
5227
  return all_result
5228

    
5229

    
5230
def _ComputeDiskSize(disk_template, disks):
5231
  """Compute disk size requirements in the volume group
5232

5233
  """
5234
  # Required free disk space as a function of disk and swap space
5235
  req_size_dict = {
5236
    constants.DT_DISKLESS: None,
5237
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5238
    # 128 MB are added for drbd metadata for each disk
5239
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5240
    constants.DT_FILE: None,
5241
  }
5242

    
5243
  if disk_template not in req_size_dict:
5244
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5245
                                 " is unknown" %  disk_template)
5246

    
5247
  return req_size_dict[disk_template]
5248

    
5249

    
5250
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5251
  """Hypervisor parameter validation.
5252

5253
  This function abstract the hypervisor parameter validation to be
5254
  used in both instance create and instance modify.
5255

5256
  @type lu: L{LogicalUnit}
5257
  @param lu: the logical unit for which we check
5258
  @type nodenames: list
5259
  @param nodenames: the list of nodes on which we should check
5260
  @type hvname: string
5261
  @param hvname: the name of the hypervisor we should use
5262
  @type hvparams: dict
5263
  @param hvparams: the parameters which we need to check
5264
  @raise errors.OpPrereqError: if the parameters are not valid
5265

5266
  """
5267
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5268
                                                  hvname,
5269
                                                  hvparams)
5270
  for node in nodenames:
5271
    info = hvinfo[node]
5272
    if info.offline:
5273
      continue
5274
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5275

    
5276

    
5277
class LUCreateInstance(LogicalUnit):
5278
  """Create an instance.
5279

5280
  """
5281
  HPATH = "instance-add"
5282
  HTYPE = constants.HTYPE_INSTANCE
5283
  _OP_REQP = ["instance_name", "disks", "disk_template",
5284
              "mode", "start",
5285
              "wait_for_sync", "ip_check", "nics",
5286
              "hvparams", "beparams"]
5287
  REQ_BGL = False
5288

    
5289
  def _ExpandNode(self, node):
5290
    """Expands and checks one node name.
5291

5292
    """
5293
    node_full = self.cfg.ExpandNodeName(node)
5294
    if node_full is None:
5295
      raise errors.OpPrereqError("Unknown node %s" % node)
5296
    return node_full
5297

    
5298
  def ExpandNames(self):
5299
    """ExpandNames for CreateInstance.
5300

5301
    Figure out the right locks for instance creation.
5302

5303
    """
5304
    self.needed_locks = {}
5305

    
5306
    # set optional parameters to none if they don't exist
5307
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5308
      if not hasattr(self.op, attr):
5309
        setattr(self.op, attr, None)
5310

    
5311
    # cheap checks, mostly valid constants given
5312

    
5313
    # verify creation mode
5314
    if self.op.mode not in (constants.INSTANCE_CREATE,
5315
                            constants.INSTANCE_IMPORT):
5316
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5317
                                 self.op.mode)
5318

    
5319
    # disk template and mirror node verification
5320
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5321
      raise errors.OpPrereqError("Invalid disk template name")
5322

    
5323
    if self.op.hypervisor is None:
5324
      self.op.hypervisor = self.cfg.GetHypervisorType()
5325

    
5326
    cluster = self.cfg.GetClusterInfo()
5327
    enabled_hvs = cluster.enabled_hypervisors
5328
    if self.op.hypervisor not in enabled_hvs:
5329
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5330
                                 " cluster (%s)" % (self.op.hypervisor,
5331
                                  ",".join(enabled_hvs)))
5332

    
5333
    # check hypervisor parameter syntax (locally)
5334
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5335
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5336
                                  self.op.hvparams)
5337
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5338
    hv_type.CheckParameterSyntax(filled_hvp)
5339
    self.hv_full = filled_hvp
5340

    
5341
    # fill and remember the beparams dict
5342
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5343
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5344
                                    self.op.beparams)
5345

    
5346
    #### instance parameters check
5347

    
5348
    # instance name verification
5349
    hostname1 = utils.HostInfo(self.op.instance_name)
5350
    self.op.instance_name = instance_name = hostname1.name
5351

    
5352
    # this is just a preventive check, but someone might still add this
5353
    # instance in the meantime, and creation will fail at lock-add time
5354
    if instance_name in self.cfg.GetInstanceList():
5355
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5356
                                 instance_name)
5357

    
5358
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5359

    
5360
    # NIC buildup
5361
    self.nics = []
5362
    for idx, nic in enumerate(self.op.nics):
5363
      nic_mode_req = nic.get("mode", None)
5364
      nic_mode = nic_mode_req
5365
      if nic_mode is None:
5366
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5367

    
5368
      # in routed mode, for the first nic, the default ip is 'auto'
5369
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5370
        default_ip_mode = constants.VALUE_AUTO
5371
      else:
5372
        default_ip_mode = constants.VALUE_NONE
5373

    
5374
      # ip validity checks
5375
      ip = nic.get("ip", default_ip_mode)
5376
      if ip is None or ip.lower() == constants.VALUE_NONE:
5377
        nic_ip = None
5378
      elif ip.lower() == constants.VALUE_AUTO:
5379
        nic_ip = hostname1.ip
5380
      else:
5381
        if not utils.IsValidIP(ip):
5382
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5383
                                     " like a valid IP" % ip)
5384
        nic_ip = ip
5385

    
5386
      # TODO: check the ip for uniqueness !!
5387
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5388
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
5389

    
5390
      # MAC address verification
5391
      mac = nic.get("mac", constants.VALUE_AUTO)
5392
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5393
        if not utils.IsValidMac(mac.lower()):
5394
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5395
                                     mac)
5396
      # bridge verification
5397
      bridge = nic.get("bridge", None)
5398
      link = nic.get("link", None)
5399
      if bridge and link:
5400
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5401
                                   " at the same time")
5402
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5403
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
5404
      elif bridge:
5405
        link = bridge
5406

    
5407
      nicparams = {}
5408
      if nic_mode_req:
5409
        nicparams[constants.NIC_MODE] = nic_mode_req
5410
      if link:
5411
        nicparams[constants.NIC_LINK] = link
5412

    
5413
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5414
                                      nicparams)
5415
      objects.NIC.CheckParameterSyntax(check_params)
5416
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5417

    
5418
    # disk checks/pre-build
5419
    self.disks = []
5420
    for disk in self.op.disks:
5421
      mode = disk.get("mode", constants.DISK_RDWR)
5422
      if mode not in constants.DISK_ACCESS_SET:
5423
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5424
                                   mode)
5425
      size = disk.get("size", None)
5426
      if size is None:
5427
        raise errors.OpPrereqError("Missing disk size")
5428
      try:
5429
        size = int(size)
5430
      except ValueError:
5431
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
5432
      self.disks.append({"size": size, "mode": mode})
5433

    
5434
    # used in CheckPrereq for ip ping check
5435
    self.check_ip = hostname1.ip
5436

    
5437
    # file storage checks
5438
    if (self.op.file_driver and
5439
        not self.op.file_driver in constants.FILE_DRIVER):
5440
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5441
                                 self.op.file_driver)
5442

    
5443
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5444
      raise errors.OpPrereqError("File storage directory path not absolute")
5445

    
5446
    ### Node/iallocator related checks
5447
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5448
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5449
                                 " node must be given")
5450

    
5451
    if self.op.iallocator:
5452
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5453
    else:
5454
      self.op.pnode = self._ExpandNode(self.op.pnode)
5455
      nodelist = [self.op.pnode]
5456
      if self.op.snode is not None:
5457
        self.op.snode = self._ExpandNode(self.op.snode)
5458
        nodelist.append(self.op.snode)
5459
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5460

    
5461
    # in case of import lock the source node too
5462
    if self.op.mode == constants.INSTANCE_IMPORT:
5463
      src_node = getattr(self.op, "src_node", None)
5464
      src_path = getattr(self.op, "src_path", None)
5465

    
5466
      if src_path is None:
5467
        self.op.src_path = src_path = self.op.instance_name
5468

    
5469
      if src_node is None:
5470
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5471
        self.op.src_node = None
5472
        if os.path.isabs(src_path):
5473
          raise errors.OpPrereqError("Importing an instance from an absolute"
5474
                                     " path requires a source node option.")
5475
      else:
5476
        self.op.src_node = src_node = self._ExpandNode(src_node)
5477
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5478
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5479
        if not os.path.isabs(src_path):
5480
          self.op.src_path = src_path = \
5481
            os.path.join(constants.EXPORT_DIR, src_path)
5482

    
5483
    else: # INSTANCE_CREATE
5484
      if getattr(self.op, "os_type", None) is None:
5485
        raise errors.OpPrereqError("No guest OS specified")
5486

    
5487
  def _RunAllocator(self):
5488
    """Run the allocator based on input opcode.
5489

5490
    """
5491
    nics = [n.ToDict() for n in self.nics]
5492
    ial = IAllocator(self.cfg, self.rpc,
5493
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5494
                     name=self.op.instance_name,
5495
                     disk_template=self.op.disk_template,
5496
                     tags=[],
5497
                     os=self.op.os_type,
5498
                     vcpus=self.be_full[constants.BE_VCPUS],
5499
                     mem_size=self.be_full[constants.BE_MEMORY],
5500
                     disks=self.disks,
5501
                     nics=nics,
5502
                     hypervisor=self.op.hypervisor,
5503
                     )
5504

    
5505
    ial.Run(self.op.iallocator)
5506

    
5507
    if not ial.success:
5508
      raise errors.OpPrereqError("Can't compute nodes using"
5509
                                 " iallocator '%s': %s" % (self.op.iallocator,
5510
                                                           ial.info))
5511
    if len(ial.nodes) != ial.required_nodes:
5512
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5513
                                 " of nodes (%s), required %s" %
5514
                                 (self.op.iallocator, len(ial.nodes),
5515
                                  ial.required_nodes))
5516
    self.op.pnode = ial.nodes[0]
5517
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5518
                 self.op.instance_name, self.op.iallocator,
5519
                 ", ".join(ial.nodes))
5520
    if ial.required_nodes == 2:
5521
      self.op.snode = ial.nodes[1]
5522

    
5523
  def BuildHooksEnv(self):
5524
    """Build hooks env.
5525

5526
    This runs on master, primary and secondary nodes of the instance.
5527

5528
    """
5529
    env = {
5530
      "ADD_MODE": self.op.mode,
5531
      }
5532
    if self.op.mode == constants.INSTANCE_IMPORT:
5533
      env["SRC_NODE"] = self.op.src_node
5534
      env["SRC_PATH"] = self.op.src_path
5535
      env["SRC_IMAGES"] = self.src_images
5536

    
5537
    env.update(_BuildInstanceHookEnv(
5538
      name=self.op.instance_name,
5539
      primary_node=self.op.pnode,
5540
      secondary_nodes=self.secondaries,
5541
      status=self.op.start,
5542
      os_type=self.op.os_type,
5543
      memory=self.be_full[constants.BE_MEMORY],
5544
      vcpus=self.be_full[constants.BE_VCPUS],
5545
      nics=_NICListToTuple(self, self.nics),
5546
      disk_template=self.op.disk_template,
5547
      disks=[(d["size"], d["mode"]) for d in self.disks],
5548
      bep=self.be_full,
5549
      hvp=self.hv_full,
5550
      hypervisor_name=self.op.hypervisor,
5551
    ))
5552

    
5553
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5554
          self.secondaries)
5555
    return env, nl, nl
5556

    
5557

    
5558
  def CheckPrereq(self):
5559
    """Check prerequisites.
5560

5561
    """
5562
    if (not self.cfg.GetVGName() and
5563
        self.op.disk_template not in constants.DTS_NOT_LVM):
5564
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5565
                                 " instances")
5566

    
5567
    if self.op.mode == constants.INSTANCE_IMPORT:
5568
      src_node = self.op.src_node
5569
      src_path = self.op.src_path
5570

    
5571
      if src_node is None:
5572
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5573
        exp_list = self.rpc.call_export_list(locked_nodes)
5574
        found = False
5575
        for node in exp_list:
5576
          if exp_list[node].fail_msg:
5577
            continue
5578
          if src_path in exp_list[node].payload:
5579
            found = True
5580
            self.op.src_node = src_node = node
5581
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5582
                                                       src_path)
5583
            break
5584
        if not found:
5585
          raise errors.OpPrereqError("No export found for relative path %s" %
5586
                                      src_path)
5587

    
5588
      _CheckNodeOnline(self, src_node)
5589
      result = self.rpc.call_export_info(src_node, src_path)
5590
      result.Raise("No export or invalid export found in dir %s" % src_path)
5591

    
5592
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5593
      if not export_info.has_section(constants.INISECT_EXP):
5594
        raise errors.ProgrammerError("Corrupted export config")
5595

    
5596
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5597
      if (int(ei_version) != constants.EXPORT_VERSION):
5598
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5599
                                   (ei_version, constants.EXPORT_VERSION))
5600

    
5601
      # Check that the new instance doesn't have less disks than the export
5602
      instance_disks = len(self.disks)
5603
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
5604
      if instance_disks < export_disks:
5605
        raise errors.OpPrereqError("Not enough disks to import."
5606
                                   " (instance: %d, export: %d)" %
5607
                                   (instance_disks, export_disks))
5608

    
5609
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
5610
      disk_images = []
5611
      for idx in range(export_disks):
5612
        option = 'disk%d_dump' % idx
5613
        if export_info.has_option(constants.INISECT_INS, option):
5614
          # FIXME: are the old os-es, disk sizes, etc. useful?
5615
          export_name = export_info.get(constants.INISECT_INS, option)
5616
          image = os.path.join(src_path, export_name)
5617
          disk_images.append(image)
5618
        else:
5619
          disk_images.append(False)
5620

    
5621
      self.src_images = disk_images
5622

    
5623
      old_name = export_info.get(constants.INISECT_INS, 'name')
5624
      # FIXME: int() here could throw a ValueError on broken exports
5625
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
5626
      if self.op.instance_name == old_name:
5627
        for idx, nic in enumerate(self.nics):
5628
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
5629
            nic_mac_ini = 'nic%d_mac' % idx
5630
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
5631

    
5632
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
5633
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
5634
    if self.op.start and not self.op.ip_check:
5635
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
5636
                                 " adding an instance in start mode")
5637

    
5638
    if self.op.ip_check:
5639
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
5640
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5641
                                   (self.check_ip, self.op.instance_name))
5642

    
5643
    #### mac address generation
5644
    # By generating here the mac address both the allocator and the hooks get
5645
    # the real final mac address rather than the 'auto' or 'generate' value.
5646
    # There is a race condition between the generation and the instance object
5647
    # creation, which means that we know the mac is valid now, but we're not
5648
    # sure it will be when we actually add the instance. If things go bad
5649
    # adding the instance will abort because of a duplicate mac, and the
5650
    # creation job will fail.
5651
    for nic in self.nics:
5652
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5653
        nic.mac = self.cfg.GenerateMAC()
5654

    
5655
    #### allocator run
5656

    
5657
    if self.op.iallocator is not None:
5658
      self._RunAllocator()
5659

    
5660
    #### node related checks
5661

    
5662
    # check primary node
5663
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
5664
    assert self.pnode is not None, \
5665
      "Cannot retrieve locked node %s" % self.op.pnode
5666
    if pnode.offline:
5667
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
5668
                                 pnode.name)
5669
    if pnode.drained:
5670
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
5671
                                 pnode.name)
5672

    
5673
    self.secondaries = []
5674

    
5675
    # mirror node verification
5676
    if self.op.disk_template in constants.DTS_NET_MIRROR:
5677
      if self.op.snode is None:
5678
        raise errors.OpPrereqError("The networked disk templates need"
5679
                                   " a mirror node")
5680
      if self.op.snode == pnode.name:
5681
        raise errors.OpPrereqError("The secondary node cannot be"
5682
                                   " the primary node.")
5683
      _CheckNodeOnline(self, self.op.snode)
5684
      _CheckNodeNotDrained(self, self.op.snode)
5685
      self.secondaries.append(self.op.snode)
5686

    
5687
    nodenames = [pnode.name] + self.secondaries
5688

    
5689
    req_size = _ComputeDiskSize(self.op.disk_template,
5690
                                self.disks)
5691

    
5692
    # Check lv size requirements
5693
    if req_size is not None:
5694
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5695
                                         self.op.hypervisor)
5696
      for node in nodenames:
5697
        info = nodeinfo[node]
5698
        info.Raise("Cannot get current information from node %s" % node)
5699
        info = info.payload
5700
        vg_free = info.get('vg_free', None)
5701
        if not isinstance(vg_free, int):
5702
          raise errors.OpPrereqError("Can't compute free disk space on"
5703
                                     " node %s" % node)
5704
        if req_size > vg_free:
5705
          raise errors.OpPrereqError("Not enough disk space on target node %s."
5706
                                     " %d MB available, %d MB required" %
5707
                                     (node, vg_free, req_size))
5708

    
5709
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
5710

    
5711
    # os verification
5712
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5713
    result.Raise("OS '%s' not in supported os list for primary node %s" %
5714
                 (self.op.os_type, pnode.name), prereq=True)
5715

    
5716
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5717

    
5718
    # memory check on primary node
5719
    if self.op.start:
5720
      _CheckNodeFreeMemory(self, self.pnode.name,
5721
                           "creating instance %s" % self.op.instance_name,
5722
                           self.be_full[constants.BE_MEMORY],
5723
                           self.op.hypervisor)
5724

    
5725
    self.dry_run_result = list(nodenames)
5726

    
5727
  def Exec(self, feedback_fn):
5728
    """Create and add the instance to the cluster.
5729

5730
    """
5731
    instance = self.op.instance_name
5732
    pnode_name = self.pnode.name
5733

    
5734
    ht_kind = self.op.hypervisor
5735
    if ht_kind in constants.HTS_REQ_PORT:
5736
      network_port = self.cfg.AllocatePort()
5737
    else:
5738
      network_port = None
5739

    
5740
    ##if self.op.vnc_bind_address is None:
5741
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5742

    
5743
    # this is needed because os.path.join does not accept None arguments
5744
    if self.op.file_storage_dir is None:
5745
      string_file_storage_dir = ""
5746
    else:
5747
      string_file_storage_dir = self.op.file_storage_dir
5748

    
5749
    # build the full file storage dir path
5750
    file_storage_dir = os.path.normpath(os.path.join(
5751
                                        self.cfg.GetFileStorageDir(),
5752
                                        string_file_storage_dir, instance))
5753

    
5754

    
5755
    disks = _GenerateDiskTemplate(self,
5756
                                  self.op.disk_template,
5757
                                  instance, pnode_name,
5758
                                  self.secondaries,
5759
                                  self.disks,
5760
                                  file_storage_dir,
5761
                                  self.op.file_driver,
5762
                                  0)
5763

    
5764
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5765
                            primary_node=pnode_name,
5766
                            nics=self.nics, disks=disks,
5767
                            disk_template=self.op.disk_template,
5768
                            admin_up=False,
5769
                            network_port=network_port,
5770
                            beparams=self.op.beparams,
5771
                            hvparams=self.op.hvparams,
5772
                            hypervisor=self.op.hypervisor,
5773
                            )
5774

    
5775
    feedback_fn("* creating instance disks...")
5776
    try:
5777
      _CreateDisks(self, iobj)
5778
    except errors.OpExecError:
5779
      self.LogWarning("Device creation failed, reverting...")
5780
      try:
5781
        _RemoveDisks(self, iobj)
5782
      finally:
5783
        self.cfg.ReleaseDRBDMinors(instance)
5784
        raise
5785

    
5786
    feedback_fn("adding instance %s to cluster config" % instance)
5787

    
5788
    self.cfg.AddInstance(iobj)
5789
    # Declare that we don't want to remove the instance lock anymore, as we've
5790
    # added the instance to the config
5791
    del self.remove_locks[locking.LEVEL_INSTANCE]
5792
    # Unlock all the nodes
5793
    if self.op.mode == constants.INSTANCE_IMPORT:
5794
      nodes_keep = [self.op.src_node]
5795
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5796
                       if node != self.op.src_node]
5797
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5798
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5799
    else:
5800
      self.context.glm.release(locking.LEVEL_NODE)
5801
      del self.acquired_locks[locking.LEVEL_NODE]
5802

    
5803
    if self.op.wait_for_sync:
5804
      disk_abort = not _WaitForSync(self, iobj)
5805
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
5806
      # make sure the disks are not degraded (still sync-ing is ok)
5807
      time.sleep(15)
5808
      feedback_fn("* checking mirrors status")
5809
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5810
    else:
5811
      disk_abort = False
5812

    
5813
    if disk_abort:
5814
      _RemoveDisks(self, iobj)
5815
      self.cfg.RemoveInstance(iobj.name)
5816
      # Make sure the instance lock gets removed
5817
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5818
      raise errors.OpExecError("There are some degraded disks for"
5819
                               " this instance")
5820

    
5821
    feedback_fn("creating os for instance %s on node %s" %
5822
                (instance, pnode_name))
5823

    
5824
    if iobj.disk_template != constants.DT_DISKLESS:
5825
      if self.op.mode == constants.INSTANCE_CREATE:
5826
        feedback_fn("* running the instance OS create scripts...")
5827
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5828
        result.Raise("Could not add os for instance %s"
5829
                     " on node %s" % (instance, pnode_name))
5830

    
5831
      elif self.op.mode == constants.INSTANCE_IMPORT:
5832
        feedback_fn("* running the instance OS import scripts...")
5833
        src_node = self.op.src_node
5834
        src_images = self.src_images
5835
        cluster_name = self.cfg.GetClusterName()
5836
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5837
                                                         src_node, src_images,
5838
                                                         cluster_name)
5839
        msg = import_result.fail_msg
5840
        if msg:
5841
          self.LogWarning("Error while importing the disk images for instance"
5842
                          " %s on node %s: %s" % (instance, pnode_name, msg))
5843
      else:
5844
        # also checked in the prereq part
5845
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5846
                                     % self.op.mode)
5847

    
5848
    if self.op.start:
5849
      iobj.admin_up = True
5850
      self.cfg.Update(iobj)
5851
      logging.info("Starting instance %s on node %s", instance, pnode_name)
5852
      feedback_fn("* starting instance...")
5853
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5854
      result.Raise("Could not start instance")
5855

    
5856
    return list(iobj.all_nodes)
5857

    
5858

    
5859
class LUConnectConsole(NoHooksLU):
5860
  """Connect to an instance's console.
5861

5862
  This is somewhat special in that it returns the command line that
5863
  you need to run on the master node in order to connect to the
5864
  console.
5865

5866
  """
5867
  _OP_REQP = ["instance_name"]
5868
  REQ_BGL = False
5869

    
5870
  def ExpandNames(self):
5871
    self._ExpandAndLockInstance()
5872

    
5873
  def CheckPrereq(self):
5874
    """Check prerequisites.
5875

5876
    This checks that the instance is in the cluster.
5877

5878
    """
5879
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5880
    assert self.instance is not None, \
5881
      "Cannot retrieve locked instance %s" % self.op.instance_name
5882
    _CheckNodeOnline(self, self.instance.primary_node)
5883

    
5884
  def Exec(self, feedback_fn):
5885
    """Connect to the console of an instance
5886

5887
    """
5888
    instance = self.instance
5889
    node = instance.primary_node
5890

    
5891
    node_insts = self.rpc.call_instance_list([node],
5892
                                             [instance.hypervisor])[node]
5893
    node_insts.Raise("Can't get node information from %s" % node)
5894

    
5895
    if instance.name not in node_insts.payload:
5896
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5897

    
5898
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5899

    
5900
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5901
    cluster = self.cfg.GetClusterInfo()
5902
    # beparams and hvparams are passed separately, to avoid editing the
5903
    # instance and then saving the defaults in the instance itself.
5904
    hvparams = cluster.FillHV(instance)
5905
    beparams = cluster.FillBE(instance)
5906
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5907

    
5908
    # build ssh cmdline
5909
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5910

    
5911

    
5912
class LUReplaceDisks(LogicalUnit):
5913
  """Replace the disks of an instance.
5914

5915
  """
5916
  HPATH = "mirrors-replace"
5917
  HTYPE = constants.HTYPE_INSTANCE
5918
  _OP_REQP = ["instance_name", "mode", "disks"]
5919
  REQ_BGL = False
5920

    
5921
  def CheckArguments(self):
5922
    if not hasattr(self.op, "remote_node"):
5923
      self.op.remote_node = None
5924
    if not hasattr(self.op, "iallocator"):
5925
      self.op.iallocator = None
5926

    
5927
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
5928
                                  self.op.iallocator)
5929

    
5930
  def ExpandNames(self):
5931
    self._ExpandAndLockInstance()
5932

    
5933
    if self.op.iallocator is not None:
5934
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5935

    
5936
    elif self.op.remote_node is not None:
5937
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5938
      if remote_node is None:
5939
        raise errors.OpPrereqError("Node '%s' not known" %
5940
                                   self.op.remote_node)
5941

    
5942
      self.op.remote_node = remote_node
5943

    
5944
      # Warning: do not remove the locking of the new secondary here
5945
      # unless DRBD8.AddChildren is changed to work in parallel;
5946
      # currently it doesn't since parallel invocations of
5947
      # FindUnusedMinor will conflict
5948
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5949
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5950

    
5951
    else:
5952
      self.needed_locks[locking.LEVEL_NODE] = []
5953
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5954

    
5955
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
5956
                                   self.op.iallocator, self.op.remote_node,
5957
                                   self.op.disks)
5958

    
5959
    self.tasklets = [self.replacer]
5960

    
5961
  def DeclareLocks(self, level):
5962
    # If we're not already locking all nodes in the set we have to declare the
5963
    # instance's primary/secondary nodes.
5964
    if (level == locking.LEVEL_NODE and
5965
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5966
      self._LockInstancesNodes()
5967

    
5968
  def BuildHooksEnv(self):
5969
    """Build hooks env.
5970

5971
    This runs on the master, the primary and all the secondaries.
5972

5973
    """
5974
    instance = self.replacer.instance
5975
    env = {
5976
      "MODE": self.op.mode,
5977
      "NEW_SECONDARY": self.op.remote_node,
5978
      "OLD_SECONDARY": instance.secondary_nodes[0],
5979
      }
5980
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5981
    nl = [
5982
      self.cfg.GetMasterNode(),
5983
      instance.primary_node,
5984
      ]
5985
    if self.op.remote_node is not None:
5986
      nl.append(self.op.remote_node)
5987
    return env, nl, nl
5988

    
5989

    
5990
class LUEvacuateNode(LogicalUnit):
5991
  """Relocate the secondary instances from a node.
5992

5993
  """
5994
  HPATH = "node-evacuate"
5995
  HTYPE = constants.HTYPE_NODE
5996
  _OP_REQP = ["node_name"]
5997
  REQ_BGL = False
5998

    
5999
  def CheckArguments(self):
6000
    if not hasattr(self.op, "remote_node"):
6001
      self.op.remote_node = None
6002
    if not hasattr(self.op, "iallocator"):
6003
      self.op.iallocator = None
6004

    
6005
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6006
                                  self.op.remote_node,
6007
                                  self.op.iallocator)
6008

    
6009
  def ExpandNames(self):
6010
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6011
    if self.op.node_name is None:
6012
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
6013

    
6014
    self.needed_locks = {}
6015

    
6016
    # Declare node locks
6017
    if self.op.iallocator is not None:
6018
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6019

    
6020
    elif self.op.remote_node is not None:
6021
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6022
      if remote_node is None:
6023
        raise errors.OpPrereqError("Node '%s' not known" %
6024
                                   self.op.remote_node)
6025

    
6026
      self.op.remote_node = remote_node
6027

    
6028
      # Warning: do not remove the locking of the new secondary here
6029
      # unless DRBD8.AddChildren is changed to work in parallel;
6030
      # currently it doesn't since parallel invocations of
6031
      # FindUnusedMinor will conflict
6032
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6033
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6034

    
6035
    else:
6036
      raise errors.OpPrereqError("Invalid parameters")
6037

    
6038
    # Create tasklets for replacing disks for all secondary instances on this
6039
    # node
6040
    names = []
6041
    tasklets = []
6042

    
6043
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6044
      logging.debug("Replacing disks for instance %s", inst.name)
6045
      names.append(inst.name)
6046

    
6047
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6048
                                self.op.iallocator, self.op.remote_node, [])
6049
      tasklets.append(replacer)
6050

    
6051
    self.tasklets = tasklets
6052
    self.instance_names = names
6053

    
6054
    # Declare instance locks
6055
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6056

    
6057
  def DeclareLocks(self, level):
6058
    # If we're not already locking all nodes in the set we have to declare the
6059
    # instance's primary/secondary nodes.
6060
    if (level == locking.LEVEL_NODE and
6061
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6062
      self._LockInstancesNodes()
6063

    
6064
  def BuildHooksEnv(self):
6065
    """Build hooks env.
6066

6067
    This runs on the master, the primary and all the secondaries.
6068

6069
    """
6070
    env = {
6071
      "NODE_NAME": self.op.node_name,
6072
      }
6073

    
6074
    nl = [self.cfg.GetMasterNode()]
6075

    
6076
    if self.op.remote_node is not None:
6077
      env["NEW_SECONDARY"] = self.op.remote_node
6078
      nl.append(self.op.remote_node)
6079

    
6080
    return (env, nl, nl)
6081

    
6082

    
6083
class TLReplaceDisks(Tasklet):
6084
  """Replaces disks for an instance.
6085

6086
  Note: Locking is not within the scope of this class.
6087

6088
  """
6089
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6090
               disks):
6091
    """Initializes this class.
6092

6093
    """
6094
    Tasklet.__init__(self, lu)
6095

    
6096
    # Parameters
6097
    self.instance_name = instance_name
6098
    self.mode = mode
6099
    self.iallocator_name = iallocator_name
6100
    self.remote_node = remote_node
6101
    self.disks = disks
6102

    
6103
    # Runtime data
6104
    self.instance = None
6105
    self.new_node = None
6106
    self.target_node = None
6107
    self.other_node = None
6108
    self.remote_node_info = None
6109
    self.node_secondary_ip = None
6110

    
6111
  @staticmethod
6112
  def CheckArguments(mode, remote_node, iallocator):
6113
    """Helper function for users of this class.
6114

6115
    """
6116
    # check for valid parameter combination
6117
    if mode == constants.REPLACE_DISK_CHG:
6118
      if remote_node is None and iallocator is None:
6119
        raise errors.OpPrereqError("When changing the secondary either an"
6120
                                   " iallocator script must be used or the"
6121
                                   " new node given")
6122

    
6123
      if remote_node is not None and iallocator is not None:
6124
        raise errors.OpPrereqError("Give either the iallocator or the new"
6125
                                   " secondary, not both")
6126

    
6127
    elif remote_node is not None or iallocator is not None:
6128
      # Not replacing the secondary
6129
      raise errors.OpPrereqError("The iallocator and new node options can"
6130
                                 " only be used when changing the"
6131
                                 " secondary node")
6132

    
6133
  @staticmethod
6134
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6135
    """Compute a new secondary node using an IAllocator.
6136

6137
    """
6138
    ial = IAllocator(lu.cfg, lu.rpc,
6139
                     mode=constants.IALLOCATOR_MODE_RELOC,
6140
                     name=instance_name,
6141
                     relocate_from=relocate_from)
6142

    
6143
    ial.Run(iallocator_name)
6144

    
6145
    if not ial.success:
6146
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6147
                                 " %s" % (iallocator_name, ial.info))
6148

    
6149
    if len(ial.nodes) != ial.required_nodes:
6150
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6151
                                 " of nodes (%s), required %s" %
6152
                                 (len(ial.nodes), ial.required_nodes))
6153

    
6154
    remote_node_name = ial.nodes[0]
6155

    
6156
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6157
               instance_name, remote_node_name)
6158

    
6159
    return remote_node_name
6160

    
6161
  def _FindFaultyDisks(self, node_name):
6162
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6163
                                    node_name, True)
6164

    
6165
  def CheckPrereq(self):
6166
    """Check prerequisites.
6167

6168
    This checks that the instance is in the cluster.
6169

6170
    """
6171
    self.instance = self.cfg.GetInstanceInfo(self.instance_name)
6172
    assert self.instance is not None, \
6173
      "Cannot retrieve locked instance %s" % self.instance_name
6174

    
6175
    if self.instance.disk_template != constants.DT_DRBD8:
6176
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6177
                                 " instances")
6178

    
6179
    if len(self.instance.secondary_nodes) != 1:
6180
      raise errors.OpPrereqError("The instance has a strange layout,"
6181
                                 " expected one secondary but found %d" %
6182
                                 len(self.instance.secondary_nodes))
6183

    
6184
    secondary_node = self.instance.secondary_nodes[0]
6185

    
6186
    if self.iallocator_name is None:
6187
      remote_node = self.remote_node
6188
    else:
6189
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6190
                                       self.instance.name, secondary_node)
6191

    
6192
    if remote_node is not None:
6193
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6194
      assert self.remote_node_info is not None, \
6195
        "Cannot retrieve locked node %s" % remote_node
6196
    else:
6197
      self.remote_node_info = None
6198

    
6199
    if remote_node == self.instance.primary_node:
6200
      raise errors.OpPrereqError("The specified node is the primary node of"
6201
                                 " the instance.")
6202

    
6203
    if remote_node == secondary_node:
6204
      raise errors.OpPrereqError("The specified node is already the"
6205
                                 " secondary node of the instance.")
6206

    
6207
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6208
                                    constants.REPLACE_DISK_CHG):
6209
      raise errors.OpPrereqError("Cannot specify disks to be replaced")
6210

    
6211
    if self.mode == constants.REPLACE_DISK_AUTO:
6212
      faulty_primary = self._FindFaultyDisks(self.instance.primary_node)
6213
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6214

    
6215
      if faulty_primary and faulty_secondary:
6216
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6217
                                   " one node and can not be repaired"
6218
                                   " automatically" % self.instance_name)
6219

    
6220
      if faulty_primary:
6221
        self.disks = faulty_primary
6222
        self.target_node = self.instance.primary_node
6223
        self.other_node = secondary_node
6224
        check_nodes = [self.target_node, self.other_node]
6225
      elif faulty_secondary:
6226
        self.disks = faulty_secondary
6227
        self.target_node = secondary_node
6228
        self.other_node = self.instance.primary_node
6229
        check_nodes = [self.target_node, self.other_node]
6230
      else:
6231
        self.disks = []
6232
        check_nodes = []
6233

    
6234
    else:
6235
      # Non-automatic modes
6236
      if self.mode == constants.REPLACE_DISK_PRI:
6237
        self.target_node = self.instance.primary_node
6238
        self.other_node = secondary_node
6239
        check_nodes = [self.target_node, self.other_node]
6240

    
6241
      elif self.mode == constants.REPLACE_DISK_SEC:
6242
        self.target_node = secondary_node
6243
        self.other_node = self.instance.primary_node
6244
        check_nodes = [self.target_node, self.other_node]
6245

    
6246
      elif self.mode == constants.REPLACE_DISK_CHG:
6247
        self.new_node = remote_node
6248
        self.other_node = self.instance.primary_node
6249
        self.target_node = secondary_node
6250
        check_nodes = [self.new_node, self.other_node]
6251

    
6252
        _CheckNodeNotDrained(self.lu, remote_node)
6253

    
6254
      else:
6255
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6256
                                     self.mode)
6257

    
6258
      # If not specified all disks should be replaced
6259
      if not self.disks:
6260
        self.disks = range(len(self.instance.disks))
6261

    
6262
    for node in check_nodes:
6263
      _CheckNodeOnline(self.lu, node)
6264

    
6265
    # Check whether disks are valid
6266
    for disk_idx in self.disks:
6267
      self.instance.FindDisk(disk_idx)
6268

    
6269
    # Get secondary node IP addresses
6270
    node_2nd_ip = {}
6271

    
6272
    for node_name in [self.target_node, self.other_node, self.new_node]:
6273
      if node_name is not None:
6274
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6275

    
6276
    self.node_secondary_ip = node_2nd_ip
6277

    
6278
  def Exec(self, feedback_fn):
6279
    """Execute disk replacement.
6280

6281
    This dispatches the disk replacement to the appropriate handler.
6282

6283
    """
6284
    if not self.disks:
6285
      feedback_fn("No disks need replacement")
6286
      return
6287

    
6288
    feedback_fn("Replacing disk(s) %s for %s" %
6289
                (", ".join([str(i) for i in self.disks]), self.instance.name))
6290

    
6291
    activate_disks = (not self.instance.admin_up)
6292

    
6293
    # Activate the instance disks if we're replacing them on a down instance
6294
    if activate_disks:
6295
      _StartInstanceDisks(self.lu, self.instance, True)
6296

    
6297
    try:
6298
      # Should we replace the secondary node?
6299
      if self.new_node is not None:
6300
        return self._ExecDrbd8Secondary()
6301
      else:
6302
        return self._ExecDrbd8DiskOnly()
6303

    
6304
    finally:
6305
      # Deactivate the instance disks if we're replacing them on a down instance
6306
      if activate_disks:
6307
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6308

    
6309
  def _CheckVolumeGroup(self, nodes):
6310
    self.lu.LogInfo("Checking volume groups")
6311

    
6312
    vgname = self.cfg.GetVGName()
6313

    
6314
    # Make sure volume group exists on all involved nodes
6315
    results = self.rpc.call_vg_list(nodes)
6316
    if not results:
6317
      raise errors.OpExecError("Can't list volume groups on the nodes")
6318

    
6319
    for node in nodes:
6320
      res = results[node]
6321
      res.Raise("Error checking node %s" % node)
6322
      if vgname not in res.payload:
6323
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6324
                                 (vgname, node))
6325

    
6326
  def _CheckDisksExistence(self, nodes):
6327
    # Check disk existence
6328
    for idx, dev in enumerate(self.instance.disks):
6329
      if idx not in self.disks:
6330
        continue
6331

    
6332
      for node in nodes:
6333
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6334
        self.cfg.SetDiskID(dev, node)
6335

    
6336
        result = self.rpc.call_blockdev_find(node, dev)
6337

    
6338
        msg = result.fail_msg
6339
        if msg or not result.payload:
6340
          if not msg:
6341
            msg = "disk not found"
6342
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6343
                                   (idx, node, msg))
6344

    
6345
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6346
    for idx, dev in enumerate(self.instance.disks):
6347
      if idx not in self.disks:
6348
        continue
6349

    
6350
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6351
                      (idx, node_name))
6352

    
6353
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6354
                                   ldisk=ldisk):
6355
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6356
                                 " replace disks for instance %s" %
6357
                                 (node_name, self.instance.name))
6358

    
6359
  def _CreateNewStorage(self, node_name):
6360
    vgname = self.cfg.GetVGName()
6361
    iv_names = {}
6362

    
6363
    for idx, dev in enumerate(self.instance.disks):
6364
      if idx not in self.disks:
6365
        continue
6366

    
6367
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6368

    
6369
      self.cfg.SetDiskID(dev, node_name)
6370

    
6371
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6372
      names = _GenerateUniqueNames(self.lu, lv_names)
6373

    
6374
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6375
                             logical_id=(vgname, names[0]))
6376
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6377
                             logical_id=(vgname, names[1]))
6378

    
6379
      new_lvs = [lv_data, lv_meta]
6380
      old_lvs = dev.children
6381
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6382

    
6383
      # we pass force_create=True to force the LVM creation
6384
      for new_lv in new_lvs:
6385
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6386
                        _GetInstanceInfoText(self.instance), False)
6387

    
6388
    return iv_names
6389

    
6390
  def _CheckDevices(self, node_name, iv_names):
6391
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
6392
      self.cfg.SetDiskID(dev, node_name)
6393

    
6394
      result = self.rpc.call_blockdev_find(node_name, dev)
6395

    
6396
      msg = result.fail_msg
6397
      if msg or not result.payload:
6398
        if not msg:
6399
          msg = "disk not found"
6400
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6401
                                 (name, msg))
6402

    
6403
      if result.payload.is_degraded:
6404
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6405

    
6406
  def _RemoveOldStorage(self, node_name, iv_names):
6407
    for name, (dev, old_lvs, _) in iv_names.iteritems():
6408
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6409

    
6410
      for lv in old_lvs:
6411
        self.cfg.SetDiskID(lv, node_name)
6412

    
6413
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6414
        if msg:
6415
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6416
                             hint="remove unused LVs manually")
6417

    
6418
  def _ExecDrbd8DiskOnly(self):
6419
    """Replace a disk on the primary or secondary for DRBD 8.
6420

6421
    The algorithm for replace is quite complicated:
6422

6423
      1. for each disk to be replaced:
6424

6425
        1. create new LVs on the target node with unique names
6426
        1. detach old LVs from the drbd device
6427
        1. rename old LVs to name_replaced.<time_t>
6428
        1. rename new LVs to old LVs
6429
        1. attach the new LVs (with the old names now) to the drbd device
6430

6431
      1. wait for sync across all devices
6432

6433
      1. for each modified disk:
6434

6435
        1. remove old LVs (which have the name name_replaces.<time_t>)
6436

6437
    Failures are not very well handled.
6438

6439
    """
6440
    steps_total = 6
6441

    
6442
    # Step: check device activation
6443
    self.lu.LogStep(1, steps_total, "Check device existence")
6444
    self._CheckDisksExistence([self.other_node, self.target_node])
6445
    self._CheckVolumeGroup([self.target_node, self.other_node])
6446

    
6447
    # Step: check other node consistency
6448
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6449
    self._CheckDisksConsistency(self.other_node,
6450
                                self.other_node == self.instance.primary_node,
6451
                                False)
6452

    
6453
    # Step: create new storage
6454
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6455
    iv_names = self._CreateNewStorage(self.target_node)
6456

    
6457
    # Step: for each lv, detach+rename*2+attach
6458
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6459
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6460
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6461

    
6462
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev, old_lvs)
6463
      result.Raise("Can't detach drbd from local storage on node"
6464
                   " %s for device %s" % (self.target_node, dev.iv_name))
6465
      #dev.children = []
6466
      #cfg.Update(instance)
6467

    
6468
      # ok, we created the new LVs, so now we know we have the needed
6469
      # storage; as such, we proceed on the target node to rename
6470
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6471
      # using the assumption that logical_id == physical_id (which in
6472
      # turn is the unique_id on that node)
6473

    
6474
      # FIXME(iustin): use a better name for the replaced LVs
6475
      temp_suffix = int(time.time())
6476
      ren_fn = lambda d, suff: (d.physical_id[0],
6477
                                d.physical_id[1] + "_replaced-%s" % suff)
6478

    
6479
      # Build the rename list based on what LVs exist on the node
6480
      rename_old_to_new = []
6481
      for to_ren in old_lvs:
6482
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6483
        if not result.fail_msg and result.payload:
6484
          # device exists
6485
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6486

    
6487
      self.lu.LogInfo("Renaming the old LVs on the target node")
6488
      result = self.rpc.call_blockdev_rename(self.target_node, rename_old_to_new)
6489
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6490

    
6491
      # Now we rename the new LVs to the old LVs
6492
      self.lu.LogInfo("Renaming the new LVs on the target node")
6493
      rename_new_to_old = [(new, old.physical_id)
6494
                           for old, new in zip(old_lvs, new_lvs)]
6495
      result = self.rpc.call_blockdev_rename(self.target_node, rename_new_to_old)
6496
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6497

    
6498
      for old, new in zip(old_lvs, new_lvs):
6499
        new.logical_id = old.logical_id
6500
        self.cfg.SetDiskID(new, self.target_node)
6501

    
6502
      for disk in old_lvs:
6503
        disk.logical_id = ren_fn(disk, temp_suffix)
6504
        self.cfg.SetDiskID(disk, self.target_node)
6505

    
6506
      # Now that the new lvs have the old name, we can add them to the device
6507
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6508
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev, new_lvs)
6509
      msg = result.fail_msg
6510
      if msg:
6511
        for new_lv in new_lvs:
6512
          msg2 = self.rpc.call_blockdev_remove(self.target_node, new_lv).fail_msg
6513
          if msg2:
6514
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6515
                               hint=("cleanup manually the unused logical"
6516
                                     "volumes"))
6517
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6518

    
6519
      dev.children = new_lvs
6520

    
6521
      self.cfg.Update(self.instance)
6522

    
6523
    # Wait for sync
6524
    # This can fail as the old devices are degraded and _WaitForSync
6525
    # does a combined result over all disks, so we don't check its return value
6526
    self.lu.LogStep(5, steps_total, "Sync devices")
6527
    _WaitForSync(self.lu, self.instance, unlock=True)
6528

    
6529
    # Check all devices manually
6530
    self._CheckDevices(self.instance.primary_node, iv_names)
6531

    
6532
    # Step: remove old storage
6533
    self.lu.LogStep(6, steps_total, "Removing old storage")
6534
    self._RemoveOldStorage(self.target_node, iv_names)
6535

    
6536
  def _ExecDrbd8Secondary(self):
6537
    """Replace the secondary node for DRBD 8.
6538

6539
    The algorithm for replace is quite complicated:
6540
      - for all disks of the instance:
6541
        - create new LVs on the new node with same names
6542
        - shutdown the drbd device on the old secondary
6543
        - disconnect the drbd network on the primary
6544
        - create the drbd device on the new secondary
6545
        - network attach the drbd on the primary, using an artifice:
6546
          the drbd code for Attach() will connect to the network if it
6547
          finds a device which is connected to the good local disks but
6548
          not network enabled
6549
      - wait for sync across all devices
6550
      - remove all disks from the old secondary
6551

6552
    Failures are not very well handled.
6553

6554
    """
6555
    steps_total = 6
6556

    
6557
    # Step: check device activation
6558
    self.lu.LogStep(1, steps_total, "Check device existence")
6559
    self._CheckDisksExistence([self.instance.primary_node])
6560
    self._CheckVolumeGroup([self.instance.primary_node])
6561

    
6562
    # Step: check other node consistency
6563
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6564
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
6565

    
6566
    # Step: create new storage
6567
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6568
    for idx, dev in enumerate(self.instance.disks):
6569
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6570
                      (self.new_node, idx))
6571
      # we pass force_create=True to force LVM creation
6572
      for new_lv in dev.children:
6573
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6574
                        _GetInstanceInfoText(self.instance), False)
6575

    
6576
    # Step 4: dbrd minors and drbd setups changes
6577
    # after this, we must manually remove the drbd minors on both the
6578
    # error and the success paths
6579
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6580
    minors = self.cfg.AllocateDRBDMinor([self.new_node for dev in self.instance.disks],
6581
                                        self.instance.name)
6582
    logging.debug("Allocated minors %r" % (minors,))
6583

    
6584
    iv_names = {}
6585
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
6586
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" % (self.new_node, idx))
6587
      # create new devices on new_node; note that we create two IDs:
6588
      # one without port, so the drbd will be activated without
6589
      # networking information on the new node at this stage, and one
6590
      # with network, for the latter activation in step 4
6591
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
6592
      if self.instance.primary_node == o_node1:
6593
        p_minor = o_minor1
6594
      else:
6595
        p_minor = o_minor2
6596

    
6597
      new_alone_id = (self.instance.primary_node, self.new_node, None, p_minor, new_minor, o_secret)
6598
      new_net_id = (self.instance.primary_node, self.new_node, o_port, p_minor, new_minor, o_secret)
6599

    
6600
      iv_names[idx] = (dev, dev.children, new_net_id)
6601
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
6602
                    new_net_id)
6603
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
6604
                              logical_id=new_alone_id,
6605
                              children=dev.children,
6606
                              size=dev.size)
6607
      try:
6608
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
6609
                              _GetInstanceInfoText(self.instance), False)
6610
      except errors.GenericError:
6611
        self.cfg.ReleaseDRBDMinors(self.instance.name)
6612
        raise
6613

    
6614
    # We have new devices, shutdown the drbd on the old secondary
6615
    for idx, dev in enumerate(self.instance.disks):
6616
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
6617
      self.cfg.SetDiskID(dev, self.target_node)
6618
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
6619
      if msg:
6620
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
6621
                           "node: %s" % (idx, msg),
6622
                           hint=("Please cleanup this device manually as"
6623
                                 " soon as possible"))
6624

    
6625
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
6626
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], self.node_secondary_ip,
6627
                                               self.instance.disks)[self.instance.primary_node]
6628

    
6629
    msg = result.fail_msg
6630
    if msg:
6631
      # detaches didn't succeed (unlikely)
6632
      self.cfg.ReleaseDRBDMinors(self.instance.name)
6633
      raise errors.OpExecError("Can't detach the disks from the network on"
6634
                               " old node: %s" % (msg,))
6635

    
6636
    # if we managed to detach at least one, we update all the disks of
6637
    # the instance to point to the new secondary
6638
    self.lu.LogInfo("Updating instance configuration")
6639
    for dev, _, new_logical_id in iv_names.itervalues():
6640
      dev.logical_id = new_logical_id
6641
      self.cfg.SetDiskID(dev, self.instance.primary_node)
6642

    
6643
    self.cfg.Update(self.instance)
6644

    
6645
    # and now perform the drbd attach
6646
    self.lu.LogInfo("Attaching primary drbds to new secondary"
6647
                    " (standalone => connected)")
6648
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node, self.new_node], self.node_secondary_ip,
6649
                                           self.instance.disks, self.instance.name,
6650
                                           False)
6651
    for to_node, to_result in result.items():
6652
      msg = to_result.fail_msg
6653
      if msg:
6654
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s", to_node, msg,
6655
                           hint=("please do a gnt-instance info to see the"
6656
                                 " status of disks"))
6657

    
6658
    # Wait for sync
6659
    # This can fail as the old devices are degraded and _WaitForSync
6660
    # does a combined result over all disks, so we don't check its return value
6661
    self.lu.LogStep(5, steps_total, "Sync devices")
6662
    _WaitForSync(self.lu, self.instance, unlock=True)
6663

    
6664
    # Check all devices manually
6665
    self._CheckDevices(self.instance.primary_node, iv_names)
6666

    
6667
    # Step: remove old storage
6668
    self.lu.LogStep(6, steps_total, "Removing old storage")
6669
    self._RemoveOldStorage(self.target_node, iv_names)
6670

    
6671

    
6672
class LURepairNodeStorage(NoHooksLU):
6673
  """Repairs the volume group on a node.
6674

6675
  """
6676
  _OP_REQP = ["node_name"]
6677
  REQ_BGL = False
6678

    
6679
  def CheckArguments(self):
6680
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
6681
    if node_name is None:
6682
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
6683

    
6684
    self.op.node_name = node_name
6685

    
6686
  def ExpandNames(self):
6687
    self.needed_locks = {
6688
      locking.LEVEL_NODE: [self.op.node_name],
6689
      }
6690

    
6691
  def _CheckFaultyDisks(self, instance, node_name):
6692
    if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
6693
                                node_name, True):
6694
      raise errors.OpPrereqError("Instance '%s' has faulty disks on"
6695
                                 " node '%s'" % (inst.name, node_name))
6696

    
6697
  def CheckPrereq(self):
6698
    """Check prerequisites.
6699

6700
    """
6701
    storage_type = self.op.storage_type
6702

    
6703
    if (constants.SO_FIX_CONSISTENCY not in
6704
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
6705
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
6706
                                 " repaired" % storage_type)
6707

    
6708
    # Check whether any instance on this node has faulty disks
6709
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
6710
      check_nodes = set(inst.all_nodes)
6711
      check_nodes.discard(self.op.node_name)
6712
      for inst_node_name in check_nodes:
6713
        self._CheckFaultyDisks(inst, inst_node_name)
6714

    
6715
  def Exec(self, feedback_fn):
6716
    feedback_fn("Repairing storage unit '%s' on %s ..." %
6717
                (self.op.name, self.op.node_name))
6718

    
6719
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
6720
    result = self.rpc.call_storage_execute(self.op.node_name,
6721
                                           self.op.storage_type, st_args,
6722
                                           self.op.name,
6723
                                           constants.SO_FIX_CONSISTENCY)
6724
    result.Raise("Failed to repair storage unit '%s' on %s" %
6725
                 (self.op.name, self.op.node_name))
6726

    
6727

    
6728
class LUGrowDisk(LogicalUnit):
6729
  """Grow a disk of an instance.
6730

6731
  """
6732
  HPATH = "disk-grow"
6733
  HTYPE = constants.HTYPE_INSTANCE
6734
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
6735
  REQ_BGL = False
6736

    
6737
  def ExpandNames(self):
6738
    self._ExpandAndLockInstance()
6739
    self.needed_locks[locking.LEVEL_NODE] = []
6740
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6741

    
6742
  def DeclareLocks(self, level):
6743
    if level == locking.LEVEL_NODE:
6744
      self._LockInstancesNodes()
6745

    
6746
  def BuildHooksEnv(self):
6747
    """Build hooks env.
6748

6749
    This runs on the master, the primary and all the secondaries.
6750

6751
    """
6752
    env = {
6753
      "DISK": self.op.disk,
6754
      "AMOUNT": self.op.amount,
6755
      }
6756
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6757
    nl = [
6758
      self.cfg.GetMasterNode(),
6759
      self.instance.primary_node,
6760
      ]
6761
    return env, nl, nl
6762

    
6763
  def CheckPrereq(self):
6764
    """Check prerequisites.
6765

6766
    This checks that the instance is in the cluster.
6767

6768
    """
6769
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6770
    assert instance is not None, \
6771
      "Cannot retrieve locked instance %s" % self.op.instance_name
6772
    nodenames = list(instance.all_nodes)
6773
    for node in nodenames:
6774
      _CheckNodeOnline(self, node)
6775

    
6776

    
6777
    self.instance = instance
6778

    
6779
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
6780
      raise errors.OpPrereqError("Instance's disk layout does not support"
6781
                                 " growing.")
6782

    
6783
    self.disk = instance.FindDisk(self.op.disk)
6784

    
6785
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6786
                                       instance.hypervisor)
6787
    for node in nodenames:
6788
      info = nodeinfo[node]
6789
      info.Raise("Cannot get current information from node %s" % node)
6790
      vg_free = info.payload.get('vg_free', None)
6791
      if not isinstance(vg_free, int):
6792
        raise errors.OpPrereqError("Can't compute free disk space on"
6793
                                   " node %s" % node)
6794
      if self.op.amount > vg_free:
6795
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
6796
                                   " %d MiB available, %d MiB required" %
6797
                                   (node, vg_free, self.op.amount))
6798

    
6799
  def Exec(self, feedback_fn):
6800
    """Execute disk grow.
6801

6802
    """
6803
    instance = self.instance
6804
    disk = self.disk
6805
    for node in instance.all_nodes:
6806
      self.cfg.SetDiskID(disk, node)
6807
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
6808
      result.Raise("Grow request failed to node %s" % node)
6809
    disk.RecordGrow(self.op.amount)
6810
    self.cfg.Update(instance)
6811
    if self.op.wait_for_sync:
6812
      disk_abort = not _WaitForSync(self, instance)
6813
      if disk_abort:
6814
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
6815
                             " status.\nPlease check the instance.")
6816

    
6817

    
6818
class LUQueryInstanceData(NoHooksLU):
6819
  """Query runtime instance data.
6820

6821
  """
6822
  _OP_REQP = ["instances", "static"]
6823
  REQ_BGL = False
6824

    
6825
  def ExpandNames(self):
6826
    self.needed_locks = {}
6827
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6828

    
6829
    if not isinstance(self.op.instances, list):
6830
      raise errors.OpPrereqError("Invalid argument type 'instances'")
6831

    
6832
    if self.op.instances:
6833
      self.wanted_names = []
6834
      for name in self.op.instances:
6835
        full_name = self.cfg.ExpandInstanceName(name)
6836
        if full_name is None:
6837
          raise errors.OpPrereqError("Instance '%s' not known" % name)
6838
        self.wanted_names.append(full_name)
6839
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
6840
    else:
6841
      self.wanted_names = None
6842
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
6843

    
6844
    self.needed_locks[locking.LEVEL_NODE] = []
6845
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6846

    
6847
  def DeclareLocks(self, level):
6848
    if level == locking.LEVEL_NODE:
6849
      self._LockInstancesNodes()
6850

    
6851
  def CheckPrereq(self):
6852
    """Check prerequisites.
6853

6854
    This only checks the optional instance list against the existing names.
6855

6856
    """
6857
    if self.wanted_names is None:
6858
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
6859

    
6860
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
6861
                             in self.wanted_names]
6862
    return
6863

    
6864
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
6865
    """Returns the status of a block device
6866

6867
    """
6868
    if self.op.static or not node:
6869
      return None
6870

    
6871
    self.cfg.SetDiskID(dev, node)
6872

    
6873
    result = self.rpc.call_blockdev_find(node, dev)
6874
    if result.offline:
6875
      return None
6876

    
6877
    result.Raise("Can't compute disk status for %s" % instance_name)
6878

    
6879
    status = result.payload
6880
    if status is None:
6881
      return None
6882

    
6883
    return (status.dev_path, status.major, status.minor,
6884
            status.sync_percent, status.estimated_time,
6885
            status.is_degraded, status.ldisk_status)
6886

    
6887
  def _ComputeDiskStatus(self, instance, snode, dev):
6888
    """Compute block device status.
6889

6890
    """
6891
    if dev.dev_type in constants.LDS_DRBD:
6892
      # we change the snode then (otherwise we use the one passed in)
6893
      if dev.logical_id[0] == instance.primary_node:
6894
        snode = dev.logical_id[1]
6895
      else:
6896
        snode = dev.logical_id[0]
6897

    
6898
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
6899
                                              instance.name, dev)
6900
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
6901

    
6902
    if dev.children:
6903
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
6904
                      for child in dev.children]
6905
    else:
6906
      dev_children = []
6907

    
6908
    data = {
6909
      "iv_name": dev.iv_name,
6910
      "dev_type": dev.dev_type,
6911
      "logical_id": dev.logical_id,
6912
      "physical_id": dev.physical_id,
6913
      "pstatus": dev_pstatus,
6914
      "sstatus": dev_sstatus,
6915
      "children": dev_children,
6916
      "mode": dev.mode,
6917
      "size": dev.size,
6918
      }
6919

    
6920
    return data
6921

    
6922
  def Exec(self, feedback_fn):
6923
    """Gather and return data"""
6924
    result = {}
6925

    
6926
    cluster = self.cfg.GetClusterInfo()
6927

    
6928
    for instance in self.wanted_instances:
6929
      if not self.op.static:
6930
        remote_info = self.rpc.call_instance_info(instance.primary_node,
6931
                                                  instance.name,
6932
                                                  instance.hypervisor)
6933
        remote_info.Raise("Error checking node %s" % instance.primary_node)
6934
        remote_info = remote_info.payload
6935
        if remote_info and "state" in remote_info:
6936
          remote_state = "up"
6937
        else:
6938
          remote_state = "down"
6939
      else:
6940
        remote_state = None
6941
      if instance.admin_up:
6942
        config_state = "up"
6943
      else:
6944
        config_state = "down"
6945

    
6946
      disks = [self._ComputeDiskStatus(instance, None, device)
6947
               for device in instance.disks]
6948

    
6949
      idict = {
6950
        "name": instance.name,
6951
        "config_state": config_state,
6952
        "run_state": remote_state,
6953
        "pnode": instance.primary_node,
6954
        "snodes": instance.secondary_nodes,
6955
        "os": instance.os,
6956
        # this happens to be the same format used for hooks
6957
        "nics": _NICListToTuple(self, instance.nics),
6958
        "disks": disks,
6959
        "hypervisor": instance.hypervisor,
6960
        "network_port": instance.network_port,
6961
        "hv_instance": instance.hvparams,
6962
        "hv_actual": cluster.FillHV(instance),
6963
        "be_instance": instance.beparams,
6964
        "be_actual": cluster.FillBE(instance),
6965
        "serial_no": instance.serial_no,
6966
        "mtime": instance.mtime,
6967
        "ctime": instance.ctime,
6968
        }
6969

    
6970
      result[instance.name] = idict
6971

    
6972
    return result
6973

    
6974

    
6975
class LUSetInstanceParams(LogicalUnit):
6976
  """Modifies an instances's parameters.
6977

6978
  """
6979
  HPATH = "instance-modify"
6980
  HTYPE = constants.HTYPE_INSTANCE
6981
  _OP_REQP = ["instance_name"]
6982
  REQ_BGL = False
6983

    
6984
  def CheckArguments(self):
6985
    if not hasattr(self.op, 'nics'):
6986
      self.op.nics = []
6987
    if not hasattr(self.op, 'disks'):
6988
      self.op.disks = []
6989
    if not hasattr(self.op, 'beparams'):
6990
      self.op.beparams = {}
6991
    if not hasattr(self.op, 'hvparams'):
6992
      self.op.hvparams = {}
6993
    self.op.force = getattr(self.op, "force", False)
6994
    if not (self.op.nics or self.op.disks or
6995
            self.op.hvparams or self.op.beparams):
6996
      raise errors.OpPrereqError("No changes submitted")
6997

    
6998
    # Disk validation
6999
    disk_addremove = 0
7000
    for disk_op, disk_dict in self.op.disks:
7001
      if disk_op == constants.DDM_REMOVE:
7002
        disk_addremove += 1
7003
        continue
7004
      elif disk_op == constants.DDM_ADD:
7005
        disk_addremove += 1
7006
      else:
7007
        if not isinstance(disk_op, int):
7008
          raise errors.OpPrereqError("Invalid disk index")
7009
        if not isinstance(disk_dict, dict):
7010
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7011
          raise errors.OpPrereqError(msg)
7012

    
7013
      if disk_op == constants.DDM_ADD:
7014
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7015
        if mode not in constants.DISK_ACCESS_SET:
7016
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
7017
        size = disk_dict.get('size', None)
7018
        if size is None:
7019
          raise errors.OpPrereqError("Required disk parameter size missing")
7020
        try:
7021
          size = int(size)
7022
        except ValueError, err:
7023
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7024
                                     str(err))
7025
        disk_dict['size'] = size
7026
      else:
7027
        # modification of disk
7028
        if 'size' in disk_dict:
7029
          raise errors.OpPrereqError("Disk size change not possible, use"
7030
                                     " grow-disk")
7031

    
7032
    if disk_addremove > 1:
7033
      raise errors.OpPrereqError("Only one disk add or remove operation"
7034
                                 " supported at a time")
7035

    
7036
    # NIC validation
7037
    nic_addremove = 0
7038
    for nic_op, nic_dict in self.op.nics:
7039
      if nic_op == constants.DDM_REMOVE:
7040
        nic_addremove += 1
7041
        continue
7042
      elif nic_op == constants.DDM_ADD:
7043
        nic_addremove += 1
7044
      else:
7045
        if not isinstance(nic_op, int):
7046
          raise errors.OpPrereqError("Invalid nic index")
7047
        if not isinstance(nic_dict, dict):
7048
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7049
          raise errors.OpPrereqError(msg)
7050

    
7051
      # nic_dict should be a dict
7052
      nic_ip = nic_dict.get('ip', None)
7053
      if nic_ip is not None:
7054
        if nic_ip.lower() == constants.VALUE_NONE:
7055
          nic_dict['ip'] = None
7056
        else:
7057
          if not utils.IsValidIP(nic_ip):
7058
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
7059

    
7060
      nic_bridge = nic_dict.get('bridge', None)
7061
      nic_link = nic_dict.get('link', None)
7062
      if nic_bridge and nic_link:
7063
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7064
                                   " at the same time")
7065
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7066
        nic_dict['bridge'] = None
7067
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7068
        nic_dict['link'] = None
7069

    
7070
      if nic_op == constants.DDM_ADD:
7071
        nic_mac = nic_dict.get('mac', None)
7072
        if nic_mac is None:
7073
          nic_dict['mac'] = constants.VALUE_AUTO
7074

    
7075
      if 'mac' in nic_dict:
7076
        nic_mac = nic_dict['mac']
7077
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7078
          if not utils.IsValidMac(nic_mac):
7079
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
7080
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7081
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7082
                                     " modifying an existing nic")
7083

    
7084
    if nic_addremove > 1:
7085
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7086
                                 " supported at a time")
7087

    
7088
  def ExpandNames(self):
7089
    self._ExpandAndLockInstance()
7090
    self.needed_locks[locking.LEVEL_NODE] = []
7091
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7092

    
7093
  def DeclareLocks(self, level):
7094
    if level == locking.LEVEL_NODE:
7095
      self._LockInstancesNodes()
7096

    
7097
  def BuildHooksEnv(self):
7098
    """Build hooks env.
7099

7100
    This runs on the master, primary and secondaries.
7101

7102
    """
7103
    args = dict()
7104
    if constants.BE_MEMORY in self.be_new:
7105
      args['memory'] = self.be_new[constants.BE_MEMORY]
7106
    if constants.BE_VCPUS in self.be_new:
7107
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7108
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7109
    # information at all.
7110
    if self.op.nics:
7111
      args['nics'] = []
7112
      nic_override = dict(self.op.nics)
7113
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7114
      for idx, nic in enumerate(self.instance.nics):
7115
        if idx in nic_override:
7116
          this_nic_override = nic_override[idx]
7117
        else:
7118
          this_nic_override = {}
7119
        if 'ip' in this_nic_override:
7120
          ip = this_nic_override['ip']
7121
        else:
7122
          ip = nic.ip
7123
        if 'mac' in this_nic_override:
7124
          mac = this_nic_override['mac']
7125
        else:
7126
          mac = nic.mac
7127
        if idx in self.nic_pnew:
7128
          nicparams = self.nic_pnew[idx]
7129
        else:
7130
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7131
        mode = nicparams[constants.NIC_MODE]
7132
        link = nicparams[constants.NIC_LINK]
7133
        args['nics'].append((ip, mac, mode, link))
7134
      if constants.DDM_ADD in nic_override:
7135
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7136
        mac = nic_override[constants.DDM_ADD]['mac']
7137
        nicparams = self.nic_pnew[constants.DDM_ADD]
7138
        mode = nicparams[constants.NIC_MODE]
7139
        link = nicparams[constants.NIC_LINK]
7140
        args['nics'].append((ip, mac, mode, link))
7141
      elif constants.DDM_REMOVE in nic_override:
7142
        del args['nics'][-1]
7143

    
7144
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7145
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7146
    return env, nl, nl
7147

    
7148
  def _GetUpdatedParams(self, old_params, update_dict,
7149
                        default_values, parameter_types):
7150
    """Return the new params dict for the given params.
7151

7152
    @type old_params: dict
7153
    @param old_params: old parameters
7154
    @type update_dict: dict
7155
    @param update_dict: dict containing new parameter values,
7156
                        or constants.VALUE_DEFAULT to reset the
7157
                        parameter to its default value
7158
    @type default_values: dict
7159
    @param default_values: default values for the filled parameters
7160
    @type parameter_types: dict
7161
    @param parameter_types: dict mapping target dict keys to types
7162
                            in constants.ENFORCEABLE_TYPES
7163
    @rtype: (dict, dict)
7164
    @return: (new_parameters, filled_parameters)
7165

7166
    """
7167
    params_copy = copy.deepcopy(old_params)
7168
    for key, val in update_dict.iteritems():
7169
      if val == constants.VALUE_DEFAULT:
7170
        try:
7171
          del params_copy[key]
7172
        except KeyError:
7173
          pass
7174
      else:
7175
        params_copy[key] = val
7176
    utils.ForceDictType(params_copy, parameter_types)
7177
    params_filled = objects.FillDict(default_values, params_copy)
7178
    return (params_copy, params_filled)
7179

    
7180
  def CheckPrereq(self):
7181
    """Check prerequisites.
7182

7183
    This only checks the instance list against the existing names.
7184

7185
    """
7186
    self.force = self.op.force
7187

    
7188
    # checking the new params on the primary/secondary nodes
7189

    
7190
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7191
    cluster = self.cluster = self.cfg.GetClusterInfo()
7192
    assert self.instance is not None, \
7193
      "Cannot retrieve locked instance %s" % self.op.instance_name
7194
    pnode = instance.primary_node
7195
    nodelist = list(instance.all_nodes)
7196

    
7197
    # hvparams processing
7198
    if self.op.hvparams:
7199
      i_hvdict, hv_new = self._GetUpdatedParams(
7200
                             instance.hvparams, self.op.hvparams,
7201
                             cluster.hvparams[instance.hypervisor],
7202
                             constants.HVS_PARAMETER_TYPES)
7203
      # local check
7204
      hypervisor.GetHypervisor(
7205
        instance.hypervisor).CheckParameterSyntax(hv_new)
7206
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7207
      self.hv_new = hv_new # the new actual values
7208
      self.hv_inst = i_hvdict # the new dict (without defaults)
7209
    else:
7210
      self.hv_new = self.hv_inst = {}
7211

    
7212
    # beparams processing
7213
    if self.op.beparams:
7214
      i_bedict, be_new = self._GetUpdatedParams(
7215
                             instance.beparams, self.op.beparams,
7216
                             cluster.beparams[constants.PP_DEFAULT],
7217
                             constants.BES_PARAMETER_TYPES)
7218
      self.be_new = be_new # the new actual values
7219
      self.be_inst = i_bedict # the new dict (without defaults)
7220
    else:
7221
      self.be_new = self.be_inst = {}
7222

    
7223
    self.warn = []
7224

    
7225
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7226
      mem_check_list = [pnode]
7227
      if be_new[constants.BE_AUTO_BALANCE]:
7228
        # either we changed auto_balance to yes or it was from before
7229
        mem_check_list.extend(instance.secondary_nodes)
7230
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7231
                                                  instance.hypervisor)
7232
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7233
                                         instance.hypervisor)
7234
      pninfo = nodeinfo[pnode]
7235
      msg = pninfo.fail_msg
7236
      if msg:
7237
        # Assume the primary node is unreachable and go ahead
7238
        self.warn.append("Can't get info from primary node %s: %s" %
7239
                         (pnode,  msg))
7240
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7241
        self.warn.append("Node data from primary node %s doesn't contain"
7242
                         " free memory information" % pnode)
7243
      elif instance_info.fail_msg:
7244
        self.warn.append("Can't get instance runtime information: %s" %
7245
                        instance_info.fail_msg)
7246
      else:
7247
        if instance_info.payload:
7248
          current_mem = int(instance_info.payload['memory'])
7249
        else:
7250
          # Assume instance not running
7251
          # (there is a slight race condition here, but it's not very probable,
7252
          # and we have no other way to check)
7253
          current_mem = 0
7254
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7255
                    pninfo.payload['memory_free'])
7256
        if miss_mem > 0:
7257
          raise errors.OpPrereqError("This change will prevent the instance"
7258
                                     " from starting, due to %d MB of memory"
7259
                                     " missing on its primary node" % miss_mem)
7260

    
7261
      if be_new[constants.BE_AUTO_BALANCE]:
7262
        for node, nres in nodeinfo.items():
7263
          if node not in instance.secondary_nodes:
7264
            continue
7265
          msg = nres.fail_msg
7266
          if msg:
7267
            self.warn.append("Can't get info from secondary node %s: %s" %
7268
                             (node, msg))
7269
          elif not isinstance(nres.payload.get('memory_free', None), int):
7270
            self.warn.append("Secondary node %s didn't return free"
7271
                             " memory information" % node)
7272
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7273
            self.warn.append("Not enough memory to failover instance to"
7274
                             " secondary node %s" % node)
7275

    
7276
    # NIC processing
7277
    self.nic_pnew = {}
7278
    self.nic_pinst = {}
7279
    for nic_op, nic_dict in self.op.nics:
7280
      if nic_op == constants.DDM_REMOVE:
7281
        if not instance.nics:
7282
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
7283
        continue
7284
      if nic_op != constants.DDM_ADD:
7285
        # an existing nic
7286
        if nic_op < 0 or nic_op >= len(instance.nics):
7287
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7288
                                     " are 0 to %d" %
7289
                                     (nic_op, len(instance.nics)))
7290
        old_nic_params = instance.nics[nic_op].nicparams
7291
        old_nic_ip = instance.nics[nic_op].ip
7292
      else:
7293
        old_nic_params = {}
7294
        old_nic_ip = None
7295

    
7296
      update_params_dict = dict([(key, nic_dict[key])
7297
                                 for key in constants.NICS_PARAMETERS
7298
                                 if key in nic_dict])
7299

    
7300
      if 'bridge' in nic_dict:
7301
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7302

    
7303
      new_nic_params, new_filled_nic_params = \
7304
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7305
                                 cluster.nicparams[constants.PP_DEFAULT],
7306
                                 constants.NICS_PARAMETER_TYPES)
7307
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7308
      self.nic_pinst[nic_op] = new_nic_params
7309
      self.nic_pnew[nic_op] = new_filled_nic_params
7310
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7311

    
7312
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7313
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7314
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7315
        if msg:
7316
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7317
          if self.force:
7318
            self.warn.append(msg)
7319
          else:
7320
            raise errors.OpPrereqError(msg)
7321
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7322
        if 'ip' in nic_dict:
7323
          nic_ip = nic_dict['ip']
7324
        else:
7325
          nic_ip = old_nic_ip
7326
        if nic_ip is None:
7327
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7328
                                     ' on a routed nic')
7329
      if 'mac' in nic_dict:
7330
        nic_mac = nic_dict['mac']
7331
        if nic_mac is None:
7332
          raise errors.OpPrereqError('Cannot set the nic mac to None')
7333
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7334
          # otherwise generate the mac
7335
          nic_dict['mac'] = self.cfg.GenerateMAC()
7336
        else:
7337
          # or validate/reserve the current one
7338
          if self.cfg.IsMacInUse(nic_mac):
7339
            raise errors.OpPrereqError("MAC address %s already in use"
7340
                                       " in cluster" % nic_mac)
7341

    
7342
    # DISK processing
7343
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7344
      raise errors.OpPrereqError("Disk operations not supported for"
7345
                                 " diskless instances")
7346
    for disk_op, disk_dict in self.op.disks:
7347
      if disk_op == constants.DDM_REMOVE:
7348
        if len(instance.disks) == 1:
7349
          raise errors.OpPrereqError("Cannot remove the last disk of"
7350
                                     " an instance")
7351
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7352
        ins_l = ins_l[pnode]
7353
        msg = ins_l.fail_msg
7354
        if msg:
7355
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7356
                                     (pnode, msg))
7357
        if instance.name in ins_l.payload:
7358
          raise errors.OpPrereqError("Instance is running, can't remove"
7359
                                     " disks.")
7360

    
7361
      if (disk_op == constants.DDM_ADD and
7362
          len(instance.nics) >= constants.MAX_DISKS):
7363
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7364
                                   " add more" % constants.MAX_DISKS)
7365
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7366
        # an existing disk
7367
        if disk_op < 0 or disk_op >= len(instance.disks):
7368
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7369
                                     " are 0 to %d" %
7370
                                     (disk_op, len(instance.disks)))
7371

    
7372
    return
7373

    
7374
  def Exec(self, feedback_fn):
7375
    """Modifies an instance.
7376

7377
    All parameters take effect only at the next restart of the instance.
7378

7379
    """
7380
    # Process here the warnings from CheckPrereq, as we don't have a
7381
    # feedback_fn there.
7382
    for warn in self.warn:
7383
      feedback_fn("WARNING: %s" % warn)
7384

    
7385
    result = []
7386
    instance = self.instance
7387
    cluster = self.cluster
7388
    # disk changes
7389
    for disk_op, disk_dict in self.op.disks:
7390
      if disk_op == constants.DDM_REMOVE:
7391
        # remove the last disk
7392
        device = instance.disks.pop()
7393
        device_idx = len(instance.disks)
7394
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7395
          self.cfg.SetDiskID(disk, node)
7396
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7397
          if msg:
7398
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7399
                            " continuing anyway", device_idx, node, msg)
7400
        result.append(("disk/%d" % device_idx, "remove"))
7401
      elif disk_op == constants.DDM_ADD:
7402
        # add a new disk
7403
        if instance.disk_template == constants.DT_FILE:
7404
          file_driver, file_path = instance.disks[0].logical_id
7405
          file_path = os.path.dirname(file_path)
7406
        else:
7407
          file_driver = file_path = None
7408
        disk_idx_base = len(instance.disks)
7409
        new_disk = _GenerateDiskTemplate(self,
7410
                                         instance.disk_template,
7411
                                         instance.name, instance.primary_node,
7412
                                         instance.secondary_nodes,
7413
                                         [disk_dict],
7414
                                         file_path,
7415
                                         file_driver,
7416
                                         disk_idx_base)[0]
7417
        instance.disks.append(new_disk)
7418
        info = _GetInstanceInfoText(instance)
7419

    
7420
        logging.info("Creating volume %s for instance %s",
7421
                     new_disk.iv_name, instance.name)
7422
        # Note: this needs to be kept in sync with _CreateDisks
7423
        #HARDCODE
7424
        for node in instance.all_nodes:
7425
          f_create = node == instance.primary_node
7426
          try:
7427
            _CreateBlockDev(self, node, instance, new_disk,
7428
                            f_create, info, f_create)
7429
          except errors.OpExecError, err:
7430
            self.LogWarning("Failed to create volume %s (%s) on"
7431
                            " node %s: %s",
7432
                            new_disk.iv_name, new_disk, node, err)
7433
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7434
                       (new_disk.size, new_disk.mode)))
7435
      else:
7436
        # change a given disk
7437
        instance.disks[disk_op].mode = disk_dict['mode']
7438
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7439
    # NIC changes
7440
    for nic_op, nic_dict in self.op.nics:
7441
      if nic_op == constants.DDM_REMOVE:
7442
        # remove the last nic
7443
        del instance.nics[-1]
7444
        result.append(("nic.%d" % len(instance.nics), "remove"))
7445
      elif nic_op == constants.DDM_ADD:
7446
        # mac and bridge should be set, by now
7447
        mac = nic_dict['mac']
7448
        ip = nic_dict.get('ip', None)
7449
        nicparams = self.nic_pinst[constants.DDM_ADD]
7450
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7451
        instance.nics.append(new_nic)
7452
        result.append(("nic.%d" % (len(instance.nics) - 1),
7453
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7454
                       (new_nic.mac, new_nic.ip,
7455
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7456
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7457
                       )))
7458
      else:
7459
        for key in 'mac', 'ip':
7460
          if key in nic_dict:
7461
            setattr(instance.nics[nic_op], key, nic_dict[key])
7462
        if nic_op in self.nic_pnew:
7463
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
7464
        for key, val in nic_dict.iteritems():
7465
          result.append(("nic.%s/%d" % (key, nic_op), val))
7466

    
7467
    # hvparams changes
7468
    if self.op.hvparams:
7469
      instance.hvparams = self.hv_inst
7470
      for key, val in self.op.hvparams.iteritems():
7471
        result.append(("hv/%s" % key, val))
7472

    
7473
    # beparams changes
7474
    if self.op.beparams:
7475
      instance.beparams = self.be_inst
7476
      for key, val in self.op.beparams.iteritems():
7477
        result.append(("be/%s" % key, val))
7478

    
7479
    self.cfg.Update(instance)
7480

    
7481
    return result
7482

    
7483

    
7484
class LUQueryExports(NoHooksLU):
7485
  """Query the exports list
7486

7487
  """
7488
  _OP_REQP = ['nodes']
7489
  REQ_BGL = False
7490

    
7491
  def ExpandNames(self):
7492
    self.needed_locks = {}
7493
    self.share_locks[locking.LEVEL_NODE] = 1
7494
    if not self.op.nodes:
7495
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7496
    else:
7497
      self.needed_locks[locking.LEVEL_NODE] = \
7498
        _GetWantedNodes(self, self.op.nodes)
7499

    
7500
  def CheckPrereq(self):
7501
    """Check prerequisites.
7502

7503
    """
7504
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7505

    
7506
  def Exec(self, feedback_fn):
7507
    """Compute the list of all the exported system images.
7508

7509
    @rtype: dict
7510
    @return: a dictionary with the structure node->(export-list)
7511
        where export-list is a list of the instances exported on
7512
        that node.
7513

7514
    """
7515
    rpcresult = self.rpc.call_export_list(self.nodes)
7516
    result = {}
7517
    for node in rpcresult:
7518
      if rpcresult[node].fail_msg:
7519
        result[node] = False
7520
      else:
7521
        result[node] = rpcresult[node].payload
7522

    
7523
    return result
7524

    
7525

    
7526
class LUExportInstance(LogicalUnit):
7527
  """Export an instance to an image in the cluster.
7528

7529
  """
7530
  HPATH = "instance-export"
7531
  HTYPE = constants.HTYPE_INSTANCE
7532
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
7533
  REQ_BGL = False
7534

    
7535
  def ExpandNames(self):
7536
    self._ExpandAndLockInstance()
7537
    # FIXME: lock only instance primary and destination node
7538
    #
7539
    # Sad but true, for now we have do lock all nodes, as we don't know where
7540
    # the previous export might be, and and in this LU we search for it and
7541
    # remove it from its current node. In the future we could fix this by:
7542
    #  - making a tasklet to search (share-lock all), then create the new one,
7543
    #    then one to remove, after
7544
    #  - removing the removal operation altogether
7545
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7546

    
7547
  def DeclareLocks(self, level):
7548
    """Last minute lock declaration."""
7549
    # All nodes are locked anyway, so nothing to do here.
7550

    
7551
  def BuildHooksEnv(self):
7552
    """Build hooks env.
7553

7554
    This will run on the master, primary node and target node.
7555

7556
    """
7557
    env = {
7558
      "EXPORT_NODE": self.op.target_node,
7559
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
7560
      }
7561
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7562
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
7563
          self.op.target_node]
7564
    return env, nl, nl
7565

    
7566
  def CheckPrereq(self):
7567
    """Check prerequisites.
7568

7569
    This checks that the instance and node names are valid.
7570

7571
    """
7572
    instance_name = self.op.instance_name
7573
    self.instance = self.cfg.GetInstanceInfo(instance_name)
7574
    assert self.instance is not None, \
7575
          "Cannot retrieve locked instance %s" % self.op.instance_name
7576
    _CheckNodeOnline(self, self.instance.primary_node)
7577

    
7578
    self.dst_node = self.cfg.GetNodeInfo(
7579
      self.cfg.ExpandNodeName(self.op.target_node))
7580

    
7581
    if self.dst_node is None:
7582
      # This is wrong node name, not a non-locked node
7583
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
7584
    _CheckNodeOnline(self, self.dst_node.name)
7585
    _CheckNodeNotDrained(self, self.dst_node.name)
7586

    
7587
    # instance disk type verification
7588
    for disk in self.instance.disks:
7589
      if disk.dev_type == constants.LD_FILE:
7590
        raise errors.OpPrereqError("Export not supported for instances with"
7591
                                   " file-based disks")
7592

    
7593
  def Exec(self, feedback_fn):
7594
    """Export an instance to an image in the cluster.
7595

7596
    """
7597
    instance = self.instance
7598
    dst_node = self.dst_node
7599
    src_node = instance.primary_node
7600

    
7601
    if self.op.shutdown:
7602
      # shutdown the instance, but not the disks
7603
      feedback_fn("Shutting down instance %s" % instance.name)
7604
      result = self.rpc.call_instance_shutdown(src_node, instance)
7605
      result.Raise("Could not shutdown instance %s on"
7606
                   " node %s" % (instance.name, src_node))
7607

    
7608
    vgname = self.cfg.GetVGName()
7609

    
7610
    snap_disks = []
7611

    
7612
    # set the disks ID correctly since call_instance_start needs the
7613
    # correct drbd minor to create the symlinks
7614
    for disk in instance.disks:
7615
      self.cfg.SetDiskID(disk, src_node)
7616

    
7617
    # per-disk results
7618
    dresults = []
7619
    try:
7620
      for idx, disk in enumerate(instance.disks):
7621
        feedback_fn("Creating a snapshot of disk/%s on node %s" %
7622
                    (idx, src_node))
7623

    
7624
        # result.payload will be a snapshot of an lvm leaf of the one we passed
7625
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
7626
        msg = result.fail_msg
7627
        if msg:
7628
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
7629
                          idx, src_node, msg)
7630
          snap_disks.append(False)
7631
        else:
7632
          disk_id = (vgname, result.payload)
7633
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
7634
                                 logical_id=disk_id, physical_id=disk_id,
7635
                                 iv_name=disk.iv_name)
7636
          snap_disks.append(new_dev)
7637

    
7638
    finally:
7639
      if self.op.shutdown and instance.admin_up:
7640
        feedback_fn("Starting instance %s" % instance.name)
7641
        result = self.rpc.call_instance_start(src_node, instance, None, None)
7642
        msg = result.fail_msg
7643
        if msg:
7644
          _ShutdownInstanceDisks(self, instance)
7645
          raise errors.OpExecError("Could not start instance: %s" % msg)
7646

    
7647
    # TODO: check for size
7648

    
7649
    cluster_name = self.cfg.GetClusterName()
7650
    for idx, dev in enumerate(snap_disks):
7651
      feedback_fn("Exporting snapshot %s from %s to %s" %
7652
                  (idx, src_node, dst_node.name))
7653
      if dev:
7654
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
7655
                                               instance, cluster_name, idx)
7656
        msg = result.fail_msg
7657
        if msg:
7658
          self.LogWarning("Could not export disk/%s from node %s to"
7659
                          " node %s: %s", idx, src_node, dst_node.name, msg)
7660
          dresults.append(False)
7661
        else:
7662
          dresults.append(True)
7663
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
7664
        if msg:
7665
          self.LogWarning("Could not remove snapshot for disk/%d from node"
7666
                          " %s: %s", idx, src_node, msg)
7667
      else:
7668
        dresults.append(False)
7669

    
7670
    feedback_fn("Finalizing export on %s" % dst_node.name)
7671
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
7672
    fin_resu = True
7673
    msg = result.fail_msg
7674
    if msg:
7675
      self.LogWarning("Could not finalize export for instance %s"
7676
                      " on node %s: %s", instance.name, dst_node.name, msg)
7677
      fin_resu = False
7678

    
7679
    nodelist = self.cfg.GetNodeList()
7680
    nodelist.remove(dst_node.name)
7681

    
7682
    # on one-node clusters nodelist will be empty after the removal
7683
    # if we proceed the backup would be removed because OpQueryExports
7684
    # substitutes an empty list with the full cluster node list.
7685
    iname = instance.name
7686
    if nodelist:
7687
      feedback_fn("Removing old exports for instance %s" % iname)
7688
      exportlist = self.rpc.call_export_list(nodelist)
7689
      for node in exportlist:
7690
        if exportlist[node].fail_msg:
7691
          continue
7692
        if iname in exportlist[node].payload:
7693
          msg = self.rpc.call_export_remove(node, iname).fail_msg
7694
          if msg:
7695
            self.LogWarning("Could not remove older export for instance %s"
7696
                            " on node %s: %s", iname, node, msg)
7697
    return fin_resu, dresults
7698

    
7699

    
7700
class LURemoveExport(NoHooksLU):
7701
  """Remove exports related to the named instance.
7702

7703
  """
7704
  _OP_REQP = ["instance_name"]
7705
  REQ_BGL = False
7706

    
7707
  def ExpandNames(self):
7708
    self.needed_locks = {}
7709
    # We need all nodes to be locked in order for RemoveExport to work, but we
7710
    # don't need to lock the instance itself, as nothing will happen to it (and
7711
    # we can remove exports also for a removed instance)
7712
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7713

    
7714
  def CheckPrereq(self):
7715
    """Check prerequisites.
7716
    """
7717
    pass
7718

    
7719
  def Exec(self, feedback_fn):
7720
    """Remove any export.
7721

7722
    """
7723
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
7724
    # If the instance was not found we'll try with the name that was passed in.
7725
    # This will only work if it was an FQDN, though.
7726
    fqdn_warn = False
7727
    if not instance_name:
7728
      fqdn_warn = True
7729
      instance_name = self.op.instance_name
7730

    
7731
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7732
    exportlist = self.rpc.call_export_list(locked_nodes)
7733
    found = False
7734
    for node in exportlist:
7735
      msg = exportlist[node].fail_msg
7736
      if msg:
7737
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
7738
        continue
7739
      if instance_name in exportlist[node].payload:
7740
        found = True
7741
        result = self.rpc.call_export_remove(node, instance_name)
7742
        msg = result.fail_msg
7743
        if msg:
7744
          logging.error("Could not remove export for instance %s"
7745
                        " on node %s: %s", instance_name, node, msg)
7746

    
7747
    if fqdn_warn and not found:
7748
      feedback_fn("Export not found. If trying to remove an export belonging"
7749
                  " to a deleted instance please use its Fully Qualified"
7750
                  " Domain Name.")
7751

    
7752

    
7753
class TagsLU(NoHooksLU):
7754
  """Generic tags LU.
7755

7756
  This is an abstract class which is the parent of all the other tags LUs.
7757

7758
  """
7759

    
7760
  def ExpandNames(self):
7761
    self.needed_locks = {}
7762
    if self.op.kind == constants.TAG_NODE:
7763
      name = self.cfg.ExpandNodeName(self.op.name)
7764
      if name is None:
7765
        raise errors.OpPrereqError("Invalid node name (%s)" %
7766
                                   (self.op.name,))
7767
      self.op.name = name
7768
      self.needed_locks[locking.LEVEL_NODE] = name
7769
    elif self.op.kind == constants.TAG_INSTANCE:
7770
      name = self.cfg.ExpandInstanceName(self.op.name)
7771
      if name is None:
7772
        raise errors.OpPrereqError("Invalid instance name (%s)" %
7773
                                   (self.op.name,))
7774
      self.op.name = name
7775
      self.needed_locks[locking.LEVEL_INSTANCE] = name
7776

    
7777
  def CheckPrereq(self):
7778
    """Check prerequisites.
7779

7780
    """
7781
    if self.op.kind == constants.TAG_CLUSTER:
7782
      self.target = self.cfg.GetClusterInfo()
7783
    elif self.op.kind == constants.TAG_NODE:
7784
      self.target = self.cfg.GetNodeInfo(self.op.name)
7785
    elif self.op.kind == constants.TAG_INSTANCE:
7786
      self.target = self.cfg.GetInstanceInfo(self.op.name)
7787
    else:
7788
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
7789
                                 str(self.op.kind))
7790

    
7791

    
7792
class LUGetTags(TagsLU):
7793
  """Returns the tags of a given object.
7794

7795
  """
7796
  _OP_REQP = ["kind", "name"]
7797
  REQ_BGL = False
7798

    
7799
  def Exec(self, feedback_fn):
7800
    """Returns the tag list.
7801

7802
    """
7803
    return list(self.target.GetTags())
7804

    
7805

    
7806
class LUSearchTags(NoHooksLU):
7807
  """Searches the tags for a given pattern.
7808

7809
  """
7810
  _OP_REQP = ["pattern"]
7811
  REQ_BGL = False
7812

    
7813
  def ExpandNames(self):
7814
    self.needed_locks = {}
7815

    
7816
  def CheckPrereq(self):
7817
    """Check prerequisites.
7818

7819
    This checks the pattern passed for validity by compiling it.
7820

7821
    """
7822
    try:
7823
      self.re = re.compile(self.op.pattern)
7824
    except re.error, err:
7825
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
7826
                                 (self.op.pattern, err))
7827

    
7828
  def Exec(self, feedback_fn):
7829
    """Returns the tag list.
7830

7831
    """
7832
    cfg = self.cfg
7833
    tgts = [("/cluster", cfg.GetClusterInfo())]
7834
    ilist = cfg.GetAllInstancesInfo().values()
7835
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
7836
    nlist = cfg.GetAllNodesInfo().values()
7837
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
7838
    results = []
7839
    for path, target in tgts:
7840
      for tag in target.GetTags():
7841
        if self.re.search(tag):
7842
          results.append((path, tag))
7843
    return results
7844

    
7845

    
7846
class LUAddTags(TagsLU):
7847
  """Sets a tag on a given object.
7848

7849
  """
7850
  _OP_REQP = ["kind", "name", "tags"]
7851
  REQ_BGL = False
7852

    
7853
  def CheckPrereq(self):
7854
    """Check prerequisites.
7855

7856
    This checks the type and length of the tag name and value.
7857

7858
    """
7859
    TagsLU.CheckPrereq(self)
7860
    for tag in self.op.tags:
7861
      objects.TaggableObject.ValidateTag(tag)
7862

    
7863
  def Exec(self, feedback_fn):
7864
    """Sets the tag.
7865

7866
    """
7867
    try:
7868
      for tag in self.op.tags:
7869
        self.target.AddTag(tag)
7870
    except errors.TagError, err:
7871
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
7872
    try:
7873
      self.cfg.Update(self.target)
7874
    except errors.ConfigurationError:
7875
      raise errors.OpRetryError("There has been a modification to the"
7876
                                " config file and the operation has been"
7877
                                " aborted. Please retry.")
7878

    
7879

    
7880
class LUDelTags(TagsLU):
7881
  """Delete a list of tags from a given object.
7882

7883
  """
7884
  _OP_REQP = ["kind", "name", "tags"]
7885
  REQ_BGL = False
7886

    
7887
  def CheckPrereq(self):
7888
    """Check prerequisites.
7889

7890
    This checks that we have the given tag.
7891

7892
    """
7893
    TagsLU.CheckPrereq(self)
7894
    for tag in self.op.tags:
7895
      objects.TaggableObject.ValidateTag(tag)
7896
    del_tags = frozenset(self.op.tags)
7897
    cur_tags = self.target.GetTags()
7898
    if not del_tags <= cur_tags:
7899
      diff_tags = del_tags - cur_tags
7900
      diff_names = ["'%s'" % tag for tag in diff_tags]
7901
      diff_names.sort()
7902
      raise errors.OpPrereqError("Tag(s) %s not found" %
7903
                                 (",".join(diff_names)))
7904

    
7905
  def Exec(self, feedback_fn):
7906
    """Remove the tag from the object.
7907

7908
    """
7909
    for tag in self.op.tags:
7910
      self.target.RemoveTag(tag)
7911
    try:
7912
      self.cfg.Update(self.target)
7913
    except errors.ConfigurationError:
7914
      raise errors.OpRetryError("There has been a modification to the"
7915
                                " config file and the operation has been"
7916
                                " aborted. Please retry.")
7917

    
7918

    
7919
class LUTestDelay(NoHooksLU):
7920
  """Sleep for a specified amount of time.
7921

7922
  This LU sleeps on the master and/or nodes for a specified amount of
7923
  time.
7924

7925
  """
7926
  _OP_REQP = ["duration", "on_master", "on_nodes"]
7927
  REQ_BGL = False
7928

    
7929
  def ExpandNames(self):
7930
    """Expand names and set required locks.
7931

7932
    This expands the node list, if any.
7933

7934
    """
7935
    self.needed_locks = {}
7936
    if self.op.on_nodes:
7937
      # _GetWantedNodes can be used here, but is not always appropriate to use
7938
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
7939
      # more information.
7940
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
7941
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
7942

    
7943
  def CheckPrereq(self):
7944
    """Check prerequisites.
7945

7946
    """
7947

    
7948
  def Exec(self, feedback_fn):
7949
    """Do the actual sleep.
7950

7951
    """
7952
    if self.op.on_master:
7953
      if not utils.TestDelay(self.op.duration):
7954
        raise errors.OpExecError("Error during master delay test")
7955
    if self.op.on_nodes:
7956
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
7957
      for node, node_result in result.items():
7958
        node_result.Raise("Failure during rpc call to node %s" % node)
7959

    
7960

    
7961
class IAllocator(object):
7962
  """IAllocator framework.
7963

7964
  An IAllocator instance has three sets of attributes:
7965
    - cfg that is needed to query the cluster
7966
    - input data (all members of the _KEYS class attribute are required)
7967
    - four buffer attributes (in|out_data|text), that represent the
7968
      input (to the external script) in text and data structure format,
7969
      and the output from it, again in two formats
7970
    - the result variables from the script (success, info, nodes) for
7971
      easy usage
7972

7973
  """
7974
  _ALLO_KEYS = [
7975
    "mem_size", "disks", "disk_template",
7976
    "os", "tags", "nics", "vcpus", "hypervisor",
7977
    ]
7978
  _RELO_KEYS = [
7979
    "relocate_from",
7980
    ]
7981

    
7982
  def __init__(self, cfg, rpc, mode, name, **kwargs):
7983
    self.cfg = cfg
7984
    self.rpc = rpc
7985
    # init buffer variables
7986
    self.in_text = self.out_text = self.in_data = self.out_data = None
7987
    # init all input fields so that pylint is happy
7988
    self.mode = mode
7989
    self.name = name
7990
    self.mem_size = self.disks = self.disk_template = None
7991
    self.os = self.tags = self.nics = self.vcpus = None
7992
    self.hypervisor = None
7993
    self.relocate_from = None
7994
    # computed fields
7995
    self.required_nodes = None
7996
    # init result fields
7997
    self.success = self.info = self.nodes = None
7998
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7999
      keyset = self._ALLO_KEYS
8000
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8001
      keyset = self._RELO_KEYS
8002
    else:
8003
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8004
                                   " IAllocator" % self.mode)
8005
    for key in kwargs:
8006
      if key not in keyset:
8007
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8008
                                     " IAllocator" % key)
8009
      setattr(self, key, kwargs[key])
8010
    for key in keyset:
8011
      if key not in kwargs:
8012
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8013
                                     " IAllocator" % key)
8014
    self._BuildInputData()
8015

    
8016
  def _ComputeClusterData(self):
8017
    """Compute the generic allocator input data.
8018

8019
    This is the data that is independent of the actual operation.
8020

8021
    """
8022
    cfg = self.cfg
8023
    cluster_info = cfg.GetClusterInfo()
8024
    # cluster data
8025
    data = {
8026
      "version": constants.IALLOCATOR_VERSION,
8027
      "cluster_name": cfg.GetClusterName(),
8028
      "cluster_tags": list(cluster_info.GetTags()),
8029
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8030
      # we don't have job IDs
8031
      }
8032
    iinfo = cfg.GetAllInstancesInfo().values()
8033
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8034

    
8035
    # node data
8036
    node_results = {}
8037
    node_list = cfg.GetNodeList()
8038

    
8039
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8040
      hypervisor_name = self.hypervisor
8041
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8042
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8043

    
8044
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8045
                                        hypervisor_name)
8046
    node_iinfo = \
8047
      self.rpc.call_all_instances_info(node_list,
8048
                                       cluster_info.enabled_hypervisors)
8049
    for nname, nresult in node_data.items():
8050
      # first fill in static (config-based) values
8051
      ninfo = cfg.GetNodeInfo(nname)
8052
      pnr = {
8053
        "tags": list(ninfo.GetTags()),
8054
        "primary_ip": ninfo.primary_ip,
8055
        "secondary_ip": ninfo.secondary_ip,
8056
        "offline": ninfo.offline,
8057
        "drained": ninfo.drained,
8058
        "master_candidate": ninfo.master_candidate,
8059
        }
8060

    
8061
      if not (ninfo.offline or ninfo.drained):
8062
        nresult.Raise("Can't get data for node %s" % nname)
8063
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8064
                                nname)
8065
        remote_info = nresult.payload
8066

    
8067
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8068
                     'vg_size', 'vg_free', 'cpu_total']:
8069
          if attr not in remote_info:
8070
            raise errors.OpExecError("Node '%s' didn't return attribute"
8071
                                     " '%s'" % (nname, attr))
8072
          if not isinstance(remote_info[attr], int):
8073
            raise errors.OpExecError("Node '%s' returned invalid value"
8074
                                     " for '%s': %s" %
8075
                                     (nname, attr, remote_info[attr]))
8076
        # compute memory used by primary instances
8077
        i_p_mem = i_p_up_mem = 0
8078
        for iinfo, beinfo in i_list:
8079
          if iinfo.primary_node == nname:
8080
            i_p_mem += beinfo[constants.BE_MEMORY]
8081
            if iinfo.name not in node_iinfo[nname].payload:
8082
              i_used_mem = 0
8083
            else:
8084
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8085
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8086
            remote_info['memory_free'] -= max(0, i_mem_diff)
8087

    
8088
            if iinfo.admin_up:
8089
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8090

    
8091
        # compute memory used by instances
8092
        pnr_dyn = {
8093
          "total_memory": remote_info['memory_total'],
8094
          "reserved_memory": remote_info['memory_dom0'],
8095
          "free_memory": remote_info['memory_free'],
8096
          "total_disk": remote_info['vg_size'],
8097
          "free_disk": remote_info['vg_free'],
8098
          "total_cpus": remote_info['cpu_total'],
8099
          "i_pri_memory": i_p_mem,
8100
          "i_pri_up_memory": i_p_up_mem,
8101
          }
8102
        pnr.update(pnr_dyn)
8103

    
8104
      node_results[nname] = pnr
8105
    data["nodes"] = node_results
8106

    
8107
    # instance data
8108
    instance_data = {}
8109
    for iinfo, beinfo in i_list:
8110
      nic_data = []
8111
      for nic in iinfo.nics:
8112
        filled_params = objects.FillDict(
8113
            cluster_info.nicparams[constants.PP_DEFAULT],
8114
            nic.nicparams)
8115
        nic_dict = {"mac": nic.mac,
8116
                    "ip": nic.ip,
8117
                    "mode": filled_params[constants.NIC_MODE],
8118
                    "link": filled_params[constants.NIC_LINK],
8119
                   }
8120
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8121
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8122
        nic_data.append(nic_dict)
8123
      pir = {
8124
        "tags": list(iinfo.GetTags()),
8125
        "admin_up": iinfo.admin_up,
8126
        "vcpus": beinfo[constants.BE_VCPUS],
8127
        "memory": beinfo[constants.BE_MEMORY],
8128
        "os": iinfo.os,
8129
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8130
        "nics": nic_data,
8131
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8132
        "disk_template": iinfo.disk_template,
8133
        "hypervisor": iinfo.hypervisor,
8134
        }
8135
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8136
                                                 pir["disks"])
8137
      instance_data[iinfo.name] = pir
8138

    
8139
    data["instances"] = instance_data
8140

    
8141
    self.in_data = data
8142

    
8143
  def _AddNewInstance(self):
8144
    """Add new instance data to allocator structure.
8145

8146
    This in combination with _AllocatorGetClusterData will create the
8147
    correct structure needed as input for the allocator.
8148

8149
    The checks for the completeness of the opcode must have already been
8150
    done.
8151

8152
    """
8153
    data = self.in_data
8154

    
8155
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8156

    
8157
    if self.disk_template in constants.DTS_NET_MIRROR:
8158
      self.required_nodes = 2
8159
    else:
8160
      self.required_nodes = 1
8161
    request = {
8162
      "type": "allocate",
8163
      "name": self.name,
8164
      "disk_template": self.disk_template,
8165
      "tags": self.tags,
8166
      "os": self.os,
8167
      "vcpus": self.vcpus,
8168
      "memory": self.mem_size,
8169
      "disks": self.disks,
8170
      "disk_space_total": disk_space,
8171
      "nics": self.nics,
8172
      "required_nodes": self.required_nodes,
8173
      }
8174
    data["request"] = request
8175

    
8176
  def _AddRelocateInstance(self):
8177
    """Add relocate instance data to allocator structure.
8178

8179
    This in combination with _IAllocatorGetClusterData will create the
8180
    correct structure needed as input for the allocator.
8181

8182
    The checks for the completeness of the opcode must have already been
8183
    done.
8184

8185
    """
8186
    instance = self.cfg.GetInstanceInfo(self.name)
8187
    if instance is None:
8188
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8189
                                   " IAllocator" % self.name)
8190

    
8191
    if instance.disk_template not in constants.DTS_NET_MIRROR:
8192
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
8193

    
8194
    if len(instance.secondary_nodes) != 1:
8195
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
8196

    
8197
    self.required_nodes = 1
8198
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
8199
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8200

    
8201
    request = {
8202
      "type": "relocate",
8203
      "name": self.name,
8204
      "disk_space_total": disk_space,
8205
      "required_nodes": self.required_nodes,
8206
      "relocate_from": self.relocate_from,
8207
      }
8208
    self.in_data["request"] = request
8209

    
8210
  def _BuildInputData(self):
8211
    """Build input data structures.
8212

8213
    """
8214
    self._ComputeClusterData()
8215

    
8216
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8217
      self._AddNewInstance()
8218
    else:
8219
      self._AddRelocateInstance()
8220

    
8221
    self.in_text = serializer.Dump(self.in_data)
8222

    
8223
  def Run(self, name, validate=True, call_fn=None):
8224
    """Run an instance allocator and return the results.
8225

8226
    """
8227
    if call_fn is None:
8228
      call_fn = self.rpc.call_iallocator_runner
8229

    
8230
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8231
    result.Raise("Failure while running the iallocator script")
8232

    
8233
    self.out_text = result.payload
8234
    if validate:
8235
      self._ValidateResult()
8236

    
8237
  def _ValidateResult(self):
8238
    """Process the allocator results.
8239

8240
    This will process and if successful save the result in
8241
    self.out_data and the other parameters.
8242

8243
    """
8244
    try:
8245
      rdict = serializer.Load(self.out_text)
8246
    except Exception, err:
8247
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8248

    
8249
    if not isinstance(rdict, dict):
8250
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8251

    
8252
    for key in "success", "info", "nodes":
8253
      if key not in rdict:
8254
        raise errors.OpExecError("Can't parse iallocator results:"
8255
                                 " missing key '%s'" % key)
8256
      setattr(self, key, rdict[key])
8257

    
8258
    if not isinstance(rdict["nodes"], list):
8259
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8260
                               " is not a list")
8261
    self.out_data = rdict
8262

    
8263

    
8264
class LUTestAllocator(NoHooksLU):
8265
  """Run allocator tests.
8266

8267
  This LU runs the allocator tests
8268

8269
  """
8270
  _OP_REQP = ["direction", "mode", "name"]
8271

    
8272
  def CheckPrereq(self):
8273
    """Check prerequisites.
8274

8275
    This checks the opcode parameters depending on the director and mode test.
8276

8277
    """
8278
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8279
      for attr in ["name", "mem_size", "disks", "disk_template",
8280
                   "os", "tags", "nics", "vcpus"]:
8281
        if not hasattr(self.op, attr):
8282
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8283
                                     attr)
8284
      iname = self.cfg.ExpandInstanceName(self.op.name)
8285
      if iname is not None:
8286
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8287
                                   iname)
8288
      if not isinstance(self.op.nics, list):
8289
        raise errors.OpPrereqError("Invalid parameter 'nics'")
8290
      for row in self.op.nics:
8291
        if (not isinstance(row, dict) or
8292
            "mac" not in row or
8293
            "ip" not in row or
8294
            "bridge" not in row):
8295
          raise errors.OpPrereqError("Invalid contents of the"
8296
                                     " 'nics' parameter")
8297
      if not isinstance(self.op.disks, list):
8298
        raise errors.OpPrereqError("Invalid parameter 'disks'")
8299
      for row in self.op.disks:
8300
        if (not isinstance(row, dict) or
8301
            "size" not in row or
8302
            not isinstance(row["size"], int) or
8303
            "mode" not in row or
8304
            row["mode"] not in ['r', 'w']):
8305
          raise errors.OpPrereqError("Invalid contents of the"
8306
                                     " 'disks' parameter")
8307
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8308
        self.op.hypervisor = self.cfg.GetHypervisorType()
8309
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8310
      if not hasattr(self.op, "name"):
8311
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
8312
      fname = self.cfg.ExpandInstanceName(self.op.name)
8313
      if fname is None:
8314
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8315
                                   self.op.name)
8316
      self.op.name = fname
8317
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8318
    else:
8319
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8320
                                 self.op.mode)
8321

    
8322
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8323
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8324
        raise errors.OpPrereqError("Missing allocator name")
8325
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8326
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8327
                                 self.op.direction)
8328

    
8329
  def Exec(self, feedback_fn):
8330
    """Run the allocator test.
8331

8332
    """
8333
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8334
      ial = IAllocator(self.cfg, self.rpc,
8335
                       mode=self.op.mode,
8336
                       name=self.op.name,
8337
                       mem_size=self.op.mem_size,
8338
                       disks=self.op.disks,
8339
                       disk_template=self.op.disk_template,
8340
                       os=self.op.os,
8341
                       tags=self.op.tags,
8342
                       nics=self.op.nics,
8343
                       vcpus=self.op.vcpus,
8344
                       hypervisor=self.op.hypervisor,
8345
                       )
8346
    else:
8347
      ial = IAllocator(self.cfg, self.rpc,
8348
                       mode=self.op.mode,
8349
                       name=self.op.name,
8350
                       relocate_from=list(self.relocate_from),
8351
                       )
8352

    
8353
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8354
      result = ial.in_text
8355
    else:
8356
      ial.Run(self.op.allocator, validate=False)
8357
      result = ial.out_text
8358
    return result