Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 19bed813

History | View | Annotate | Download (289.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = None
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets is not None:
217
      for (idx, tl) in enumerate(self.tasklets):
218
        logging.debug("Checking prerequisites for tasklet %s/%s",
219
                      idx + 1, len(self.tasklets))
220
        tl.CheckPrereq()
221
    else:
222
      raise NotImplementedError
223

    
224
  def Exec(self, feedback_fn):
225
    """Execute the LU.
226

227
    This method should implement the actual work. It should raise
228
    errors.OpExecError for failures that are somewhat dealt with in
229
    code, or expected.
230

231
    """
232
    if self.tasklets is not None:
233
      for (idx, tl) in enumerate(self.tasklets):
234
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
235
        tl.Exec(feedback_fn)
236
    else:
237
      raise NotImplementedError
238

    
239
  def BuildHooksEnv(self):
240
    """Build hooks environment for this LU.
241

242
    This method should return a three-node tuple consisting of: a dict
243
    containing the environment that will be used for running the
244
    specific hook for this LU, a list of node names on which the hook
245
    should run before the execution, and a list of node names on which
246
    the hook should run after the execution.
247

248
    The keys of the dict must not have 'GANETI_' prefixed as this will
249
    be handled in the hooks runner. Also note additional keys will be
250
    added by the hooks runner. If the LU doesn't define any
251
    environment, an empty dict (and not None) should be returned.
252

253
    No nodes should be returned as an empty list (and not None).
254

255
    Note that if the HPATH for a LU class is None, this function will
256
    not be called.
257

258
    """
259
    raise NotImplementedError
260

    
261
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
262
    """Notify the LU about the results of its hooks.
263

264
    This method is called every time a hooks phase is executed, and notifies
265
    the Logical Unit about the hooks' result. The LU can then use it to alter
266
    its result based on the hooks.  By default the method does nothing and the
267
    previous result is passed back unchanged but any LU can define it if it
268
    wants to use the local cluster hook-scripts somehow.
269

270
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
271
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
272
    @param hook_results: the results of the multi-node hooks rpc call
273
    @param feedback_fn: function used send feedback back to the caller
274
    @param lu_result: the previous Exec result this LU had, or None
275
        in the PRE phase
276
    @return: the new Exec result, based on the previous result
277
        and hook results
278

279
    """
280
    return lu_result
281

    
282
  def _ExpandAndLockInstance(self):
283
    """Helper function to expand and lock an instance.
284

285
    Many LUs that work on an instance take its name in self.op.instance_name
286
    and need to expand it and then declare the expanded name for locking. This
287
    function does it, and then updates self.op.instance_name to the expanded
288
    name. It also initializes needed_locks as a dict, if this hasn't been done
289
    before.
290

291
    """
292
    if self.needed_locks is None:
293
      self.needed_locks = {}
294
    else:
295
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
296
        "_ExpandAndLockInstance called with instance-level locks set"
297
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
298
    if expanded_name is None:
299
      raise errors.OpPrereqError("Instance '%s' not known" %
300
                                  self.op.instance_name)
301
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
302
    self.op.instance_name = expanded_name
303

    
304
  def _LockInstancesNodes(self, primary_only=False):
305
    """Helper function to declare instances' nodes for locking.
306

307
    This function should be called after locking one or more instances to lock
308
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
309
    with all primary or secondary nodes for instances already locked and
310
    present in self.needed_locks[locking.LEVEL_INSTANCE].
311

312
    It should be called from DeclareLocks, and for safety only works if
313
    self.recalculate_locks[locking.LEVEL_NODE] is set.
314

315
    In the future it may grow parameters to just lock some instance's nodes, or
316
    to just lock primaries or secondary nodes, if needed.
317

318
    If should be called in DeclareLocks in a way similar to::
319

320
      if level == locking.LEVEL_NODE:
321
        self._LockInstancesNodes()
322

323
    @type primary_only: boolean
324
    @param primary_only: only lock primary nodes of locked instances
325

326
    """
327
    assert locking.LEVEL_NODE in self.recalculate_locks, \
328
      "_LockInstancesNodes helper function called with no nodes to recalculate"
329

    
330
    # TODO: check if we're really been called with the instance locks held
331

    
332
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
333
    # future we might want to have different behaviors depending on the value
334
    # of self.recalculate_locks[locking.LEVEL_NODE]
335
    wanted_nodes = []
336
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
337
      instance = self.context.cfg.GetInstanceInfo(instance_name)
338
      wanted_nodes.append(instance.primary_node)
339
      if not primary_only:
340
        wanted_nodes.extend(instance.secondary_nodes)
341

    
342
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
343
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
344
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
345
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
346

    
347
    del self.recalculate_locks[locking.LEVEL_NODE]
348

    
349

    
350
class NoHooksLU(LogicalUnit):
351
  """Simple LU which runs no hooks.
352

353
  This LU is intended as a parent for other LogicalUnits which will
354
  run no hooks, in order to reduce duplicate code.
355

356
  """
357
  HPATH = None
358
  HTYPE = None
359

    
360

    
361
class Tasklet:
362
  """Tasklet base class.
363

364
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
365
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
366
  tasklets know nothing about locks.
367

368
  Subclasses must follow these rules:
369
    - Implement CheckPrereq
370
    - Implement Exec
371

372
  """
373
  def __init__(self, lu):
374
    self.lu = lu
375

    
376
    # Shortcuts
377
    self.cfg = lu.cfg
378
    self.rpc = lu.rpc
379

    
380
  def CheckPrereq(self):
381
    """Check prerequisites for this tasklets.
382

383
    This method should check whether the prerequisites for the execution of
384
    this tasklet are fulfilled. It can do internode communication, but it
385
    should be idempotent - no cluster or system changes are allowed.
386

387
    The method should raise errors.OpPrereqError in case something is not
388
    fulfilled. Its return value is ignored.
389

390
    This method should also update all parameters to their canonical form if it
391
    hasn't been done before.
392

393
    """
394
    raise NotImplementedError
395

    
396
  def Exec(self, feedback_fn):
397
    """Execute the tasklet.
398

399
    This method should implement the actual work. It should raise
400
    errors.OpExecError for failures that are somewhat dealt with in code, or
401
    expected.
402

403
    """
404
    raise NotImplementedError
405

    
406

    
407
def _GetWantedNodes(lu, nodes):
408
  """Returns list of checked and expanded node names.
409

410
  @type lu: L{LogicalUnit}
411
  @param lu: the logical unit on whose behalf we execute
412
  @type nodes: list
413
  @param nodes: list of node names or None for all nodes
414
  @rtype: list
415
  @return: the list of nodes, sorted
416
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
417

418
  """
419
  if not isinstance(nodes, list):
420
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
421

    
422
  if not nodes:
423
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
424
      " non-empty list of nodes whose name is to be expanded.")
425

    
426
  wanted = []
427
  for name in nodes:
428
    node = lu.cfg.ExpandNodeName(name)
429
    if node is None:
430
      raise errors.OpPrereqError("No such node name '%s'" % name)
431
    wanted.append(node)
432

    
433
  return utils.NiceSort(wanted)
434

    
435

    
436
def _GetWantedInstances(lu, instances):
437
  """Returns list of checked and expanded instance names.
438

439
  @type lu: L{LogicalUnit}
440
  @param lu: the logical unit on whose behalf we execute
441
  @type instances: list
442
  @param instances: list of instance names or None for all instances
443
  @rtype: list
444
  @return: the list of instances, sorted
445
  @raise errors.OpPrereqError: if the instances parameter is wrong type
446
  @raise errors.OpPrereqError: if any of the passed instances is not found
447

448
  """
449
  if not isinstance(instances, list):
450
    raise errors.OpPrereqError("Invalid argument type 'instances'")
451

    
452
  if instances:
453
    wanted = []
454

    
455
    for name in instances:
456
      instance = lu.cfg.ExpandInstanceName(name)
457
      if instance is None:
458
        raise errors.OpPrereqError("No such instance name '%s'" % name)
459
      wanted.append(instance)
460

    
461
  else:
462
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
463
  return wanted
464

    
465

    
466
def _CheckOutputFields(static, dynamic, selected):
467
  """Checks whether all selected fields are valid.
468

469
  @type static: L{utils.FieldSet}
470
  @param static: static fields set
471
  @type dynamic: L{utils.FieldSet}
472
  @param dynamic: dynamic fields set
473

474
  """
475
  f = utils.FieldSet()
476
  f.Extend(static)
477
  f.Extend(dynamic)
478

    
479
  delta = f.NonMatching(selected)
480
  if delta:
481
    raise errors.OpPrereqError("Unknown output fields selected: %s"
482
                               % ",".join(delta))
483

    
484

    
485
def _CheckBooleanOpField(op, name):
486
  """Validates boolean opcode parameters.
487

488
  This will ensure that an opcode parameter is either a boolean value,
489
  or None (but that it always exists).
490

491
  """
492
  val = getattr(op, name, None)
493
  if not (val is None or isinstance(val, bool)):
494
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
495
                               (name, str(val)))
496
  setattr(op, name, val)
497

    
498

    
499
def _CheckNodeOnline(lu, node):
500
  """Ensure that a given node is online.
501

502
  @param lu: the LU on behalf of which we make the check
503
  @param node: the node to check
504
  @raise errors.OpPrereqError: if the node is offline
505

506
  """
507
  if lu.cfg.GetNodeInfo(node).offline:
508
    raise errors.OpPrereqError("Can't use offline node %s" % node)
509

    
510

    
511
def _CheckNodeNotDrained(lu, node):
512
  """Ensure that a given node is not drained.
513

514
  @param lu: the LU on behalf of which we make the check
515
  @param node: the node to check
516
  @raise errors.OpPrereqError: if the node is drained
517

518
  """
519
  if lu.cfg.GetNodeInfo(node).drained:
520
    raise errors.OpPrereqError("Can't use drained node %s" % node)
521

    
522

    
523
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
524
                          memory, vcpus, nics, disk_template, disks,
525
                          bep, hvp, hypervisor_name):
526
  """Builds instance related env variables for hooks
527

528
  This builds the hook environment from individual variables.
529

530
  @type name: string
531
  @param name: the name of the instance
532
  @type primary_node: string
533
  @param primary_node: the name of the instance's primary node
534
  @type secondary_nodes: list
535
  @param secondary_nodes: list of secondary nodes as strings
536
  @type os_type: string
537
  @param os_type: the name of the instance's OS
538
  @type status: boolean
539
  @param status: the should_run status of the instance
540
  @type memory: string
541
  @param memory: the memory size of the instance
542
  @type vcpus: string
543
  @param vcpus: the count of VCPUs the instance has
544
  @type nics: list
545
  @param nics: list of tuples (ip, mac, mode, link) representing
546
      the NICs the instance has
547
  @type disk_template: string
548
  @param disk_template: the disk template of the instance
549
  @type disks: list
550
  @param disks: the list of (size, mode) pairs
551
  @type bep: dict
552
  @param bep: the backend parameters for the instance
553
  @type hvp: dict
554
  @param hvp: the hypervisor parameters for the instance
555
  @type hypervisor_name: string
556
  @param hypervisor_name: the hypervisor for the instance
557
  @rtype: dict
558
  @return: the hook environment for this instance
559

560
  """
561
  if status:
562
    str_status = "up"
563
  else:
564
    str_status = "down"
565
  env = {
566
    "OP_TARGET": name,
567
    "INSTANCE_NAME": name,
568
    "INSTANCE_PRIMARY": primary_node,
569
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
570
    "INSTANCE_OS_TYPE": os_type,
571
    "INSTANCE_STATUS": str_status,
572
    "INSTANCE_MEMORY": memory,
573
    "INSTANCE_VCPUS": vcpus,
574
    "INSTANCE_DISK_TEMPLATE": disk_template,
575
    "INSTANCE_HYPERVISOR": hypervisor_name,
576
  }
577

    
578
  if nics:
579
    nic_count = len(nics)
580
    for idx, (ip, mac, mode, link) in enumerate(nics):
581
      if ip is None:
582
        ip = ""
583
      env["INSTANCE_NIC%d_IP" % idx] = ip
584
      env["INSTANCE_NIC%d_MAC" % idx] = mac
585
      env["INSTANCE_NIC%d_MODE" % idx] = mode
586
      env["INSTANCE_NIC%d_LINK" % idx] = link
587
      if mode == constants.NIC_MODE_BRIDGED:
588
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
589
  else:
590
    nic_count = 0
591

    
592
  env["INSTANCE_NIC_COUNT"] = nic_count
593

    
594
  if disks:
595
    disk_count = len(disks)
596
    for idx, (size, mode) in enumerate(disks):
597
      env["INSTANCE_DISK%d_SIZE" % idx] = size
598
      env["INSTANCE_DISK%d_MODE" % idx] = mode
599
  else:
600
    disk_count = 0
601

    
602
  env["INSTANCE_DISK_COUNT"] = disk_count
603

    
604
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
605
    for key, value in source.items():
606
      env["INSTANCE_%s_%s" % (kind, key)] = value
607

    
608
  return env
609

    
610

    
611
def _NICListToTuple(lu, nics):
612
  """Build a list of nic information tuples.
613

614
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
615
  value in LUQueryInstanceData.
616

617
  @type lu:  L{LogicalUnit}
618
  @param lu: the logical unit on whose behalf we execute
619
  @type nics: list of L{objects.NIC}
620
  @param nics: list of nics to convert to hooks tuples
621

622
  """
623
  hooks_nics = []
624
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
625
  for nic in nics:
626
    ip = nic.ip
627
    mac = nic.mac
628
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
629
    mode = filled_params[constants.NIC_MODE]
630
    link = filled_params[constants.NIC_LINK]
631
    hooks_nics.append((ip, mac, mode, link))
632
  return hooks_nics
633

    
634

    
635
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
636
  """Builds instance related env variables for hooks from an object.
637

638
  @type lu: L{LogicalUnit}
639
  @param lu: the logical unit on whose behalf we execute
640
  @type instance: L{objects.Instance}
641
  @param instance: the instance for which we should build the
642
      environment
643
  @type override: dict
644
  @param override: dictionary with key/values that will override
645
      our values
646
  @rtype: dict
647
  @return: the hook environment dictionary
648

649
  """
650
  cluster = lu.cfg.GetClusterInfo()
651
  bep = cluster.FillBE(instance)
652
  hvp = cluster.FillHV(instance)
653
  args = {
654
    'name': instance.name,
655
    'primary_node': instance.primary_node,
656
    'secondary_nodes': instance.secondary_nodes,
657
    'os_type': instance.os,
658
    'status': instance.admin_up,
659
    'memory': bep[constants.BE_MEMORY],
660
    'vcpus': bep[constants.BE_VCPUS],
661
    'nics': _NICListToTuple(lu, instance.nics),
662
    'disk_template': instance.disk_template,
663
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
664
    'bep': bep,
665
    'hvp': hvp,
666
    'hypervisor_name': instance.hypervisor,
667
  }
668
  if override:
669
    args.update(override)
670
  return _BuildInstanceHookEnv(**args)
671

    
672

    
673
def _AdjustCandidatePool(lu):
674
  """Adjust the candidate pool after node operations.
675

676
  """
677
  mod_list = lu.cfg.MaintainCandidatePool()
678
  if mod_list:
679
    lu.LogInfo("Promoted nodes to master candidate role: %s",
680
               ", ".join(node.name for node in mod_list))
681
    for name in mod_list:
682
      lu.context.ReaddNode(name)
683
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
684
  if mc_now > mc_max:
685
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
686
               (mc_now, mc_max))
687

    
688

    
689
def _CheckNicsBridgesExist(lu, target_nics, target_node,
690
                               profile=constants.PP_DEFAULT):
691
  """Check that the brigdes needed by a list of nics exist.
692

693
  """
694
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
695
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
696
                for nic in target_nics]
697
  brlist = [params[constants.NIC_LINK] for params in paramslist
698
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
699
  if brlist:
700
    result = lu.rpc.call_bridges_exist(target_node, brlist)
701
    result.Raise("Error checking bridges on destination node '%s'" %
702
                 target_node, prereq=True)
703

    
704

    
705
def _CheckInstanceBridgesExist(lu, instance, node=None):
706
  """Check that the brigdes needed by an instance exist.
707

708
  """
709
  if node is None:
710
    node = instance.primary_node
711
  _CheckNicsBridgesExist(lu, instance.nics, node)
712

    
713

    
714
def _GetNodeInstancesInner(cfg, fn):
715
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
716

    
717

    
718
def _GetNodeInstances(cfg, node_name):
719
  """Returns a list of all primary and secondary instances on a node.
720

721
  """
722

    
723
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
724

    
725

    
726
def _GetNodePrimaryInstances(cfg, node_name):
727
  """Returns primary instances on a node.
728

729
  """
730
  return _GetNodeInstancesInner(cfg,
731
                                lambda inst: node_name == inst.primary_node)
732

    
733

    
734
def _GetNodeSecondaryInstances(cfg, node_name):
735
  """Returns secondary instances on a node.
736

737
  """
738
  return _GetNodeInstancesInner(cfg,
739
                                lambda inst: node_name in inst.secondary_nodes)
740

    
741

    
742
def _GetStorageTypeArgs(cfg, storage_type):
743
  """Returns the arguments for a storage type.
744

745
  """
746
  # Special case for file storage
747
  if storage_type == constants.ST_FILE:
748
    # storage.FileStorage wants a list of storage directories
749
    return [[cfg.GetFileStorageDir()]]
750

    
751
  return []
752

    
753

    
754
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
755
  faulty = []
756

    
757
  for dev in instance.disks:
758
    cfg.SetDiskID(dev, node_name)
759

    
760
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
761
  result.Raise("Failed to get disk status from node %s" % node_name,
762
               prereq=prereq)
763

    
764
  for idx, bdev_status in enumerate(result.payload):
765
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
766
      faulty.append(idx)
767

    
768
  return faulty
769

    
770

    
771
class LUPostInitCluster(LogicalUnit):
772
  """Logical unit for running hooks after cluster initialization.
773

774
  """
775
  HPATH = "cluster-init"
776
  HTYPE = constants.HTYPE_CLUSTER
777
  _OP_REQP = []
778

    
779
  def BuildHooksEnv(self):
780
    """Build hooks env.
781

782
    """
783
    env = {"OP_TARGET": self.cfg.GetClusterName()}
784
    mn = self.cfg.GetMasterNode()
785
    return env, [], [mn]
786

    
787
  def CheckPrereq(self):
788
    """No prerequisites to check.
789

790
    """
791
    return True
792

    
793
  def Exec(self, feedback_fn):
794
    """Nothing to do.
795

796
    """
797
    return True
798

    
799

    
800
class LUDestroyCluster(LogicalUnit):
801
  """Logical unit for destroying the cluster.
802

803
  """
804
  HPATH = "cluster-destroy"
805
  HTYPE = constants.HTYPE_CLUSTER
806
  _OP_REQP = []
807

    
808
  def BuildHooksEnv(self):
809
    """Build hooks env.
810

811
    """
812
    env = {"OP_TARGET": self.cfg.GetClusterName()}
813
    return env, [], []
814

    
815
  def CheckPrereq(self):
816
    """Check prerequisites.
817

818
    This checks whether the cluster is empty.
819

820
    Any errors are signaled by raising errors.OpPrereqError.
821

822
    """
823
    master = self.cfg.GetMasterNode()
824

    
825
    nodelist = self.cfg.GetNodeList()
826
    if len(nodelist) != 1 or nodelist[0] != master:
827
      raise errors.OpPrereqError("There are still %d node(s) in"
828
                                 " this cluster." % (len(nodelist) - 1))
829
    instancelist = self.cfg.GetInstanceList()
830
    if instancelist:
831
      raise errors.OpPrereqError("There are still %d instance(s) in"
832
                                 " this cluster." % len(instancelist))
833

    
834
  def Exec(self, feedback_fn):
835
    """Destroys the cluster.
836

837
    """
838
    master = self.cfg.GetMasterNode()
839

    
840
    # Run post hooks on master node before it's removed
841
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
842
    try:
843
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
844
    except:
845
      self.LogWarning("Errors occurred running hooks on %s" % master)
846

    
847
    result = self.rpc.call_node_stop_master(master, False)
848
    result.Raise("Could not disable the master role")
849
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
850
    utils.CreateBackup(priv_key)
851
    utils.CreateBackup(pub_key)
852
    return master
853

    
854

    
855
class LUVerifyCluster(LogicalUnit):
856
  """Verifies the cluster status.
857

858
  """
859
  HPATH = "cluster-verify"
860
  HTYPE = constants.HTYPE_CLUSTER
861
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
862
  REQ_BGL = False
863

    
864
  TCLUSTER = "cluster"
865
  TNODE = "node"
866
  TINSTANCE = "instance"
867

    
868
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
869
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
870
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
871
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
872
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
873
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
874
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
875
  ENODEDRBD = (TNODE, "ENODEDRBD")
876
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
877
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
878
  ENODEHV = (TNODE, "ENODEHV")
879
  ENODELVM = (TNODE, "ENODELVM")
880
  ENODEN1 = (TNODE, "ENODEN1")
881
  ENODENET = (TNODE, "ENODENET")
882
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
883
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
884
  ENODERPC = (TNODE, "ENODERPC")
885
  ENODESSH = (TNODE, "ENODESSH")
886
  ENODEVERSION = (TNODE, "ENODEVERSION")
887

    
888
  ETYPE_FIELD = "code"
889
  ETYPE_ERROR = "ERROR"
890
  ETYPE_WARNING = "WARNING"
891

    
892
  def ExpandNames(self):
893
    self.needed_locks = {
894
      locking.LEVEL_NODE: locking.ALL_SET,
895
      locking.LEVEL_INSTANCE: locking.ALL_SET,
896
    }
897
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
898

    
899
  def _Error(self, ecode, item, msg, *args, **kwargs):
900
    """Format an error message.
901

902
    Based on the opcode's error_codes parameter, either format a
903
    parseable error code, or a simpler error string.
904

905
    This must be called only from Exec and functions called from Exec.
906

907
    """
908
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
909
    itype, etxt = ecode
910
    # first complete the msg
911
    if args:
912
      msg = msg % args
913
    # then format the whole message
914
    if self.op.error_codes:
915
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
916
    else:
917
      if item:
918
        item = " " + item
919
      else:
920
        item = ""
921
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
922
    # and finally report it via the feedback_fn
923
    self._feedback_fn("  - %s" % msg)
924

    
925
  def _ErrorIf(self, cond, *args, **kwargs):
926
    """Log an error message if the passed condition is True.
927

928
    """
929
    cond = bool(cond) or self.op.debug_simulate_errors
930
    if cond:
931
      self._Error(*args, **kwargs)
932
    # do not mark the operation as failed for WARN cases only
933
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
934
      self.bad = self.bad or cond
935

    
936
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
937
                  node_result, master_files, drbd_map, vg_name):
938
    """Run multiple tests against a node.
939

940
    Test list:
941

942
      - compares ganeti version
943
      - checks vg existence and size > 20G
944
      - checks config file checksum
945
      - checks ssh to other nodes
946

947
    @type nodeinfo: L{objects.Node}
948
    @param nodeinfo: the node to check
949
    @param file_list: required list of files
950
    @param local_cksum: dictionary of local files and their checksums
951
    @param node_result: the results from the node
952
    @param master_files: list of files that only masters should have
953
    @param drbd_map: the useddrbd minors for this node, in
954
        form of minor: (instance, must_exist) which correspond to instances
955
        and their running status
956
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
957

958
    """
959
    node = nodeinfo.name
960
    _ErrorIf = self._ErrorIf
961

    
962
    # main result, node_result should be a non-empty dict
963
    test = not node_result or not isinstance(node_result, dict)
964
    _ErrorIf(test, self.ENODERPC, node,
965
                  "unable to verify node: no data returned")
966
    if test:
967
      return
968

    
969
    # compares ganeti version
970
    local_version = constants.PROTOCOL_VERSION
971
    remote_version = node_result.get('version', None)
972
    test = not (remote_version and
973
                isinstance(remote_version, (list, tuple)) and
974
                len(remote_version) == 2)
975
    _ErrorIf(test, self.ENODERPC, node,
976
             "connection to node returned invalid data")
977
    if test:
978
      return
979

    
980
    test = local_version != remote_version[0]
981
    _ErrorIf(test, self.ENODEVERSION, node,
982
             "incompatible protocol versions: master %s,"
983
             " node %s", local_version, remote_version[0])
984
    if test:
985
      return
986

    
987
    # node seems compatible, we can actually try to look into its results
988

    
989
    # full package version
990
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
991
                  self.ENODEVERSION, node,
992
                  "software version mismatch: master %s, node %s",
993
                  constants.RELEASE_VERSION, remote_version[1],
994
                  code=self.ETYPE_WARNING)
995

    
996
    # checks vg existence and size > 20G
997
    if vg_name is not None:
998
      vglist = node_result.get(constants.NV_VGLIST, None)
999
      test = not vglist
1000
      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1001
      if not test:
1002
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1003
                                              constants.MIN_VG_SIZE)
1004
        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1005

    
1006
    # checks config file checksum
1007

    
1008
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
1009
    test = not isinstance(remote_cksum, dict)
1010
    _ErrorIf(test, self.ENODEFILECHECK, node,
1011
             "node hasn't returned file checksum data")
1012
    if not test:
1013
      for file_name in file_list:
1014
        node_is_mc = nodeinfo.master_candidate
1015
        must_have = (file_name not in master_files) or node_is_mc
1016
        # missing
1017
        test1 = file_name not in remote_cksum
1018
        # invalid checksum
1019
        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1020
        # existing and good
1021
        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1022
        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1023
                 "file '%s' missing", file_name)
1024
        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1025
                 "file '%s' has wrong checksum", file_name)
1026
        # not candidate and this is not a must-have file
1027
        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1028
                 "file '%s' should not exist on non master"
1029
                 " candidates (and the file is outdated)", file_name)
1030
        # all good, except non-master/non-must have combination
1031
        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1032
                 "file '%s' should not exist"
1033
                 " on non master candidates", file_name)
1034

    
1035
    # checks ssh to any
1036

    
1037
    test = constants.NV_NODELIST not in node_result
1038
    _ErrorIf(test, self.ENODESSH, node,
1039
             "node hasn't returned node ssh connectivity data")
1040
    if not test:
1041
      if node_result[constants.NV_NODELIST]:
1042
        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1043
          _ErrorIf(True, self.ENODESSH, node,
1044
                   "ssh communication with node '%s': %s", a_node, a_msg)
1045

    
1046
    test = constants.NV_NODENETTEST not in node_result
1047
    _ErrorIf(test, self.ENODENET, node,
1048
             "node hasn't returned node tcp connectivity data")
1049
    if not test:
1050
      if node_result[constants.NV_NODENETTEST]:
1051
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1052
        for anode in nlist:
1053
          _ErrorIf(True, self.ENODENET, node,
1054
                   "tcp communication with node '%s': %s",
1055
                   anode, node_result[constants.NV_NODENETTEST][anode])
1056

    
1057
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1058
    if isinstance(hyp_result, dict):
1059
      for hv_name, hv_result in hyp_result.iteritems():
1060
        test = hv_result is not None
1061
        _ErrorIf(test, self.ENODEHV, node,
1062
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1063

    
1064
    # check used drbd list
1065
    if vg_name is not None:
1066
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1067
      test = not isinstance(used_minors, (tuple, list))
1068
      _ErrorIf(test, self.ENODEDRBD, node,
1069
               "cannot parse drbd status file: %s", str(used_minors))
1070
      if not test:
1071
        for minor, (iname, must_exist) in drbd_map.items():
1072
          test = minor not in used_minors and must_exist
1073
          _ErrorIf(test, self.ENODEDRBD, node,
1074
                   "drbd minor %d of instance %s is not active",
1075
                   minor, iname)
1076
        for minor in used_minors:
1077
          test = minor not in drbd_map
1078
          _ErrorIf(test, self.ENODEDRBD, node,
1079
                   "unallocated drbd minor %d is in use", minor)
1080

    
1081
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1082
                      node_instance, n_offline):
1083
    """Verify an instance.
1084

1085
    This function checks to see if the required block devices are
1086
    available on the instance's node.
1087

1088
    """
1089
    _ErrorIf = self._ErrorIf
1090
    node_current = instanceconfig.primary_node
1091

    
1092
    node_vol_should = {}
1093
    instanceconfig.MapLVsByNode(node_vol_should)
1094

    
1095
    for node in node_vol_should:
1096
      if node in n_offline:
1097
        # ignore missing volumes on offline nodes
1098
        continue
1099
      for volume in node_vol_should[node]:
1100
        test = node not in node_vol_is or volume not in node_vol_is[node]
1101
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1102
                 "volume %s missing on node %s", volume, node)
1103

    
1104
    if instanceconfig.admin_up:
1105
      test = ((node_current not in node_instance or
1106
               not instance in node_instance[node_current]) and
1107
              node_current not in n_offline)
1108
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1109
               "instance not running on its primary node %s",
1110
               node_current)
1111

    
1112
    for node in node_instance:
1113
      if (not node == node_current):
1114
        test = instance in node_instance[node]
1115
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1116
                 "instance should not run on node %s", node)
1117

    
1118
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1119
    """Verify if there are any unknown volumes in the cluster.
1120

1121
    The .os, .swap and backup volumes are ignored. All other volumes are
1122
    reported as unknown.
1123

1124
    """
1125
    for node in node_vol_is:
1126
      for volume in node_vol_is[node]:
1127
        test = (node not in node_vol_should or
1128
                volume not in node_vol_should[node])
1129
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1130
                      "volume %s is unknown", volume)
1131

    
1132
  def _VerifyOrphanInstances(self, instancelist, node_instance):
1133
    """Verify the list of running instances.
1134

1135
    This checks what instances are running but unknown to the cluster.
1136

1137
    """
1138
    for node in node_instance:
1139
      for o_inst in node_instance[node]:
1140
        test = o_inst not in instancelist
1141
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1142
                      "instance %s on node %s should not exist", o_inst, node)
1143

    
1144
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1145
    """Verify N+1 Memory Resilience.
1146

1147
    Check that if one single node dies we can still start all the instances it
1148
    was primary for.
1149

1150
    """
1151
    for node, nodeinfo in node_info.iteritems():
1152
      # This code checks that every node which is now listed as secondary has
1153
      # enough memory to host all instances it is supposed to should a single
1154
      # other node in the cluster fail.
1155
      # FIXME: not ready for failover to an arbitrary node
1156
      # FIXME: does not support file-backed instances
1157
      # WARNING: we currently take into account down instances as well as up
1158
      # ones, considering that even if they're down someone might want to start
1159
      # them even in the event of a node failure.
1160
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1161
        needed_mem = 0
1162
        for instance in instances:
1163
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1164
          if bep[constants.BE_AUTO_BALANCE]:
1165
            needed_mem += bep[constants.BE_MEMORY]
1166
        test = nodeinfo['mfree'] < needed_mem
1167
        self._ErrorIf(test, self.ENODEN1, node,
1168
                      "not enough memory on to accommodate"
1169
                      " failovers should peer node %s fail", prinode)
1170

    
1171
  def CheckPrereq(self):
1172
    """Check prerequisites.
1173

1174
    Transform the list of checks we're going to skip into a set and check that
1175
    all its members are valid.
1176

1177
    """
1178
    self.skip_set = frozenset(self.op.skip_checks)
1179
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1180
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1181

    
1182
  def BuildHooksEnv(self):
1183
    """Build hooks env.
1184

1185
    Cluster-Verify hooks just ran in the post phase and their failure makes
1186
    the output be logged in the verify output and the verification to fail.
1187

1188
    """
1189
    all_nodes = self.cfg.GetNodeList()
1190
    env = {
1191
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1192
      }
1193
    for node in self.cfg.GetAllNodesInfo().values():
1194
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1195

    
1196
    return env, [], all_nodes
1197

    
1198
  def Exec(self, feedback_fn):
1199
    """Verify integrity of cluster, performing various test on nodes.
1200

1201
    """
1202
    self.bad = False
1203
    _ErrorIf = self._ErrorIf
1204
    verbose = self.op.verbose
1205
    self._feedback_fn = feedback_fn
1206
    feedback_fn("* Verifying global settings")
1207
    for msg in self.cfg.VerifyConfig():
1208
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1209

    
1210
    vg_name = self.cfg.GetVGName()
1211
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1212
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1213
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1214
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1215
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1216
                        for iname in instancelist)
1217
    i_non_redundant = [] # Non redundant instances
1218
    i_non_a_balanced = [] # Non auto-balanced instances
1219
    n_offline = [] # List of offline nodes
1220
    n_drained = [] # List of nodes being drained
1221
    node_volume = {}
1222
    node_instance = {}
1223
    node_info = {}
1224
    instance_cfg = {}
1225

    
1226
    # FIXME: verify OS list
1227
    # do local checksums
1228
    master_files = [constants.CLUSTER_CONF_FILE]
1229

    
1230
    file_names = ssconf.SimpleStore().GetFileList()
1231
    file_names.append(constants.SSL_CERT_FILE)
1232
    file_names.append(constants.RAPI_CERT_FILE)
1233
    file_names.extend(master_files)
1234

    
1235
    local_checksums = utils.FingerprintFiles(file_names)
1236

    
1237
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1238
    node_verify_param = {
1239
      constants.NV_FILELIST: file_names,
1240
      constants.NV_NODELIST: [node.name for node in nodeinfo
1241
                              if not node.offline],
1242
      constants.NV_HYPERVISOR: hypervisors,
1243
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1244
                                  node.secondary_ip) for node in nodeinfo
1245
                                 if not node.offline],
1246
      constants.NV_INSTANCELIST: hypervisors,
1247
      constants.NV_VERSION: None,
1248
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1249
      }
1250
    if vg_name is not None:
1251
      node_verify_param[constants.NV_VGLIST] = None
1252
      node_verify_param[constants.NV_LVLIST] = vg_name
1253
      node_verify_param[constants.NV_DRBDLIST] = None
1254
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1255
                                           self.cfg.GetClusterName())
1256

    
1257
    cluster = self.cfg.GetClusterInfo()
1258
    master_node = self.cfg.GetMasterNode()
1259
    all_drbd_map = self.cfg.ComputeDRBDMap()
1260

    
1261
    feedback_fn("* Verifying node status")
1262
    for node_i in nodeinfo:
1263
      node = node_i.name
1264

    
1265
      if node_i.offline:
1266
        if verbose:
1267
          feedback_fn("* Skipping offline node %s" % (node,))
1268
        n_offline.append(node)
1269
        continue
1270

    
1271
      if node == master_node:
1272
        ntype = "master"
1273
      elif node_i.master_candidate:
1274
        ntype = "master candidate"
1275
      elif node_i.drained:
1276
        ntype = "drained"
1277
        n_drained.append(node)
1278
      else:
1279
        ntype = "regular"
1280
      if verbose:
1281
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1282

    
1283
      msg = all_nvinfo[node].fail_msg
1284
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1285
      if msg:
1286
        continue
1287

    
1288
      nresult = all_nvinfo[node].payload
1289
      node_drbd = {}
1290
      for minor, instance in all_drbd_map[node].items():
1291
        test = instance not in instanceinfo
1292
        _ErrorIf(test, self.ECLUSTERCFG, None,
1293
                 "ghost instance '%s' in temporary DRBD map", instance)
1294
          # ghost instance should not be running, but otherwise we
1295
          # don't give double warnings (both ghost instance and
1296
          # unallocated minor in use)
1297
        if test:
1298
          node_drbd[minor] = (instance, False)
1299
        else:
1300
          instance = instanceinfo[instance]
1301
          node_drbd[minor] = (instance.name, instance.admin_up)
1302
      self._VerifyNode(node_i, file_names, local_checksums,
1303
                       nresult, master_files, node_drbd, vg_name)
1304

    
1305
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1306
      if vg_name is None:
1307
        node_volume[node] = {}
1308
      elif isinstance(lvdata, basestring):
1309
        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1310
                 utils.SafeEncode(lvdata))
1311
        node_volume[node] = {}
1312
      elif not isinstance(lvdata, dict):
1313
        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1314
        continue
1315
      else:
1316
        node_volume[node] = lvdata
1317

    
1318
      # node_instance
1319
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1320
      test = not isinstance(idata, list)
1321
      _ErrorIf(test, self.ENODEHV, node,
1322
               "rpc call to node failed (instancelist)")
1323
      if test:
1324
        continue
1325

    
1326
      node_instance[node] = idata
1327

    
1328
      # node_info
1329
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1330
      test = not isinstance(nodeinfo, dict)
1331
      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1332
      if test:
1333
        continue
1334

    
1335
      try:
1336
        node_info[node] = {
1337
          "mfree": int(nodeinfo['memory_free']),
1338
          "pinst": [],
1339
          "sinst": [],
1340
          # dictionary holding all instances this node is secondary for,
1341
          # grouped by their primary node. Each key is a cluster node, and each
1342
          # value is a list of instances which have the key as primary and the
1343
          # current node as secondary.  this is handy to calculate N+1 memory
1344
          # availability if you can only failover from a primary to its
1345
          # secondary.
1346
          "sinst-by-pnode": {},
1347
        }
1348
        # FIXME: devise a free space model for file based instances as well
1349
        if vg_name is not None:
1350
          test = (constants.NV_VGLIST not in nresult or
1351
                  vg_name not in nresult[constants.NV_VGLIST])
1352
          _ErrorIf(test, self.ENODELVM, node,
1353
                   "node didn't return data for the volume group '%s'"
1354
                   " - it is either missing or broken", vg_name)
1355
          if test:
1356
            continue
1357
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1358
      except (ValueError, KeyError):
1359
        _ErrorIf(True, self.ENODERPC, node,
1360
                 "node returned invalid nodeinfo, check lvm/hypervisor")
1361
        continue
1362

    
1363
    node_vol_should = {}
1364

    
1365
    feedback_fn("* Verifying instance status")
1366
    for instance in instancelist:
1367
      if verbose:
1368
        feedback_fn("* Verifying instance %s" % instance)
1369
      inst_config = instanceinfo[instance]
1370
      self._VerifyInstance(instance, inst_config, node_volume,
1371
                           node_instance, n_offline)
1372
      inst_nodes_offline = []
1373

    
1374
      inst_config.MapLVsByNode(node_vol_should)
1375

    
1376
      instance_cfg[instance] = inst_config
1377

    
1378
      pnode = inst_config.primary_node
1379
      _ErrorIf(pnode not in node_info and pnode not in n_offline,
1380
               self.ENODERPC, pnode, "instance %s, connection to"
1381
               " primary node failed", instance)
1382
      if pnode in node_info:
1383
        node_info[pnode]['pinst'].append(instance)
1384

    
1385
      if pnode in n_offline:
1386
        inst_nodes_offline.append(pnode)
1387

    
1388
      # If the instance is non-redundant we cannot survive losing its primary
1389
      # node, so we are not N+1 compliant. On the other hand we have no disk
1390
      # templates with more than one secondary so that situation is not well
1391
      # supported either.
1392
      # FIXME: does not support file-backed instances
1393
      if len(inst_config.secondary_nodes) == 0:
1394
        i_non_redundant.append(instance)
1395
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
1396
               self.EINSTANCELAYOUT, instance,
1397
               "instance has multiple secondary nodes", code="WARNING")
1398

    
1399
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1400
        i_non_a_balanced.append(instance)
1401

    
1402
      for snode in inst_config.secondary_nodes:
1403
        _ErrorIf(snode not in node_info and snode not in n_offline,
1404
                 self.ENODERPC, snode,
1405
                 "instance %s, connection to secondary node"
1406
                 "failed", instance)
1407

    
1408
        if snode in node_info:
1409
          node_info[snode]['sinst'].append(instance)
1410
          if pnode not in node_info[snode]['sinst-by-pnode']:
1411
            node_info[snode]['sinst-by-pnode'][pnode] = []
1412
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1413

    
1414
        if snode in n_offline:
1415
          inst_nodes_offline.append(snode)
1416

    
1417
      # warn that the instance lives on offline nodes
1418
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1419
               "instance lives on offline node(s) %s",
1420
               ", ".join(inst_nodes_offline))
1421

    
1422
    feedback_fn("* Verifying orphan volumes")
1423
    self._VerifyOrphanVolumes(node_vol_should, node_volume)
1424

    
1425
    feedback_fn("* Verifying remaining instances")
1426
    self._VerifyOrphanInstances(instancelist, node_instance)
1427

    
1428
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1429
      feedback_fn("* Verifying N+1 Memory redundancy")
1430
      self._VerifyNPlusOneMemory(node_info, instance_cfg)
1431

    
1432
    feedback_fn("* Other Notes")
1433
    if i_non_redundant:
1434
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1435
                  % len(i_non_redundant))
1436

    
1437
    if i_non_a_balanced:
1438
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1439
                  % len(i_non_a_balanced))
1440

    
1441
    if n_offline:
1442
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1443

    
1444
    if n_drained:
1445
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1446

    
1447
    return not self.bad
1448

    
1449
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1450
    """Analyze the post-hooks' result
1451

1452
    This method analyses the hook result, handles it, and sends some
1453
    nicely-formatted feedback back to the user.
1454

1455
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1456
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1457
    @param hooks_results: the results of the multi-node hooks rpc call
1458
    @param feedback_fn: function used send feedback back to the caller
1459
    @param lu_result: previous Exec result
1460
    @return: the new Exec result, based on the previous result
1461
        and hook results
1462

1463
    """
1464
    # We only really run POST phase hooks, and are only interested in
1465
    # their results
1466
    if phase == constants.HOOKS_PHASE_POST:
1467
      # Used to change hooks' output to proper indentation
1468
      indent_re = re.compile('^', re.M)
1469
      feedback_fn("* Hooks Results")
1470
      assert hooks_results, "invalid result from hooks"
1471

    
1472
      for node_name in hooks_results:
1473
        show_node_header = True
1474
        res = hooks_results[node_name]
1475
        msg = res.fail_msg
1476
        test = msg and not res.offline
1477
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1478
                      "Communication failure in hooks execution: %s", msg)
1479
        if test:
1480
          # override manually lu_result here as _ErrorIf only
1481
          # overrides self.bad
1482
          lu_result = 1
1483
          continue
1484
        for script, hkr, output in res.payload:
1485
          test = hkr == constants.HKR_FAIL
1486
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1487
                        "Script %s failed, output:", script)
1488
          if test:
1489
            output = indent_re.sub('      ', output)
1490
            feedback_fn("%s" % output)
1491
            lu_result = 1
1492

    
1493
      return lu_result
1494

    
1495

    
1496
class LUVerifyDisks(NoHooksLU):
1497
  """Verifies the cluster disks status.
1498

1499
  """
1500
  _OP_REQP = []
1501
  REQ_BGL = False
1502

    
1503
  def ExpandNames(self):
1504
    self.needed_locks = {
1505
      locking.LEVEL_NODE: locking.ALL_SET,
1506
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1507
    }
1508
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1509

    
1510
  def CheckPrereq(self):
1511
    """Check prerequisites.
1512

1513
    This has no prerequisites.
1514

1515
    """
1516
    pass
1517

    
1518
  def Exec(self, feedback_fn):
1519
    """Verify integrity of cluster disks.
1520

1521
    @rtype: tuple of three items
1522
    @return: a tuple of (dict of node-to-node_error, list of instances
1523
        which need activate-disks, dict of instance: (node, volume) for
1524
        missing volumes
1525

1526
    """
1527
    result = res_nodes, res_instances, res_missing = {}, [], {}
1528

    
1529
    vg_name = self.cfg.GetVGName()
1530
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1531
    instances = [self.cfg.GetInstanceInfo(name)
1532
                 for name in self.cfg.GetInstanceList()]
1533

    
1534
    nv_dict = {}
1535
    for inst in instances:
1536
      inst_lvs = {}
1537
      if (not inst.admin_up or
1538
          inst.disk_template not in constants.DTS_NET_MIRROR):
1539
        continue
1540
      inst.MapLVsByNode(inst_lvs)
1541
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1542
      for node, vol_list in inst_lvs.iteritems():
1543
        for vol in vol_list:
1544
          nv_dict[(node, vol)] = inst
1545

    
1546
    if not nv_dict:
1547
      return result
1548

    
1549
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1550

    
1551
    for node in nodes:
1552
      # node_volume
1553
      node_res = node_lvs[node]
1554
      if node_res.offline:
1555
        continue
1556
      msg = node_res.fail_msg
1557
      if msg:
1558
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1559
        res_nodes[node] = msg
1560
        continue
1561

    
1562
      lvs = node_res.payload
1563
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1564
        inst = nv_dict.pop((node, lv_name), None)
1565
        if (not lv_online and inst is not None
1566
            and inst.name not in res_instances):
1567
          res_instances.append(inst.name)
1568

    
1569
    # any leftover items in nv_dict are missing LVs, let's arrange the
1570
    # data better
1571
    for key, inst in nv_dict.iteritems():
1572
      if inst.name not in res_missing:
1573
        res_missing[inst.name] = []
1574
      res_missing[inst.name].append(key)
1575

    
1576
    return result
1577

    
1578

    
1579
class LURepairDiskSizes(NoHooksLU):
1580
  """Verifies the cluster disks sizes.
1581

1582
  """
1583
  _OP_REQP = ["instances"]
1584
  REQ_BGL = False
1585

    
1586
  def ExpandNames(self):
1587
    if not isinstance(self.op.instances, list):
1588
      raise errors.OpPrereqError("Invalid argument type 'instances'")
1589

    
1590
    if self.op.instances:
1591
      self.wanted_names = []
1592
      for name in self.op.instances:
1593
        full_name = self.cfg.ExpandInstanceName(name)
1594
        if full_name is None:
1595
          raise errors.OpPrereqError("Instance '%s' not known" % name)
1596
        self.wanted_names.append(full_name)
1597
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
1598
      self.needed_locks = {
1599
        locking.LEVEL_NODE: [],
1600
        locking.LEVEL_INSTANCE: self.wanted_names,
1601
        }
1602
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1603
    else:
1604
      self.wanted_names = None
1605
      self.needed_locks = {
1606
        locking.LEVEL_NODE: locking.ALL_SET,
1607
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1608
        }
1609
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1610

    
1611
  def DeclareLocks(self, level):
1612
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1613
      self._LockInstancesNodes(primary_only=True)
1614

    
1615
  def CheckPrereq(self):
1616
    """Check prerequisites.
1617

1618
    This only checks the optional instance list against the existing names.
1619

1620
    """
1621
    if self.wanted_names is None:
1622
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1623

    
1624
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1625
                             in self.wanted_names]
1626

    
1627
  def Exec(self, feedback_fn):
1628
    """Verify the size of cluster disks.
1629

1630
    """
1631
    # TODO: check child disks too
1632
    # TODO: check differences in size between primary/secondary nodes
1633
    per_node_disks = {}
1634
    for instance in self.wanted_instances:
1635
      pnode = instance.primary_node
1636
      if pnode not in per_node_disks:
1637
        per_node_disks[pnode] = []
1638
      for idx, disk in enumerate(instance.disks):
1639
        per_node_disks[pnode].append((instance, idx, disk))
1640

    
1641
    changed = []
1642
    for node, dskl in per_node_disks.items():
1643
      result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
1644
      if result.fail_msg:
1645
        self.LogWarning("Failure in blockdev_getsizes call to node"
1646
                        " %s, ignoring", node)
1647
        continue
1648
      if len(result.data) != len(dskl):
1649
        self.LogWarning("Invalid result from node %s, ignoring node results",
1650
                        node)
1651
        continue
1652
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1653
        if size is None:
1654
          self.LogWarning("Disk %d of instance %s did not return size"
1655
                          " information, ignoring", idx, instance.name)
1656
          continue
1657
        if not isinstance(size, (int, long)):
1658
          self.LogWarning("Disk %d of instance %s did not return valid"
1659
                          " size information, ignoring", idx, instance.name)
1660
          continue
1661
        size = size >> 20
1662
        if size != disk.size:
1663
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1664
                       " correcting: recorded %d, actual %d", idx,
1665
                       instance.name, disk.size, size)
1666
          disk.size = size
1667
          self.cfg.Update(instance)
1668
          changed.append((instance.name, idx, size))
1669
    return changed
1670

    
1671

    
1672
class LURenameCluster(LogicalUnit):
1673
  """Rename the cluster.
1674

1675
  """
1676
  HPATH = "cluster-rename"
1677
  HTYPE = constants.HTYPE_CLUSTER
1678
  _OP_REQP = ["name"]
1679

    
1680
  def BuildHooksEnv(self):
1681
    """Build hooks env.
1682

1683
    """
1684
    env = {
1685
      "OP_TARGET": self.cfg.GetClusterName(),
1686
      "NEW_NAME": self.op.name,
1687
      }
1688
    mn = self.cfg.GetMasterNode()
1689
    return env, [mn], [mn]
1690

    
1691
  def CheckPrereq(self):
1692
    """Verify that the passed name is a valid one.
1693

1694
    """
1695
    hostname = utils.HostInfo(self.op.name)
1696

    
1697
    new_name = hostname.name
1698
    self.ip = new_ip = hostname.ip
1699
    old_name = self.cfg.GetClusterName()
1700
    old_ip = self.cfg.GetMasterIP()
1701
    if new_name == old_name and new_ip == old_ip:
1702
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1703
                                 " cluster has changed")
1704
    if new_ip != old_ip:
1705
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1706
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1707
                                   " reachable on the network. Aborting." %
1708
                                   new_ip)
1709

    
1710
    self.op.name = new_name
1711

    
1712
  def Exec(self, feedback_fn):
1713
    """Rename the cluster.
1714

1715
    """
1716
    clustername = self.op.name
1717
    ip = self.ip
1718

    
1719
    # shutdown the master IP
1720
    master = self.cfg.GetMasterNode()
1721
    result = self.rpc.call_node_stop_master(master, False)
1722
    result.Raise("Could not disable the master role")
1723

    
1724
    try:
1725
      cluster = self.cfg.GetClusterInfo()
1726
      cluster.cluster_name = clustername
1727
      cluster.master_ip = ip
1728
      self.cfg.Update(cluster)
1729

    
1730
      # update the known hosts file
1731
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1732
      node_list = self.cfg.GetNodeList()
1733
      try:
1734
        node_list.remove(master)
1735
      except ValueError:
1736
        pass
1737
      result = self.rpc.call_upload_file(node_list,
1738
                                         constants.SSH_KNOWN_HOSTS_FILE)
1739
      for to_node, to_result in result.iteritems():
1740
        msg = to_result.fail_msg
1741
        if msg:
1742
          msg = ("Copy of file %s to node %s failed: %s" %
1743
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1744
          self.proc.LogWarning(msg)
1745

    
1746
    finally:
1747
      result = self.rpc.call_node_start_master(master, False, False)
1748
      msg = result.fail_msg
1749
      if msg:
1750
        self.LogWarning("Could not re-enable the master role on"
1751
                        " the master, please restart manually: %s", msg)
1752

    
1753

    
1754
def _RecursiveCheckIfLVMBased(disk):
1755
  """Check if the given disk or its children are lvm-based.
1756

1757
  @type disk: L{objects.Disk}
1758
  @param disk: the disk to check
1759
  @rtype: boolean
1760
  @return: boolean indicating whether a LD_LV dev_type was found or not
1761

1762
  """
1763
  if disk.children:
1764
    for chdisk in disk.children:
1765
      if _RecursiveCheckIfLVMBased(chdisk):
1766
        return True
1767
  return disk.dev_type == constants.LD_LV
1768

    
1769

    
1770
class LUSetClusterParams(LogicalUnit):
1771
  """Change the parameters of the cluster.
1772

1773
  """
1774
  HPATH = "cluster-modify"
1775
  HTYPE = constants.HTYPE_CLUSTER
1776
  _OP_REQP = []
1777
  REQ_BGL = False
1778

    
1779
  def CheckArguments(self):
1780
    """Check parameters
1781

1782
    """
1783
    if not hasattr(self.op, "candidate_pool_size"):
1784
      self.op.candidate_pool_size = None
1785
    if self.op.candidate_pool_size is not None:
1786
      try:
1787
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1788
      except (ValueError, TypeError), err:
1789
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1790
                                   str(err))
1791
      if self.op.candidate_pool_size < 1:
1792
        raise errors.OpPrereqError("At least one master candidate needed")
1793

    
1794
  def ExpandNames(self):
1795
    # FIXME: in the future maybe other cluster params won't require checking on
1796
    # all nodes to be modified.
1797
    self.needed_locks = {
1798
      locking.LEVEL_NODE: locking.ALL_SET,
1799
    }
1800
    self.share_locks[locking.LEVEL_NODE] = 1
1801

    
1802
  def BuildHooksEnv(self):
1803
    """Build hooks env.
1804

1805
    """
1806
    env = {
1807
      "OP_TARGET": self.cfg.GetClusterName(),
1808
      "NEW_VG_NAME": self.op.vg_name,
1809
      }
1810
    mn = self.cfg.GetMasterNode()
1811
    return env, [mn], [mn]
1812

    
1813
  def CheckPrereq(self):
1814
    """Check prerequisites.
1815

1816
    This checks whether the given params don't conflict and
1817
    if the given volume group is valid.
1818

1819
    """
1820
    if self.op.vg_name is not None and not self.op.vg_name:
1821
      instances = self.cfg.GetAllInstancesInfo().values()
1822
      for inst in instances:
1823
        for disk in inst.disks:
1824
          if _RecursiveCheckIfLVMBased(disk):
1825
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1826
                                       " lvm-based instances exist")
1827

    
1828
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1829

    
1830
    # if vg_name not None, checks given volume group on all nodes
1831
    if self.op.vg_name:
1832
      vglist = self.rpc.call_vg_list(node_list)
1833
      for node in node_list:
1834
        msg = vglist[node].fail_msg
1835
        if msg:
1836
          # ignoring down node
1837
          self.LogWarning("Error while gathering data on node %s"
1838
                          " (ignoring node): %s", node, msg)
1839
          continue
1840
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1841
                                              self.op.vg_name,
1842
                                              constants.MIN_VG_SIZE)
1843
        if vgstatus:
1844
          raise errors.OpPrereqError("Error on node '%s': %s" %
1845
                                     (node, vgstatus))
1846

    
1847
    self.cluster = cluster = self.cfg.GetClusterInfo()
1848
    # validate params changes
1849
    if self.op.beparams:
1850
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1851
      self.new_beparams = objects.FillDict(
1852
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1853

    
1854
    if self.op.nicparams:
1855
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1856
      self.new_nicparams = objects.FillDict(
1857
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1858
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1859

    
1860
    # hypervisor list/parameters
1861
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1862
    if self.op.hvparams:
1863
      if not isinstance(self.op.hvparams, dict):
1864
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1865
      for hv_name, hv_dict in self.op.hvparams.items():
1866
        if hv_name not in self.new_hvparams:
1867
          self.new_hvparams[hv_name] = hv_dict
1868
        else:
1869
          self.new_hvparams[hv_name].update(hv_dict)
1870

    
1871
    if self.op.enabled_hypervisors is not None:
1872
      self.hv_list = self.op.enabled_hypervisors
1873
      if not self.hv_list:
1874
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1875
                                   " least one member")
1876
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1877
      if invalid_hvs:
1878
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1879
                                   " entries: %s" %
1880
                                   utils.CommaJoin(invalid_hvs))
1881
    else:
1882
      self.hv_list = cluster.enabled_hypervisors
1883

    
1884
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1885
      # either the enabled list has changed, or the parameters have, validate
1886
      for hv_name, hv_params in self.new_hvparams.items():
1887
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1888
            (self.op.enabled_hypervisors and
1889
             hv_name in self.op.enabled_hypervisors)):
1890
          # either this is a new hypervisor, or its parameters have changed
1891
          hv_class = hypervisor.GetHypervisor(hv_name)
1892
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1893
          hv_class.CheckParameterSyntax(hv_params)
1894
          _CheckHVParams(self, node_list, hv_name, hv_params)
1895

    
1896
  def Exec(self, feedback_fn):
1897
    """Change the parameters of the cluster.
1898

1899
    """
1900
    if self.op.vg_name is not None:
1901
      new_volume = self.op.vg_name
1902
      if not new_volume:
1903
        new_volume = None
1904
      if new_volume != self.cfg.GetVGName():
1905
        self.cfg.SetVGName(new_volume)
1906
      else:
1907
        feedback_fn("Cluster LVM configuration already in desired"
1908
                    " state, not changing")
1909
    if self.op.hvparams:
1910
      self.cluster.hvparams = self.new_hvparams
1911
    if self.op.enabled_hypervisors is not None:
1912
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1913
    if self.op.beparams:
1914
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1915
    if self.op.nicparams:
1916
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1917

    
1918
    if self.op.candidate_pool_size is not None:
1919
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1920
      # we need to update the pool size here, otherwise the save will fail
1921
      _AdjustCandidatePool(self)
1922

    
1923
    self.cfg.Update(self.cluster)
1924

    
1925

    
1926
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1927
  """Distribute additional files which are part of the cluster configuration.
1928

1929
  ConfigWriter takes care of distributing the config and ssconf files, but
1930
  there are more files which should be distributed to all nodes. This function
1931
  makes sure those are copied.
1932

1933
  @param lu: calling logical unit
1934
  @param additional_nodes: list of nodes not in the config to distribute to
1935

1936
  """
1937
  # 1. Gather target nodes
1938
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1939
  dist_nodes = lu.cfg.GetNodeList()
1940
  if additional_nodes is not None:
1941
    dist_nodes.extend(additional_nodes)
1942
  if myself.name in dist_nodes:
1943
    dist_nodes.remove(myself.name)
1944
  # 2. Gather files to distribute
1945
  dist_files = set([constants.ETC_HOSTS,
1946
                    constants.SSH_KNOWN_HOSTS_FILE,
1947
                    constants.RAPI_CERT_FILE,
1948
                    constants.RAPI_USERS_FILE,
1949
                    constants.HMAC_CLUSTER_KEY,
1950
                   ])
1951

    
1952
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1953
  for hv_name in enabled_hypervisors:
1954
    hv_class = hypervisor.GetHypervisor(hv_name)
1955
    dist_files.update(hv_class.GetAncillaryFiles())
1956

    
1957
  # 3. Perform the files upload
1958
  for fname in dist_files:
1959
    if os.path.exists(fname):
1960
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1961
      for to_node, to_result in result.items():
1962
        msg = to_result.fail_msg
1963
        if msg:
1964
          msg = ("Copy of file %s to node %s failed: %s" %
1965
                 (fname, to_node, msg))
1966
          lu.proc.LogWarning(msg)
1967

    
1968

    
1969
class LURedistributeConfig(NoHooksLU):
1970
  """Force the redistribution of cluster configuration.
1971

1972
  This is a very simple LU.
1973

1974
  """
1975
  _OP_REQP = []
1976
  REQ_BGL = False
1977

    
1978
  def ExpandNames(self):
1979
    self.needed_locks = {
1980
      locking.LEVEL_NODE: locking.ALL_SET,
1981
    }
1982
    self.share_locks[locking.LEVEL_NODE] = 1
1983

    
1984
  def CheckPrereq(self):
1985
    """Check prerequisites.
1986

1987
    """
1988

    
1989
  def Exec(self, feedback_fn):
1990
    """Redistribute the configuration.
1991

1992
    """
1993
    self.cfg.Update(self.cfg.GetClusterInfo())
1994
    _RedistributeAncillaryFiles(self)
1995

    
1996

    
1997
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1998
  """Sleep and poll for an instance's disk to sync.
1999

2000
  """
2001
  if not instance.disks:
2002
    return True
2003

    
2004
  if not oneshot:
2005
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2006

    
2007
  node = instance.primary_node
2008

    
2009
  for dev in instance.disks:
2010
    lu.cfg.SetDiskID(dev, node)
2011

    
2012
  retries = 0
2013
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2014
  while True:
2015
    max_time = 0
2016
    done = True
2017
    cumul_degraded = False
2018
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2019
    msg = rstats.fail_msg
2020
    if msg:
2021
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2022
      retries += 1
2023
      if retries >= 10:
2024
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2025
                                 " aborting." % node)
2026
      time.sleep(6)
2027
      continue
2028
    rstats = rstats.payload
2029
    retries = 0
2030
    for i, mstat in enumerate(rstats):
2031
      if mstat is None:
2032
        lu.LogWarning("Can't compute data for node %s/%s",
2033
                           node, instance.disks[i].iv_name)
2034
        continue
2035

    
2036
      cumul_degraded = (cumul_degraded or
2037
                        (mstat.is_degraded and mstat.sync_percent is None))
2038
      if mstat.sync_percent is not None:
2039
        done = False
2040
        if mstat.estimated_time is not None:
2041
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2042
          max_time = mstat.estimated_time
2043
        else:
2044
          rem_time = "no time estimate"
2045
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2046
                        (instance.disks[i].iv_name, mstat.sync_percent,
2047
                         rem_time))
2048

    
2049
    # if we're done but degraded, let's do a few small retries, to
2050
    # make sure we see a stable and not transient situation; therefore
2051
    # we force restart of the loop
2052
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2053
      logging.info("Degraded disks found, %d retries left", degr_retries)
2054
      degr_retries -= 1
2055
      time.sleep(1)
2056
      continue
2057

    
2058
    if done or oneshot:
2059
      break
2060

    
2061
    time.sleep(min(60, max_time))
2062

    
2063
  if done:
2064
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2065
  return not cumul_degraded
2066

    
2067

    
2068
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2069
  """Check that mirrors are not degraded.
2070

2071
  The ldisk parameter, if True, will change the test from the
2072
  is_degraded attribute (which represents overall non-ok status for
2073
  the device(s)) to the ldisk (representing the local storage status).
2074

2075
  """
2076
  lu.cfg.SetDiskID(dev, node)
2077

    
2078
  result = True
2079

    
2080
  if on_primary or dev.AssembleOnSecondary():
2081
    rstats = lu.rpc.call_blockdev_find(node, dev)
2082
    msg = rstats.fail_msg
2083
    if msg:
2084
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2085
      result = False
2086
    elif not rstats.payload:
2087
      lu.LogWarning("Can't find disk on node %s", node)
2088
      result = False
2089
    else:
2090
      if ldisk:
2091
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2092
      else:
2093
        result = result and not rstats.payload.is_degraded
2094

    
2095
  if dev.children:
2096
    for child in dev.children:
2097
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2098

    
2099
  return result
2100

    
2101

    
2102
class LUDiagnoseOS(NoHooksLU):
2103
  """Logical unit for OS diagnose/query.
2104

2105
  """
2106
  _OP_REQP = ["output_fields", "names"]
2107
  REQ_BGL = False
2108
  _FIELDS_STATIC = utils.FieldSet()
2109
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
2110

    
2111
  def ExpandNames(self):
2112
    if self.op.names:
2113
      raise errors.OpPrereqError("Selective OS query not supported")
2114

    
2115
    _CheckOutputFields(static=self._FIELDS_STATIC,
2116
                       dynamic=self._FIELDS_DYNAMIC,
2117
                       selected=self.op.output_fields)
2118

    
2119
    # Lock all nodes, in shared mode
2120
    # Temporary removal of locks, should be reverted later
2121
    # TODO: reintroduce locks when they are lighter-weight
2122
    self.needed_locks = {}
2123
    #self.share_locks[locking.LEVEL_NODE] = 1
2124
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2125

    
2126
  def CheckPrereq(self):
2127
    """Check prerequisites.
2128

2129
    """
2130

    
2131
  @staticmethod
2132
  def _DiagnoseByOS(node_list, rlist):
2133
    """Remaps a per-node return list into an a per-os per-node dictionary
2134

2135
    @param node_list: a list with the names of all nodes
2136
    @param rlist: a map with node names as keys and OS objects as values
2137

2138
    @rtype: dict
2139
    @return: a dictionary with osnames as keys and as value another map, with
2140
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2141

2142
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2143
                                     (/srv/..., False, "invalid api")],
2144
                           "node2": [(/srv/..., True, "")]}
2145
          }
2146

2147
    """
2148
    all_os = {}
2149
    # we build here the list of nodes that didn't fail the RPC (at RPC
2150
    # level), so that nodes with a non-responding node daemon don't
2151
    # make all OSes invalid
2152
    good_nodes = [node_name for node_name in rlist
2153
                  if not rlist[node_name].fail_msg]
2154
    for node_name, nr in rlist.items():
2155
      if nr.fail_msg or not nr.payload:
2156
        continue
2157
      for name, path, status, diagnose in nr.payload:
2158
        if name not in all_os:
2159
          # build a list of nodes for this os containing empty lists
2160
          # for each node in node_list
2161
          all_os[name] = {}
2162
          for nname in good_nodes:
2163
            all_os[name][nname] = []
2164
        all_os[name][node_name].append((path, status, diagnose))
2165
    return all_os
2166

    
2167
  def Exec(self, feedback_fn):
2168
    """Compute the list of OSes.
2169

2170
    """
2171
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2172
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2173
    pol = self._DiagnoseByOS(valid_nodes, node_data)
2174
    output = []
2175
    for os_name, os_data in pol.items():
2176
      row = []
2177
      for field in self.op.output_fields:
2178
        if field == "name":
2179
          val = os_name
2180
        elif field == "valid":
2181
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
2182
        elif field == "node_status":
2183
          # this is just a copy of the dict
2184
          val = {}
2185
          for node_name, nos_list in os_data.items():
2186
            val[node_name] = nos_list
2187
        else:
2188
          raise errors.ParameterError(field)
2189
        row.append(val)
2190
      output.append(row)
2191

    
2192
    return output
2193

    
2194

    
2195
class LURemoveNode(LogicalUnit):
2196
  """Logical unit for removing a node.
2197

2198
  """
2199
  HPATH = "node-remove"
2200
  HTYPE = constants.HTYPE_NODE
2201
  _OP_REQP = ["node_name"]
2202

    
2203
  def BuildHooksEnv(self):
2204
    """Build hooks env.
2205

2206
    This doesn't run on the target node in the pre phase as a failed
2207
    node would then be impossible to remove.
2208

2209
    """
2210
    env = {
2211
      "OP_TARGET": self.op.node_name,
2212
      "NODE_NAME": self.op.node_name,
2213
      }
2214
    all_nodes = self.cfg.GetNodeList()
2215
    if self.op.node_name in all_nodes:
2216
      all_nodes.remove(self.op.node_name)
2217
    return env, all_nodes, all_nodes
2218

    
2219
  def CheckPrereq(self):
2220
    """Check prerequisites.
2221

2222
    This checks:
2223
     - the node exists in the configuration
2224
     - it does not have primary or secondary instances
2225
     - it's not the master
2226

2227
    Any errors are signaled by raising errors.OpPrereqError.
2228

2229
    """
2230
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2231
    if node is None:
2232
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
2233

    
2234
    instance_list = self.cfg.GetInstanceList()
2235

    
2236
    masternode = self.cfg.GetMasterNode()
2237
    if node.name == masternode:
2238
      raise errors.OpPrereqError("Node is the master node,"
2239
                                 " you need to failover first.")
2240

    
2241
    for instance_name in instance_list:
2242
      instance = self.cfg.GetInstanceInfo(instance_name)
2243
      if node.name in instance.all_nodes:
2244
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2245
                                   " please remove first." % instance_name)
2246
    self.op.node_name = node.name
2247
    self.node = node
2248

    
2249
  def Exec(self, feedback_fn):
2250
    """Removes the node from the cluster.
2251

2252
    """
2253
    node = self.node
2254
    logging.info("Stopping the node daemon and removing configs from node %s",
2255
                 node.name)
2256

    
2257
    self.context.RemoveNode(node.name)
2258

    
2259
    # Run post hooks on the node before it's removed
2260
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2261
    try:
2262
      h_results = hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2263
    except:
2264
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2265

    
2266
    result = self.rpc.call_node_leave_cluster(node.name)
2267
    msg = result.fail_msg
2268
    if msg:
2269
      self.LogWarning("Errors encountered on the remote node while leaving"
2270
                      " the cluster: %s", msg)
2271

    
2272
    # Promote nodes to master candidate as needed
2273
    _AdjustCandidatePool(self)
2274

    
2275

    
2276
class LUQueryNodes(NoHooksLU):
2277
  """Logical unit for querying nodes.
2278

2279
  """
2280
  _OP_REQP = ["output_fields", "names", "use_locking"]
2281
  REQ_BGL = False
2282

    
2283
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2284
                    "master_candidate", "offline", "drained"]
2285

    
2286
  _FIELDS_DYNAMIC = utils.FieldSet(
2287
    "dtotal", "dfree",
2288
    "mtotal", "mnode", "mfree",
2289
    "bootid",
2290
    "ctotal", "cnodes", "csockets",
2291
    )
2292

    
2293
  _FIELDS_STATIC = utils.FieldSet(*[
2294
    "pinst_cnt", "sinst_cnt",
2295
    "pinst_list", "sinst_list",
2296
    "pip", "sip", "tags",
2297
    "master",
2298
    "role"] + _SIMPLE_FIELDS
2299
    )
2300

    
2301
  def ExpandNames(self):
2302
    _CheckOutputFields(static=self._FIELDS_STATIC,
2303
                       dynamic=self._FIELDS_DYNAMIC,
2304
                       selected=self.op.output_fields)
2305

    
2306
    self.needed_locks = {}
2307
    self.share_locks[locking.LEVEL_NODE] = 1
2308

    
2309
    if self.op.names:
2310
      self.wanted = _GetWantedNodes(self, self.op.names)
2311
    else:
2312
      self.wanted = locking.ALL_SET
2313

    
2314
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2315
    self.do_locking = self.do_node_query and self.op.use_locking
2316
    if self.do_locking:
2317
      # if we don't request only static fields, we need to lock the nodes
2318
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2319

    
2320

    
2321
  def CheckPrereq(self):
2322
    """Check prerequisites.
2323

2324
    """
2325
    # The validation of the node list is done in the _GetWantedNodes,
2326
    # if non empty, and if empty, there's no validation to do
2327
    pass
2328

    
2329
  def Exec(self, feedback_fn):
2330
    """Computes the list of nodes and their attributes.
2331

2332
    """
2333
    all_info = self.cfg.GetAllNodesInfo()
2334
    if self.do_locking:
2335
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2336
    elif self.wanted != locking.ALL_SET:
2337
      nodenames = self.wanted
2338
      missing = set(nodenames).difference(all_info.keys())
2339
      if missing:
2340
        raise errors.OpExecError(
2341
          "Some nodes were removed before retrieving their data: %s" % missing)
2342
    else:
2343
      nodenames = all_info.keys()
2344

    
2345
    nodenames = utils.NiceSort(nodenames)
2346
    nodelist = [all_info[name] for name in nodenames]
2347

    
2348
    # begin data gathering
2349

    
2350
    if self.do_node_query:
2351
      live_data = {}
2352
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2353
                                          self.cfg.GetHypervisorType())
2354
      for name in nodenames:
2355
        nodeinfo = node_data[name]
2356
        if not nodeinfo.fail_msg and nodeinfo.payload:
2357
          nodeinfo = nodeinfo.payload
2358
          fn = utils.TryConvert
2359
          live_data[name] = {
2360
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2361
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2362
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2363
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2364
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2365
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2366
            "bootid": nodeinfo.get('bootid', None),
2367
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2368
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2369
            }
2370
        else:
2371
          live_data[name] = {}
2372
    else:
2373
      live_data = dict.fromkeys(nodenames, {})
2374

    
2375
    node_to_primary = dict([(name, set()) for name in nodenames])
2376
    node_to_secondary = dict([(name, set()) for name in nodenames])
2377

    
2378
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2379
                             "sinst_cnt", "sinst_list"))
2380
    if inst_fields & frozenset(self.op.output_fields):
2381
      instancelist = self.cfg.GetInstanceList()
2382

    
2383
      for instance_name in instancelist:
2384
        inst = self.cfg.GetInstanceInfo(instance_name)
2385
        if inst.primary_node in node_to_primary:
2386
          node_to_primary[inst.primary_node].add(inst.name)
2387
        for secnode in inst.secondary_nodes:
2388
          if secnode in node_to_secondary:
2389
            node_to_secondary[secnode].add(inst.name)
2390

    
2391
    master_node = self.cfg.GetMasterNode()
2392

    
2393
    # end data gathering
2394

    
2395
    output = []
2396
    for node in nodelist:
2397
      node_output = []
2398
      for field in self.op.output_fields:
2399
        if field in self._SIMPLE_FIELDS:
2400
          val = getattr(node, field)
2401
        elif field == "pinst_list":
2402
          val = list(node_to_primary[node.name])
2403
        elif field == "sinst_list":
2404
          val = list(node_to_secondary[node.name])
2405
        elif field == "pinst_cnt":
2406
          val = len(node_to_primary[node.name])
2407
        elif field == "sinst_cnt":
2408
          val = len(node_to_secondary[node.name])
2409
        elif field == "pip":
2410
          val = node.primary_ip
2411
        elif field == "sip":
2412
          val = node.secondary_ip
2413
        elif field == "tags":
2414
          val = list(node.GetTags())
2415
        elif field == "master":
2416
          val = node.name == master_node
2417
        elif self._FIELDS_DYNAMIC.Matches(field):
2418
          val = live_data[node.name].get(field, None)
2419
        elif field == "role":
2420
          if node.name == master_node:
2421
            val = "M"
2422
          elif node.master_candidate:
2423
            val = "C"
2424
          elif node.drained:
2425
            val = "D"
2426
          elif node.offline:
2427
            val = "O"
2428
          else:
2429
            val = "R"
2430
        else:
2431
          raise errors.ParameterError(field)
2432
        node_output.append(val)
2433
      output.append(node_output)
2434

    
2435
    return output
2436

    
2437

    
2438
class LUQueryNodeVolumes(NoHooksLU):
2439
  """Logical unit for getting volumes on node(s).
2440

2441
  """
2442
  _OP_REQP = ["nodes", "output_fields"]
2443
  REQ_BGL = False
2444
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2445
  _FIELDS_STATIC = utils.FieldSet("node")
2446

    
2447
  def ExpandNames(self):
2448
    _CheckOutputFields(static=self._FIELDS_STATIC,
2449
                       dynamic=self._FIELDS_DYNAMIC,
2450
                       selected=self.op.output_fields)
2451

    
2452
    self.needed_locks = {}
2453
    self.share_locks[locking.LEVEL_NODE] = 1
2454
    if not self.op.nodes:
2455
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2456
    else:
2457
      self.needed_locks[locking.LEVEL_NODE] = \
2458
        _GetWantedNodes(self, self.op.nodes)
2459

    
2460
  def CheckPrereq(self):
2461
    """Check prerequisites.
2462

2463
    This checks that the fields required are valid output fields.
2464

2465
    """
2466
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2467

    
2468
  def Exec(self, feedback_fn):
2469
    """Computes the list of nodes and their attributes.
2470

2471
    """
2472
    nodenames = self.nodes
2473
    volumes = self.rpc.call_node_volumes(nodenames)
2474

    
2475
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2476
             in self.cfg.GetInstanceList()]
2477

    
2478
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2479

    
2480
    output = []
2481
    for node in nodenames:
2482
      nresult = volumes[node]
2483
      if nresult.offline:
2484
        continue
2485
      msg = nresult.fail_msg
2486
      if msg:
2487
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2488
        continue
2489

    
2490
      node_vols = nresult.payload[:]
2491
      node_vols.sort(key=lambda vol: vol['dev'])
2492

    
2493
      for vol in node_vols:
2494
        node_output = []
2495
        for field in self.op.output_fields:
2496
          if field == "node":
2497
            val = node
2498
          elif field == "phys":
2499
            val = vol['dev']
2500
          elif field == "vg":
2501
            val = vol['vg']
2502
          elif field == "name":
2503
            val = vol['name']
2504
          elif field == "size":
2505
            val = int(float(vol['size']))
2506
          elif field == "instance":
2507
            for inst in ilist:
2508
              if node not in lv_by_node[inst]:
2509
                continue
2510
              if vol['name'] in lv_by_node[inst][node]:
2511
                val = inst.name
2512
                break
2513
            else:
2514
              val = '-'
2515
          else:
2516
            raise errors.ParameterError(field)
2517
          node_output.append(str(val))
2518

    
2519
        output.append(node_output)
2520

    
2521
    return output
2522

    
2523

    
2524
class LUQueryNodeStorage(NoHooksLU):
2525
  """Logical unit for getting information on storage units on node(s).
2526

2527
  """
2528
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2529
  REQ_BGL = False
2530
  _FIELDS_STATIC = utils.FieldSet("node")
2531

    
2532
  def ExpandNames(self):
2533
    storage_type = self.op.storage_type
2534

    
2535
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2536
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2537

    
2538
    dynamic_fields = constants.VALID_STORAGE_FIELDS[storage_type]
2539

    
2540
    _CheckOutputFields(static=self._FIELDS_STATIC,
2541
                       dynamic=utils.FieldSet(*dynamic_fields),
2542
                       selected=self.op.output_fields)
2543

    
2544
    self.needed_locks = {}
2545
    self.share_locks[locking.LEVEL_NODE] = 1
2546

    
2547
    if self.op.nodes:
2548
      self.needed_locks[locking.LEVEL_NODE] = \
2549
        _GetWantedNodes(self, self.op.nodes)
2550
    else:
2551
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2552

    
2553
  def CheckPrereq(self):
2554
    """Check prerequisites.
2555

2556
    This checks that the fields required are valid output fields.
2557

2558
    """
2559
    self.op.name = getattr(self.op, "name", None)
2560

    
2561
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2562

    
2563
  def Exec(self, feedback_fn):
2564
    """Computes the list of nodes and their attributes.
2565

2566
    """
2567
    # Always get name to sort by
2568
    if constants.SF_NAME in self.op.output_fields:
2569
      fields = self.op.output_fields[:]
2570
    else:
2571
      fields = [constants.SF_NAME] + self.op.output_fields
2572

    
2573
    # Never ask for node as it's only known to the LU
2574
    while "node" in fields:
2575
      fields.remove("node")
2576

    
2577
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2578
    name_idx = field_idx[constants.SF_NAME]
2579

    
2580
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2581
    data = self.rpc.call_storage_list(self.nodes,
2582
                                      self.op.storage_type, st_args,
2583
                                      self.op.name, fields)
2584

    
2585
    result = []
2586

    
2587
    for node in utils.NiceSort(self.nodes):
2588
      nresult = data[node]
2589
      if nresult.offline:
2590
        continue
2591

    
2592
      msg = nresult.fail_msg
2593
      if msg:
2594
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2595
        continue
2596

    
2597
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2598

    
2599
      for name in utils.NiceSort(rows.keys()):
2600
        row = rows[name]
2601

    
2602
        out = []
2603

    
2604
        for field in self.op.output_fields:
2605
          if field == "node":
2606
            val = node
2607
          elif field in field_idx:
2608
            val = row[field_idx[field]]
2609
          else:
2610
            raise errors.ParameterError(field)
2611

    
2612
          out.append(val)
2613

    
2614
        result.append(out)
2615

    
2616
    return result
2617

    
2618

    
2619
class LUModifyNodeStorage(NoHooksLU):
2620
  """Logical unit for modifying a storage volume on a node.
2621

2622
  """
2623
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2624
  REQ_BGL = False
2625

    
2626
  def CheckArguments(self):
2627
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2628
    if node_name is None:
2629
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2630

    
2631
    self.op.node_name = node_name
2632

    
2633
    storage_type = self.op.storage_type
2634
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2635
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2636

    
2637
  def ExpandNames(self):
2638
    self.needed_locks = {
2639
      locking.LEVEL_NODE: self.op.node_name,
2640
      }
2641

    
2642
  def CheckPrereq(self):
2643
    """Check prerequisites.
2644

2645
    """
2646
    storage_type = self.op.storage_type
2647

    
2648
    try:
2649
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2650
    except KeyError:
2651
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2652
                                 " modified" % storage_type)
2653

    
2654
    diff = set(self.op.changes.keys()) - modifiable
2655
    if diff:
2656
      raise errors.OpPrereqError("The following fields can not be modified for"
2657
                                 " storage units of type '%s': %r" %
2658
                                 (storage_type, list(diff)))
2659

    
2660
  def Exec(self, feedback_fn):
2661
    """Computes the list of nodes and their attributes.
2662

2663
    """
2664
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2665
    result = self.rpc.call_storage_modify(self.op.node_name,
2666
                                          self.op.storage_type, st_args,
2667
                                          self.op.name, self.op.changes)
2668
    result.Raise("Failed to modify storage unit '%s' on %s" %
2669
                 (self.op.name, self.op.node_name))
2670

    
2671

    
2672
class LUAddNode(LogicalUnit):
2673
  """Logical unit for adding node to the cluster.
2674

2675
  """
2676
  HPATH = "node-add"
2677
  HTYPE = constants.HTYPE_NODE
2678
  _OP_REQP = ["node_name"]
2679

    
2680
  def BuildHooksEnv(self):
2681
    """Build hooks env.
2682

2683
    This will run on all nodes before, and on all nodes + the new node after.
2684

2685
    """
2686
    env = {
2687
      "OP_TARGET": self.op.node_name,
2688
      "NODE_NAME": self.op.node_name,
2689
      "NODE_PIP": self.op.primary_ip,
2690
      "NODE_SIP": self.op.secondary_ip,
2691
      }
2692
    nodes_0 = self.cfg.GetNodeList()
2693
    nodes_1 = nodes_0 + [self.op.node_name, ]
2694
    return env, nodes_0, nodes_1
2695

    
2696
  def CheckPrereq(self):
2697
    """Check prerequisites.
2698

2699
    This checks:
2700
     - the new node is not already in the config
2701
     - it is resolvable
2702
     - its parameters (single/dual homed) matches the cluster
2703

2704
    Any errors are signaled by raising errors.OpPrereqError.
2705

2706
    """
2707
    node_name = self.op.node_name
2708
    cfg = self.cfg
2709

    
2710
    dns_data = utils.HostInfo(node_name)
2711

    
2712
    node = dns_data.name
2713
    primary_ip = self.op.primary_ip = dns_data.ip
2714
    secondary_ip = getattr(self.op, "secondary_ip", None)
2715
    if secondary_ip is None:
2716
      secondary_ip = primary_ip
2717
    if not utils.IsValidIP(secondary_ip):
2718
      raise errors.OpPrereqError("Invalid secondary IP given")
2719
    self.op.secondary_ip = secondary_ip
2720

    
2721
    node_list = cfg.GetNodeList()
2722
    if not self.op.readd and node in node_list:
2723
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2724
                                 node)
2725
    elif self.op.readd and node not in node_list:
2726
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2727

    
2728
    for existing_node_name in node_list:
2729
      existing_node = cfg.GetNodeInfo(existing_node_name)
2730

    
2731
      if self.op.readd and node == existing_node_name:
2732
        if (existing_node.primary_ip != primary_ip or
2733
            existing_node.secondary_ip != secondary_ip):
2734
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2735
                                     " address configuration as before")
2736
        continue
2737

    
2738
      if (existing_node.primary_ip == primary_ip or
2739
          existing_node.secondary_ip == primary_ip or
2740
          existing_node.primary_ip == secondary_ip or
2741
          existing_node.secondary_ip == secondary_ip):
2742
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2743
                                   " existing node %s" % existing_node.name)
2744

    
2745
    # check that the type of the node (single versus dual homed) is the
2746
    # same as for the master
2747
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2748
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2749
    newbie_singlehomed = secondary_ip == primary_ip
2750
    if master_singlehomed != newbie_singlehomed:
2751
      if master_singlehomed:
2752
        raise errors.OpPrereqError("The master has no private ip but the"
2753
                                   " new node has one")
2754
      else:
2755
        raise errors.OpPrereqError("The master has a private ip but the"
2756
                                   " new node doesn't have one")
2757

    
2758
    # checks reachability
2759
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2760
      raise errors.OpPrereqError("Node not reachable by ping")
2761

    
2762
    if not newbie_singlehomed:
2763
      # check reachability from my secondary ip to newbie's secondary ip
2764
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2765
                           source=myself.secondary_ip):
2766
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2767
                                   " based ping to noded port")
2768

    
2769
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2770
    if self.op.readd:
2771
      exceptions = [node]
2772
    else:
2773
      exceptions = []
2774
    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2775
    # the new node will increase mc_max with one, so:
2776
    mc_max = min(mc_max + 1, cp_size)
2777
    self.master_candidate = mc_now < mc_max
2778

    
2779
    if self.op.readd:
2780
      self.new_node = self.cfg.GetNodeInfo(node)
2781
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2782
    else:
2783
      self.new_node = objects.Node(name=node,
2784
                                   primary_ip=primary_ip,
2785
                                   secondary_ip=secondary_ip,
2786
                                   master_candidate=self.master_candidate,
2787
                                   offline=False, drained=False)
2788

    
2789
  def Exec(self, feedback_fn):
2790
    """Adds the new node to the cluster.
2791

2792
    """
2793
    new_node = self.new_node
2794
    node = new_node.name
2795

    
2796
    # for re-adds, reset the offline/drained/master-candidate flags;
2797
    # we need to reset here, otherwise offline would prevent RPC calls
2798
    # later in the procedure; this also means that if the re-add
2799
    # fails, we are left with a non-offlined, broken node
2800
    if self.op.readd:
2801
      new_node.drained = new_node.offline = False
2802
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2803
      # if we demote the node, we do cleanup later in the procedure
2804
      new_node.master_candidate = self.master_candidate
2805

    
2806
    # notify the user about any possible mc promotion
2807
    if new_node.master_candidate:
2808
      self.LogInfo("Node will be a master candidate")
2809

    
2810
    # check connectivity
2811
    result = self.rpc.call_version([node])[node]
2812
    result.Raise("Can't get version information from node %s" % node)
2813
    if constants.PROTOCOL_VERSION == result.payload:
2814
      logging.info("Communication to node %s fine, sw version %s match",
2815
                   node, result.payload)
2816
    else:
2817
      raise errors.OpExecError("Version mismatch master version %s,"
2818
                               " node version %s" %
2819
                               (constants.PROTOCOL_VERSION, result.payload))
2820

    
2821
    # setup ssh on node
2822
    logging.info("Copy ssh key to node %s", node)
2823
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2824
    keyarray = []
2825
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2826
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2827
                priv_key, pub_key]
2828

    
2829
    for i in keyfiles:
2830
      keyarray.append(utils.ReadFile(i))
2831

    
2832
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2833
                                    keyarray[2],
2834
                                    keyarray[3], keyarray[4], keyarray[5])
2835
    result.Raise("Cannot transfer ssh keys to the new node")
2836

    
2837
    # Add node to our /etc/hosts, and add key to known_hosts
2838
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2839
      utils.AddHostToEtcHosts(new_node.name)
2840

    
2841
    if new_node.secondary_ip != new_node.primary_ip:
2842
      result = self.rpc.call_node_has_ip_address(new_node.name,
2843
                                                 new_node.secondary_ip)
2844
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2845
                   prereq=True)
2846
      if not result.payload:
2847
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2848
                                 " you gave (%s). Please fix and re-run this"
2849
                                 " command." % new_node.secondary_ip)
2850

    
2851
    node_verify_list = [self.cfg.GetMasterNode()]
2852
    node_verify_param = {
2853
      constants.NV_NODELIST: [node],
2854
      # TODO: do a node-net-test as well?
2855
    }
2856

    
2857
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2858
                                       self.cfg.GetClusterName())
2859
    for verifier in node_verify_list:
2860
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2861
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
2862
      if nl_payload:
2863
        for failed in nl_payload:
2864
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2865
                      (verifier, nl_payload[failed]))
2866
        raise errors.OpExecError("ssh/hostname verification failed.")
2867

    
2868
    if self.op.readd:
2869
      _RedistributeAncillaryFiles(self)
2870
      self.context.ReaddNode(new_node)
2871
      # make sure we redistribute the config
2872
      self.cfg.Update(new_node)
2873
      # and make sure the new node will not have old files around
2874
      if not new_node.master_candidate:
2875
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2876
        msg = result.fail_msg
2877
        if msg:
2878
          self.LogWarning("Node failed to demote itself from master"
2879
                          " candidate status: %s" % msg)
2880
    else:
2881
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2882
      self.context.AddNode(new_node)
2883

    
2884

    
2885
class LUSetNodeParams(LogicalUnit):
2886
  """Modifies the parameters of a node.
2887

2888
  """
2889
  HPATH = "node-modify"
2890
  HTYPE = constants.HTYPE_NODE
2891
  _OP_REQP = ["node_name"]
2892
  REQ_BGL = False
2893

    
2894
  def CheckArguments(self):
2895
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2896
    if node_name is None:
2897
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2898
    self.op.node_name = node_name
2899
    _CheckBooleanOpField(self.op, 'master_candidate')
2900
    _CheckBooleanOpField(self.op, 'offline')
2901
    _CheckBooleanOpField(self.op, 'drained')
2902
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2903
    if all_mods.count(None) == 3:
2904
      raise errors.OpPrereqError("Please pass at least one modification")
2905
    if all_mods.count(True) > 1:
2906
      raise errors.OpPrereqError("Can't set the node into more than one"
2907
                                 " state at the same time")
2908

    
2909
  def ExpandNames(self):
2910
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2911

    
2912
  def BuildHooksEnv(self):
2913
    """Build hooks env.
2914

2915
    This runs on the master node.
2916

2917
    """
2918
    env = {
2919
      "OP_TARGET": self.op.node_name,
2920
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2921
      "OFFLINE": str(self.op.offline),
2922
      "DRAINED": str(self.op.drained),
2923
      }
2924
    nl = [self.cfg.GetMasterNode(),
2925
          self.op.node_name]
2926
    return env, nl, nl
2927

    
2928
  def CheckPrereq(self):
2929
    """Check prerequisites.
2930

2931
    This only checks the instance list against the existing names.
2932

2933
    """
2934
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2935

    
2936
    if (self.op.master_candidate is not None or
2937
        self.op.drained is not None or
2938
        self.op.offline is not None):
2939
      # we can't change the master's node flags
2940
      if self.op.node_name == self.cfg.GetMasterNode():
2941
        raise errors.OpPrereqError("The master role can be changed"
2942
                                   " only via masterfailover")
2943

    
2944
    if ((self.op.master_candidate == False or self.op.offline == True or
2945
         self.op.drained == True) and node.master_candidate):
2946
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2947
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2948
      if num_candidates <= cp_size:
2949
        msg = ("Not enough master candidates (desired"
2950
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2951
        if self.op.force:
2952
          self.LogWarning(msg)
2953
        else:
2954
          raise errors.OpPrereqError(msg)
2955

    
2956
    if (self.op.master_candidate == True and
2957
        ((node.offline and not self.op.offline == False) or
2958
         (node.drained and not self.op.drained == False))):
2959
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2960
                                 " to master_candidate" % node.name)
2961

    
2962
    return
2963

    
2964
  def Exec(self, feedback_fn):
2965
    """Modifies a node.
2966

2967
    """
2968
    node = self.node
2969

    
2970
    result = []
2971
    changed_mc = False
2972

    
2973
    if self.op.offline is not None:
2974
      node.offline = self.op.offline
2975
      result.append(("offline", str(self.op.offline)))
2976
      if self.op.offline == True:
2977
        if node.master_candidate:
2978
          node.master_candidate = False
2979
          changed_mc = True
2980
          result.append(("master_candidate", "auto-demotion due to offline"))
2981
        if node.drained:
2982
          node.drained = False
2983
          result.append(("drained", "clear drained status due to offline"))
2984

    
2985
    if self.op.master_candidate is not None:
2986
      node.master_candidate = self.op.master_candidate
2987
      changed_mc = True
2988
      result.append(("master_candidate", str(self.op.master_candidate)))
2989
      if self.op.master_candidate == False:
2990
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2991
        msg = rrc.fail_msg
2992
        if msg:
2993
          self.LogWarning("Node failed to demote itself: %s" % msg)
2994

    
2995
    if self.op.drained is not None:
2996
      node.drained = self.op.drained
2997
      result.append(("drained", str(self.op.drained)))
2998
      if self.op.drained == True:
2999
        if node.master_candidate:
3000
          node.master_candidate = False
3001
          changed_mc = True
3002
          result.append(("master_candidate", "auto-demotion due to drain"))
3003
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3004
          msg = rrc.fail_msg
3005
          if msg:
3006
            self.LogWarning("Node failed to demote itself: %s" % msg)
3007
        if node.offline:
3008
          node.offline = False
3009
          result.append(("offline", "clear offline status due to drain"))
3010

    
3011
    # this will trigger configuration file update, if needed
3012
    self.cfg.Update(node)
3013
    # this will trigger job queue propagation or cleanup
3014
    if changed_mc:
3015
      self.context.ReaddNode(node)
3016

    
3017
    return result
3018

    
3019

    
3020
class LUPowercycleNode(NoHooksLU):
3021
  """Powercycles a node.
3022

3023
  """
3024
  _OP_REQP = ["node_name", "force"]
3025
  REQ_BGL = False
3026

    
3027
  def CheckArguments(self):
3028
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3029
    if node_name is None:
3030
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
3031
    self.op.node_name = node_name
3032
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
3033
      raise errors.OpPrereqError("The node is the master and the force"
3034
                                 " parameter was not set")
3035

    
3036
  def ExpandNames(self):
3037
    """Locking for PowercycleNode.
3038

3039
    This is a last-resort option and shouldn't block on other
3040
    jobs. Therefore, we grab no locks.
3041

3042
    """
3043
    self.needed_locks = {}
3044

    
3045
  def CheckPrereq(self):
3046
    """Check prerequisites.
3047

3048
    This LU has no prereqs.
3049

3050
    """
3051
    pass
3052

    
3053
  def Exec(self, feedback_fn):
3054
    """Reboots a node.
3055

3056
    """
3057
    result = self.rpc.call_node_powercycle(self.op.node_name,
3058
                                           self.cfg.GetHypervisorType())
3059
    result.Raise("Failed to schedule the reboot")
3060
    return result.payload
3061

    
3062

    
3063
class LUQueryClusterInfo(NoHooksLU):
3064
  """Query cluster configuration.
3065

3066
  """
3067
  _OP_REQP = []
3068
  REQ_BGL = False
3069

    
3070
  def ExpandNames(self):
3071
    self.needed_locks = {}
3072

    
3073
  def CheckPrereq(self):
3074
    """No prerequsites needed for this LU.
3075

3076
    """
3077
    pass
3078

    
3079
  def Exec(self, feedback_fn):
3080
    """Return cluster config.
3081

3082
    """
3083
    cluster = self.cfg.GetClusterInfo()
3084
    result = {
3085
      "software_version": constants.RELEASE_VERSION,
3086
      "protocol_version": constants.PROTOCOL_VERSION,
3087
      "config_version": constants.CONFIG_VERSION,
3088
      "os_api_version": max(constants.OS_API_VERSIONS),
3089
      "export_version": constants.EXPORT_VERSION,
3090
      "architecture": (platform.architecture()[0], platform.machine()),
3091
      "name": cluster.cluster_name,
3092
      "master": cluster.master_node,
3093
      "default_hypervisor": cluster.enabled_hypervisors[0],
3094
      "enabled_hypervisors": cluster.enabled_hypervisors,
3095
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3096
                        for hypervisor_name in cluster.enabled_hypervisors]),
3097
      "beparams": cluster.beparams,
3098
      "nicparams": cluster.nicparams,
3099
      "candidate_pool_size": cluster.candidate_pool_size,
3100
      "master_netdev": cluster.master_netdev,
3101
      "volume_group_name": cluster.volume_group_name,
3102
      "file_storage_dir": cluster.file_storage_dir,
3103
      "ctime": cluster.ctime,
3104
      "mtime": cluster.mtime,
3105
      "tags": list(cluster.GetTags()),
3106
      }
3107

    
3108
    return result
3109

    
3110

    
3111
class LUQueryConfigValues(NoHooksLU):
3112
  """Return configuration values.
3113

3114
  """
3115
  _OP_REQP = []
3116
  REQ_BGL = False
3117
  _FIELDS_DYNAMIC = utils.FieldSet()
3118
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3119
                                  "watcher_pause")
3120

    
3121
  def ExpandNames(self):
3122
    self.needed_locks = {}
3123

    
3124
    _CheckOutputFields(static=self._FIELDS_STATIC,
3125
                       dynamic=self._FIELDS_DYNAMIC,
3126
                       selected=self.op.output_fields)
3127

    
3128
  def CheckPrereq(self):
3129
    """No prerequisites.
3130

3131
    """
3132
    pass
3133

    
3134
  def Exec(self, feedback_fn):
3135
    """Dump a representation of the cluster config to the standard output.
3136

3137
    """
3138
    values = []
3139
    for field in self.op.output_fields:
3140
      if field == "cluster_name":
3141
        entry = self.cfg.GetClusterName()
3142
      elif field == "master_node":
3143
        entry = self.cfg.GetMasterNode()
3144
      elif field == "drain_flag":
3145
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3146
      elif field == "watcher_pause":
3147
        return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3148
      else:
3149
        raise errors.ParameterError(field)
3150
      values.append(entry)
3151
    return values
3152

    
3153

    
3154
class LUActivateInstanceDisks(NoHooksLU):
3155
  """Bring up an instance's disks.
3156

3157
  """
3158
  _OP_REQP = ["instance_name"]
3159
  REQ_BGL = False
3160

    
3161
  def ExpandNames(self):
3162
    self._ExpandAndLockInstance()
3163
    self.needed_locks[locking.LEVEL_NODE] = []
3164
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3165

    
3166
  def DeclareLocks(self, level):
3167
    if level == locking.LEVEL_NODE:
3168
      self._LockInstancesNodes()
3169

    
3170
  def CheckPrereq(self):
3171
    """Check prerequisites.
3172

3173
    This checks that the instance is in the cluster.
3174

3175
    """
3176
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3177
    assert self.instance is not None, \
3178
      "Cannot retrieve locked instance %s" % self.op.instance_name
3179
    _CheckNodeOnline(self, self.instance.primary_node)
3180
    if not hasattr(self.op, "ignore_size"):
3181
      self.op.ignore_size = False
3182

    
3183
  def Exec(self, feedback_fn):
3184
    """Activate the disks.
3185

3186
    """
3187
    disks_ok, disks_info = \
3188
              _AssembleInstanceDisks(self, self.instance,
3189
                                     ignore_size=self.op.ignore_size)
3190
    if not disks_ok:
3191
      raise errors.OpExecError("Cannot activate block devices")
3192

    
3193
    return disks_info
3194

    
3195

    
3196
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3197
                           ignore_size=False):
3198
  """Prepare the block devices for an instance.
3199

3200
  This sets up the block devices on all nodes.
3201

3202
  @type lu: L{LogicalUnit}
3203
  @param lu: the logical unit on whose behalf we execute
3204
  @type instance: L{objects.Instance}
3205
  @param instance: the instance for whose disks we assemble
3206
  @type ignore_secondaries: boolean
3207
  @param ignore_secondaries: if true, errors on secondary nodes
3208
      won't result in an error return from the function
3209
  @type ignore_size: boolean
3210
  @param ignore_size: if true, the current known size of the disk
3211
      will not be used during the disk activation, useful for cases
3212
      when the size is wrong
3213
  @return: False if the operation failed, otherwise a list of
3214
      (host, instance_visible_name, node_visible_name)
3215
      with the mapping from node devices to instance devices
3216

3217
  """
3218
  device_info = []
3219
  disks_ok = True
3220
  iname = instance.name
3221
  # With the two passes mechanism we try to reduce the window of
3222
  # opportunity for the race condition of switching DRBD to primary
3223
  # before handshaking occured, but we do not eliminate it
3224

    
3225
  # The proper fix would be to wait (with some limits) until the
3226
  # connection has been made and drbd transitions from WFConnection
3227
  # into any other network-connected state (Connected, SyncTarget,
3228
  # SyncSource, etc.)
3229

    
3230
  # 1st pass, assemble on all nodes in secondary mode
3231
  for inst_disk in instance.disks:
3232
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3233
      if ignore_size:
3234
        node_disk = node_disk.Copy()
3235
        node_disk.UnsetSize()
3236
      lu.cfg.SetDiskID(node_disk, node)
3237
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3238
      msg = result.fail_msg
3239
      if msg:
3240
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3241
                           " (is_primary=False, pass=1): %s",
3242
                           inst_disk.iv_name, node, msg)
3243
        if not ignore_secondaries:
3244
          disks_ok = False
3245

    
3246
  # FIXME: race condition on drbd migration to primary
3247

    
3248
  # 2nd pass, do only the primary node
3249
  for inst_disk in instance.disks:
3250
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3251
      if node != instance.primary_node:
3252
        continue
3253
      if ignore_size:
3254
        node_disk = node_disk.Copy()
3255
        node_disk.UnsetSize()
3256
      lu.cfg.SetDiskID(node_disk, node)
3257
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3258
      msg = result.fail_msg
3259
      if msg:
3260
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3261
                           " (is_primary=True, pass=2): %s",
3262
                           inst_disk.iv_name, node, msg)
3263
        disks_ok = False
3264
    device_info.append((instance.primary_node, inst_disk.iv_name,
3265
                        result.payload))
3266

    
3267
  # leave the disks configured for the primary node
3268
  # this is a workaround that would be fixed better by
3269
  # improving the logical/physical id handling
3270
  for disk in instance.disks:
3271
    lu.cfg.SetDiskID(disk, instance.primary_node)
3272

    
3273
  return disks_ok, device_info
3274

    
3275

    
3276
def _StartInstanceDisks(lu, instance, force):
3277
  """Start the disks of an instance.
3278

3279
  """
3280
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3281
                                           ignore_secondaries=force)
3282
  if not disks_ok:
3283
    _ShutdownInstanceDisks(lu, instance)
3284
    if force is not None and not force:
3285
      lu.proc.LogWarning("", hint="If the message above refers to a"
3286
                         " secondary node,"
3287
                         " you can retry the operation using '--force'.")
3288
    raise errors.OpExecError("Disk consistency error")
3289

    
3290

    
3291
class LUDeactivateInstanceDisks(NoHooksLU):
3292
  """Shutdown an instance's disks.
3293

3294
  """
3295
  _OP_REQP = ["instance_name"]
3296
  REQ_BGL = False
3297

    
3298
  def ExpandNames(self):
3299
    self._ExpandAndLockInstance()
3300
    self.needed_locks[locking.LEVEL_NODE] = []
3301
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3302

    
3303
  def DeclareLocks(self, level):
3304
    if level == locking.LEVEL_NODE:
3305
      self._LockInstancesNodes()
3306

    
3307
  def CheckPrereq(self):
3308
    """Check prerequisites.
3309

3310
    This checks that the instance is in the cluster.
3311

3312
    """
3313
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3314
    assert self.instance is not None, \
3315
      "Cannot retrieve locked instance %s" % self.op.instance_name
3316

    
3317
  def Exec(self, feedback_fn):
3318
    """Deactivate the disks
3319

3320
    """
3321
    instance = self.instance
3322
    _SafeShutdownInstanceDisks(self, instance)
3323

    
3324

    
3325
def _SafeShutdownInstanceDisks(lu, instance):
3326
  """Shutdown block devices of an instance.
3327

3328
  This function checks if an instance is running, before calling
3329
  _ShutdownInstanceDisks.
3330

3331
  """
3332
  pnode = instance.primary_node
3333
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3334
  ins_l.Raise("Can't contact node %s" % pnode)
3335

    
3336
  if instance.name in ins_l.payload:
3337
    raise errors.OpExecError("Instance is running, can't shutdown"
3338
                             " block devices.")
3339

    
3340
  _ShutdownInstanceDisks(lu, instance)
3341

    
3342

    
3343
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3344
  """Shutdown block devices of an instance.
3345

3346
  This does the shutdown on all nodes of the instance.
3347

3348
  If the ignore_primary is false, errors on the primary node are
3349
  ignored.
3350

3351
  """
3352
  all_result = True
3353
  for disk in instance.disks:
3354
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3355
      lu.cfg.SetDiskID(top_disk, node)
3356
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3357
      msg = result.fail_msg
3358
      if msg:
3359
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3360
                      disk.iv_name, node, msg)
3361
        if not ignore_primary or node != instance.primary_node:
3362
          all_result = False
3363
  return all_result
3364

    
3365

    
3366
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3367
  """Checks if a node has enough free memory.
3368

3369
  This function check if a given node has the needed amount of free
3370
  memory. In case the node has less memory or we cannot get the
3371
  information from the node, this function raise an OpPrereqError
3372
  exception.
3373

3374
  @type lu: C{LogicalUnit}
3375
  @param lu: a logical unit from which we get configuration data
3376
  @type node: C{str}
3377
  @param node: the node to check
3378
  @type reason: C{str}
3379
  @param reason: string to use in the error message
3380
  @type requested: C{int}
3381
  @param requested: the amount of memory in MiB to check for
3382
  @type hypervisor_name: C{str}
3383
  @param hypervisor_name: the hypervisor to ask for memory stats
3384
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3385
      we cannot check the node
3386

3387
  """
3388
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3389
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
3390
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3391
  if not isinstance(free_mem, int):
3392
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3393
                               " was '%s'" % (node, free_mem))
3394
  if requested > free_mem:
3395
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3396
                               " needed %s MiB, available %s MiB" %
3397
                               (node, reason, requested, free_mem))
3398

    
3399

    
3400
class LUStartupInstance(LogicalUnit):
3401
  """Starts an instance.
3402

3403
  """
3404
  HPATH = "instance-start"
3405
  HTYPE = constants.HTYPE_INSTANCE
3406
  _OP_REQP = ["instance_name", "force"]
3407
  REQ_BGL = False
3408

    
3409
  def ExpandNames(self):
3410
    self._ExpandAndLockInstance()
3411

    
3412
  def BuildHooksEnv(self):
3413
    """Build hooks env.
3414

3415
    This runs on master, primary and secondary nodes of the instance.
3416

3417
    """
3418
    env = {
3419
      "FORCE": self.op.force,
3420
      }
3421
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3422
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3423
    return env, nl, nl
3424

    
3425
  def CheckPrereq(self):
3426
    """Check prerequisites.
3427

3428
    This checks that the instance is in the cluster.
3429

3430
    """
3431
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3432
    assert self.instance is not None, \
3433
      "Cannot retrieve locked instance %s" % self.op.instance_name
3434

    
3435
    # extra beparams
3436
    self.beparams = getattr(self.op, "beparams", {})
3437
    if self.beparams:
3438
      if not isinstance(self.beparams, dict):
3439
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3440
                                   " dict" % (type(self.beparams), ))
3441
      # fill the beparams dict
3442
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3443
      self.op.beparams = self.beparams
3444

    
3445
    # extra hvparams
3446
    self.hvparams = getattr(self.op, "hvparams", {})
3447
    if self.hvparams:
3448
      if not isinstance(self.hvparams, dict):
3449
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3450
                                   " dict" % (type(self.hvparams), ))
3451

    
3452
      # check hypervisor parameter syntax (locally)
3453
      cluster = self.cfg.GetClusterInfo()
3454
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3455
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3456
                                    instance.hvparams)
3457
      filled_hvp.update(self.hvparams)
3458
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3459
      hv_type.CheckParameterSyntax(filled_hvp)
3460
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3461
      self.op.hvparams = self.hvparams
3462

    
3463
    _CheckNodeOnline(self, instance.primary_node)
3464

    
3465
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3466
    # check bridges existence
3467
    _CheckInstanceBridgesExist(self, instance)
3468

    
3469
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3470
                                              instance.name,
3471
                                              instance.hypervisor)
3472
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3473
                      prereq=True)
3474
    if not remote_info.payload: # not running already
3475
      _CheckNodeFreeMemory(self, instance.primary_node,
3476
                           "starting instance %s" % instance.name,
3477
                           bep[constants.BE_MEMORY], instance.hypervisor)
3478

    
3479
  def Exec(self, feedback_fn):
3480
    """Start the instance.
3481

3482
    """
3483
    instance = self.instance
3484
    force = self.op.force
3485

    
3486
    self.cfg.MarkInstanceUp(instance.name)
3487

    
3488
    node_current = instance.primary_node
3489

    
3490
    _StartInstanceDisks(self, instance, force)
3491

    
3492
    result = self.rpc.call_instance_start(node_current, instance,
3493
                                          self.hvparams, self.beparams)
3494
    msg = result.fail_msg
3495
    if msg:
3496
      _ShutdownInstanceDisks(self, instance)
3497
      raise errors.OpExecError("Could not start instance: %s" % msg)
3498

    
3499

    
3500
class LURebootInstance(LogicalUnit):
3501
  """Reboot an instance.
3502

3503
  """
3504
  HPATH = "instance-reboot"
3505
  HTYPE = constants.HTYPE_INSTANCE
3506
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3507
  REQ_BGL = False
3508

    
3509
  def ExpandNames(self):
3510
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3511
                                   constants.INSTANCE_REBOOT_HARD,
3512
                                   constants.INSTANCE_REBOOT_FULL]:
3513
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3514
                                  (constants.INSTANCE_REBOOT_SOFT,
3515
                                   constants.INSTANCE_REBOOT_HARD,
3516
                                   constants.INSTANCE_REBOOT_FULL))
3517
    self._ExpandAndLockInstance()
3518

    
3519
  def BuildHooksEnv(self):
3520
    """Build hooks env.
3521

3522
    This runs on master, primary and secondary nodes of the instance.
3523

3524
    """
3525
    env = {
3526
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3527
      "REBOOT_TYPE": self.op.reboot_type,
3528
      }
3529
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3530
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3531
    return env, nl, nl
3532

    
3533
  def CheckPrereq(self):
3534
    """Check prerequisites.
3535

3536
    This checks that the instance is in the cluster.
3537

3538
    """
3539
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3540
    assert self.instance is not None, \
3541
      "Cannot retrieve locked instance %s" % self.op.instance_name
3542

    
3543
    _CheckNodeOnline(self, instance.primary_node)
3544

    
3545
    # check bridges existence
3546
    _CheckInstanceBridgesExist(self, instance)
3547

    
3548
  def Exec(self, feedback_fn):
3549
    """Reboot the instance.
3550

3551
    """
3552
    instance = self.instance
3553
    ignore_secondaries = self.op.ignore_secondaries
3554
    reboot_type = self.op.reboot_type
3555

    
3556
    node_current = instance.primary_node
3557

    
3558
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3559
                       constants.INSTANCE_REBOOT_HARD]:
3560
      for disk in instance.disks:
3561
        self.cfg.SetDiskID(disk, node_current)
3562
      result = self.rpc.call_instance_reboot(node_current, instance,
3563
                                             reboot_type)
3564
      result.Raise("Could not reboot instance")
3565
    else:
3566
      result = self.rpc.call_instance_shutdown(node_current, instance)
3567
      result.Raise("Could not shutdown instance for full reboot")
3568
      _ShutdownInstanceDisks(self, instance)
3569
      _StartInstanceDisks(self, instance, ignore_secondaries)
3570
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3571
      msg = result.fail_msg
3572
      if msg:
3573
        _ShutdownInstanceDisks(self, instance)
3574
        raise errors.OpExecError("Could not start instance for"
3575
                                 " full reboot: %s" % msg)
3576

    
3577
    self.cfg.MarkInstanceUp(instance.name)
3578

    
3579

    
3580
class LUShutdownInstance(LogicalUnit):
3581
  """Shutdown an instance.
3582

3583
  """
3584
  HPATH = "instance-stop"
3585
  HTYPE = constants.HTYPE_INSTANCE
3586
  _OP_REQP = ["instance_name"]
3587
  REQ_BGL = False
3588

    
3589
  def ExpandNames(self):
3590
    self._ExpandAndLockInstance()
3591

    
3592
  def BuildHooksEnv(self):
3593
    """Build hooks env.
3594

3595
    This runs on master, primary and secondary nodes of the instance.
3596

3597
    """
3598
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3599
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3600
    return env, nl, nl
3601

    
3602
  def CheckPrereq(self):
3603
    """Check prerequisites.
3604

3605
    This checks that the instance is in the cluster.
3606

3607
    """
3608
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3609
    assert self.instance is not None, \
3610
      "Cannot retrieve locked instance %s" % self.op.instance_name
3611
    _CheckNodeOnline(self, self.instance.primary_node)
3612

    
3613
  def Exec(self, feedback_fn):
3614
    """Shutdown the instance.
3615

3616
    """
3617
    instance = self.instance
3618
    node_current = instance.primary_node
3619
    self.cfg.MarkInstanceDown(instance.name)
3620
    result = self.rpc.call_instance_shutdown(node_current, instance)
3621
    msg = result.fail_msg
3622
    if msg:
3623
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3624

    
3625
    _ShutdownInstanceDisks(self, instance)
3626

    
3627

    
3628
class LUReinstallInstance(LogicalUnit):
3629
  """Reinstall an instance.
3630

3631
  """
3632
  HPATH = "instance-reinstall"
3633
  HTYPE = constants.HTYPE_INSTANCE
3634
  _OP_REQP = ["instance_name"]
3635
  REQ_BGL = False
3636

    
3637
  def ExpandNames(self):
3638
    self._ExpandAndLockInstance()
3639

    
3640
  def BuildHooksEnv(self):
3641
    """Build hooks env.
3642

3643
    This runs on master, primary and secondary nodes of the instance.
3644

3645
    """
3646
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3647
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3648
    return env, nl, nl
3649

    
3650
  def CheckPrereq(self):
3651
    """Check prerequisites.
3652

3653
    This checks that the instance is in the cluster and is not running.
3654

3655
    """
3656
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3657
    assert instance is not None, \
3658
      "Cannot retrieve locked instance %s" % self.op.instance_name
3659
    _CheckNodeOnline(self, instance.primary_node)
3660

    
3661
    if instance.disk_template == constants.DT_DISKLESS:
3662
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3663
                                 self.op.instance_name)
3664
    if instance.admin_up:
3665
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3666
                                 self.op.instance_name)
3667
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3668
                                              instance.name,
3669
                                              instance.hypervisor)
3670
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3671
                      prereq=True)
3672
    if remote_info.payload:
3673
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3674
                                 (self.op.instance_name,
3675
                                  instance.primary_node))
3676

    
3677
    self.op.os_type = getattr(self.op, "os_type", None)
3678
    if self.op.os_type is not None:
3679
      # OS verification
3680
      pnode = self.cfg.GetNodeInfo(
3681
        self.cfg.ExpandNodeName(instance.primary_node))
3682
      if pnode is None:
3683
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3684
                                   self.op.pnode)
3685
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3686
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3687
                   (self.op.os_type, pnode.name), prereq=True)
3688

    
3689
    self.instance = instance
3690

    
3691
  def Exec(self, feedback_fn):
3692
    """Reinstall the instance.
3693

3694
    """
3695
    inst = self.instance
3696

    
3697
    if self.op.os_type is not None:
3698
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3699
      inst.os = self.op.os_type
3700
      self.cfg.Update(inst)
3701

    
3702
    _StartInstanceDisks(self, inst, None)
3703
    try:
3704
      feedback_fn("Running the instance OS create scripts...")
3705
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3706
      result.Raise("Could not install OS for instance %s on node %s" %
3707
                   (inst.name, inst.primary_node))
3708
    finally:
3709
      _ShutdownInstanceDisks(self, inst)
3710

    
3711

    
3712
class LURecreateInstanceDisks(LogicalUnit):
3713
  """Recreate an instance's missing disks.
3714

3715
  """
3716
  HPATH = "instance-recreate-disks"
3717
  HTYPE = constants.HTYPE_INSTANCE
3718
  _OP_REQP = ["instance_name", "disks"]
3719
  REQ_BGL = False
3720

    
3721
  def CheckArguments(self):
3722
    """Check the arguments.
3723

3724
    """
3725
    if not isinstance(self.op.disks, list):
3726
      raise errors.OpPrereqError("Invalid disks parameter")
3727
    for item in self.op.disks:
3728
      if (not isinstance(item, int) or
3729
          item < 0):
3730
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
3731
                                   str(item))
3732

    
3733
  def ExpandNames(self):
3734
    self._ExpandAndLockInstance()
3735

    
3736
  def BuildHooksEnv(self):
3737
    """Build hooks env.
3738

3739
    This runs on master, primary and secondary nodes of the instance.
3740

3741
    """
3742
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3743
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3744
    return env, nl, nl
3745

    
3746
  def CheckPrereq(self):
3747
    """Check prerequisites.
3748

3749
    This checks that the instance is in the cluster and is not running.
3750

3751
    """
3752
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3753
    assert instance is not None, \
3754
      "Cannot retrieve locked instance %s" % self.op.instance_name
3755
    _CheckNodeOnline(self, instance.primary_node)
3756

    
3757
    if instance.disk_template == constants.DT_DISKLESS:
3758
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3759
                                 self.op.instance_name)
3760
    if instance.admin_up:
3761
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3762
                                 self.op.instance_name)
3763
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3764
                                              instance.name,
3765
                                              instance.hypervisor)
3766
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3767
                      prereq=True)
3768
    if remote_info.payload:
3769
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3770
                                 (self.op.instance_name,
3771
                                  instance.primary_node))
3772

    
3773
    if not self.op.disks:
3774
      self.op.disks = range(len(instance.disks))
3775
    else:
3776
      for idx in self.op.disks:
3777
        if idx >= len(instance.disks):
3778
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx)
3779

    
3780
    self.instance = instance
3781

    
3782
  def Exec(self, feedback_fn):
3783
    """Recreate the disks.
3784

3785
    """
3786
    to_skip = []
3787
    for idx, disk in enumerate(self.instance.disks):
3788
      if idx not in self.op.disks: # disk idx has not been passed in
3789
        to_skip.append(idx)
3790
        continue
3791

    
3792
    _CreateDisks(self, self.instance, to_skip=to_skip)
3793

    
3794

    
3795
class LURenameInstance(LogicalUnit):
3796
  """Rename an instance.
3797

3798
  """
3799
  HPATH = "instance-rename"
3800
  HTYPE = constants.HTYPE_INSTANCE
3801
  _OP_REQP = ["instance_name", "new_name"]
3802

    
3803
  def BuildHooksEnv(self):
3804
    """Build hooks env.
3805

3806
    This runs on master, primary and secondary nodes of the instance.
3807

3808
    """
3809
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3810
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3811
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3812
    return env, nl, nl
3813

    
3814
  def CheckPrereq(self):
3815
    """Check prerequisites.
3816

3817
    This checks that the instance is in the cluster and is not running.
3818

3819
    """
3820
    instance = self.cfg.GetInstanceInfo(
3821
      self.cfg.ExpandInstanceName(self.op.instance_name))
3822
    if instance is None:
3823
      raise errors.OpPrereqError("Instance '%s' not known" %
3824
                                 self.op.instance_name)
3825
    _CheckNodeOnline(self, instance.primary_node)
3826

    
3827
    if instance.admin_up:
3828
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3829
                                 self.op.instance_name)
3830
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3831
                                              instance.name,
3832
                                              instance.hypervisor)
3833
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3834
                      prereq=True)
3835
    if remote_info.payload:
3836
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3837
                                 (self.op.instance_name,
3838
                                  instance.primary_node))
3839
    self.instance = instance
3840

    
3841
    # new name verification
3842
    name_info = utils.HostInfo(self.op.new_name)
3843

    
3844
    self.op.new_name = new_name = name_info.name
3845
    instance_list = self.cfg.GetInstanceList()
3846
    if new_name in instance_list:
3847
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3848
                                 new_name)
3849

    
3850
    if not getattr(self.op, "ignore_ip", False):
3851
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3852
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3853
                                   (name_info.ip, new_name))
3854

    
3855

    
3856
  def Exec(self, feedback_fn):
3857
    """Reinstall the instance.
3858

3859
    """
3860
    inst = self.instance
3861
    old_name = inst.name
3862

    
3863
    if inst.disk_template == constants.DT_FILE:
3864
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3865

    
3866
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3867
    # Change the instance lock. This is definitely safe while we hold the BGL
3868
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3869
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3870

    
3871
    # re-read the instance from the configuration after rename
3872
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3873

    
3874
    if inst.disk_template == constants.DT_FILE:
3875
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3876
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3877
                                                     old_file_storage_dir,
3878
                                                     new_file_storage_dir)
3879
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3880
                   " (but the instance has been renamed in Ganeti)" %
3881
                   (inst.primary_node, old_file_storage_dir,
3882
                    new_file_storage_dir))
3883

    
3884
    _StartInstanceDisks(self, inst, None)
3885
    try:
3886
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3887
                                                 old_name)
3888
      msg = result.fail_msg
3889
      if msg:
3890
        msg = ("Could not run OS rename script for instance %s on node %s"
3891
               " (but the instance has been renamed in Ganeti): %s" %
3892
               (inst.name, inst.primary_node, msg))
3893
        self.proc.LogWarning(msg)
3894
    finally:
3895
      _ShutdownInstanceDisks(self, inst)
3896

    
3897

    
3898
class LURemoveInstance(LogicalUnit):
3899
  """Remove an instance.
3900

3901
  """
3902
  HPATH = "instance-remove"
3903
  HTYPE = constants.HTYPE_INSTANCE
3904
  _OP_REQP = ["instance_name", "ignore_failures"]
3905
  REQ_BGL = False
3906

    
3907
  def ExpandNames(self):
3908
    self._ExpandAndLockInstance()
3909
    self.needed_locks[locking.LEVEL_NODE] = []
3910
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3911

    
3912
  def DeclareLocks(self, level):
3913
    if level == locking.LEVEL_NODE:
3914
      self._LockInstancesNodes()
3915

    
3916
  def BuildHooksEnv(self):
3917
    """Build hooks env.
3918

3919
    This runs on master, primary and secondary nodes of the instance.
3920

3921
    """
3922
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3923
    nl = [self.cfg.GetMasterNode()]
3924
    return env, nl, nl
3925

    
3926
  def CheckPrereq(self):
3927
    """Check prerequisites.
3928

3929
    This checks that the instance is in the cluster.
3930

3931
    """
3932
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3933
    assert self.instance is not None, \
3934
      "Cannot retrieve locked instance %s" % self.op.instance_name
3935

    
3936
  def Exec(self, feedback_fn):
3937
    """Remove the instance.
3938

3939
    """
3940
    instance = self.instance
3941
    logging.info("Shutting down instance %s on node %s",
3942
                 instance.name, instance.primary_node)
3943

    
3944
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3945
    msg = result.fail_msg
3946
    if msg:
3947
      if self.op.ignore_failures:
3948
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3949
      else:
3950
        raise errors.OpExecError("Could not shutdown instance %s on"
3951
                                 " node %s: %s" %
3952
                                 (instance.name, instance.primary_node, msg))
3953

    
3954
    logging.info("Removing block devices for instance %s", instance.name)
3955

    
3956
    if not _RemoveDisks(self, instance):
3957
      if self.op.ignore_failures:
3958
        feedback_fn("Warning: can't remove instance's disks")
3959
      else:
3960
        raise errors.OpExecError("Can't remove instance's disks")
3961

    
3962
    logging.info("Removing instance %s out of cluster config", instance.name)
3963

    
3964
    self.cfg.RemoveInstance(instance.name)
3965
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3966

    
3967

    
3968
class LUQueryInstances(NoHooksLU):
3969
  """Logical unit for querying instances.
3970

3971
  """
3972
  _OP_REQP = ["output_fields", "names", "use_locking"]
3973
  REQ_BGL = False
3974
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
3975
                    "serial_no", "ctime", "mtime", "uuid"]
3976
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3977
                                    "admin_state",
3978
                                    "disk_template", "ip", "mac", "bridge",
3979
                                    "nic_mode", "nic_link",
3980
                                    "sda_size", "sdb_size", "vcpus", "tags",
3981
                                    "network_port", "beparams",
3982
                                    r"(disk)\.(size)/([0-9]+)",
3983
                                    r"(disk)\.(sizes)", "disk_usage",
3984
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3985
                                    r"(nic)\.(bridge)/([0-9]+)",
3986
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
3987
                                    r"(disk|nic)\.(count)",
3988
                                    "hvparams",
3989
                                    ] + _SIMPLE_FIELDS +
3990
                                  ["hv/%s" % name
3991
                                   for name in constants.HVS_PARAMETERS] +
3992
                                  ["be/%s" % name
3993
                                   for name in constants.BES_PARAMETERS])
3994
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3995

    
3996

    
3997
  def ExpandNames(self):
3998
    _CheckOutputFields(static=self._FIELDS_STATIC,
3999
                       dynamic=self._FIELDS_DYNAMIC,
4000
                       selected=self.op.output_fields)
4001

    
4002
    self.needed_locks = {}
4003
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4004
    self.share_locks[locking.LEVEL_NODE] = 1
4005

    
4006
    if self.op.names:
4007
      self.wanted = _GetWantedInstances(self, self.op.names)
4008
    else:
4009
      self.wanted = locking.ALL_SET
4010

    
4011
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4012
    self.do_locking = self.do_node_query and self.op.use_locking
4013
    if self.do_locking:
4014
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4015
      self.needed_locks[locking.LEVEL_NODE] = []
4016
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4017

    
4018
  def DeclareLocks(self, level):
4019
    if level == locking.LEVEL_NODE and self.do_locking:
4020
      self._LockInstancesNodes()
4021

    
4022
  def CheckPrereq(self):
4023
    """Check prerequisites.
4024

4025
    """
4026
    pass
4027

    
4028
  def Exec(self, feedback_fn):
4029
    """Computes the list of nodes and their attributes.
4030

4031
    """
4032
    all_info = self.cfg.GetAllInstancesInfo()
4033
    if self.wanted == locking.ALL_SET:
4034
      # caller didn't specify instance names, so ordering is not important
4035
      if self.do_locking:
4036
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4037
      else:
4038
        instance_names = all_info.keys()
4039
      instance_names = utils.NiceSort(instance_names)
4040
    else:
4041
      # caller did specify names, so we must keep the ordering
4042
      if self.do_locking:
4043
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4044
      else:
4045
        tgt_set = all_info.keys()
4046
      missing = set(self.wanted).difference(tgt_set)
4047
      if missing:
4048
        raise errors.OpExecError("Some instances were removed before"
4049
                                 " retrieving their data: %s" % missing)
4050
      instance_names = self.wanted
4051

    
4052
    instance_list = [all_info[iname] for iname in instance_names]
4053

    
4054
    # begin data gathering
4055

    
4056
    nodes = frozenset([inst.primary_node for inst in instance_list])
4057
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4058

    
4059
    bad_nodes = []
4060
    off_nodes = []
4061
    if self.do_node_query:
4062
      live_data = {}
4063
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4064
      for name in nodes:
4065
        result = node_data[name]
4066
        if result.offline:
4067
          # offline nodes will be in both lists
4068
          off_nodes.append(name)
4069
        if result.fail_msg:
4070
          bad_nodes.append(name)
4071
        else:
4072
          if result.payload:
4073
            live_data.update(result.payload)
4074
          # else no instance is alive
4075
    else:
4076
      live_data = dict([(name, {}) for name in instance_names])
4077

    
4078
    # end data gathering
4079

    
4080
    HVPREFIX = "hv/"
4081
    BEPREFIX = "be/"
4082
    output = []
4083
    cluster = self.cfg.GetClusterInfo()
4084
    for instance in instance_list:
4085
      iout = []
4086
      i_hv = cluster.FillHV(instance)
4087
      i_be = cluster.FillBE(instance)
4088
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4089
                                 nic.nicparams) for nic in instance.nics]
4090
      for field in self.op.output_fields:
4091
        st_match = self._FIELDS_STATIC.Matches(field)
4092
        if field in self._SIMPLE_FIELDS:
4093
          val = getattr(instance, field)
4094
        elif field == "pnode":
4095
          val = instance.primary_node
4096
        elif field == "snodes":
4097
          val = list(instance.secondary_nodes)
4098
        elif field == "admin_state":
4099
          val = instance.admin_up
4100
        elif field == "oper_state":
4101
          if instance.primary_node in bad_nodes:
4102
            val = None
4103
          else:
4104
            val = bool(live_data.get(instance.name))
4105
        elif field == "status":
4106
          if instance.primary_node in off_nodes:
4107
            val = "ERROR_nodeoffline"
4108
          elif instance.primary_node in bad_nodes:
4109
            val = "ERROR_nodedown"
4110
          else:
4111
            running = bool(live_data.get(instance.name))
4112
            if running:
4113
              if instance.admin_up:
4114
                val = "running"
4115
              else:
4116
                val = "ERROR_up"
4117
            else:
4118
              if instance.admin_up:
4119
                val = "ERROR_down"
4120
              else:
4121
                val = "ADMIN_down"
4122
        elif field == "oper_ram":
4123
          if instance.primary_node in bad_nodes:
4124
            val = None
4125
          elif instance.name in live_data:
4126
            val = live_data[instance.name].get("memory", "?")
4127
          else:
4128
            val = "-"
4129
        elif field == "vcpus":
4130
          val = i_be[constants.BE_VCPUS]
4131
        elif field == "disk_template":
4132
          val = instance.disk_template
4133
        elif field == "ip":
4134
          if instance.nics:
4135
            val = instance.nics[0].ip
4136
          else:
4137
            val = None
4138
        elif field == "nic_mode":
4139
          if instance.nics:
4140
            val = i_nicp[0][constants.NIC_MODE]
4141
          else:
4142
            val = None
4143
        elif field == "nic_link":
4144
          if instance.nics:
4145
            val = i_nicp[0][constants.NIC_LINK]
4146
          else:
4147
            val = None
4148
        elif field == "bridge":
4149
          if (instance.nics and
4150
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4151
            val = i_nicp[0][constants.NIC_LINK]
4152
          else:
4153
            val = None
4154
        elif field == "mac":
4155
          if instance.nics:
4156
            val = instance.nics[0].mac
4157
          else:
4158
            val = None
4159
        elif field == "sda_size" or field == "sdb_size":
4160
          idx = ord(field[2]) - ord('a')
4161
          try:
4162
            val = instance.FindDisk(idx).size
4163
          except errors.OpPrereqError:
4164
            val = None
4165
        elif field == "disk_usage": # total disk usage per node
4166
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4167
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4168
        elif field == "tags":
4169
          val = list(instance.GetTags())
4170
        elif field == "hvparams":
4171
          val = i_hv
4172
        elif (field.startswith(HVPREFIX) and
4173
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
4174
          val = i_hv.get(field[len(HVPREFIX):], None)
4175
        elif field == "beparams":
4176
          val = i_be
4177
        elif (field.startswith(BEPREFIX) and
4178
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4179
          val = i_be.get(field[len(BEPREFIX):], None)
4180
        elif st_match and st_match.groups():
4181
          # matches a variable list
4182
          st_groups = st_match.groups()
4183
          if st_groups and st_groups[0] == "disk":
4184
            if st_groups[1] == "count":
4185
              val = len(instance.disks)
4186
            elif st_groups[1] == "sizes":
4187
              val = [disk.size for disk in instance.disks]
4188
            elif st_groups[1] == "size":
4189
              try:
4190
                val = instance.FindDisk(st_groups[2]).size
4191
              except errors.OpPrereqError:
4192
                val = None
4193
            else:
4194
              assert False, "Unhandled disk parameter"
4195
          elif st_groups[0] == "nic":
4196
            if st_groups[1] == "count":
4197
              val = len(instance.nics)
4198
            elif st_groups[1] == "macs":
4199
              val = [nic.mac for nic in instance.nics]
4200
            elif st_groups[1] == "ips":
4201
              val = [nic.ip for nic in instance.nics]
4202
            elif st_groups[1] == "modes":
4203
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4204
            elif st_groups[1] == "links":
4205
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4206
            elif st_groups[1] == "bridges":
4207
              val = []
4208
              for nicp in i_nicp:
4209
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4210
                  val.append(nicp[constants.NIC_LINK])
4211
                else:
4212
                  val.append(None)
4213
            else:
4214
              # index-based item
4215
              nic_idx = int(st_groups[2])
4216
              if nic_idx >= len(instance.nics):
4217
                val = None
4218
              else:
4219
                if st_groups[1] == "mac":
4220
                  val = instance.nics[nic_idx].mac
4221
                elif st_groups[1] == "ip":
4222
                  val = instance.nics[nic_idx].ip
4223
                elif st_groups[1] == "mode":
4224
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4225
                elif st_groups[1] == "link":
4226
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4227
                elif st_groups[1] == "bridge":
4228
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4229
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4230
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4231
                  else:
4232
                    val = None
4233
                else:
4234
                  assert False, "Unhandled NIC parameter"
4235
          else:
4236
            assert False, ("Declared but unhandled variable parameter '%s'" %
4237
                           field)
4238
        else:
4239
          assert False, "Declared but unhandled parameter '%s'" % field
4240
        iout.append(val)
4241
      output.append(iout)
4242

    
4243
    return output
4244

    
4245

    
4246
class LUFailoverInstance(LogicalUnit):
4247
  """Failover an instance.
4248

4249
  """
4250
  HPATH = "instance-failover"
4251
  HTYPE = constants.HTYPE_INSTANCE
4252
  _OP_REQP = ["instance_name", "ignore_consistency"]
4253
  REQ_BGL = False
4254

    
4255
  def ExpandNames(self):
4256
    self._ExpandAndLockInstance()
4257
    self.needed_locks[locking.LEVEL_NODE] = []
4258
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4259

    
4260
  def DeclareLocks(self, level):
4261
    if level == locking.LEVEL_NODE:
4262
      self._LockInstancesNodes()
4263

    
4264
  def BuildHooksEnv(self):
4265
    """Build hooks env.
4266

4267
    This runs on master, primary and secondary nodes of the instance.
4268

4269
    """
4270
    env = {
4271
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4272
      }
4273
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4274
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4275
    return env, nl, nl
4276

    
4277
  def CheckPrereq(self):
4278
    """Check prerequisites.
4279

4280
    This checks that the instance is in the cluster.
4281

4282
    """
4283
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4284
    assert self.instance is not None, \
4285
      "Cannot retrieve locked instance %s" % self.op.instance_name
4286

    
4287
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4288
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4289
      raise errors.OpPrereqError("Instance's disk layout is not"
4290
                                 " network mirrored, cannot failover.")
4291

    
4292
    secondary_nodes = instance.secondary_nodes
4293
    if not secondary_nodes:
4294
      raise errors.ProgrammerError("no secondary node but using "
4295
                                   "a mirrored disk template")
4296

    
4297
    target_node = secondary_nodes[0]
4298
    _CheckNodeOnline(self, target_node)
4299
    _CheckNodeNotDrained(self, target_node)
4300
    if instance.admin_up:
4301
      # check memory requirements on the secondary node
4302
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4303
                           instance.name, bep[constants.BE_MEMORY],
4304
                           instance.hypervisor)
4305
    else:
4306
      self.LogInfo("Not checking memory on the secondary node as"
4307
                   " instance will not be started")
4308

    
4309
    # check bridge existance
4310
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4311

    
4312
  def Exec(self, feedback_fn):
4313
    """Failover an instance.
4314

4315
    The failover is done by shutting it down on its present node and
4316
    starting it on the secondary.
4317

4318
    """
4319
    instance = self.instance
4320

    
4321
    source_node = instance.primary_node
4322
    target_node = instance.secondary_nodes[0]
4323

    
4324
    feedback_fn("* checking disk consistency between source and target")
4325
    for dev in instance.disks:
4326
      # for drbd, these are drbd over lvm
4327
      if not _CheckDiskConsistency(self, dev, target_node, False):
4328
        if instance.admin_up and not self.op.ignore_consistency:
4329
          raise errors.OpExecError("Disk %s is degraded on target node,"
4330
                                   " aborting failover." % dev.iv_name)
4331

    
4332
    feedback_fn("* shutting down instance on source node")
4333
    logging.info("Shutting down instance %s on node %s",
4334
                 instance.name, source_node)
4335

    
4336
    result = self.rpc.call_instance_shutdown(source_node, instance)
4337
    msg = result.fail_msg
4338
    if msg:
4339
      if self.op.ignore_consistency:
4340
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4341
                             " Proceeding anyway. Please make sure node"
4342
                             " %s is down. Error details: %s",
4343
                             instance.name, source_node, source_node, msg)
4344
      else:
4345
        raise errors.OpExecError("Could not shutdown instance %s on"
4346
                                 " node %s: %s" %
4347
                                 (instance.name, source_node, msg))
4348

    
4349
    feedback_fn("* deactivating the instance's disks on source node")
4350
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4351
      raise errors.OpExecError("Can't shut down the instance's disks.")
4352

    
4353
    instance.primary_node = target_node
4354
    # distribute new instance config to the other nodes
4355
    self.cfg.Update(instance)
4356

    
4357
    # Only start the instance if it's marked as up
4358
    if instance.admin_up:
4359
      feedback_fn("* activating the instance's disks on target node")
4360
      logging.info("Starting instance %s on node %s",
4361
                   instance.name, target_node)
4362

    
4363
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4364
                                               ignore_secondaries=True)
4365
      if not disks_ok:
4366
        _ShutdownInstanceDisks(self, instance)
4367
        raise errors.OpExecError("Can't activate the instance's disks")
4368

    
4369
      feedback_fn("* starting the instance on the target node")
4370
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4371
      msg = result.fail_msg
4372
      if msg:
4373
        _ShutdownInstanceDisks(self, instance)
4374
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4375
                                 (instance.name, target_node, msg))
4376

    
4377

    
4378
class LUMigrateInstance(LogicalUnit):
4379
  """Migrate an instance.
4380

4381
  This is migration without shutting down, compared to the failover,
4382
  which is done with shutdown.
4383

4384
  """
4385
  HPATH = "instance-migrate"
4386
  HTYPE = constants.HTYPE_INSTANCE
4387
  _OP_REQP = ["instance_name", "live", "cleanup"]
4388

    
4389
  REQ_BGL = False
4390

    
4391
  def ExpandNames(self):
4392
    self._ExpandAndLockInstance()
4393

    
4394
    self.needed_locks[locking.LEVEL_NODE] = []
4395
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4396

    
4397
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4398
                                       self.op.live, self.op.cleanup)
4399
    self.tasklets = [self._migrater]
4400

    
4401
  def DeclareLocks(self, level):
4402
    if level == locking.LEVEL_NODE:
4403
      self._LockInstancesNodes()
4404

    
4405
  def BuildHooksEnv(self):
4406
    """Build hooks env.
4407

4408
    This runs on master, primary and secondary nodes of the instance.
4409

4410
    """
4411
    instance = self._migrater.instance
4412
    env = _BuildInstanceHookEnvByObject(self, instance)
4413
    env["MIGRATE_LIVE"] = self.op.live
4414
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4415
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4416
    return env, nl, nl
4417

    
4418

    
4419
class LUMoveInstance(LogicalUnit):
4420
  """Move an instance by data-copying.
4421

4422
  """
4423
  HPATH = "instance-move"
4424
  HTYPE = constants.HTYPE_INSTANCE
4425
  _OP_REQP = ["instance_name", "target_node"]
4426
  REQ_BGL = False
4427

    
4428
  def ExpandNames(self):
4429
    self._ExpandAndLockInstance()
4430
    target_node = self.cfg.ExpandNodeName(self.op.target_node)
4431
    if target_node is None:
4432
      raise errors.OpPrereqError("Node '%s' not known" %
4433
                                  self.op.target_node)
4434
    self.op.target_node = target_node
4435
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4436
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4437

    
4438
  def DeclareLocks(self, level):
4439
    if level == locking.LEVEL_NODE:
4440
      self._LockInstancesNodes(primary_only=True)
4441

    
4442
  def BuildHooksEnv(self):
4443
    """Build hooks env.
4444

4445
    This runs on master, primary and secondary nodes of the instance.
4446

4447
    """
4448
    env = {
4449
      "TARGET_NODE": self.op.target_node,
4450
      }
4451
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4452
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4453
                                       self.op.target_node]
4454
    return env, nl, nl
4455

    
4456
  def CheckPrereq(self):
4457
    """Check prerequisites.
4458

4459
    This checks that the instance is in the cluster.
4460

4461
    """
4462
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4463
    assert self.instance is not None, \
4464
      "Cannot retrieve locked instance %s" % self.op.instance_name
4465

    
4466
    node = self.cfg.GetNodeInfo(self.op.target_node)
4467
    assert node is not None, \
4468
      "Cannot retrieve locked node %s" % self.op.target_node
4469

    
4470
    self.target_node = target_node = node.name
4471

    
4472
    if target_node == instance.primary_node:
4473
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4474
                                 (instance.name, target_node))
4475

    
4476
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4477

    
4478
    for idx, dsk in enumerate(instance.disks):
4479
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4480
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4481
                                   " cannot copy")
4482

    
4483
    _CheckNodeOnline(self, target_node)
4484
    _CheckNodeNotDrained(self, target_node)
4485

    
4486
    if instance.admin_up:
4487
      # check memory requirements on the secondary node
4488
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4489
                           instance.name, bep[constants.BE_MEMORY],
4490
                           instance.hypervisor)
4491
    else:
4492
      self.LogInfo("Not checking memory on the secondary node as"
4493
                   " instance will not be started")
4494

    
4495
    # check bridge existance
4496
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4497

    
4498
  def Exec(self, feedback_fn):
4499
    """Move an instance.
4500

4501
    The move is done by shutting it down on its present node, copying
4502
    the data over (slow) and starting it on the new node.
4503

4504
    """
4505
    instance = self.instance
4506

    
4507
    source_node = instance.primary_node
4508
    target_node = self.target_node
4509

    
4510
    self.LogInfo("Shutting down instance %s on source node %s",
4511
                 instance.name, source_node)
4512

    
4513
    result = self.rpc.call_instance_shutdown(source_node, instance)
4514
    msg = result.fail_msg
4515
    if msg:
4516
      if self.op.ignore_consistency:
4517
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4518
                             " Proceeding anyway. Please make sure node"
4519
                             " %s is down. Error details: %s",
4520
                             instance.name, source_node, source_node, msg)
4521
      else:
4522
        raise errors.OpExecError("Could not shutdown instance %s on"
4523
                                 " node %s: %s" %
4524
                                 (instance.name, source_node, msg))
4525

    
4526
    # create the target disks
4527
    try:
4528
      _CreateDisks(self, instance, target_node=target_node)
4529
    except errors.OpExecError:
4530
      self.LogWarning("Device creation failed, reverting...")
4531
      try:
4532
        _RemoveDisks(self, instance, target_node=target_node)
4533
      finally:
4534
        self.cfg.ReleaseDRBDMinors(instance.name)
4535
        raise
4536

    
4537
    cluster_name = self.cfg.GetClusterInfo().cluster_name
4538

    
4539
    errs = []
4540
    # activate, get path, copy the data over
4541
    for idx, disk in enumerate(instance.disks):
4542
      self.LogInfo("Copying data for disk %d", idx)
4543
      result = self.rpc.call_blockdev_assemble(target_node, disk,
4544
                                               instance.name, True)
4545
      if result.fail_msg:
4546
        self.LogWarning("Can't assemble newly created disk %d: %s",
4547
                        idx, result.fail_msg)
4548
        errs.append(result.fail_msg)
4549
        break
4550
      dev_path = result.payload
4551
      result = self.rpc.call_blockdev_export(source_node, disk,
4552
                                             target_node, dev_path,
4553
                                             cluster_name)
4554
      if result.fail_msg:
4555
        self.LogWarning("Can't copy data over for disk %d: %s",
4556
                        idx, result.fail_msg)
4557
        errs.append(result.fail_msg)
4558
        break
4559

    
4560
    if errs:
4561
      self.LogWarning("Some disks failed to copy, aborting")
4562
      try:
4563
        _RemoveDisks(self, instance, target_node=target_node)
4564
      finally:
4565
        self.cfg.ReleaseDRBDMinors(instance.name)
4566
        raise errors.OpExecError("Errors during disk copy: %s" %
4567
                                 (",".join(errs),))
4568

    
4569
    instance.primary_node = target_node
4570
    self.cfg.Update(instance)
4571

    
4572
    self.LogInfo("Removing the disks on the original node")
4573
    _RemoveDisks(self, instance, target_node=source_node)
4574

    
4575
    # Only start the instance if it's marked as up
4576
    if instance.admin_up:
4577
      self.LogInfo("Starting instance %s on node %s",
4578
                   instance.name, target_node)
4579

    
4580
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4581
                                           ignore_secondaries=True)
4582
      if not disks_ok:
4583
        _ShutdownInstanceDisks(self, instance)
4584
        raise errors.OpExecError("Can't activate the instance's disks")
4585

    
4586
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4587
      msg = result.fail_msg
4588
      if msg:
4589
        _ShutdownInstanceDisks(self, instance)
4590
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4591
                                 (instance.name, target_node, msg))
4592

    
4593

    
4594
class LUMigrateNode(LogicalUnit):
4595
  """Migrate all instances from a node.
4596

4597
  """
4598
  HPATH = "node-migrate"
4599
  HTYPE = constants.HTYPE_NODE
4600
  _OP_REQP = ["node_name", "live"]
4601
  REQ_BGL = False
4602

    
4603
  def ExpandNames(self):
4604
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4605
    if self.op.node_name is None:
4606
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
4607

    
4608
    self.needed_locks = {
4609
      locking.LEVEL_NODE: [self.op.node_name],
4610
      }
4611

    
4612
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4613

    
4614
    # Create tasklets for migrating instances for all instances on this node
4615
    names = []
4616
    tasklets = []
4617

    
4618
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4619
      logging.debug("Migrating instance %s", inst.name)
4620
      names.append(inst.name)
4621

    
4622
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4623

    
4624
    self.tasklets = tasklets
4625

    
4626
    # Declare instance locks
4627
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4628

    
4629
  def DeclareLocks(self, level):
4630
    if level == locking.LEVEL_NODE:
4631
      self._LockInstancesNodes()
4632

    
4633
  def BuildHooksEnv(self):
4634
    """Build hooks env.
4635

4636
    This runs on the master, the primary and all the secondaries.
4637

4638
    """
4639
    env = {
4640
      "NODE_NAME": self.op.node_name,
4641
      }
4642

    
4643
    nl = [self.cfg.GetMasterNode()]
4644

    
4645
    return (env, nl, nl)
4646

    
4647

    
4648
class TLMigrateInstance(Tasklet):
4649
  def __init__(self, lu, instance_name, live, cleanup):
4650
    """Initializes this class.
4651

4652
    """
4653
    Tasklet.__init__(self, lu)
4654

    
4655
    # Parameters
4656
    self.instance_name = instance_name
4657
    self.live = live
4658
    self.cleanup = cleanup
4659

    
4660
  def CheckPrereq(self):
4661
    """Check prerequisites.
4662

4663
    This checks that the instance is in the cluster.
4664

4665
    """
4666
    instance = self.cfg.GetInstanceInfo(
4667
      self.cfg.ExpandInstanceName(self.instance_name))
4668
    if instance is None:
4669
      raise errors.OpPrereqError("Instance '%s' not known" %
4670
                                 self.instance_name)
4671

    
4672
    if instance.disk_template != constants.DT_DRBD8:
4673
      raise errors.OpPrereqError("Instance's disk layout is not"
4674
                                 " drbd8, cannot migrate.")
4675

    
4676
    secondary_nodes = instance.secondary_nodes
4677
    if not secondary_nodes:
4678
      raise errors.ConfigurationError("No secondary node but using"
4679
                                      " drbd8 disk template")
4680

    
4681
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
4682

    
4683
    target_node = secondary_nodes[0]
4684
    # check memory requirements on the secondary node
4685
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4686
                         instance.name, i_be[constants.BE_MEMORY],
4687
                         instance.hypervisor)
4688

    
4689
    # check bridge existance
4690
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4691

    
4692
    if not self.cleanup:
4693
      _CheckNodeNotDrained(self, target_node)
4694
      result = self.rpc.call_instance_migratable(instance.primary_node,
4695
                                                 instance)
4696
      result.Raise("Can't migrate, please use failover", prereq=True)
4697

    
4698
    self.instance = instance
4699

    
4700
  def _WaitUntilSync(self):
4701
    """Poll with custom rpc for disk sync.
4702

4703
    This uses our own step-based rpc call.
4704

4705
    """
4706
    self.feedback_fn("* wait until resync is done")
4707
    all_done = False
4708
    while not all_done:
4709
      all_done = True
4710
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4711
                                            self.nodes_ip,
4712
                                            self.instance.disks)
4713
      min_percent = 100
4714
      for node, nres in result.items():
4715
        nres.Raise("Cannot resync disks on node %s" % node)
4716
        node_done, node_percent = nres.payload
4717
        all_done = all_done and node_done
4718
        if node_percent is not None:
4719
          min_percent = min(min_percent, node_percent)
4720
      if not all_done:
4721
        if min_percent < 100:
4722
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
4723
        time.sleep(2)
4724

    
4725
  def _EnsureSecondary(self, node):
4726
    """Demote a node to secondary.
4727

4728
    """
4729
    self.feedback_fn("* switching node %s to secondary mode" % node)
4730

    
4731
    for dev in self.instance.disks:
4732
      self.cfg.SetDiskID(dev, node)
4733

    
4734
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4735
                                          self.instance.disks)
4736
    result.Raise("Cannot change disk to secondary on node %s" % node)
4737

    
4738
  def _GoStandalone(self):
4739
    """Disconnect from the network.
4740

4741
    """
4742
    self.feedback_fn("* changing into standalone mode")
4743
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4744
                                               self.instance.disks)
4745
    for node, nres in result.items():
4746
      nres.Raise("Cannot disconnect disks node %s" % node)
4747

    
4748
  def _GoReconnect(self, multimaster):
4749
    """Reconnect to the network.
4750

4751
    """
4752
    if multimaster:
4753
      msg = "dual-master"
4754
    else:
4755
      msg = "single-master"
4756
    self.feedback_fn("* changing disks into %s mode" % msg)
4757
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4758
                                           self.instance.disks,
4759
                                           self.instance.name, multimaster)
4760
    for node, nres in result.items():
4761
      nres.Raise("Cannot change disks config on node %s" % node)
4762

    
4763
  def _ExecCleanup(self):
4764
    """Try to cleanup after a failed migration.
4765

4766
    The cleanup is done by:
4767
      - check that the instance is running only on one node
4768
        (and update the config if needed)
4769
      - change disks on its secondary node to secondary
4770
      - wait until disks are fully synchronized
4771
      - disconnect from the network
4772
      - change disks into single-master mode
4773
      - wait again until disks are fully synchronized
4774

4775
    """
4776
    instance = self.instance
4777
    target_node = self.target_node
4778
    source_node = self.source_node
4779

    
4780
    # check running on only one node
4781
    self.feedback_fn("* checking where the instance actually runs"
4782
                     " (if this hangs, the hypervisor might be in"
4783
                     " a bad state)")
4784
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4785
    for node, result in ins_l.items():
4786
      result.Raise("Can't contact node %s" % node)
4787

    
4788
    runningon_source = instance.name in ins_l[source_node].payload
4789
    runningon_target = instance.name in ins_l[target_node].payload
4790

    
4791
    if runningon_source and runningon_target:
4792
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4793
                               " or the hypervisor is confused. You will have"
4794
                               " to ensure manually that it runs only on one"
4795
                               " and restart this operation.")
4796

    
4797
    if not (runningon_source or runningon_target):
4798
      raise errors.OpExecError("Instance does not seem to be running at all."
4799
                               " In this case, it's safer to repair by"
4800
                               " running 'gnt-instance stop' to ensure disk"
4801
                               " shutdown, and then restarting it.")
4802

    
4803
    if runningon_target:
4804
      # the migration has actually succeeded, we need to update the config
4805
      self.feedback_fn("* instance running on secondary node (%s),"
4806
                       " updating config" % target_node)
4807
      instance.primary_node = target_node
4808
      self.cfg.Update(instance)
4809
      demoted_node = source_node
4810
    else:
4811
      self.feedback_fn("* instance confirmed to be running on its"
4812
                       " primary node (%s)" % source_node)
4813
      demoted_node = target_node
4814

    
4815
    self._EnsureSecondary(demoted_node)
4816
    try:
4817
      self._WaitUntilSync()
4818
    except errors.OpExecError:
4819
      # we ignore here errors, since if the device is standalone, it
4820
      # won't be able to sync
4821
      pass
4822
    self._GoStandalone()
4823
    self._GoReconnect(False)
4824
    self._WaitUntilSync()
4825

    
4826
    self.feedback_fn("* done")
4827

    
4828
  def _RevertDiskStatus(self):
4829
    """Try to revert the disk status after a failed migration.
4830

4831
    """
4832
    target_node = self.target_node
4833
    try:
4834
      self._EnsureSecondary(target_node)
4835
      self._GoStandalone()
4836
      self._GoReconnect(False)
4837
      self._WaitUntilSync()
4838
    except errors.OpExecError, err:
4839
      self.lu.LogWarning("Migration failed and I can't reconnect the"
4840
                         " drives: error '%s'\n"
4841
                         "Please look and recover the instance status" %
4842
                         str(err))
4843

    
4844
  def _AbortMigration(self):
4845
    """Call the hypervisor code to abort a started migration.
4846

4847
    """
4848
    instance = self.instance
4849
    target_node = self.target_node
4850
    migration_info = self.migration_info
4851

    
4852
    abort_result = self.rpc.call_finalize_migration(target_node,
4853
                                                    instance,
4854
                                                    migration_info,
4855
                                                    False)
4856
    abort_msg = abort_result.fail_msg
4857
    if abort_msg:
4858
      logging.error("Aborting migration failed on target node %s: %s" %
4859
                    (target_node, abort_msg))
4860
      # Don't raise an exception here, as we stil have to try to revert the
4861
      # disk status, even if this step failed.
4862

    
4863
  def _ExecMigration(self):
4864
    """Migrate an instance.
4865

4866
    The migrate is done by:
4867
      - change the disks into dual-master mode
4868
      - wait until disks are fully synchronized again
4869
      - migrate the instance
4870
      - change disks on the new secondary node (the old primary) to secondary
4871
      - wait until disks are fully synchronized
4872
      - change disks into single-master mode
4873

4874
    """
4875
    instance = self.instance
4876
    target_node = self.target_node
4877
    source_node = self.source_node
4878

    
4879
    self.feedback_fn("* checking disk consistency between source and target")
4880
    for dev in instance.disks:
4881
      if not _CheckDiskConsistency(self, dev, target_node, False):
4882
        raise errors.OpExecError("Disk %s is degraded or not fully"
4883
                                 " synchronized on target node,"
4884
                                 " aborting migrate." % dev.iv_name)
4885

    
4886
    # First get the migration information from the remote node
4887
    result = self.rpc.call_migration_info(source_node, instance)
4888
    msg = result.fail_msg
4889
    if msg:
4890
      log_err = ("Failed fetching source migration information from %s: %s" %
4891
                 (source_node, msg))
4892
      logging.error(log_err)
4893
      raise errors.OpExecError(log_err)
4894

    
4895
    self.migration_info = migration_info = result.payload
4896

    
4897
    # Then switch the disks to master/master mode
4898
    self._EnsureSecondary(target_node)
4899
    self._GoStandalone()
4900
    self._GoReconnect(True)
4901
    self._WaitUntilSync()
4902

    
4903
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4904
    result = self.rpc.call_accept_instance(target_node,
4905
                                           instance,
4906
                                           migration_info,
4907
                                           self.nodes_ip[target_node])
4908

    
4909
    msg = result.fail_msg
4910
    if msg:
4911
      logging.error("Instance pre-migration failed, trying to revert"
4912
                    " disk status: %s", msg)
4913
      self._AbortMigration()
4914
      self._RevertDiskStatus()
4915
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4916
                               (instance.name, msg))
4917

    
4918
    self.feedback_fn("* migrating instance to %s" % target_node)
4919
    time.sleep(10)
4920
    result = self.rpc.call_instance_migrate(source_node, instance,
4921
                                            self.nodes_ip[target_node],
4922
                                            self.live)
4923
    msg = result.fail_msg
4924
    if msg:
4925
      logging.error("Instance migration failed, trying to revert"
4926
                    " disk status: %s", msg)
4927
      self._AbortMigration()
4928
      self._RevertDiskStatus()
4929
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4930
                               (instance.name, msg))
4931
    time.sleep(10)
4932

    
4933
    instance.primary_node = target_node
4934
    # distribute new instance config to the other nodes
4935
    self.cfg.Update(instance)
4936

    
4937
    result = self.rpc.call_finalize_migration(target_node,
4938
                                              instance,
4939
                                              migration_info,
4940
                                              True)
4941
    msg = result.fail_msg
4942
    if msg:
4943
      logging.error("Instance migration succeeded, but finalization failed:"
4944
                    " %s" % msg)
4945
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4946
                               msg)
4947

    
4948
    self._EnsureSecondary(source_node)
4949
    self._WaitUntilSync()
4950
    self._GoStandalone()
4951
    self._GoReconnect(False)
4952
    self._WaitUntilSync()
4953

    
4954
    self.feedback_fn("* done")
4955

    
4956
  def Exec(self, feedback_fn):
4957
    """Perform the migration.
4958

4959
    """
4960
    feedback_fn("Migrating instance %s" % self.instance.name)
4961

    
4962
    self.feedback_fn = feedback_fn
4963

    
4964
    self.source_node = self.instance.primary_node
4965
    self.target_node = self.instance.secondary_nodes[0]
4966
    self.all_nodes = [self.source_node, self.target_node]
4967
    self.nodes_ip = {
4968
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4969
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4970
      }
4971

    
4972
    if self.cleanup:
4973
      return self._ExecCleanup()
4974
    else:
4975
      return self._ExecMigration()
4976

    
4977

    
4978
def _CreateBlockDev(lu, node, instance, device, force_create,
4979
                    info, force_open):
4980
  """Create a tree of block devices on a given node.
4981

4982
  If this device type has to be created on secondaries, create it and
4983
  all its children.
4984

4985
  If not, just recurse to children keeping the same 'force' value.
4986

4987
  @param lu: the lu on whose behalf we execute
4988
  @param node: the node on which to create the device
4989
  @type instance: L{objects.Instance}
4990
  @param instance: the instance which owns the device
4991
  @type device: L{objects.Disk}
4992
  @param device: the device to create
4993
  @type force_create: boolean
4994
  @param force_create: whether to force creation of this device; this
4995
      will be change to True whenever we find a device which has
4996
      CreateOnSecondary() attribute
4997
  @param info: the extra 'metadata' we should attach to the device
4998
      (this will be represented as a LVM tag)
4999
  @type force_open: boolean
5000
  @param force_open: this parameter will be passes to the
5001
      L{backend.BlockdevCreate} function where it specifies
5002
      whether we run on primary or not, and it affects both
5003
      the child assembly and the device own Open() execution
5004

5005
  """
5006
  if device.CreateOnSecondary():
5007
    force_create = True
5008

    
5009
  if device.children:
5010
    for child in device.children:
5011
      _CreateBlockDev(lu, node, instance, child, force_create,
5012
                      info, force_open)
5013

    
5014
  if not force_create:
5015
    return
5016

    
5017
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5018

    
5019

    
5020
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5021
  """Create a single block device on a given node.
5022

5023
  This will not recurse over children of the device, so they must be
5024
  created in advance.
5025

5026
  @param lu: the lu on whose behalf we execute
5027
  @param node: the node on which to create the device
5028
  @type instance: L{objects.Instance}
5029
  @param instance: the instance which owns the device
5030
  @type device: L{objects.Disk}
5031
  @param device: the device to create
5032
  @param info: the extra 'metadata' we should attach to the device
5033
      (this will be represented as a LVM tag)
5034
  @type force_open: boolean
5035
  @param force_open: this parameter will be passes to the
5036
      L{backend.BlockdevCreate} function where it specifies
5037
      whether we run on primary or not, and it affects both
5038
      the child assembly and the device own Open() execution
5039

5040
  """
5041
  lu.cfg.SetDiskID(device, node)
5042
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5043
                                       instance.name, force_open, info)
5044
  result.Raise("Can't create block device %s on"
5045
               " node %s for instance %s" % (device, node, instance.name))
5046
  if device.physical_id is None:
5047
    device.physical_id = result.payload
5048

    
5049

    
5050
def _GenerateUniqueNames(lu, exts):
5051
  """Generate a suitable LV name.
5052

5053
  This will generate a logical volume name for the given instance.
5054

5055
  """
5056
  results = []
5057
  for val in exts:
5058
    new_id = lu.cfg.GenerateUniqueID()
5059
    results.append("%s%s" % (new_id, val))
5060
  return results
5061

    
5062

    
5063
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5064
                         p_minor, s_minor):
5065
  """Generate a drbd8 device complete with its children.
5066

5067
  """
5068
  port = lu.cfg.AllocatePort()
5069
  vgname = lu.cfg.GetVGName()
5070
  shared_secret = lu.cfg.GenerateDRBDSecret()
5071
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5072
                          logical_id=(vgname, names[0]))
5073
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5074
                          logical_id=(vgname, names[1]))
5075
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5076
                          logical_id=(primary, secondary, port,
5077
                                      p_minor, s_minor,
5078
                                      shared_secret),
5079
                          children=[dev_data, dev_meta],
5080
                          iv_name=iv_name)
5081
  return drbd_dev
5082

    
5083

    
5084
def _GenerateDiskTemplate(lu, template_name,
5085
                          instance_name, primary_node,
5086
                          secondary_nodes, disk_info,
5087
                          file_storage_dir, file_driver,
5088
                          base_index):
5089
  """Generate the entire disk layout for a given template type.
5090

5091
  """
5092
  #TODO: compute space requirements
5093

    
5094
  vgname = lu.cfg.GetVGName()
5095
  disk_count = len(disk_info)
5096
  disks = []
5097
  if template_name == constants.DT_DISKLESS:
5098
    pass
5099
  elif template_name == constants.DT_PLAIN:
5100
    if len(secondary_nodes) != 0:
5101
      raise errors.ProgrammerError("Wrong template configuration")
5102

    
5103
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5104
                                      for i in range(disk_count)])
5105
    for idx, disk in enumerate(disk_info):
5106
      disk_index = idx + base_index
5107
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5108
                              logical_id=(vgname, names[idx]),
5109
                              iv_name="disk/%d" % disk_index,
5110
                              mode=disk["mode"])
5111
      disks.append(disk_dev)
5112
  elif template_name == constants.DT_DRBD8:
5113
    if len(secondary_nodes) != 1:
5114
      raise errors.ProgrammerError("Wrong template configuration")
5115
    remote_node = secondary_nodes[0]
5116
    minors = lu.cfg.AllocateDRBDMinor(
5117
      [primary_node, remote_node] * len(disk_info), instance_name)
5118

    
5119
    names = []
5120
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5121
                                               for i in range(disk_count)]):
5122
      names.append(lv_prefix + "_data")
5123
      names.append(lv_prefix + "_meta")
5124
    for idx, disk in enumerate(disk_info):
5125
      disk_index = idx + base_index
5126
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5127
                                      disk["size"], names[idx*2:idx*2+2],
5128
                                      "disk/%d" % disk_index,
5129
                                      minors[idx*2], minors[idx*2+1])
5130
      disk_dev.mode = disk["mode"]
5131
      disks.append(disk_dev)
5132
  elif template_name == constants.DT_FILE:
5133
    if len(secondary_nodes) != 0:
5134
      raise errors.ProgrammerError("Wrong template configuration")
5135

    
5136
    for idx, disk in enumerate(disk_info):
5137
      disk_index = idx + base_index
5138
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5139
                              iv_name="disk/%d" % disk_index,
5140
                              logical_id=(file_driver,
5141
                                          "%s/disk%d" % (file_storage_dir,
5142
                                                         disk_index)),
5143
                              mode=disk["mode"])
5144
      disks.append(disk_dev)
5145
  else:
5146
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5147
  return disks
5148

    
5149

    
5150
def _GetInstanceInfoText(instance):
5151
  """Compute that text that should be added to the disk's metadata.
5152

5153
  """
5154
  return "originstname+%s" % instance.name
5155

    
5156

    
5157
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5158
  """Create all disks for an instance.
5159

5160
  This abstracts away some work from AddInstance.
5161

5162
  @type lu: L{LogicalUnit}
5163
  @param lu: the logical unit on whose behalf we execute
5164
  @type instance: L{objects.Instance}
5165
  @param instance: the instance whose disks we should create
5166
  @type to_skip: list
5167
  @param to_skip: list of indices to skip
5168
  @type target_node: string
5169
  @param target_node: if passed, overrides the target node for creation
5170
  @rtype: boolean
5171
  @return: the success of the creation
5172

5173
  """
5174
  info = _GetInstanceInfoText(instance)
5175
  if target_node is None:
5176
    pnode = instance.primary_node
5177
    all_nodes = instance.all_nodes
5178
  else:
5179
    pnode = target_node
5180
    all_nodes = [pnode]
5181

    
5182
  if instance.disk_template == constants.DT_FILE:
5183
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5184
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5185

    
5186
    result.Raise("Failed to create directory '%s' on"
5187
                 " node %s" % (file_storage_dir, pnode))
5188

    
5189
  # Note: this needs to be kept in sync with adding of disks in
5190
  # LUSetInstanceParams
5191
  for idx, device in enumerate(instance.disks):
5192
    if to_skip and idx in to_skip:
5193
      continue
5194
    logging.info("Creating volume %s for instance %s",
5195
                 device.iv_name, instance.name)
5196
    #HARDCODE
5197
    for node in all_nodes:
5198
      f_create = node == pnode
5199
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5200

    
5201

    
5202
def _RemoveDisks(lu, instance, target_node=None):
5203
  """Remove all disks for an instance.
5204

5205
  This abstracts away some work from `AddInstance()` and
5206
  `RemoveInstance()`. Note that in case some of the devices couldn't
5207
  be removed, the removal will continue with the other ones (compare
5208
  with `_CreateDisks()`).
5209

5210
  @type lu: L{LogicalUnit}
5211
  @param lu: the logical unit on whose behalf we execute
5212
  @type instance: L{objects.Instance}
5213
  @param instance: the instance whose disks we should remove
5214
  @type target_node: string
5215
  @param target_node: used to override the node on which to remove the disks
5216
  @rtype: boolean
5217
  @return: the success of the removal
5218

5219
  """
5220
  logging.info("Removing block devices for instance %s", instance.name)
5221

    
5222
  all_result = True
5223
  for device in instance.disks:
5224
    if target_node:
5225
      edata = [(target_node, device)]
5226
    else:
5227
      edata = device.ComputeNodeTree(instance.primary_node)
5228
    for node, disk in edata:
5229
      lu.cfg.SetDiskID(disk, node)
5230
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5231
      if msg:
5232
        lu.LogWarning("Could not remove block device %s on node %s,"
5233
                      " continuing anyway: %s", device.iv_name, node, msg)
5234
        all_result = False
5235

    
5236
  if instance.disk_template == constants.DT_FILE:
5237
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5238
    if target_node:
5239
      tgt = target_node
5240
    else:
5241
      tgt = instance.primary_node
5242
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5243
    if result.fail_msg:
5244
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5245
                    file_storage_dir, instance.primary_node, result.fail_msg)
5246
      all_result = False
5247

    
5248
  return all_result
5249

    
5250

    
5251
def _ComputeDiskSize(disk_template, disks):
5252
  """Compute disk size requirements in the volume group
5253

5254
  """
5255
  # Required free disk space as a function of disk and swap space
5256
  req_size_dict = {
5257
    constants.DT_DISKLESS: None,
5258
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5259
    # 128 MB are added for drbd metadata for each disk
5260
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5261
    constants.DT_FILE: None,
5262
  }
5263

    
5264
  if disk_template not in req_size_dict:
5265
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5266
                                 " is unknown" %  disk_template)
5267

    
5268
  return req_size_dict[disk_template]
5269

    
5270

    
5271
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5272
  """Hypervisor parameter validation.
5273

5274
  This function abstract the hypervisor parameter validation to be
5275
  used in both instance create and instance modify.
5276

5277
  @type lu: L{LogicalUnit}
5278
  @param lu: the logical unit for which we check
5279
  @type nodenames: list
5280
  @param nodenames: the list of nodes on which we should check
5281
  @type hvname: string
5282
  @param hvname: the name of the hypervisor we should use
5283
  @type hvparams: dict
5284
  @param hvparams: the parameters which we need to check
5285
  @raise errors.OpPrereqError: if the parameters are not valid
5286

5287
  """
5288
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5289
                                                  hvname,
5290
                                                  hvparams)
5291
  for node in nodenames:
5292
    info = hvinfo[node]
5293
    if info.offline:
5294
      continue
5295
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5296

    
5297

    
5298
class LUCreateInstance(LogicalUnit):
5299
  """Create an instance.
5300

5301
  """
5302
  HPATH = "instance-add"
5303
  HTYPE = constants.HTYPE_INSTANCE
5304
  _OP_REQP = ["instance_name", "disks", "disk_template",
5305
              "mode", "start",
5306
              "wait_for_sync", "ip_check", "nics",
5307
              "hvparams", "beparams"]
5308
  REQ_BGL = False
5309

    
5310
  def _ExpandNode(self, node):
5311
    """Expands and checks one node name.
5312

5313
    """
5314
    node_full = self.cfg.ExpandNodeName(node)
5315
    if node_full is None:
5316
      raise errors.OpPrereqError("Unknown node %s" % node)
5317
    return node_full
5318

    
5319
  def ExpandNames(self):
5320
    """ExpandNames for CreateInstance.
5321

5322
    Figure out the right locks for instance creation.
5323

5324
    """
5325
    self.needed_locks = {}
5326

    
5327
    # set optional parameters to none if they don't exist
5328
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5329
      if not hasattr(self.op, attr):
5330
        setattr(self.op, attr, None)
5331

    
5332
    # cheap checks, mostly valid constants given
5333

    
5334
    # verify creation mode
5335
    if self.op.mode not in (constants.INSTANCE_CREATE,
5336
                            constants.INSTANCE_IMPORT):
5337
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5338
                                 self.op.mode)
5339

    
5340
    # disk template and mirror node verification
5341
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5342
      raise errors.OpPrereqError("Invalid disk template name")
5343

    
5344
    if self.op.hypervisor is None:
5345
      self.op.hypervisor = self.cfg.GetHypervisorType()
5346

    
5347
    cluster = self.cfg.GetClusterInfo()
5348
    enabled_hvs = cluster.enabled_hypervisors
5349
    if self.op.hypervisor not in enabled_hvs:
5350
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5351
                                 " cluster (%s)" % (self.op.hypervisor,
5352
                                  ",".join(enabled_hvs)))
5353

    
5354
    # check hypervisor parameter syntax (locally)
5355
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5356
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5357
                                  self.op.hvparams)
5358
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5359
    hv_type.CheckParameterSyntax(filled_hvp)
5360
    self.hv_full = filled_hvp
5361

    
5362
    # fill and remember the beparams dict
5363
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5364
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5365
                                    self.op.beparams)
5366

    
5367
    #### instance parameters check
5368

    
5369
    # instance name verification
5370
    hostname1 = utils.HostInfo(self.op.instance_name)
5371
    self.op.instance_name = instance_name = hostname1.name
5372

    
5373
    # this is just a preventive check, but someone might still add this
5374
    # instance in the meantime, and creation will fail at lock-add time
5375
    if instance_name in self.cfg.GetInstanceList():
5376
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5377
                                 instance_name)
5378

    
5379
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5380

    
5381
    # NIC buildup
5382
    self.nics = []
5383
    for idx, nic in enumerate(self.op.nics):
5384
      nic_mode_req = nic.get("mode", None)
5385
      nic_mode = nic_mode_req
5386
      if nic_mode is None:
5387
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5388

    
5389
      # in routed mode, for the first nic, the default ip is 'auto'
5390
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5391
        default_ip_mode = constants.VALUE_AUTO
5392
      else:
5393
        default_ip_mode = constants.VALUE_NONE
5394

    
5395
      # ip validity checks
5396
      ip = nic.get("ip", default_ip_mode)
5397
      if ip is None or ip.lower() == constants.VALUE_NONE:
5398
        nic_ip = None
5399
      elif ip.lower() == constants.VALUE_AUTO:
5400
        nic_ip = hostname1.ip
5401
      else:
5402
        if not utils.IsValidIP(ip):
5403
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5404
                                     " like a valid IP" % ip)
5405
        nic_ip = ip
5406

    
5407
      # TODO: check the ip for uniqueness !!
5408
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5409
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
5410

    
5411
      # MAC address verification
5412
      mac = nic.get("mac", constants.VALUE_AUTO)
5413
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5414
        if not utils.IsValidMac(mac.lower()):
5415
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5416
                                     mac)
5417
        else:
5418
          # or validate/reserve the current one
5419
          if self.cfg.IsMacInUse(mac):
5420
            raise errors.OpPrereqError("MAC address %s already in use"
5421
                                       " in cluster" % mac)
5422

    
5423
      # bridge verification
5424
      bridge = nic.get("bridge", None)
5425
      link = nic.get("link", None)
5426
      if bridge and link:
5427
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5428
                                   " at the same time")
5429
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5430
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
5431
      elif bridge:
5432
        link = bridge
5433

    
5434
      nicparams = {}
5435
      if nic_mode_req:
5436
        nicparams[constants.NIC_MODE] = nic_mode_req
5437
      if link:
5438
        nicparams[constants.NIC_LINK] = link
5439

    
5440
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5441
                                      nicparams)
5442
      objects.NIC.CheckParameterSyntax(check_params)
5443
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5444

    
5445
    # disk checks/pre-build
5446
    self.disks = []
5447
    for disk in self.op.disks:
5448
      mode = disk.get("mode", constants.DISK_RDWR)
5449
      if mode not in constants.DISK_ACCESS_SET:
5450
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5451
                                   mode)
5452
      size = disk.get("size", None)
5453
      if size is None:
5454
        raise errors.OpPrereqError("Missing disk size")
5455
      try:
5456
        size = int(size)
5457
      except ValueError:
5458
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
5459
      self.disks.append({"size": size, "mode": mode})
5460

    
5461
    # used in CheckPrereq for ip ping check
5462
    self.check_ip = hostname1.ip
5463

    
5464
    # file storage checks
5465
    if (self.op.file_driver and
5466
        not self.op.file_driver in constants.FILE_DRIVER):
5467
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5468
                                 self.op.file_driver)
5469

    
5470
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5471
      raise errors.OpPrereqError("File storage directory path not absolute")
5472

    
5473
    ### Node/iallocator related checks
5474
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5475
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5476
                                 " node must be given")
5477

    
5478
    if self.op.iallocator:
5479
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5480
    else:
5481
      self.op.pnode = self._ExpandNode(self.op.pnode)
5482
      nodelist = [self.op.pnode]
5483
      if self.op.snode is not None:
5484
        self.op.snode = self._ExpandNode(self.op.snode)
5485
        nodelist.append(self.op.snode)
5486
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5487

    
5488
    # in case of import lock the source node too
5489
    if self.op.mode == constants.INSTANCE_IMPORT:
5490
      src_node = getattr(self.op, "src_node", None)
5491
      src_path = getattr(self.op, "src_path", None)
5492

    
5493
      if src_path is None:
5494
        self.op.src_path = src_path = self.op.instance_name
5495

    
5496
      if src_node is None:
5497
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5498
        self.op.src_node = None
5499
        if os.path.isabs(src_path):
5500
          raise errors.OpPrereqError("Importing an instance from an absolute"
5501
                                     " path requires a source node option.")
5502
      else:
5503
        self.op.src_node = src_node = self._ExpandNode(src_node)
5504
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5505
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5506
        if not os.path.isabs(src_path):
5507
          self.op.src_path = src_path = \
5508
            os.path.join(constants.EXPORT_DIR, src_path)
5509

    
5510
    else: # INSTANCE_CREATE
5511
      if getattr(self.op, "os_type", None) is None:
5512
        raise errors.OpPrereqError("No guest OS specified")
5513

    
5514
  def _RunAllocator(self):
5515
    """Run the allocator based on input opcode.
5516

5517
    """
5518
    nics = [n.ToDict() for n in self.nics]
5519
    ial = IAllocator(self.cfg, self.rpc,
5520
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5521
                     name=self.op.instance_name,
5522
                     disk_template=self.op.disk_template,
5523
                     tags=[],
5524
                     os=self.op.os_type,
5525
                     vcpus=self.be_full[constants.BE_VCPUS],
5526
                     mem_size=self.be_full[constants.BE_MEMORY],
5527
                     disks=self.disks,
5528
                     nics=nics,
5529
                     hypervisor=self.op.hypervisor,
5530
                     )
5531

    
5532
    ial.Run(self.op.iallocator)
5533

    
5534
    if not ial.success:
5535
      raise errors.OpPrereqError("Can't compute nodes using"
5536
                                 " iallocator '%s': %s" % (self.op.iallocator,
5537
                                                           ial.info))
5538
    if len(ial.nodes) != ial.required_nodes:
5539
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5540
                                 " of nodes (%s), required %s" %
5541
                                 (self.op.iallocator, len(ial.nodes),
5542
                                  ial.required_nodes))
5543
    self.op.pnode = ial.nodes[0]
5544
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5545
                 self.op.instance_name, self.op.iallocator,
5546
                 ", ".join(ial.nodes))
5547
    if ial.required_nodes == 2:
5548
      self.op.snode = ial.nodes[1]
5549

    
5550
  def BuildHooksEnv(self):
5551
    """Build hooks env.
5552

5553
    This runs on master, primary and secondary nodes of the instance.
5554

5555
    """
5556
    env = {
5557
      "ADD_MODE": self.op.mode,
5558
      }
5559
    if self.op.mode == constants.INSTANCE_IMPORT:
5560
      env["SRC_NODE"] = self.op.src_node
5561
      env["SRC_PATH"] = self.op.src_path
5562
      env["SRC_IMAGES"] = self.src_images
5563

    
5564
    env.update(_BuildInstanceHookEnv(
5565
      name=self.op.instance_name,
5566
      primary_node=self.op.pnode,
5567
      secondary_nodes=self.secondaries,
5568
      status=self.op.start,
5569
      os_type=self.op.os_type,
5570
      memory=self.be_full[constants.BE_MEMORY],
5571
      vcpus=self.be_full[constants.BE_VCPUS],
5572
      nics=_NICListToTuple(self, self.nics),
5573
      disk_template=self.op.disk_template,
5574
      disks=[(d["size"], d["mode"]) for d in self.disks],
5575
      bep=self.be_full,
5576
      hvp=self.hv_full,
5577
      hypervisor_name=self.op.hypervisor,
5578
    ))
5579

    
5580
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5581
          self.secondaries)
5582
    return env, nl, nl
5583

    
5584

    
5585
  def CheckPrereq(self):
5586
    """Check prerequisites.
5587

5588
    """
5589
    if (not self.cfg.GetVGName() and
5590
        self.op.disk_template not in constants.DTS_NOT_LVM):
5591
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5592
                                 " instances")
5593

    
5594
    if self.op.mode == constants.INSTANCE_IMPORT:
5595
      src_node = self.op.src_node
5596
      src_path = self.op.src_path
5597

    
5598
      if src_node is None:
5599
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5600
        exp_list = self.rpc.call_export_list(locked_nodes)
5601
        found = False
5602
        for node in exp_list:
5603
          if exp_list[node].fail_msg:
5604
            continue
5605
          if src_path in exp_list[node].payload:
5606
            found = True
5607
            self.op.src_node = src_node = node
5608
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5609
                                                       src_path)
5610
            break
5611
        if not found:
5612
          raise errors.OpPrereqError("No export found for relative path %s" %
5613
                                      src_path)
5614

    
5615
      _CheckNodeOnline(self, src_node)
5616
      result = self.rpc.call_export_info(src_node, src_path)
5617
      result.Raise("No export or invalid export found in dir %s" % src_path)
5618

    
5619
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5620
      if not export_info.has_section(constants.INISECT_EXP):
5621
        raise errors.ProgrammerError("Corrupted export config")
5622

    
5623
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5624
      if (int(ei_version) != constants.EXPORT_VERSION):
5625
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5626
                                   (ei_version, constants.EXPORT_VERSION))
5627

    
5628
      # Check that the new instance doesn't have less disks than the export
5629
      instance_disks = len(self.disks)
5630
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
5631
      if instance_disks < export_disks:
5632
        raise errors.OpPrereqError("Not enough disks to import."
5633
                                   " (instance: %d, export: %d)" %
5634
                                   (instance_disks, export_disks))
5635

    
5636
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
5637
      disk_images = []
5638
      for idx in range(export_disks):
5639
        option = 'disk%d_dump' % idx
5640
        if export_info.has_option(constants.INISECT_INS, option):
5641
          # FIXME: are the old os-es, disk sizes, etc. useful?
5642
          export_name = export_info.get(constants.INISECT_INS, option)
5643
          image = os.path.join(src_path, export_name)
5644
          disk_images.append(image)
5645
        else:
5646
          disk_images.append(False)
5647

    
5648
      self.src_images = disk_images
5649

    
5650
      old_name = export_info.get(constants.INISECT_INS, 'name')
5651
      # FIXME: int() here could throw a ValueError on broken exports
5652
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
5653
      if self.op.instance_name == old_name:
5654
        for idx, nic in enumerate(self.nics):
5655
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
5656
            nic_mac_ini = 'nic%d_mac' % idx
5657
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
5658

    
5659
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
5660
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
5661
    if self.op.start and not self.op.ip_check:
5662
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
5663
                                 " adding an instance in start mode")
5664

    
5665
    if self.op.ip_check:
5666
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
5667
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5668
                                   (self.check_ip, self.op.instance_name))
5669

    
5670
    #### mac address generation
5671
    # By generating here the mac address both the allocator and the hooks get
5672
    # the real final mac address rather than the 'auto' or 'generate' value.
5673
    # There is a race condition between the generation and the instance object
5674
    # creation, which means that we know the mac is valid now, but we're not
5675
    # sure it will be when we actually add the instance. If things go bad
5676
    # adding the instance will abort because of a duplicate mac, and the
5677
    # creation job will fail.
5678
    for nic in self.nics:
5679
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5680
        nic.mac = self.cfg.GenerateMAC()
5681

    
5682
    #### allocator run
5683

    
5684
    if self.op.iallocator is not None:
5685
      self._RunAllocator()
5686

    
5687
    #### node related checks
5688

    
5689
    # check primary node
5690
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
5691
    assert self.pnode is not None, \
5692
      "Cannot retrieve locked node %s" % self.op.pnode
5693
    if pnode.offline:
5694
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
5695
                                 pnode.name)
5696
    if pnode.drained:
5697
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
5698
                                 pnode.name)
5699

    
5700
    self.secondaries = []
5701

    
5702
    # mirror node verification
5703
    if self.op.disk_template in constants.DTS_NET_MIRROR:
5704
      if self.op.snode is None:
5705
        raise errors.OpPrereqError("The networked disk templates need"
5706
                                   " a mirror node")
5707
      if self.op.snode == pnode.name:
5708
        raise errors.OpPrereqError("The secondary node cannot be"
5709
                                   " the primary node.")
5710
      _CheckNodeOnline(self, self.op.snode)
5711
      _CheckNodeNotDrained(self, self.op.snode)
5712
      self.secondaries.append(self.op.snode)
5713

    
5714
    nodenames = [pnode.name] + self.secondaries
5715

    
5716
    req_size = _ComputeDiskSize(self.op.disk_template,
5717
                                self.disks)
5718

    
5719
    # Check lv size requirements
5720
    if req_size is not None:
5721
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5722
                                         self.op.hypervisor)
5723
      for node in nodenames:
5724
        info = nodeinfo[node]
5725
        info.Raise("Cannot get current information from node %s" % node)
5726
        info = info.payload
5727
        vg_free = info.get('vg_free', None)
5728
        if not isinstance(vg_free, int):
5729
          raise errors.OpPrereqError("Can't compute free disk space on"
5730
                                     " node %s" % node)
5731
        if req_size > vg_free:
5732
          raise errors.OpPrereqError("Not enough disk space on target node %s."
5733
                                     " %d MB available, %d MB required" %
5734
                                     (node, vg_free, req_size))
5735

    
5736
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
5737

    
5738
    # os verification
5739
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5740
    result.Raise("OS '%s' not in supported os list for primary node %s" %
5741
                 (self.op.os_type, pnode.name), prereq=True)
5742

    
5743
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5744

    
5745
    # memory check on primary node
5746
    if self.op.start:
5747
      _CheckNodeFreeMemory(self, self.pnode.name,
5748
                           "creating instance %s" % self.op.instance_name,
5749
                           self.be_full[constants.BE_MEMORY],
5750
                           self.op.hypervisor)
5751

    
5752
    self.dry_run_result = list(nodenames)
5753

    
5754
  def Exec(self, feedback_fn):
5755
    """Create and add the instance to the cluster.
5756

5757
    """
5758
    instance = self.op.instance_name
5759
    pnode_name = self.pnode.name
5760

    
5761
    ht_kind = self.op.hypervisor
5762
    if ht_kind in constants.HTS_REQ_PORT:
5763
      network_port = self.cfg.AllocatePort()
5764
    else:
5765
      network_port = None
5766

    
5767
    ##if self.op.vnc_bind_address is None:
5768
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5769

    
5770
    # this is needed because os.path.join does not accept None arguments
5771
    if self.op.file_storage_dir is None:
5772
      string_file_storage_dir = ""
5773
    else:
5774
      string_file_storage_dir = self.op.file_storage_dir
5775

    
5776
    # build the full file storage dir path
5777
    file_storage_dir = os.path.normpath(os.path.join(
5778
                                        self.cfg.GetFileStorageDir(),
5779
                                        string_file_storage_dir, instance))
5780

    
5781

    
5782
    disks = _GenerateDiskTemplate(self,
5783
                                  self.op.disk_template,
5784
                                  instance, pnode_name,
5785
                                  self.secondaries,
5786
                                  self.disks,
5787
                                  file_storage_dir,
5788
                                  self.op.file_driver,
5789
                                  0)
5790

    
5791
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5792
                            primary_node=pnode_name,
5793
                            nics=self.nics, disks=disks,
5794
                            disk_template=self.op.disk_template,
5795
                            admin_up=False,
5796
                            network_port=network_port,
5797
                            beparams=self.op.beparams,
5798
                            hvparams=self.op.hvparams,
5799
                            hypervisor=self.op.hypervisor,
5800
                            )
5801

    
5802
    feedback_fn("* creating instance disks...")
5803
    try:
5804
      _CreateDisks(self, iobj)
5805
    except errors.OpExecError:
5806
      self.LogWarning("Device creation failed, reverting...")
5807
      try:
5808
        _RemoveDisks(self, iobj)
5809
      finally:
5810
        self.cfg.ReleaseDRBDMinors(instance)
5811
        raise
5812

    
5813
    feedback_fn("adding instance %s to cluster config" % instance)
5814

    
5815
    self.cfg.AddInstance(iobj)
5816
    # Declare that we don't want to remove the instance lock anymore, as we've
5817
    # added the instance to the config
5818
    del self.remove_locks[locking.LEVEL_INSTANCE]
5819
    # Unlock all the nodes
5820
    if self.op.mode == constants.INSTANCE_IMPORT:
5821
      nodes_keep = [self.op.src_node]
5822
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5823
                       if node != self.op.src_node]
5824
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5825
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5826
    else:
5827
      self.context.glm.release(locking.LEVEL_NODE)
5828
      del self.acquired_locks[locking.LEVEL_NODE]
5829

    
5830
    if self.op.wait_for_sync:
5831
      disk_abort = not _WaitForSync(self, iobj)
5832
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
5833
      # make sure the disks are not degraded (still sync-ing is ok)
5834
      time.sleep(15)
5835
      feedback_fn("* checking mirrors status")
5836
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5837
    else:
5838
      disk_abort = False
5839

    
5840
    if disk_abort:
5841
      _RemoveDisks(self, iobj)
5842
      self.cfg.RemoveInstance(iobj.name)
5843
      # Make sure the instance lock gets removed
5844
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5845
      raise errors.OpExecError("There are some degraded disks for"
5846
                               " this instance")
5847

    
5848
    feedback_fn("creating os for instance %s on node %s" %
5849
                (instance, pnode_name))
5850

    
5851
    if iobj.disk_template != constants.DT_DISKLESS:
5852
      if self.op.mode == constants.INSTANCE_CREATE:
5853
        feedback_fn("* running the instance OS create scripts...")
5854
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5855
        result.Raise("Could not add os for instance %s"
5856
                     " on node %s" % (instance, pnode_name))
5857

    
5858
      elif self.op.mode == constants.INSTANCE_IMPORT:
5859
        feedback_fn("* running the instance OS import scripts...")
5860
        src_node = self.op.src_node
5861
        src_images = self.src_images
5862
        cluster_name = self.cfg.GetClusterName()
5863
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5864
                                                         src_node, src_images,
5865
                                                         cluster_name)
5866
        msg = import_result.fail_msg
5867
        if msg:
5868
          self.LogWarning("Error while importing the disk images for instance"
5869
                          " %s on node %s: %s" % (instance, pnode_name, msg))
5870
      else:
5871
        # also checked in the prereq part
5872
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5873
                                     % self.op.mode)
5874

    
5875
    if self.op.start:
5876
      iobj.admin_up = True
5877
      self.cfg.Update(iobj)
5878
      logging.info("Starting instance %s on node %s", instance, pnode_name)
5879
      feedback_fn("* starting instance...")
5880
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5881
      result.Raise("Could not start instance")
5882

    
5883
    return list(iobj.all_nodes)
5884

    
5885

    
5886
class LUConnectConsole(NoHooksLU):
5887
  """Connect to an instance's console.
5888

5889
  This is somewhat special in that it returns the command line that
5890
  you need to run on the master node in order to connect to the
5891
  console.
5892

5893
  """
5894
  _OP_REQP = ["instance_name"]
5895
  REQ_BGL = False
5896

    
5897
  def ExpandNames(self):
5898
    self._ExpandAndLockInstance()
5899

    
5900
  def CheckPrereq(self):
5901
    """Check prerequisites.
5902

5903
    This checks that the instance is in the cluster.
5904

5905
    """
5906
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5907
    assert self.instance is not None, \
5908
      "Cannot retrieve locked instance %s" % self.op.instance_name
5909
    _CheckNodeOnline(self, self.instance.primary_node)
5910

    
5911
  def Exec(self, feedback_fn):
5912
    """Connect to the console of an instance
5913

5914
    """
5915
    instance = self.instance
5916
    node = instance.primary_node
5917

    
5918
    node_insts = self.rpc.call_instance_list([node],
5919
                                             [instance.hypervisor])[node]
5920
    node_insts.Raise("Can't get node information from %s" % node)
5921

    
5922
    if instance.name not in node_insts.payload:
5923
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5924

    
5925
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5926

    
5927
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5928
    cluster = self.cfg.GetClusterInfo()
5929
    # beparams and hvparams are passed separately, to avoid editing the
5930
    # instance and then saving the defaults in the instance itself.
5931
    hvparams = cluster.FillHV(instance)
5932
    beparams = cluster.FillBE(instance)
5933
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5934

    
5935
    # build ssh cmdline
5936
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5937

    
5938

    
5939
class LUReplaceDisks(LogicalUnit):
5940
  """Replace the disks of an instance.
5941

5942
  """
5943
  HPATH = "mirrors-replace"
5944
  HTYPE = constants.HTYPE_INSTANCE
5945
  _OP_REQP = ["instance_name", "mode", "disks"]
5946
  REQ_BGL = False
5947

    
5948
  def CheckArguments(self):
5949
    if not hasattr(self.op, "remote_node"):
5950
      self.op.remote_node = None
5951
    if not hasattr(self.op, "iallocator"):
5952
      self.op.iallocator = None
5953

    
5954
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
5955
                                  self.op.iallocator)
5956

    
5957
  def ExpandNames(self):
5958
    self._ExpandAndLockInstance()
5959

    
5960
    if self.op.iallocator is not None:
5961
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5962

    
5963
    elif self.op.remote_node is not None:
5964
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5965
      if remote_node is None:
5966
        raise errors.OpPrereqError("Node '%s' not known" %
5967
                                   self.op.remote_node)
5968

    
5969
      self.op.remote_node = remote_node
5970

    
5971
      # Warning: do not remove the locking of the new secondary here
5972
      # unless DRBD8.AddChildren is changed to work in parallel;
5973
      # currently it doesn't since parallel invocations of
5974
      # FindUnusedMinor will conflict
5975
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5976
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5977

    
5978
    else:
5979
      self.needed_locks[locking.LEVEL_NODE] = []
5980
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5981

    
5982
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
5983
                                   self.op.iallocator, self.op.remote_node,
5984
                                   self.op.disks)
5985

    
5986
    self.tasklets = [self.replacer]
5987

    
5988
  def DeclareLocks(self, level):
5989
    # If we're not already locking all nodes in the set we have to declare the
5990
    # instance's primary/secondary nodes.
5991
    if (level == locking.LEVEL_NODE and
5992
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5993
      self._LockInstancesNodes()
5994

    
5995
  def BuildHooksEnv(self):
5996
    """Build hooks env.
5997

5998
    This runs on the master, the primary and all the secondaries.
5999

6000
    """
6001
    instance = self.replacer.instance
6002
    env = {
6003
      "MODE": self.op.mode,
6004
      "NEW_SECONDARY": self.op.remote_node,
6005
      "OLD_SECONDARY": instance.secondary_nodes[0],
6006
      }
6007
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6008
    nl = [
6009
      self.cfg.GetMasterNode(),
6010
      instance.primary_node,
6011
      ]
6012
    if self.op.remote_node is not None:
6013
      nl.append(self.op.remote_node)
6014
    return env, nl, nl
6015

    
6016

    
6017
class LUEvacuateNode(LogicalUnit):
6018
  """Relocate the secondary instances from a node.
6019

6020
  """
6021
  HPATH = "node-evacuate"
6022
  HTYPE = constants.HTYPE_NODE
6023
  _OP_REQP = ["node_name"]
6024
  REQ_BGL = False
6025

    
6026
  def CheckArguments(self):
6027
    if not hasattr(self.op, "remote_node"):
6028
      self.op.remote_node = None
6029
    if not hasattr(self.op, "iallocator"):
6030
      self.op.iallocator = None
6031

    
6032
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6033
                                  self.op.remote_node,
6034
                                  self.op.iallocator)
6035

    
6036
  def ExpandNames(self):
6037
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6038
    if self.op.node_name is None:
6039
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
6040

    
6041
    self.needed_locks = {}
6042

    
6043
    # Declare node locks
6044
    if self.op.iallocator is not None:
6045
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6046

    
6047
    elif self.op.remote_node is not None:
6048
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6049
      if remote_node is None:
6050
        raise errors.OpPrereqError("Node '%s' not known" %
6051
                                   self.op.remote_node)
6052

    
6053
      self.op.remote_node = remote_node
6054

    
6055
      # Warning: do not remove the locking of the new secondary here
6056
      # unless DRBD8.AddChildren is changed to work in parallel;
6057
      # currently it doesn't since parallel invocations of
6058
      # FindUnusedMinor will conflict
6059
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6060
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6061

    
6062
    else:
6063
      raise errors.OpPrereqError("Invalid parameters")
6064

    
6065
    # Create tasklets for replacing disks for all secondary instances on this
6066
    # node
6067
    names = []
6068
    tasklets = []
6069

    
6070
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6071
      logging.debug("Replacing disks for instance %s", inst.name)
6072
      names.append(inst.name)
6073

    
6074
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6075
                                self.op.iallocator, self.op.remote_node, [])
6076
      tasklets.append(replacer)
6077

    
6078
    self.tasklets = tasklets
6079
    self.instance_names = names
6080

    
6081
    # Declare instance locks
6082
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6083

    
6084
  def DeclareLocks(self, level):
6085
    # If we're not already locking all nodes in the set we have to declare the
6086
    # instance's primary/secondary nodes.
6087
    if (level == locking.LEVEL_NODE and
6088
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6089
      self._LockInstancesNodes()
6090

    
6091
  def BuildHooksEnv(self):
6092
    """Build hooks env.
6093

6094
    This runs on the master, the primary and all the secondaries.
6095

6096
    """
6097
    env = {
6098
      "NODE_NAME": self.op.node_name,
6099
      }
6100

    
6101
    nl = [self.cfg.GetMasterNode()]
6102

    
6103
    if self.op.remote_node is not None:
6104
      env["NEW_SECONDARY"] = self.op.remote_node
6105
      nl.append(self.op.remote_node)
6106

    
6107
    return (env, nl, nl)
6108

    
6109

    
6110
class TLReplaceDisks(Tasklet):
6111
  """Replaces disks for an instance.
6112

6113
  Note: Locking is not within the scope of this class.
6114

6115
  """
6116
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6117
               disks):
6118
    """Initializes this class.
6119

6120
    """
6121
    Tasklet.__init__(self, lu)
6122

    
6123
    # Parameters
6124
    self.instance_name = instance_name
6125
    self.mode = mode
6126
    self.iallocator_name = iallocator_name
6127
    self.remote_node = remote_node
6128
    self.disks = disks
6129

    
6130
    # Runtime data
6131
    self.instance = None
6132
    self.new_node = None
6133
    self.target_node = None
6134
    self.other_node = None
6135
    self.remote_node_info = None
6136
    self.node_secondary_ip = None
6137

    
6138
  @staticmethod
6139
  def CheckArguments(mode, remote_node, iallocator):
6140
    """Helper function for users of this class.
6141

6142
    """
6143
    # check for valid parameter combination
6144
    if mode == constants.REPLACE_DISK_CHG:
6145
      if remote_node is None and iallocator is None:
6146
        raise errors.OpPrereqError("When changing the secondary either an"
6147
                                   " iallocator script must be used or the"
6148
                                   " new node given")
6149

    
6150
      if remote_node is not None and iallocator is not None:
6151
        raise errors.OpPrereqError("Give either the iallocator or the new"
6152
                                   " secondary, not both")
6153

    
6154
    elif remote_node is not None or iallocator is not None:
6155
      # Not replacing the secondary
6156
      raise errors.OpPrereqError("The iallocator and new node options can"
6157
                                 " only be used when changing the"
6158
                                 " secondary node")
6159

    
6160
  @staticmethod
6161
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6162
    """Compute a new secondary node using an IAllocator.
6163

6164
    """
6165
    ial = IAllocator(lu.cfg, lu.rpc,
6166
                     mode=constants.IALLOCATOR_MODE_RELOC,
6167
                     name=instance_name,
6168
                     relocate_from=relocate_from)
6169

    
6170
    ial.Run(iallocator_name)
6171

    
6172
    if not ial.success:
6173
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6174
                                 " %s" % (iallocator_name, ial.info))
6175

    
6176
    if len(ial.nodes) != ial.required_nodes:
6177
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6178
                                 " of nodes (%s), required %s" %
6179
                                 (len(ial.nodes), ial.required_nodes))
6180

    
6181
    remote_node_name = ial.nodes[0]
6182

    
6183
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6184
               instance_name, remote_node_name)
6185

    
6186
    return remote_node_name
6187

    
6188
  def _FindFaultyDisks(self, node_name):
6189
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6190
                                    node_name, True)
6191

    
6192
  def CheckPrereq(self):
6193
    """Check prerequisites.
6194

6195
    This checks that the instance is in the cluster.
6196

6197
    """
6198
    self.instance = self.cfg.GetInstanceInfo(self.instance_name)
6199
    assert self.instance is not None, \
6200
      "Cannot retrieve locked instance %s" % self.instance_name
6201

    
6202
    if self.instance.disk_template != constants.DT_DRBD8:
6203
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6204
                                 " instances")
6205

    
6206
    if len(self.instance.secondary_nodes) != 1:
6207
      raise errors.OpPrereqError("The instance has a strange layout,"
6208
                                 " expected one secondary but found %d" %
6209
                                 len(self.instance.secondary_nodes))
6210

    
6211
    secondary_node = self.instance.secondary_nodes[0]
6212

    
6213
    if self.iallocator_name is None:
6214
      remote_node = self.remote_node
6215
    else:
6216
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6217
                                       self.instance.name, secondary_node)
6218

    
6219
    if remote_node is not None:
6220
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6221
      assert self.remote_node_info is not None, \
6222
        "Cannot retrieve locked node %s" % remote_node
6223
    else:
6224
      self.remote_node_info = None
6225

    
6226
    if remote_node == self.instance.primary_node:
6227
      raise errors.OpPrereqError("The specified node is the primary node of"
6228
                                 " the instance.")
6229

    
6230
    if remote_node == secondary_node:
6231
      raise errors.OpPrereqError("The specified node is already the"
6232
                                 " secondary node of the instance.")
6233

    
6234
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6235
                                    constants.REPLACE_DISK_CHG):
6236
      raise errors.OpPrereqError("Cannot specify disks to be replaced")
6237

    
6238
    if self.mode == constants.REPLACE_DISK_AUTO:
6239
      faulty_primary = self._FindFaultyDisks(self.instance.primary_node)
6240
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6241

    
6242
      if faulty_primary and faulty_secondary:
6243
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6244
                                   " one node and can not be repaired"
6245
                                   " automatically" % self.instance_name)
6246

    
6247
      if faulty_primary:
6248
        self.disks = faulty_primary
6249
        self.target_node = self.instance.primary_node
6250
        self.other_node = secondary_node
6251
        check_nodes = [self.target_node, self.other_node]
6252
      elif faulty_secondary:
6253
        self.disks = faulty_secondary
6254
        self.target_node = secondary_node
6255
        self.other_node = self.instance.primary_node
6256
        check_nodes = [self.target_node, self.other_node]
6257
      else:
6258
        self.disks = []
6259
        check_nodes = []
6260

    
6261
    else:
6262
      # Non-automatic modes
6263
      if self.mode == constants.REPLACE_DISK_PRI:
6264
        self.target_node = self.instance.primary_node
6265
        self.other_node = secondary_node
6266
        check_nodes = [self.target_node, self.other_node]
6267

    
6268
      elif self.mode == constants.REPLACE_DISK_SEC:
6269
        self.target_node = secondary_node
6270
        self.other_node = self.instance.primary_node
6271
        check_nodes = [self.target_node, self.other_node]
6272

    
6273
      elif self.mode == constants.REPLACE_DISK_CHG:
6274
        self.new_node = remote_node
6275
        self.other_node = self.instance.primary_node
6276
        self.target_node = secondary_node
6277
        check_nodes = [self.new_node, self.other_node]
6278

    
6279
        _CheckNodeNotDrained(self.lu, remote_node)
6280

    
6281
      else:
6282
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6283
                                     self.mode)
6284

    
6285
      # If not specified all disks should be replaced
6286
      if not self.disks:
6287
        self.disks = range(len(self.instance.disks))
6288

    
6289
    for node in check_nodes:
6290
      _CheckNodeOnline(self.lu, node)
6291

    
6292
    # Check whether disks are valid
6293
    for disk_idx in self.disks:
6294
      self.instance.FindDisk(disk_idx)
6295

    
6296
    # Get secondary node IP addresses
6297
    node_2nd_ip = {}
6298

    
6299
    for node_name in [self.target_node, self.other_node, self.new_node]:
6300
      if node_name is not None:
6301
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6302

    
6303
    self.node_secondary_ip = node_2nd_ip
6304

    
6305
  def Exec(self, feedback_fn):
6306
    """Execute disk replacement.
6307

6308
    This dispatches the disk replacement to the appropriate handler.
6309

6310
    """
6311
    if not self.disks:
6312
      feedback_fn("No disks need replacement")
6313
      return
6314

    
6315
    feedback_fn("Replacing disk(s) %s for %s" %
6316
                (", ".join([str(i) for i in self.disks]), self.instance.name))
6317

    
6318
    activate_disks = (not self.instance.admin_up)
6319

    
6320
    # Activate the instance disks if we're replacing them on a down instance
6321
    if activate_disks:
6322
      _StartInstanceDisks(self.lu, self.instance, True)
6323

    
6324
    try:
6325
      # Should we replace the secondary node?
6326
      if self.new_node is not None:
6327
        return self._ExecDrbd8Secondary()
6328
      else:
6329
        return self._ExecDrbd8DiskOnly()
6330

    
6331
    finally:
6332
      # Deactivate the instance disks if we're replacing them on a down instance
6333
      if activate_disks:
6334
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6335

    
6336
  def _CheckVolumeGroup(self, nodes):
6337
    self.lu.LogInfo("Checking volume groups")
6338

    
6339
    vgname = self.cfg.GetVGName()
6340

    
6341
    # Make sure volume group exists on all involved nodes
6342
    results = self.rpc.call_vg_list(nodes)
6343
    if not results:
6344
      raise errors.OpExecError("Can't list volume groups on the nodes")
6345

    
6346
    for node in nodes:
6347
      res = results[node]
6348
      res.Raise("Error checking node %s" % node)
6349
      if vgname not in res.payload:
6350
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6351
                                 (vgname, node))
6352

    
6353
  def _CheckDisksExistence(self, nodes):
6354
    # Check disk existence
6355
    for idx, dev in enumerate(self.instance.disks):
6356
      if idx not in self.disks:
6357
        continue
6358

    
6359
      for node in nodes:
6360
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6361
        self.cfg.SetDiskID(dev, node)
6362

    
6363
        result = self.rpc.call_blockdev_find(node, dev)
6364

    
6365
        msg = result.fail_msg
6366
        if msg or not result.payload:
6367
          if not msg:
6368
            msg = "disk not found"
6369
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6370
                                   (idx, node, msg))
6371

    
6372
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6373
    for idx, dev in enumerate(self.instance.disks):
6374
      if idx not in self.disks:
6375
        continue
6376

    
6377
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6378
                      (idx, node_name))
6379

    
6380
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6381
                                   ldisk=ldisk):
6382
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6383
                                 " replace disks for instance %s" %
6384
                                 (node_name, self.instance.name))
6385

    
6386
  def _CreateNewStorage(self, node_name):
6387
    vgname = self.cfg.GetVGName()
6388
    iv_names = {}
6389

    
6390
    for idx, dev in enumerate(self.instance.disks):
6391
      if idx not in self.disks:
6392
        continue
6393

    
6394
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6395

    
6396
      self.cfg.SetDiskID(dev, node_name)
6397

    
6398
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6399
      names = _GenerateUniqueNames(self.lu, lv_names)
6400

    
6401
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6402
                             logical_id=(vgname, names[0]))
6403
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6404
                             logical_id=(vgname, names[1]))
6405

    
6406
      new_lvs = [lv_data, lv_meta]
6407
      old_lvs = dev.children
6408
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6409

    
6410
      # we pass force_create=True to force the LVM creation
6411
      for new_lv in new_lvs:
6412
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6413
                        _GetInstanceInfoText(self.instance), False)
6414

    
6415
    return iv_names
6416

    
6417
  def _CheckDevices(self, node_name, iv_names):
6418
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
6419
      self.cfg.SetDiskID(dev, node_name)
6420

    
6421
      result = self.rpc.call_blockdev_find(node_name, dev)
6422

    
6423
      msg = result.fail_msg
6424
      if msg or not result.payload:
6425
        if not msg:
6426
          msg = "disk not found"
6427
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6428
                                 (name, msg))
6429

    
6430
      if result.payload.is_degraded:
6431
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6432

    
6433
  def _RemoveOldStorage(self, node_name, iv_names):
6434
    for name, (dev, old_lvs, _) in iv_names.iteritems():
6435
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6436

    
6437
      for lv in old_lvs:
6438
        self.cfg.SetDiskID(lv, node_name)
6439

    
6440
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6441
        if msg:
6442
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6443
                             hint="remove unused LVs manually")
6444

    
6445
  def _ExecDrbd8DiskOnly(self):
6446
    """Replace a disk on the primary or secondary for DRBD 8.
6447

6448
    The algorithm for replace is quite complicated:
6449

6450
      1. for each disk to be replaced:
6451

6452
        1. create new LVs on the target node with unique names
6453
        1. detach old LVs from the drbd device
6454
        1. rename old LVs to name_replaced.<time_t>
6455
        1. rename new LVs to old LVs
6456
        1. attach the new LVs (with the old names now) to the drbd device
6457

6458
      1. wait for sync across all devices
6459

6460
      1. for each modified disk:
6461

6462
        1. remove old LVs (which have the name name_replaces.<time_t>)
6463

6464
    Failures are not very well handled.
6465

6466
    """
6467
    steps_total = 6
6468

    
6469
    # Step: check device activation
6470
    self.lu.LogStep(1, steps_total, "Check device existence")
6471
    self._CheckDisksExistence([self.other_node, self.target_node])
6472
    self._CheckVolumeGroup([self.target_node, self.other_node])
6473

    
6474
    # Step: check other node consistency
6475
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6476
    self._CheckDisksConsistency(self.other_node,
6477
                                self.other_node == self.instance.primary_node,
6478
                                False)
6479

    
6480
    # Step: create new storage
6481
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6482
    iv_names = self._CreateNewStorage(self.target_node)
6483

    
6484
    # Step: for each lv, detach+rename*2+attach
6485
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6486
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6487
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6488

    
6489
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
6490
                                                     old_lvs)
6491
      result.Raise("Can't detach drbd from local storage on node"
6492
                   " %s for device %s" % (self.target_node, dev.iv_name))
6493
      #dev.children = []
6494
      #cfg.Update(instance)
6495

    
6496
      # ok, we created the new LVs, so now we know we have the needed
6497
      # storage; as such, we proceed on the target node to rename
6498
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6499
      # using the assumption that logical_id == physical_id (which in
6500
      # turn is the unique_id on that node)
6501

    
6502
      # FIXME(iustin): use a better name for the replaced LVs
6503
      temp_suffix = int(time.time())
6504
      ren_fn = lambda d, suff: (d.physical_id[0],
6505
                                d.physical_id[1] + "_replaced-%s" % suff)
6506

    
6507
      # Build the rename list based on what LVs exist on the node
6508
      rename_old_to_new = []
6509
      for to_ren in old_lvs:
6510
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6511
        if not result.fail_msg and result.payload:
6512
          # device exists
6513
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6514

    
6515
      self.lu.LogInfo("Renaming the old LVs on the target node")
6516
      result = self.rpc.call_blockdev_rename(self.target_node,
6517
                                             rename_old_to_new)
6518
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6519

    
6520
      # Now we rename the new LVs to the old LVs
6521
      self.lu.LogInfo("Renaming the new LVs on the target node")
6522
      rename_new_to_old = [(new, old.physical_id)
6523
                           for old, new in zip(old_lvs, new_lvs)]
6524
      result = self.rpc.call_blockdev_rename(self.target_node,
6525
                                             rename_new_to_old)
6526
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6527

    
6528
      for old, new in zip(old_lvs, new_lvs):
6529
        new.logical_id = old.logical_id
6530
        self.cfg.SetDiskID(new, self.target_node)
6531

    
6532
      for disk in old_lvs:
6533
        disk.logical_id = ren_fn(disk, temp_suffix)
6534
        self.cfg.SetDiskID(disk, self.target_node)
6535

    
6536
      # Now that the new lvs have the old name, we can add them to the device
6537
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6538
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
6539
                                                  new_lvs)
6540
      msg = result.fail_msg
6541
      if msg:
6542
        for new_lv in new_lvs:
6543
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
6544
                                               new_lv).fail_msg
6545
          if msg2:
6546
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6547
                               hint=("cleanup manually the unused logical"
6548
                                     "volumes"))
6549
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6550

    
6551
      dev.children = new_lvs
6552

    
6553
      self.cfg.Update(self.instance)
6554

    
6555
    # Wait for sync
6556
    # This can fail as the old devices are degraded and _WaitForSync
6557
    # does a combined result over all disks, so we don't check its return value
6558
    self.lu.LogStep(5, steps_total, "Sync devices")
6559
    _WaitForSync(self.lu, self.instance, unlock=True)
6560

    
6561
    # Check all devices manually
6562
    self._CheckDevices(self.instance.primary_node, iv_names)
6563

    
6564
    # Step: remove old storage
6565
    self.lu.LogStep(6, steps_total, "Removing old storage")
6566
    self._RemoveOldStorage(self.target_node, iv_names)
6567

    
6568
  def _ExecDrbd8Secondary(self):
6569
    """Replace the secondary node for DRBD 8.
6570

6571
    The algorithm for replace is quite complicated:
6572
      - for all disks of the instance:
6573
        - create new LVs on the new node with same names
6574
        - shutdown the drbd device on the old secondary
6575
        - disconnect the drbd network on the primary
6576
        - create the drbd device on the new secondary
6577
        - network attach the drbd on the primary, using an artifice:
6578
          the drbd code for Attach() will connect to the network if it
6579
          finds a device which is connected to the good local disks but
6580
          not network enabled
6581
      - wait for sync across all devices
6582
      - remove all disks from the old secondary
6583

6584
    Failures are not very well handled.
6585

6586
    """
6587
    steps_total = 6
6588

    
6589
    # Step: check device activation
6590
    self.lu.LogStep(1, steps_total, "Check device existence")
6591
    self._CheckDisksExistence([self.instance.primary_node])
6592
    self._CheckVolumeGroup([self.instance.primary_node])
6593

    
6594
    # Step: check other node consistency
6595
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6596
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
6597

    
6598
    # Step: create new storage
6599
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6600
    for idx, dev in enumerate(self.instance.disks):
6601
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6602
                      (self.new_node, idx))
6603
      # we pass force_create=True to force LVM creation
6604
      for new_lv in dev.children:
6605
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6606
                        _GetInstanceInfoText(self.instance), False)
6607

    
6608
    # Step 4: dbrd minors and drbd setups changes
6609
    # after this, we must manually remove the drbd minors on both the
6610
    # error and the success paths
6611
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6612
    minors = self.cfg.AllocateDRBDMinor([self.new_node
6613
                                         for dev in self.instance.disks],
6614
                                        self.instance.name)
6615
    logging.debug("Allocated minors %r" % (minors,))
6616

    
6617
    iv_names = {}
6618
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
6619
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
6620
                      (self.new_node, idx))
6621
      # create new devices on new_node; note that we create two IDs:
6622
      # one without port, so the drbd will be activated without
6623
      # networking information on the new node at this stage, and one
6624
      # with network, for the latter activation in step 4
6625
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
6626
      if self.instance.primary_node == o_node1:
6627
        p_minor = o_minor1
6628
      else:
6629
        p_minor = o_minor2
6630

    
6631
      new_alone_id = (self.instance.primary_node, self.new_node, None,
6632
                      p_minor, new_minor, o_secret)
6633
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
6634
                    p_minor, new_minor, o_secret)
6635

    
6636
      iv_names[idx] = (dev, dev.children, new_net_id)
6637
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
6638
                    new_net_id)
6639
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
6640
                              logical_id=new_alone_id,
6641
                              children=dev.children,
6642
                              size=dev.size)
6643
      try:
6644
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
6645
                              _GetInstanceInfoText(self.instance), False)
6646
      except errors.GenericError:
6647
        self.cfg.ReleaseDRBDMinors(self.instance.name)
6648
        raise
6649

    
6650
    # We have new devices, shutdown the drbd on the old secondary
6651
    for idx, dev in enumerate(self.instance.disks):
6652
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
6653
      self.cfg.SetDiskID(dev, self.target_node)
6654
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
6655
      if msg:
6656
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
6657
                           "node: %s" % (idx, msg),
6658
                           hint=("Please cleanup this device manually as"
6659
                                 " soon as possible"))
6660

    
6661
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
6662
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
6663
                                               self.node_secondary_ip,
6664
                                               self.instance.disks)\
6665
                                              [self.instance.primary_node]
6666

    
6667
    msg = result.fail_msg
6668
    if msg:
6669
      # detaches didn't succeed (unlikely)
6670
      self.cfg.ReleaseDRBDMinors(self.instance.name)
6671
      raise errors.OpExecError("Can't detach the disks from the network on"
6672
                               " old node: %s" % (msg,))
6673

    
6674
    # if we managed to detach at least one, we update all the disks of
6675
    # the instance to point to the new secondary
6676
    self.lu.LogInfo("Updating instance configuration")
6677
    for dev, _, new_logical_id in iv_names.itervalues():
6678
      dev.logical_id = new_logical_id
6679
      self.cfg.SetDiskID(dev, self.instance.primary_node)
6680

    
6681
    self.cfg.Update(self.instance)
6682

    
6683
    # and now perform the drbd attach
6684
    self.lu.LogInfo("Attaching primary drbds to new secondary"
6685
                    " (standalone => connected)")
6686
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
6687
                                            self.new_node],
6688
                                           self.node_secondary_ip,
6689
                                           self.instance.disks,
6690
                                           self.instance.name,
6691
                                           False)
6692
    for to_node, to_result in result.items():
6693
      msg = to_result.fail_msg
6694
      if msg:
6695
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
6696
                           to_node, msg,
6697
                           hint=("please do a gnt-instance info to see the"
6698
                                 " status of disks"))
6699

    
6700
    # Wait for sync
6701
    # This can fail as the old devices are degraded and _WaitForSync
6702
    # does a combined result over all disks, so we don't check its return value
6703
    self.lu.LogStep(5, steps_total, "Sync devices")
6704
    _WaitForSync(self.lu, self.instance, unlock=True)
6705

    
6706
    # Check all devices manually
6707
    self._CheckDevices(self.instance.primary_node, iv_names)
6708

    
6709
    # Step: remove old storage
6710
    self.lu.LogStep(6, steps_total, "Removing old storage")
6711
    self._RemoveOldStorage(self.target_node, iv_names)
6712

    
6713

    
6714
class LURepairNodeStorage(NoHooksLU):
6715
  """Repairs the volume group on a node.
6716

6717
  """
6718
  _OP_REQP = ["node_name"]
6719
  REQ_BGL = False
6720

    
6721
  def CheckArguments(self):
6722
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
6723
    if node_name is None:
6724
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
6725

    
6726
    self.op.node_name = node_name
6727

    
6728
  def ExpandNames(self):
6729
    self.needed_locks = {
6730
      locking.LEVEL_NODE: [self.op.node_name],
6731
      }
6732

    
6733
  def _CheckFaultyDisks(self, instance, node_name):
6734
    if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
6735
                                node_name, True):
6736
      raise errors.OpPrereqError("Instance '%s' has faulty disks on"
6737
                                 " node '%s'" % (instance.name, node_name))
6738

    
6739
  def CheckPrereq(self):
6740
    """Check prerequisites.
6741

6742
    """
6743
    storage_type = self.op.storage_type
6744

    
6745
    if (constants.SO_FIX_CONSISTENCY not in
6746
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
6747
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
6748
                                 " repaired" % storage_type)
6749

    
6750
    # Check whether any instance on this node has faulty disks
6751
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
6752
      check_nodes = set(inst.all_nodes)
6753
      check_nodes.discard(self.op.node_name)
6754
      for inst_node_name in check_nodes:
6755
        self._CheckFaultyDisks(inst, inst_node_name)
6756

    
6757
  def Exec(self, feedback_fn):
6758
    feedback_fn("Repairing storage unit '%s' on %s ..." %
6759
                (self.op.name, self.op.node_name))
6760

    
6761
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
6762
    result = self.rpc.call_storage_execute(self.op.node_name,
6763
                                           self.op.storage_type, st_args,
6764
                                           self.op.name,
6765
                                           constants.SO_FIX_CONSISTENCY)
6766
    result.Raise("Failed to repair storage unit '%s' on %s" %
6767
                 (self.op.name, self.op.node_name))
6768

    
6769

    
6770
class LUGrowDisk(LogicalUnit):
6771
  """Grow a disk of an instance.
6772

6773
  """
6774
  HPATH = "disk-grow"
6775
  HTYPE = constants.HTYPE_INSTANCE
6776
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
6777
  REQ_BGL = False
6778

    
6779
  def ExpandNames(self):
6780
    self._ExpandAndLockInstance()
6781
    self.needed_locks[locking.LEVEL_NODE] = []
6782
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6783

    
6784
  def DeclareLocks(self, level):
6785
    if level == locking.LEVEL_NODE:
6786
      self._LockInstancesNodes()
6787

    
6788
  def BuildHooksEnv(self):
6789
    """Build hooks env.
6790

6791
    This runs on the master, the primary and all the secondaries.
6792

6793
    """
6794
    env = {
6795
      "DISK": self.op.disk,
6796
      "AMOUNT": self.op.amount,
6797
      }
6798
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6799
    nl = [
6800
      self.cfg.GetMasterNode(),
6801
      self.instance.primary_node,
6802
      ]
6803
    return env, nl, nl
6804

    
6805
  def CheckPrereq(self):
6806
    """Check prerequisites.
6807

6808
    This checks that the instance is in the cluster.
6809

6810
    """
6811
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6812
    assert instance is not None, \
6813
      "Cannot retrieve locked instance %s" % self.op.instance_name
6814
    nodenames = list(instance.all_nodes)
6815
    for node in nodenames:
6816
      _CheckNodeOnline(self, node)
6817

    
6818

    
6819
    self.instance = instance
6820

    
6821
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
6822
      raise errors.OpPrereqError("Instance's disk layout does not support"
6823
                                 " growing.")
6824

    
6825
    self.disk = instance.FindDisk(self.op.disk)
6826

    
6827
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6828
                                       instance.hypervisor)
6829
    for node in nodenames:
6830
      info = nodeinfo[node]
6831
      info.Raise("Cannot get current information from node %s" % node)
6832
      vg_free = info.payload.get('vg_free', None)
6833
      if not isinstance(vg_free, int):
6834
        raise errors.OpPrereqError("Can't compute free disk space on"
6835
                                   " node %s" % node)
6836
      if self.op.amount > vg_free:
6837
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
6838
                                   " %d MiB available, %d MiB required" %
6839
                                   (node, vg_free, self.op.amount))
6840

    
6841
  def Exec(self, feedback_fn):
6842
    """Execute disk grow.
6843

6844
    """
6845
    instance = self.instance
6846
    disk = self.disk
6847
    for node in instance.all_nodes:
6848
      self.cfg.SetDiskID(disk, node)
6849
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
6850
      result.Raise("Grow request failed to node %s" % node)
6851
    disk.RecordGrow(self.op.amount)
6852
    self.cfg.Update(instance)
6853
    if self.op.wait_for_sync:
6854
      disk_abort = not _WaitForSync(self, instance)
6855
      if disk_abort:
6856
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
6857
                             " status.\nPlease check the instance.")
6858

    
6859

    
6860
class LUQueryInstanceData(NoHooksLU):
6861
  """Query runtime instance data.
6862

6863
  """
6864
  _OP_REQP = ["instances", "static"]
6865
  REQ_BGL = False
6866

    
6867
  def ExpandNames(self):
6868
    self.needed_locks = {}
6869
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6870

    
6871
    if not isinstance(self.op.instances, list):
6872
      raise errors.OpPrereqError("Invalid argument type 'instances'")
6873

    
6874
    if self.op.instances:
6875
      self.wanted_names = []
6876
      for name in self.op.instances:
6877
        full_name = self.cfg.ExpandInstanceName(name)
6878
        if full_name is None:
6879
          raise errors.OpPrereqError("Instance '%s' not known" % name)
6880
        self.wanted_names.append(full_name)
6881
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
6882
    else:
6883
      self.wanted_names = None
6884
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
6885

    
6886
    self.needed_locks[locking.LEVEL_NODE] = []
6887
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6888

    
6889
  def DeclareLocks(self, level):
6890
    if level == locking.LEVEL_NODE:
6891
      self._LockInstancesNodes()
6892

    
6893
  def CheckPrereq(self):
6894
    """Check prerequisites.
6895

6896
    This only checks the optional instance list against the existing names.
6897

6898
    """
6899
    if self.wanted_names is None:
6900
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
6901

    
6902
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
6903
                             in self.wanted_names]
6904
    return
6905

    
6906
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
6907
    """Returns the status of a block device
6908

6909
    """
6910
    if self.op.static or not node:
6911
      return None
6912

    
6913
    self.cfg.SetDiskID(dev, node)
6914

    
6915
    result = self.rpc.call_blockdev_find(node, dev)
6916
    if result.offline:
6917
      return None
6918

    
6919
    result.Raise("Can't compute disk status for %s" % instance_name)
6920

    
6921
    status = result.payload
6922
    if status is None:
6923
      return None
6924

    
6925
    return (status.dev_path, status.major, status.minor,
6926
            status.sync_percent, status.estimated_time,
6927
            status.is_degraded, status.ldisk_status)
6928

    
6929
  def _ComputeDiskStatus(self, instance, snode, dev):
6930
    """Compute block device status.
6931

6932
    """
6933
    if dev.dev_type in constants.LDS_DRBD:
6934
      # we change the snode then (otherwise we use the one passed in)
6935
      if dev.logical_id[0] == instance.primary_node:
6936
        snode = dev.logical_id[1]
6937
      else:
6938
        snode = dev.logical_id[0]
6939

    
6940
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
6941
                                              instance.name, dev)
6942
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
6943

    
6944
    if dev.children:
6945
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
6946
                      for child in dev.children]
6947
    else:
6948
      dev_children = []
6949

    
6950
    data = {
6951
      "iv_name": dev.iv_name,
6952
      "dev_type": dev.dev_type,
6953
      "logical_id": dev.logical_id,
6954
      "physical_id": dev.physical_id,
6955
      "pstatus": dev_pstatus,
6956
      "sstatus": dev_sstatus,
6957
      "children": dev_children,
6958
      "mode": dev.mode,
6959
      "size": dev.size,
6960
      }
6961

    
6962
    return data
6963

    
6964
  def Exec(self, feedback_fn):
6965
    """Gather and return data"""
6966
    result = {}
6967

    
6968
    cluster = self.cfg.GetClusterInfo()
6969

    
6970
    for instance in self.wanted_instances:
6971
      if not self.op.static:
6972
        remote_info = self.rpc.call_instance_info(instance.primary_node,
6973
                                                  instance.name,
6974
                                                  instance.hypervisor)
6975
        remote_info.Raise("Error checking node %s" % instance.primary_node)
6976
        remote_info = remote_info.payload
6977
        if remote_info and "state" in remote_info:
6978
          remote_state = "up"
6979
        else:
6980
          remote_state = "down"
6981
      else:
6982
        remote_state = None
6983
      if instance.admin_up:
6984
        config_state = "up"
6985
      else:
6986
        config_state = "down"
6987

    
6988
      disks = [self._ComputeDiskStatus(instance, None, device)
6989
               for device in instance.disks]
6990

    
6991
      idict = {
6992
        "name": instance.name,
6993
        "config_state": config_state,
6994
        "run_state": remote_state,
6995
        "pnode": instance.primary_node,
6996
        "snodes": instance.secondary_nodes,
6997
        "os": instance.os,
6998
        # this happens to be the same format used for hooks
6999
        "nics": _NICListToTuple(self, instance.nics),
7000
        "disks": disks,
7001
        "hypervisor": instance.hypervisor,
7002
        "network_port": instance.network_port,
7003
        "hv_instance": instance.hvparams,
7004
        "hv_actual": cluster.FillHV(instance),
7005
        "be_instance": instance.beparams,
7006
        "be_actual": cluster.FillBE(instance),
7007
        "serial_no": instance.serial_no,
7008
        "mtime": instance.mtime,
7009
        "ctime": instance.ctime,
7010
        }
7011

    
7012
      result[instance.name] = idict
7013

    
7014
    return result
7015

    
7016

    
7017
class LUSetInstanceParams(LogicalUnit):
7018
  """Modifies an instances's parameters.
7019

7020
  """
7021
  HPATH = "instance-modify"
7022
  HTYPE = constants.HTYPE_INSTANCE
7023
  _OP_REQP = ["instance_name"]
7024
  REQ_BGL = False
7025

    
7026
  def CheckArguments(self):
7027
    if not hasattr(self.op, 'nics'):
7028
      self.op.nics = []
7029
    if not hasattr(self.op, 'disks'):
7030
      self.op.disks = []
7031
    if not hasattr(self.op, 'beparams'):
7032
      self.op.beparams = {}
7033
    if not hasattr(self.op, 'hvparams'):
7034
      self.op.hvparams = {}
7035
    self.op.force = getattr(self.op, "force", False)
7036
    if not (self.op.nics or self.op.disks or
7037
            self.op.hvparams or self.op.beparams):
7038
      raise errors.OpPrereqError("No changes submitted")
7039

    
7040
    # Disk validation
7041
    disk_addremove = 0
7042
    for disk_op, disk_dict in self.op.disks:
7043
      if disk_op == constants.DDM_REMOVE:
7044
        disk_addremove += 1
7045
        continue
7046
      elif disk_op == constants.DDM_ADD:
7047
        disk_addremove += 1
7048
      else:
7049
        if not isinstance(disk_op, int):
7050
          raise errors.OpPrereqError("Invalid disk index")
7051
        if not isinstance(disk_dict, dict):
7052
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7053
          raise errors.OpPrereqError(msg)
7054

    
7055
      if disk_op == constants.DDM_ADD:
7056
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7057
        if mode not in constants.DISK_ACCESS_SET:
7058
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
7059
        size = disk_dict.get('size', None)
7060
        if size is None:
7061
          raise errors.OpPrereqError("Required disk parameter size missing")
7062
        try:
7063
          size = int(size)
7064
        except ValueError, err:
7065
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7066
                                     str(err))
7067
        disk_dict['size'] = size
7068
      else:
7069
        # modification of disk
7070
        if 'size' in disk_dict:
7071
          raise errors.OpPrereqError("Disk size change not possible, use"
7072
                                     " grow-disk")
7073

    
7074
    if disk_addremove > 1:
7075
      raise errors.OpPrereqError("Only one disk add or remove operation"
7076
                                 " supported at a time")
7077

    
7078
    # NIC validation
7079
    nic_addremove = 0
7080
    for nic_op, nic_dict in self.op.nics:
7081
      if nic_op == constants.DDM_REMOVE:
7082
        nic_addremove += 1
7083
        continue
7084
      elif nic_op == constants.DDM_ADD:
7085
        nic_addremove += 1
7086
      else:
7087
        if not isinstance(nic_op, int):
7088
          raise errors.OpPrereqError("Invalid nic index")
7089
        if not isinstance(nic_dict, dict):
7090
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7091
          raise errors.OpPrereqError(msg)
7092

    
7093
      # nic_dict should be a dict
7094
      nic_ip = nic_dict.get('ip', None)
7095
      if nic_ip is not None:
7096
        if nic_ip.lower() == constants.VALUE_NONE:
7097
          nic_dict['ip'] = None
7098
        else:
7099
          if not utils.IsValidIP(nic_ip):
7100
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
7101

    
7102
      nic_bridge = nic_dict.get('bridge', None)
7103
      nic_link = nic_dict.get('link', None)
7104
      if nic_bridge and nic_link:
7105
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7106
                                   " at the same time")
7107
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7108
        nic_dict['bridge'] = None
7109
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7110
        nic_dict['link'] = None
7111

    
7112
      if nic_op == constants.DDM_ADD:
7113
        nic_mac = nic_dict.get('mac', None)
7114
        if nic_mac is None:
7115
          nic_dict['mac'] = constants.VALUE_AUTO
7116

    
7117
      if 'mac' in nic_dict:
7118
        nic_mac = nic_dict['mac']
7119
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7120
          if not utils.IsValidMac(nic_mac):
7121
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
7122
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7123
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7124
                                     " modifying an existing nic")
7125

    
7126
    if nic_addremove > 1:
7127
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7128
                                 " supported at a time")
7129

    
7130
  def ExpandNames(self):
7131
    self._ExpandAndLockInstance()
7132
    self.needed_locks[locking.LEVEL_NODE] = []
7133
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7134

    
7135
  def DeclareLocks(self, level):
7136
    if level == locking.LEVEL_NODE:
7137
      self._LockInstancesNodes()
7138

    
7139
  def BuildHooksEnv(self):
7140
    """Build hooks env.
7141

7142
    This runs on the master, primary and secondaries.
7143

7144
    """
7145
    args = dict()
7146
    if constants.BE_MEMORY in self.be_new:
7147
      args['memory'] = self.be_new[constants.BE_MEMORY]
7148
    if constants.BE_VCPUS in self.be_new:
7149
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7150
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7151
    # information at all.
7152
    if self.op.nics:
7153
      args['nics'] = []
7154
      nic_override = dict(self.op.nics)
7155
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7156
      for idx, nic in enumerate(self.instance.nics):
7157
        if idx in nic_override:
7158
          this_nic_override = nic_override[idx]
7159
        else:
7160
          this_nic_override = {}
7161
        if 'ip' in this_nic_override:
7162
          ip = this_nic_override['ip']
7163
        else:
7164
          ip = nic.ip
7165
        if 'mac' in this_nic_override:
7166
          mac = this_nic_override['mac']
7167
        else:
7168
          mac = nic.mac
7169
        if idx in self.nic_pnew:
7170
          nicparams = self.nic_pnew[idx]
7171
        else:
7172
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7173
        mode = nicparams[constants.NIC_MODE]
7174
        link = nicparams[constants.NIC_LINK]
7175
        args['nics'].append((ip, mac, mode, link))
7176
      if constants.DDM_ADD in nic_override:
7177
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7178
        mac = nic_override[constants.DDM_ADD]['mac']
7179
        nicparams = self.nic_pnew[constants.DDM_ADD]
7180
        mode = nicparams[constants.NIC_MODE]
7181
        link = nicparams[constants.NIC_LINK]
7182
        args['nics'].append((ip, mac, mode, link))
7183
      elif constants.DDM_REMOVE in nic_override:
7184
        del args['nics'][-1]
7185

    
7186
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7187
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7188
    return env, nl, nl
7189

    
7190
  def _GetUpdatedParams(self, old_params, update_dict,
7191
                        default_values, parameter_types):
7192
    """Return the new params dict for the given params.
7193

7194
    @type old_params: dict
7195
    @param old_params: old parameters
7196
    @type update_dict: dict
7197
    @param update_dict: dict containing new parameter values,
7198
                        or constants.VALUE_DEFAULT to reset the
7199
                        parameter to its default value
7200
    @type default_values: dict
7201
    @param default_values: default values for the filled parameters
7202
    @type parameter_types: dict
7203
    @param parameter_types: dict mapping target dict keys to types
7204
                            in constants.ENFORCEABLE_TYPES
7205
    @rtype: (dict, dict)
7206
    @return: (new_parameters, filled_parameters)
7207

7208
    """
7209
    params_copy = copy.deepcopy(old_params)
7210
    for key, val in update_dict.iteritems():
7211
      if val == constants.VALUE_DEFAULT:
7212
        try:
7213
          del params_copy[key]
7214
        except KeyError:
7215
          pass
7216
      else:
7217
        params_copy[key] = val
7218
    utils.ForceDictType(params_copy, parameter_types)
7219
    params_filled = objects.FillDict(default_values, params_copy)
7220
    return (params_copy, params_filled)
7221

    
7222
  def CheckPrereq(self):
7223
    """Check prerequisites.
7224

7225
    This only checks the instance list against the existing names.
7226

7227
    """
7228
    self.force = self.op.force
7229

    
7230
    # checking the new params on the primary/secondary nodes
7231

    
7232
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7233
    cluster = self.cluster = self.cfg.GetClusterInfo()
7234
    assert self.instance is not None, \
7235
      "Cannot retrieve locked instance %s" % self.op.instance_name
7236
    pnode = instance.primary_node
7237
    nodelist = list(instance.all_nodes)
7238

    
7239
    # hvparams processing
7240
    if self.op.hvparams:
7241
      i_hvdict, hv_new = self._GetUpdatedParams(
7242
                             instance.hvparams, self.op.hvparams,
7243
                             cluster.hvparams[instance.hypervisor],
7244
                             constants.HVS_PARAMETER_TYPES)
7245
      # local check
7246
      hypervisor.GetHypervisor(
7247
        instance.hypervisor).CheckParameterSyntax(hv_new)
7248
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7249
      self.hv_new = hv_new # the new actual values
7250
      self.hv_inst = i_hvdict # the new dict (without defaults)
7251
    else:
7252
      self.hv_new = self.hv_inst = {}
7253

    
7254
    # beparams processing
7255
    if self.op.beparams:
7256
      i_bedict, be_new = self._GetUpdatedParams(
7257
                             instance.beparams, self.op.beparams,
7258
                             cluster.beparams[constants.PP_DEFAULT],
7259
                             constants.BES_PARAMETER_TYPES)
7260
      self.be_new = be_new # the new actual values
7261
      self.be_inst = i_bedict # the new dict (without defaults)
7262
    else:
7263
      self.be_new = self.be_inst = {}
7264

    
7265
    self.warn = []
7266

    
7267
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7268
      mem_check_list = [pnode]
7269
      if be_new[constants.BE_AUTO_BALANCE]:
7270
        # either we changed auto_balance to yes or it was from before
7271
        mem_check_list.extend(instance.secondary_nodes)
7272
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7273
                                                  instance.hypervisor)
7274
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7275
                                         instance.hypervisor)
7276
      pninfo = nodeinfo[pnode]
7277
      msg = pninfo.fail_msg
7278
      if msg:
7279
        # Assume the primary node is unreachable and go ahead
7280
        self.warn.append("Can't get info from primary node %s: %s" %
7281
                         (pnode,  msg))
7282
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7283
        self.warn.append("Node data from primary node %s doesn't contain"
7284
                         " free memory information" % pnode)
7285
      elif instance_info.fail_msg:
7286
        self.warn.append("Can't get instance runtime information: %s" %
7287
                        instance_info.fail_msg)
7288
      else:
7289
        if instance_info.payload:
7290
          current_mem = int(instance_info.payload['memory'])
7291
        else:
7292
          # Assume instance not running
7293
          # (there is a slight race condition here, but it's not very probable,
7294
          # and we have no other way to check)
7295
          current_mem = 0
7296
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7297
                    pninfo.payload['memory_free'])
7298
        if miss_mem > 0:
7299
          raise errors.OpPrereqError("This change will prevent the instance"
7300
                                     " from starting, due to %d MB of memory"
7301
                                     " missing on its primary node" % miss_mem)
7302

    
7303
      if be_new[constants.BE_AUTO_BALANCE]:
7304
        for node, nres in nodeinfo.items():
7305
          if node not in instance.secondary_nodes:
7306
            continue
7307
          msg = nres.fail_msg
7308
          if msg:
7309
            self.warn.append("Can't get info from secondary node %s: %s" %
7310
                             (node, msg))
7311
          elif not isinstance(nres.payload.get('memory_free', None), int):
7312
            self.warn.append("Secondary node %s didn't return free"
7313
                             " memory information" % node)
7314
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7315
            self.warn.append("Not enough memory to failover instance to"
7316
                             " secondary node %s" % node)
7317

    
7318
    # NIC processing
7319
    self.nic_pnew = {}
7320
    self.nic_pinst = {}
7321
    for nic_op, nic_dict in self.op.nics:
7322
      if nic_op == constants.DDM_REMOVE:
7323
        if not instance.nics:
7324
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
7325
        continue
7326
      if nic_op != constants.DDM_ADD:
7327
        # an existing nic
7328
        if nic_op < 0 or nic_op >= len(instance.nics):
7329
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7330
                                     " are 0 to %d" %
7331
                                     (nic_op, len(instance.nics)))
7332
        old_nic_params = instance.nics[nic_op].nicparams
7333
        old_nic_ip = instance.nics[nic_op].ip
7334
      else:
7335
        old_nic_params = {}
7336
        old_nic_ip = None
7337

    
7338
      update_params_dict = dict([(key, nic_dict[key])
7339
                                 for key in constants.NICS_PARAMETERS
7340
                                 if key in nic_dict])
7341

    
7342
      if 'bridge' in nic_dict:
7343
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7344

    
7345
      new_nic_params, new_filled_nic_params = \
7346
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7347
                                 cluster.nicparams[constants.PP_DEFAULT],
7348
                                 constants.NICS_PARAMETER_TYPES)
7349
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7350
      self.nic_pinst[nic_op] = new_nic_params
7351
      self.nic_pnew[nic_op] = new_filled_nic_params
7352
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7353

    
7354
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7355
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7356
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7357
        if msg:
7358
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7359
          if self.force:
7360
            self.warn.append(msg)
7361
          else:
7362
            raise errors.OpPrereqError(msg)
7363
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7364
        if 'ip' in nic_dict:
7365
          nic_ip = nic_dict['ip']
7366
        else:
7367
          nic_ip = old_nic_ip
7368
        if nic_ip is None:
7369
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7370
                                     ' on a routed nic')
7371
      if 'mac' in nic_dict:
7372
        nic_mac = nic_dict['mac']
7373
        if nic_mac is None:
7374
          raise errors.OpPrereqError('Cannot set the nic mac to None')
7375
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7376
          # otherwise generate the mac
7377
          nic_dict['mac'] = self.cfg.GenerateMAC()
7378
        else:
7379
          # or validate/reserve the current one
7380
          if self.cfg.IsMacInUse(nic_mac):
7381
            raise errors.OpPrereqError("MAC address %s already in use"
7382
                                       " in cluster" % nic_mac)
7383

    
7384
    # DISK processing
7385
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7386
      raise errors.OpPrereqError("Disk operations not supported for"
7387
                                 " diskless instances")
7388
    for disk_op, disk_dict in self.op.disks:
7389
      if disk_op == constants.DDM_REMOVE:
7390
        if len(instance.disks) == 1:
7391
          raise errors.OpPrereqError("Cannot remove the last disk of"
7392
                                     " an instance")
7393
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7394
        ins_l = ins_l[pnode]
7395
        msg = ins_l.fail_msg
7396
        if msg:
7397
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7398
                                     (pnode, msg))
7399
        if instance.name in ins_l.payload:
7400
          raise errors.OpPrereqError("Instance is running, can't remove"
7401
                                     " disks.")
7402

    
7403
      if (disk_op == constants.DDM_ADD and
7404
          len(instance.nics) >= constants.MAX_DISKS):
7405
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7406
                                   " add more" % constants.MAX_DISKS)
7407
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7408
        # an existing disk
7409
        if disk_op < 0 or disk_op >= len(instance.disks):
7410
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7411
                                     " are 0 to %d" %
7412
                                     (disk_op, len(instance.disks)))
7413

    
7414
    return
7415

    
7416
  def Exec(self, feedback_fn):
7417
    """Modifies an instance.
7418

7419
    All parameters take effect only at the next restart of the instance.
7420

7421
    """
7422
    # Process here the warnings from CheckPrereq, as we don't have a
7423
    # feedback_fn there.
7424
    for warn in self.warn:
7425
      feedback_fn("WARNING: %s" % warn)
7426

    
7427
    result = []
7428
    instance = self.instance
7429
    cluster = self.cluster
7430
    # disk changes
7431
    for disk_op, disk_dict in self.op.disks:
7432
      if disk_op == constants.DDM_REMOVE:
7433
        # remove the last disk
7434
        device = instance.disks.pop()
7435
        device_idx = len(instance.disks)
7436
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7437
          self.cfg.SetDiskID(disk, node)
7438
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7439
          if msg:
7440
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7441
                            " continuing anyway", device_idx, node, msg)
7442
        result.append(("disk/%d" % device_idx, "remove"))
7443
      elif disk_op == constants.DDM_ADD:
7444
        # add a new disk
7445
        if instance.disk_template == constants.DT_FILE:
7446
          file_driver, file_path = instance.disks[0].logical_id
7447
          file_path = os.path.dirname(file_path)
7448
        else:
7449
          file_driver = file_path = None
7450
        disk_idx_base = len(instance.disks)
7451
        new_disk = _GenerateDiskTemplate(self,
7452
                                         instance.disk_template,
7453
                                         instance.name, instance.primary_node,
7454
                                         instance.secondary_nodes,
7455
                                         [disk_dict],
7456
                                         file_path,
7457
                                         file_driver,
7458
                                         disk_idx_base)[0]
7459
        instance.disks.append(new_disk)
7460
        info = _GetInstanceInfoText(instance)
7461

    
7462
        logging.info("Creating volume %s for instance %s",
7463
                     new_disk.iv_name, instance.name)
7464
        # Note: this needs to be kept in sync with _CreateDisks
7465
        #HARDCODE
7466
        for node in instance.all_nodes:
7467
          f_create = node == instance.primary_node
7468
          try:
7469
            _CreateBlockDev(self, node, instance, new_disk,
7470
                            f_create, info, f_create)
7471
          except errors.OpExecError, err:
7472
            self.LogWarning("Failed to create volume %s (%s) on"
7473
                            " node %s: %s",
7474
                            new_disk.iv_name, new_disk, node, err)
7475
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7476
                       (new_disk.size, new_disk.mode)))
7477
      else:
7478
        # change a given disk
7479
        instance.disks[disk_op].mode = disk_dict['mode']
7480
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7481
    # NIC changes
7482
    for nic_op, nic_dict in self.op.nics:
7483
      if nic_op == constants.DDM_REMOVE:
7484
        # remove the last nic
7485
        del instance.nics[-1]
7486
        result.append(("nic.%d" % len(instance.nics), "remove"))
7487
      elif nic_op == constants.DDM_ADD:
7488
        # mac and bridge should be set, by now
7489
        mac = nic_dict['mac']
7490
        ip = nic_dict.get('ip', None)
7491
        nicparams = self.nic_pinst[constants.DDM_ADD]
7492
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7493
        instance.nics.append(new_nic)
7494
        result.append(("nic.%d" % (len(instance.nics) - 1),
7495
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7496
                       (new_nic.mac, new_nic.ip,
7497
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7498
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7499
                       )))
7500
      else:
7501
        for key in 'mac', 'ip':
7502
          if key in nic_dict:
7503
            setattr(instance.nics[nic_op], key, nic_dict[key])
7504
        if nic_op in self.nic_pnew:
7505
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
7506
        for key, val in nic_dict.iteritems():
7507
          result.append(("nic.%s/%d" % (key, nic_op), val))
7508

    
7509
    # hvparams changes
7510
    if self.op.hvparams:
7511
      instance.hvparams = self.hv_inst
7512
      for key, val in self.op.hvparams.iteritems():
7513
        result.append(("hv/%s" % key, val))
7514

    
7515
    # beparams changes
7516
    if self.op.beparams:
7517
      instance.beparams = self.be_inst
7518
      for key, val in self.op.beparams.iteritems():
7519
        result.append(("be/%s" % key, val))
7520

    
7521
    self.cfg.Update(instance)
7522

    
7523
    return result
7524

    
7525

    
7526
class LUQueryExports(NoHooksLU):
7527
  """Query the exports list
7528

7529
  """
7530
  _OP_REQP = ['nodes']
7531
  REQ_BGL = False
7532

    
7533
  def ExpandNames(self):
7534
    self.needed_locks = {}
7535
    self.share_locks[locking.LEVEL_NODE] = 1
7536
    if not self.op.nodes:
7537
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7538
    else:
7539
      self.needed_locks[locking.LEVEL_NODE] = \
7540
        _GetWantedNodes(self, self.op.nodes)
7541

    
7542
  def CheckPrereq(self):
7543
    """Check prerequisites.
7544

7545
    """
7546
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7547

    
7548
  def Exec(self, feedback_fn):
7549
    """Compute the list of all the exported system images.
7550

7551
    @rtype: dict
7552
    @return: a dictionary with the structure node->(export-list)
7553
        where export-list is a list of the instances exported on
7554
        that node.
7555

7556
    """
7557
    rpcresult = self.rpc.call_export_list(self.nodes)
7558
    result = {}
7559
    for node in rpcresult:
7560
      if rpcresult[node].fail_msg:
7561
        result[node] = False
7562
      else:
7563
        result[node] = rpcresult[node].payload
7564

    
7565
    return result
7566

    
7567

    
7568
class LUExportInstance(LogicalUnit):
7569
  """Export an instance to an image in the cluster.
7570

7571
  """
7572
  HPATH = "instance-export"
7573
  HTYPE = constants.HTYPE_INSTANCE
7574
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
7575
  REQ_BGL = False
7576

    
7577
  def ExpandNames(self):
7578
    self._ExpandAndLockInstance()
7579
    # FIXME: lock only instance primary and destination node
7580
    #
7581
    # Sad but true, for now we have do lock all nodes, as we don't know where
7582
    # the previous export might be, and and in this LU we search for it and
7583
    # remove it from its current node. In the future we could fix this by:
7584
    #  - making a tasklet to search (share-lock all), then create the new one,
7585
    #    then one to remove, after
7586
    #  - removing the removal operation altogether
7587
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7588

    
7589
  def DeclareLocks(self, level):
7590
    """Last minute lock declaration."""
7591
    # All nodes are locked anyway, so nothing to do here.
7592

    
7593
  def BuildHooksEnv(self):
7594
    """Build hooks env.
7595

7596
    This will run on the master, primary node and target node.
7597

7598
    """
7599
    env = {
7600
      "EXPORT_NODE": self.op.target_node,
7601
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
7602
      }
7603
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7604
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
7605
          self.op.target_node]
7606
    return env, nl, nl
7607

    
7608
  def CheckPrereq(self):
7609
    """Check prerequisites.
7610

7611
    This checks that the instance and node names are valid.
7612

7613
    """
7614
    instance_name = self.op.instance_name
7615
    self.instance = self.cfg.GetInstanceInfo(instance_name)
7616
    assert self.instance is not None, \
7617
          "Cannot retrieve locked instance %s" % self.op.instance_name
7618
    _CheckNodeOnline(self, self.instance.primary_node)
7619

    
7620
    self.dst_node = self.cfg.GetNodeInfo(
7621
      self.cfg.ExpandNodeName(self.op.target_node))
7622

    
7623
    if self.dst_node is None:
7624
      # This is wrong node name, not a non-locked node
7625
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
7626
    _CheckNodeOnline(self, self.dst_node.name)
7627
    _CheckNodeNotDrained(self, self.dst_node.name)
7628

    
7629
    # instance disk type verification
7630
    for disk in self.instance.disks:
7631
      if disk.dev_type == constants.LD_FILE:
7632
        raise errors.OpPrereqError("Export not supported for instances with"
7633
                                   " file-based disks")
7634

    
7635
  def Exec(self, feedback_fn):
7636
    """Export an instance to an image in the cluster.
7637

7638
    """
7639
    instance = self.instance
7640
    dst_node = self.dst_node
7641
    src_node = instance.primary_node
7642

    
7643
    if self.op.shutdown:
7644
      # shutdown the instance, but not the disks
7645
      feedback_fn("Shutting down instance %s" % instance.name)
7646
      result = self.rpc.call_instance_shutdown(src_node, instance)
7647
      result.Raise("Could not shutdown instance %s on"
7648
                   " node %s" % (instance.name, src_node))
7649

    
7650
    vgname = self.cfg.GetVGName()
7651

    
7652
    snap_disks = []
7653

    
7654
    # set the disks ID correctly since call_instance_start needs the
7655
    # correct drbd minor to create the symlinks
7656
    for disk in instance.disks:
7657
      self.cfg.SetDiskID(disk, src_node)
7658

    
7659
    # per-disk results
7660
    dresults = []
7661
    try:
7662
      for idx, disk in enumerate(instance.disks):
7663
        feedback_fn("Creating a snapshot of disk/%s on node %s" %
7664
                    (idx, src_node))
7665

    
7666
        # result.payload will be a snapshot of an lvm leaf of the one we passed
7667
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
7668
        msg = result.fail_msg
7669
        if msg:
7670
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
7671
                          idx, src_node, msg)
7672
          snap_disks.append(False)
7673
        else:
7674
          disk_id = (vgname, result.payload)
7675
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
7676
                                 logical_id=disk_id, physical_id=disk_id,
7677
                                 iv_name=disk.iv_name)
7678
          snap_disks.append(new_dev)
7679

    
7680
    finally:
7681
      if self.op.shutdown and instance.admin_up:
7682
        feedback_fn("Starting instance %s" % instance.name)
7683
        result = self.rpc.call_instance_start(src_node, instance, None, None)
7684
        msg = result.fail_msg
7685
        if msg:
7686
          _ShutdownInstanceDisks(self, instance)
7687
          raise errors.OpExecError("Could not start instance: %s" % msg)
7688

    
7689
    # TODO: check for size
7690

    
7691
    cluster_name = self.cfg.GetClusterName()
7692
    for idx, dev in enumerate(snap_disks):
7693
      feedback_fn("Exporting snapshot %s from %s to %s" %
7694
                  (idx, src_node, dst_node.name))
7695
      if dev:
7696
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
7697
                                               instance, cluster_name, idx)
7698
        msg = result.fail_msg
7699
        if msg:
7700
          self.LogWarning("Could not export disk/%s from node %s to"
7701
                          " node %s: %s", idx, src_node, dst_node.name, msg)
7702
          dresults.append(False)
7703
        else:
7704
          dresults.append(True)
7705
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
7706
        if msg:
7707
          self.LogWarning("Could not remove snapshot for disk/%d from node"
7708
                          " %s: %s", idx, src_node, msg)
7709
      else:
7710
        dresults.append(False)
7711

    
7712
    feedback_fn("Finalizing export on %s" % dst_node.name)
7713
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
7714
    fin_resu = True
7715
    msg = result.fail_msg
7716
    if msg:
7717
      self.LogWarning("Could not finalize export for instance %s"
7718
                      " on node %s: %s", instance.name, dst_node.name, msg)
7719
      fin_resu = False
7720

    
7721
    nodelist = self.cfg.GetNodeList()
7722
    nodelist.remove(dst_node.name)
7723

    
7724
    # on one-node clusters nodelist will be empty after the removal
7725
    # if we proceed the backup would be removed because OpQueryExports
7726
    # substitutes an empty list with the full cluster node list.
7727
    iname = instance.name
7728
    if nodelist:
7729
      feedback_fn("Removing old exports for instance %s" % iname)
7730
      exportlist = self.rpc.call_export_list(nodelist)
7731
      for node in exportlist:
7732
        if exportlist[node].fail_msg:
7733
          continue
7734
        if iname in exportlist[node].payload:
7735
          msg = self.rpc.call_export_remove(node, iname).fail_msg
7736
          if msg:
7737
            self.LogWarning("Could not remove older export for instance %s"
7738
                            " on node %s: %s", iname, node, msg)
7739
    return fin_resu, dresults
7740

    
7741

    
7742
class LURemoveExport(NoHooksLU):
7743
  """Remove exports related to the named instance.
7744

7745
  """
7746
  _OP_REQP = ["instance_name"]
7747
  REQ_BGL = False
7748

    
7749
  def ExpandNames(self):
7750
    self.needed_locks = {}
7751
    # We need all nodes to be locked in order for RemoveExport to work, but we
7752
    # don't need to lock the instance itself, as nothing will happen to it (and
7753
    # we can remove exports also for a removed instance)
7754
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7755

    
7756
  def CheckPrereq(self):
7757
    """Check prerequisites.
7758
    """
7759
    pass
7760

    
7761
  def Exec(self, feedback_fn):
7762
    """Remove any export.
7763

7764
    """
7765
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
7766
    # If the instance was not found we'll try with the name that was passed in.
7767
    # This will only work if it was an FQDN, though.
7768
    fqdn_warn = False
7769
    if not instance_name:
7770
      fqdn_warn = True
7771
      instance_name = self.op.instance_name
7772

    
7773
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7774
    exportlist = self.rpc.call_export_list(locked_nodes)
7775
    found = False
7776
    for node in exportlist:
7777
      msg = exportlist[node].fail_msg
7778
      if msg:
7779
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
7780
        continue
7781
      if instance_name in exportlist[node].payload:
7782
        found = True
7783
        result = self.rpc.call_export_remove(node, instance_name)
7784
        msg = result.fail_msg
7785
        if msg:
7786
          logging.error("Could not remove export for instance %s"
7787
                        " on node %s: %s", instance_name, node, msg)
7788

    
7789
    if fqdn_warn and not found:
7790
      feedback_fn("Export not found. If trying to remove an export belonging"
7791
                  " to a deleted instance please use its Fully Qualified"
7792
                  " Domain Name.")
7793

    
7794

    
7795
class TagsLU(NoHooksLU):
7796
  """Generic tags LU.
7797

7798
  This is an abstract class which is the parent of all the other tags LUs.
7799

7800
  """
7801

    
7802
  def ExpandNames(self):
7803
    self.needed_locks = {}
7804
    if self.op.kind == constants.TAG_NODE:
7805
      name = self.cfg.ExpandNodeName(self.op.name)
7806
      if name is None:
7807
        raise errors.OpPrereqError("Invalid node name (%s)" %
7808
                                   (self.op.name,))
7809
      self.op.name = name
7810
      self.needed_locks[locking.LEVEL_NODE] = name
7811
    elif self.op.kind == constants.TAG_INSTANCE:
7812
      name = self.cfg.ExpandInstanceName(self.op.name)
7813
      if name is None:
7814
        raise errors.OpPrereqError("Invalid instance name (%s)" %
7815
                                   (self.op.name,))
7816
      self.op.name = name
7817
      self.needed_locks[locking.LEVEL_INSTANCE] = name
7818

    
7819
  def CheckPrereq(self):
7820
    """Check prerequisites.
7821

7822
    """
7823
    if self.op.kind == constants.TAG_CLUSTER:
7824
      self.target = self.cfg.GetClusterInfo()
7825
    elif self.op.kind == constants.TAG_NODE:
7826
      self.target = self.cfg.GetNodeInfo(self.op.name)
7827
    elif self.op.kind == constants.TAG_INSTANCE:
7828
      self.target = self.cfg.GetInstanceInfo(self.op.name)
7829
    else:
7830
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
7831
                                 str(self.op.kind))
7832

    
7833

    
7834
class LUGetTags(TagsLU):
7835
  """Returns the tags of a given object.
7836

7837
  """
7838
  _OP_REQP = ["kind", "name"]
7839
  REQ_BGL = False
7840

    
7841
  def Exec(self, feedback_fn):
7842
    """Returns the tag list.
7843

7844
    """
7845
    return list(self.target.GetTags())
7846

    
7847

    
7848
class LUSearchTags(NoHooksLU):
7849
  """Searches the tags for a given pattern.
7850

7851
  """
7852
  _OP_REQP = ["pattern"]
7853
  REQ_BGL = False
7854

    
7855
  def ExpandNames(self):
7856
    self.needed_locks = {}
7857

    
7858
  def CheckPrereq(self):
7859
    """Check prerequisites.
7860

7861
    This checks the pattern passed for validity by compiling it.
7862

7863
    """
7864
    try:
7865
      self.re = re.compile(self.op.pattern)
7866
    except re.error, err:
7867
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
7868
                                 (self.op.pattern, err))
7869

    
7870
  def Exec(self, feedback_fn):
7871
    """Returns the tag list.
7872

7873
    """
7874
    cfg = self.cfg
7875
    tgts = [("/cluster", cfg.GetClusterInfo())]
7876
    ilist = cfg.GetAllInstancesInfo().values()
7877
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
7878
    nlist = cfg.GetAllNodesInfo().values()
7879
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
7880
    results = []
7881
    for path, target in tgts:
7882
      for tag in target.GetTags():
7883
        if self.re.search(tag):
7884
          results.append((path, tag))
7885
    return results
7886

    
7887

    
7888
class LUAddTags(TagsLU):
7889
  """Sets a tag on a given object.
7890

7891
  """
7892
  _OP_REQP = ["kind", "name", "tags"]
7893
  REQ_BGL = False
7894

    
7895
  def CheckPrereq(self):
7896
    """Check prerequisites.
7897

7898
    This checks the type and length of the tag name and value.
7899

7900
    """
7901
    TagsLU.CheckPrereq(self)
7902
    for tag in self.op.tags:
7903
      objects.TaggableObject.ValidateTag(tag)
7904

    
7905
  def Exec(self, feedback_fn):
7906
    """Sets the tag.
7907

7908
    """
7909
    try:
7910
      for tag in self.op.tags:
7911
        self.target.AddTag(tag)
7912
    except errors.TagError, err:
7913
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
7914
    try:
7915
      self.cfg.Update(self.target)
7916
    except errors.ConfigurationError:
7917
      raise errors.OpRetryError("There has been a modification to the"
7918
                                " config file and the operation has been"
7919
                                " aborted. Please retry.")
7920

    
7921

    
7922
class LUDelTags(TagsLU):
7923
  """Delete a list of tags from a given object.
7924

7925
  """
7926
  _OP_REQP = ["kind", "name", "tags"]
7927
  REQ_BGL = False
7928

    
7929
  def CheckPrereq(self):
7930
    """Check prerequisites.
7931

7932
    This checks that we have the given tag.
7933

7934
    """
7935
    TagsLU.CheckPrereq(self)
7936
    for tag in self.op.tags:
7937
      objects.TaggableObject.ValidateTag(tag)
7938
    del_tags = frozenset(self.op.tags)
7939
    cur_tags = self.target.GetTags()
7940
    if not del_tags <= cur_tags:
7941
      diff_tags = del_tags - cur_tags
7942
      diff_names = ["'%s'" % tag for tag in diff_tags]
7943
      diff_names.sort()
7944
      raise errors.OpPrereqError("Tag(s) %s not found" %
7945
                                 (",".join(diff_names)))
7946

    
7947
  def Exec(self, feedback_fn):
7948
    """Remove the tag from the object.
7949

7950
    """
7951
    for tag in self.op.tags:
7952
      self.target.RemoveTag(tag)
7953
    try:
7954
      self.cfg.Update(self.target)
7955
    except errors.ConfigurationError:
7956
      raise errors.OpRetryError("There has been a modification to the"
7957
                                " config file and the operation has been"
7958
                                " aborted. Please retry.")
7959

    
7960

    
7961
class LUTestDelay(NoHooksLU):
7962
  """Sleep for a specified amount of time.
7963

7964
  This LU sleeps on the master and/or nodes for a specified amount of
7965
  time.
7966

7967
  """
7968
  _OP_REQP = ["duration", "on_master", "on_nodes"]
7969
  REQ_BGL = False
7970

    
7971
  def ExpandNames(self):
7972
    """Expand names and set required locks.
7973

7974
    This expands the node list, if any.
7975

7976
    """
7977
    self.needed_locks = {}
7978
    if self.op.on_nodes:
7979
      # _GetWantedNodes can be used here, but is not always appropriate to use
7980
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
7981
      # more information.
7982
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
7983
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
7984

    
7985
  def CheckPrereq(self):
7986
    """Check prerequisites.
7987

7988
    """
7989

    
7990
  def Exec(self, feedback_fn):
7991
    """Do the actual sleep.
7992

7993
    """
7994
    if self.op.on_master:
7995
      if not utils.TestDelay(self.op.duration):
7996
        raise errors.OpExecError("Error during master delay test")
7997
    if self.op.on_nodes:
7998
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
7999
      for node, node_result in result.items():
8000
        node_result.Raise("Failure during rpc call to node %s" % node)
8001

    
8002

    
8003
class IAllocator(object):
8004
  """IAllocator framework.
8005

8006
  An IAllocator instance has three sets of attributes:
8007
    - cfg that is needed to query the cluster
8008
    - input data (all members of the _KEYS class attribute are required)
8009
    - four buffer attributes (in|out_data|text), that represent the
8010
      input (to the external script) in text and data structure format,
8011
      and the output from it, again in two formats
8012
    - the result variables from the script (success, info, nodes) for
8013
      easy usage
8014

8015
  """
8016
  _ALLO_KEYS = [
8017
    "mem_size", "disks", "disk_template",
8018
    "os", "tags", "nics", "vcpus", "hypervisor",
8019
    ]
8020
  _RELO_KEYS = [
8021
    "relocate_from",
8022
    ]
8023

    
8024
  def __init__(self, cfg, rpc, mode, name, **kwargs):
8025
    self.cfg = cfg
8026
    self.rpc = rpc
8027
    # init buffer variables
8028
    self.in_text = self.out_text = self.in_data = self.out_data = None
8029
    # init all input fields so that pylint is happy
8030
    self.mode = mode
8031
    self.name = name
8032
    self.mem_size = self.disks = self.disk_template = None
8033
    self.os = self.tags = self.nics = self.vcpus = None
8034
    self.hypervisor = None
8035
    self.relocate_from = None
8036
    # computed fields
8037
    self.required_nodes = None
8038
    # init result fields
8039
    self.success = self.info = self.nodes = None
8040
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8041
      keyset = self._ALLO_KEYS
8042
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8043
      keyset = self._RELO_KEYS
8044
    else:
8045
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8046
                                   " IAllocator" % self.mode)
8047
    for key in kwargs:
8048
      if key not in keyset:
8049
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8050
                                     " IAllocator" % key)
8051
      setattr(self, key, kwargs[key])
8052
    for key in keyset:
8053
      if key not in kwargs:
8054
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8055
                                     " IAllocator" % key)
8056
    self._BuildInputData()
8057

    
8058
  def _ComputeClusterData(self):
8059
    """Compute the generic allocator input data.
8060

8061
    This is the data that is independent of the actual operation.
8062

8063
    """
8064
    cfg = self.cfg
8065
    cluster_info = cfg.GetClusterInfo()
8066
    # cluster data
8067
    data = {
8068
      "version": constants.IALLOCATOR_VERSION,
8069
      "cluster_name": cfg.GetClusterName(),
8070
      "cluster_tags": list(cluster_info.GetTags()),
8071
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8072
      # we don't have job IDs
8073
      }
8074
    iinfo = cfg.GetAllInstancesInfo().values()
8075
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8076

    
8077
    # node data
8078
    node_results = {}
8079
    node_list = cfg.GetNodeList()
8080

    
8081
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8082
      hypervisor_name = self.hypervisor
8083
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8084
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8085

    
8086
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8087
                                        hypervisor_name)
8088
    node_iinfo = \
8089
      self.rpc.call_all_instances_info(node_list,
8090
                                       cluster_info.enabled_hypervisors)
8091
    for nname, nresult in node_data.items():
8092
      # first fill in static (config-based) values
8093
      ninfo = cfg.GetNodeInfo(nname)
8094
      pnr = {
8095
        "tags": list(ninfo.GetTags()),
8096
        "primary_ip": ninfo.primary_ip,
8097
        "secondary_ip": ninfo.secondary_ip,
8098
        "offline": ninfo.offline,
8099
        "drained": ninfo.drained,
8100
        "master_candidate": ninfo.master_candidate,
8101
        }
8102

    
8103
      if not (ninfo.offline or ninfo.drained):
8104
        nresult.Raise("Can't get data for node %s" % nname)
8105
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8106
                                nname)
8107
        remote_info = nresult.payload
8108

    
8109
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8110
                     'vg_size', 'vg_free', 'cpu_total']:
8111
          if attr not in remote_info:
8112
            raise errors.OpExecError("Node '%s' didn't return attribute"
8113
                                     " '%s'" % (nname, attr))
8114
          if not isinstance(remote_info[attr], int):
8115
            raise errors.OpExecError("Node '%s' returned invalid value"
8116
                                     " for '%s': %s" %
8117
                                     (nname, attr, remote_info[attr]))
8118
        # compute memory used by primary instances
8119
        i_p_mem = i_p_up_mem = 0
8120
        for iinfo, beinfo in i_list:
8121
          if iinfo.primary_node == nname:
8122
            i_p_mem += beinfo[constants.BE_MEMORY]
8123
            if iinfo.name not in node_iinfo[nname].payload:
8124
              i_used_mem = 0
8125
            else:
8126
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8127
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8128
            remote_info['memory_free'] -= max(0, i_mem_diff)
8129

    
8130
            if iinfo.admin_up:
8131
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8132

    
8133
        # compute memory used by instances
8134
        pnr_dyn = {
8135
          "total_memory": remote_info['memory_total'],
8136
          "reserved_memory": remote_info['memory_dom0'],
8137
          "free_memory": remote_info['memory_free'],
8138
          "total_disk": remote_info['vg_size'],
8139
          "free_disk": remote_info['vg_free'],
8140
          "total_cpus": remote_info['cpu_total'],
8141
          "i_pri_memory": i_p_mem,
8142
          "i_pri_up_memory": i_p_up_mem,
8143
          }
8144
        pnr.update(pnr_dyn)
8145

    
8146
      node_results[nname] = pnr
8147
    data["nodes"] = node_results
8148

    
8149
    # instance data
8150
    instance_data = {}
8151
    for iinfo, beinfo in i_list:
8152
      nic_data = []
8153
      for nic in iinfo.nics:
8154
        filled_params = objects.FillDict(
8155
            cluster_info.nicparams[constants.PP_DEFAULT],
8156
            nic.nicparams)
8157
        nic_dict = {"mac": nic.mac,
8158
                    "ip": nic.ip,
8159
                    "mode": filled_params[constants.NIC_MODE],
8160
                    "link": filled_params[constants.NIC_LINK],
8161
                   }
8162
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8163
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8164
        nic_data.append(nic_dict)
8165
      pir = {
8166
        "tags": list(iinfo.GetTags()),
8167
        "admin_up": iinfo.admin_up,
8168
        "vcpus": beinfo[constants.BE_VCPUS],
8169
        "memory": beinfo[constants.BE_MEMORY],
8170
        "os": iinfo.os,
8171
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8172
        "nics": nic_data,
8173
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8174
        "disk_template": iinfo.disk_template,
8175
        "hypervisor": iinfo.hypervisor,
8176
        }
8177
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8178
                                                 pir["disks"])
8179
      instance_data[iinfo.name] = pir
8180

    
8181
    data["instances"] = instance_data
8182

    
8183
    self.in_data = data
8184

    
8185
  def _AddNewInstance(self):
8186
    """Add new instance data to allocator structure.
8187

8188
    This in combination with _AllocatorGetClusterData will create the
8189
    correct structure needed as input for the allocator.
8190

8191
    The checks for the completeness of the opcode must have already been
8192
    done.
8193

8194
    """
8195
    data = self.in_data
8196

    
8197
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8198

    
8199
    if self.disk_template in constants.DTS_NET_MIRROR:
8200
      self.required_nodes = 2
8201
    else:
8202
      self.required_nodes = 1
8203
    request = {
8204
      "type": "allocate",
8205
      "name": self.name,
8206
      "disk_template": self.disk_template,
8207
      "tags": self.tags,
8208
      "os": self.os,
8209
      "vcpus": self.vcpus,
8210
      "memory": self.mem_size,
8211
      "disks": self.disks,
8212
      "disk_space_total": disk_space,
8213
      "nics": self.nics,
8214
      "required_nodes": self.required_nodes,
8215
      }
8216
    data["request"] = request
8217

    
8218
  def _AddRelocateInstance(self):
8219
    """Add relocate instance data to allocator structure.
8220

8221
    This in combination with _IAllocatorGetClusterData will create the
8222
    correct structure needed as input for the allocator.
8223

8224
    The checks for the completeness of the opcode must have already been
8225
    done.
8226

8227
    """
8228
    instance = self.cfg.GetInstanceInfo(self.name)
8229
    if instance is None:
8230
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8231
                                   " IAllocator" % self.name)
8232

    
8233
    if instance.disk_template not in constants.DTS_NET_MIRROR:
8234
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
8235

    
8236
    if len(instance.secondary_nodes) != 1:
8237
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
8238

    
8239
    self.required_nodes = 1
8240
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
8241
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8242

    
8243
    request = {
8244
      "type": "relocate",
8245
      "name": self.name,
8246
      "disk_space_total": disk_space,
8247
      "required_nodes": self.required_nodes,
8248
      "relocate_from": self.relocate_from,
8249
      }
8250
    self.in_data["request"] = request
8251

    
8252
  def _BuildInputData(self):
8253
    """Build input data structures.
8254

8255
    """
8256
    self._ComputeClusterData()
8257

    
8258
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8259
      self._AddNewInstance()
8260
    else:
8261
      self._AddRelocateInstance()
8262

    
8263
    self.in_text = serializer.Dump(self.in_data)
8264

    
8265
  def Run(self, name, validate=True, call_fn=None):
8266
    """Run an instance allocator and return the results.
8267

8268
    """
8269
    if call_fn is None:
8270
      call_fn = self.rpc.call_iallocator_runner
8271

    
8272
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8273
    result.Raise("Failure while running the iallocator script")
8274

    
8275
    self.out_text = result.payload
8276
    if validate:
8277
      self._ValidateResult()
8278

    
8279
  def _ValidateResult(self):
8280
    """Process the allocator results.
8281

8282
    This will process and if successful save the result in
8283
    self.out_data and the other parameters.
8284

8285
    """
8286
    try:
8287
      rdict = serializer.Load(self.out_text)
8288
    except Exception, err:
8289
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8290

    
8291
    if not isinstance(rdict, dict):
8292
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8293

    
8294
    for key in "success", "info", "nodes":
8295
      if key not in rdict:
8296
        raise errors.OpExecError("Can't parse iallocator results:"
8297
                                 " missing key '%s'" % key)
8298
      setattr(self, key, rdict[key])
8299

    
8300
    if not isinstance(rdict["nodes"], list):
8301
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8302
                               " is not a list")
8303
    self.out_data = rdict
8304

    
8305

    
8306
class LUTestAllocator(NoHooksLU):
8307
  """Run allocator tests.
8308

8309
  This LU runs the allocator tests
8310

8311
  """
8312
  _OP_REQP = ["direction", "mode", "name"]
8313

    
8314
  def CheckPrereq(self):
8315
    """Check prerequisites.
8316

8317
    This checks the opcode parameters depending on the director and mode test.
8318

8319
    """
8320
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8321
      for attr in ["name", "mem_size", "disks", "disk_template",
8322
                   "os", "tags", "nics", "vcpus"]:
8323
        if not hasattr(self.op, attr):
8324
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8325
                                     attr)
8326
      iname = self.cfg.ExpandInstanceName(self.op.name)
8327
      if iname is not None:
8328
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8329
                                   iname)
8330
      if not isinstance(self.op.nics, list):
8331
        raise errors.OpPrereqError("Invalid parameter 'nics'")
8332
      for row in self.op.nics:
8333
        if (not isinstance(row, dict) or
8334
            "mac" not in row or
8335
            "ip" not in row or
8336
            "bridge" not in row):
8337
          raise errors.OpPrereqError("Invalid contents of the"
8338
                                     " 'nics' parameter")
8339
      if not isinstance(self.op.disks, list):
8340
        raise errors.OpPrereqError("Invalid parameter 'disks'")
8341
      for row in self.op.disks:
8342
        if (not isinstance(row, dict) or
8343
            "size" not in row or
8344
            not isinstance(row["size"], int) or
8345
            "mode" not in row or
8346
            row["mode"] not in ['r', 'w']):
8347
          raise errors.OpPrereqError("Invalid contents of the"
8348
                                     " 'disks' parameter")
8349
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8350
        self.op.hypervisor = self.cfg.GetHypervisorType()
8351
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8352
      if not hasattr(self.op, "name"):
8353
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
8354
      fname = self.cfg.ExpandInstanceName(self.op.name)
8355
      if fname is None:
8356
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8357
                                   self.op.name)
8358
      self.op.name = fname
8359
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8360
    else:
8361
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8362
                                 self.op.mode)
8363

    
8364
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8365
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8366
        raise errors.OpPrereqError("Missing allocator name")
8367
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8368
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8369
                                 self.op.direction)
8370

    
8371
  def Exec(self, feedback_fn):
8372
    """Run the allocator test.
8373

8374
    """
8375
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8376
      ial = IAllocator(self.cfg, self.rpc,
8377
                       mode=self.op.mode,
8378
                       name=self.op.name,
8379
                       mem_size=self.op.mem_size,
8380
                       disks=self.op.disks,
8381
                       disk_template=self.op.disk_template,
8382
                       os=self.op.os,
8383
                       tags=self.op.tags,
8384
                       nics=self.op.nics,
8385
                       vcpus=self.op.vcpus,
8386
                       hypervisor=self.op.hypervisor,
8387
                       )
8388
    else:
8389
      ial = IAllocator(self.cfg, self.rpc,
8390
                       mode=self.op.mode,
8391
                       name=self.op.name,
8392
                       relocate_from=list(self.relocate_from),
8393
                       )
8394

    
8395
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8396
      result = ial.in_text
8397
    else:
8398
      ial.Run(self.op.allocator, validate=False)
8399
      result = ial.out_text
8400
    return result