Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 13998ef2

History | View | Annotate | Download (287.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = None
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets is not None:
217
      for (idx, tl) in enumerate(self.tasklets):
218
        logging.debug("Checking prerequisites for tasklet %s/%s",
219
                      idx + 1, len(self.tasklets))
220
        tl.CheckPrereq()
221
    else:
222
      raise NotImplementedError
223

    
224
  def Exec(self, feedback_fn):
225
    """Execute the LU.
226

227
    This method should implement the actual work. It should raise
228
    errors.OpExecError for failures that are somewhat dealt with in
229
    code, or expected.
230

231
    """
232
    if self.tasklets is not None:
233
      for (idx, tl) in enumerate(self.tasklets):
234
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
235
        tl.Exec(feedback_fn)
236
    else:
237
      raise NotImplementedError
238

    
239
  def BuildHooksEnv(self):
240
    """Build hooks environment for this LU.
241

242
    This method should return a three-node tuple consisting of: a dict
243
    containing the environment that will be used for running the
244
    specific hook for this LU, a list of node names on which the hook
245
    should run before the execution, and a list of node names on which
246
    the hook should run after the execution.
247

248
    The keys of the dict must not have 'GANETI_' prefixed as this will
249
    be handled in the hooks runner. Also note additional keys will be
250
    added by the hooks runner. If the LU doesn't define any
251
    environment, an empty dict (and not None) should be returned.
252

253
    No nodes should be returned as an empty list (and not None).
254

255
    Note that if the HPATH for a LU class is None, this function will
256
    not be called.
257

258
    """
259
    raise NotImplementedError
260

    
261
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
262
    """Notify the LU about the results of its hooks.
263

264
    This method is called every time a hooks phase is executed, and notifies
265
    the Logical Unit about the hooks' result. The LU can then use it to alter
266
    its result based on the hooks.  By default the method does nothing and the
267
    previous result is passed back unchanged but any LU can define it if it
268
    wants to use the local cluster hook-scripts somehow.
269

270
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
271
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
272
    @param hook_results: the results of the multi-node hooks rpc call
273
    @param feedback_fn: function used send feedback back to the caller
274
    @param lu_result: the previous Exec result this LU had, or None
275
        in the PRE phase
276
    @return: the new Exec result, based on the previous result
277
        and hook results
278

279
    """
280
    return lu_result
281

    
282
  def _ExpandAndLockInstance(self):
283
    """Helper function to expand and lock an instance.
284

285
    Many LUs that work on an instance take its name in self.op.instance_name
286
    and need to expand it and then declare the expanded name for locking. This
287
    function does it, and then updates self.op.instance_name to the expanded
288
    name. It also initializes needed_locks as a dict, if this hasn't been done
289
    before.
290

291
    """
292
    if self.needed_locks is None:
293
      self.needed_locks = {}
294
    else:
295
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
296
        "_ExpandAndLockInstance called with instance-level locks set"
297
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
298
    if expanded_name is None:
299
      raise errors.OpPrereqError("Instance '%s' not known" %
300
                                  self.op.instance_name)
301
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
302
    self.op.instance_name = expanded_name
303

    
304
  def _LockInstancesNodes(self, primary_only=False):
305
    """Helper function to declare instances' nodes for locking.
306

307
    This function should be called after locking one or more instances to lock
308
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
309
    with all primary or secondary nodes for instances already locked and
310
    present in self.needed_locks[locking.LEVEL_INSTANCE].
311

312
    It should be called from DeclareLocks, and for safety only works if
313
    self.recalculate_locks[locking.LEVEL_NODE] is set.
314

315
    In the future it may grow parameters to just lock some instance's nodes, or
316
    to just lock primaries or secondary nodes, if needed.
317

318
    If should be called in DeclareLocks in a way similar to::
319

320
      if level == locking.LEVEL_NODE:
321
        self._LockInstancesNodes()
322

323
    @type primary_only: boolean
324
    @param primary_only: only lock primary nodes of locked instances
325

326
    """
327
    assert locking.LEVEL_NODE in self.recalculate_locks, \
328
      "_LockInstancesNodes helper function called with no nodes to recalculate"
329

    
330
    # TODO: check if we're really been called with the instance locks held
331

    
332
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
333
    # future we might want to have different behaviors depending on the value
334
    # of self.recalculate_locks[locking.LEVEL_NODE]
335
    wanted_nodes = []
336
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
337
      instance = self.context.cfg.GetInstanceInfo(instance_name)
338
      wanted_nodes.append(instance.primary_node)
339
      if not primary_only:
340
        wanted_nodes.extend(instance.secondary_nodes)
341

    
342
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
343
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
344
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
345
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
346

    
347
    del self.recalculate_locks[locking.LEVEL_NODE]
348

    
349

    
350
class NoHooksLU(LogicalUnit):
351
  """Simple LU which runs no hooks.
352

353
  This LU is intended as a parent for other LogicalUnits which will
354
  run no hooks, in order to reduce duplicate code.
355

356
  """
357
  HPATH = None
358
  HTYPE = None
359

    
360

    
361
class Tasklet:
362
  """Tasklet base class.
363

364
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
365
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
366
  tasklets know nothing about locks.
367

368
  Subclasses must follow these rules:
369
    - Implement CheckPrereq
370
    - Implement Exec
371

372
  """
373
  def __init__(self, lu):
374
    self.lu = lu
375

    
376
    # Shortcuts
377
    self.cfg = lu.cfg
378
    self.rpc = lu.rpc
379

    
380
  def CheckPrereq(self):
381
    """Check prerequisites for this tasklets.
382

383
    This method should check whether the prerequisites for the execution of
384
    this tasklet are fulfilled. It can do internode communication, but it
385
    should be idempotent - no cluster or system changes are allowed.
386

387
    The method should raise errors.OpPrereqError in case something is not
388
    fulfilled. Its return value is ignored.
389

390
    This method should also update all parameters to their canonical form if it
391
    hasn't been done before.
392

393
    """
394
    raise NotImplementedError
395

    
396
  def Exec(self, feedback_fn):
397
    """Execute the tasklet.
398

399
    This method should implement the actual work. It should raise
400
    errors.OpExecError for failures that are somewhat dealt with in code, or
401
    expected.
402

403
    """
404
    raise NotImplementedError
405

    
406

    
407
def _GetWantedNodes(lu, nodes):
408
  """Returns list of checked and expanded node names.
409

410
  @type lu: L{LogicalUnit}
411
  @param lu: the logical unit on whose behalf we execute
412
  @type nodes: list
413
  @param nodes: list of node names or None for all nodes
414
  @rtype: list
415
  @return: the list of nodes, sorted
416
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
417

418
  """
419
  if not isinstance(nodes, list):
420
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
421

    
422
  if not nodes:
423
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
424
      " non-empty list of nodes whose name is to be expanded.")
425

    
426
  wanted = []
427
  for name in nodes:
428
    node = lu.cfg.ExpandNodeName(name)
429
    if node is None:
430
      raise errors.OpPrereqError("No such node name '%s'" % name)
431
    wanted.append(node)
432

    
433
  return utils.NiceSort(wanted)
434

    
435

    
436
def _GetWantedInstances(lu, instances):
437
  """Returns list of checked and expanded instance names.
438

439
  @type lu: L{LogicalUnit}
440
  @param lu: the logical unit on whose behalf we execute
441
  @type instances: list
442
  @param instances: list of instance names or None for all instances
443
  @rtype: list
444
  @return: the list of instances, sorted
445
  @raise errors.OpPrereqError: if the instances parameter is wrong type
446
  @raise errors.OpPrereqError: if any of the passed instances is not found
447

448
  """
449
  if not isinstance(instances, list):
450
    raise errors.OpPrereqError("Invalid argument type 'instances'")
451

    
452
  if instances:
453
    wanted = []
454

    
455
    for name in instances:
456
      instance = lu.cfg.ExpandInstanceName(name)
457
      if instance is None:
458
        raise errors.OpPrereqError("No such instance name '%s'" % name)
459
      wanted.append(instance)
460

    
461
  else:
462
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
463
  return wanted
464

    
465

    
466
def _CheckOutputFields(static, dynamic, selected):
467
  """Checks whether all selected fields are valid.
468

469
  @type static: L{utils.FieldSet}
470
  @param static: static fields set
471
  @type dynamic: L{utils.FieldSet}
472
  @param dynamic: dynamic fields set
473

474
  """
475
  f = utils.FieldSet()
476
  f.Extend(static)
477
  f.Extend(dynamic)
478

    
479
  delta = f.NonMatching(selected)
480
  if delta:
481
    raise errors.OpPrereqError("Unknown output fields selected: %s"
482
                               % ",".join(delta))
483

    
484

    
485
def _CheckBooleanOpField(op, name):
486
  """Validates boolean opcode parameters.
487

488
  This will ensure that an opcode parameter is either a boolean value,
489
  or None (but that it always exists).
490

491
  """
492
  val = getattr(op, name, None)
493
  if not (val is None or isinstance(val, bool)):
494
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
495
                               (name, str(val)))
496
  setattr(op, name, val)
497

    
498

    
499
def _CheckNodeOnline(lu, node):
500
  """Ensure that a given node is online.
501

502
  @param lu: the LU on behalf of which we make the check
503
  @param node: the node to check
504
  @raise errors.OpPrereqError: if the node is offline
505

506
  """
507
  if lu.cfg.GetNodeInfo(node).offline:
508
    raise errors.OpPrereqError("Can't use offline node %s" % node)
509

    
510

    
511
def _CheckNodeNotDrained(lu, node):
512
  """Ensure that a given node is not drained.
513

514
  @param lu: the LU on behalf of which we make the check
515
  @param node: the node to check
516
  @raise errors.OpPrereqError: if the node is drained
517

518
  """
519
  if lu.cfg.GetNodeInfo(node).drained:
520
    raise errors.OpPrereqError("Can't use drained node %s" % node)
521

    
522

    
523
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
524
                          memory, vcpus, nics, disk_template, disks,
525
                          bep, hvp, hypervisor_name):
526
  """Builds instance related env variables for hooks
527

528
  This builds the hook environment from individual variables.
529

530
  @type name: string
531
  @param name: the name of the instance
532
  @type primary_node: string
533
  @param primary_node: the name of the instance's primary node
534
  @type secondary_nodes: list
535
  @param secondary_nodes: list of secondary nodes as strings
536
  @type os_type: string
537
  @param os_type: the name of the instance's OS
538
  @type status: boolean
539
  @param status: the should_run status of the instance
540
  @type memory: string
541
  @param memory: the memory size of the instance
542
  @type vcpus: string
543
  @param vcpus: the count of VCPUs the instance has
544
  @type nics: list
545
  @param nics: list of tuples (ip, mac, mode, link) representing
546
      the NICs the instance has
547
  @type disk_template: string
548
  @param disk_template: the disk template of the instance
549
  @type disks: list
550
  @param disks: the list of (size, mode) pairs
551
  @type bep: dict
552
  @param bep: the backend parameters for the instance
553
  @type hvp: dict
554
  @param hvp: the hypervisor parameters for the instance
555
  @type hypervisor_name: string
556
  @param hypervisor_name: the hypervisor for the instance
557
  @rtype: dict
558
  @return: the hook environment for this instance
559

560
  """
561
  if status:
562
    str_status = "up"
563
  else:
564
    str_status = "down"
565
  env = {
566
    "OP_TARGET": name,
567
    "INSTANCE_NAME": name,
568
    "INSTANCE_PRIMARY": primary_node,
569
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
570
    "INSTANCE_OS_TYPE": os_type,
571
    "INSTANCE_STATUS": str_status,
572
    "INSTANCE_MEMORY": memory,
573
    "INSTANCE_VCPUS": vcpus,
574
    "INSTANCE_DISK_TEMPLATE": disk_template,
575
    "INSTANCE_HYPERVISOR": hypervisor_name,
576
  }
577

    
578
  if nics:
579
    nic_count = len(nics)
580
    for idx, (ip, mac, mode, link) in enumerate(nics):
581
      if ip is None:
582
        ip = ""
583
      env["INSTANCE_NIC%d_IP" % idx] = ip
584
      env["INSTANCE_NIC%d_MAC" % idx] = mac
585
      env["INSTANCE_NIC%d_MODE" % idx] = mode
586
      env["INSTANCE_NIC%d_LINK" % idx] = link
587
      if mode == constants.NIC_MODE_BRIDGED:
588
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
589
  else:
590
    nic_count = 0
591

    
592
  env["INSTANCE_NIC_COUNT"] = nic_count
593

    
594
  if disks:
595
    disk_count = len(disks)
596
    for idx, (size, mode) in enumerate(disks):
597
      env["INSTANCE_DISK%d_SIZE" % idx] = size
598
      env["INSTANCE_DISK%d_MODE" % idx] = mode
599
  else:
600
    disk_count = 0
601

    
602
  env["INSTANCE_DISK_COUNT"] = disk_count
603

    
604
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
605
    for key, value in source.items():
606
      env["INSTANCE_%s_%s" % (kind, key)] = value
607

    
608
  return env
609

    
610

    
611
def _NICListToTuple(lu, nics):
612
  """Build a list of nic information tuples.
613

614
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
615
  value in LUQueryInstanceData.
616

617
  @type lu:  L{LogicalUnit}
618
  @param lu: the logical unit on whose behalf we execute
619
  @type nics: list of L{objects.NIC}
620
  @param nics: list of nics to convert to hooks tuples
621

622
  """
623
  hooks_nics = []
624
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
625
  for nic in nics:
626
    ip = nic.ip
627
    mac = nic.mac
628
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
629
    mode = filled_params[constants.NIC_MODE]
630
    link = filled_params[constants.NIC_LINK]
631
    hooks_nics.append((ip, mac, mode, link))
632
  return hooks_nics
633

    
634

    
635
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
636
  """Builds instance related env variables for hooks from an object.
637

638
  @type lu: L{LogicalUnit}
639
  @param lu: the logical unit on whose behalf we execute
640
  @type instance: L{objects.Instance}
641
  @param instance: the instance for which we should build the
642
      environment
643
  @type override: dict
644
  @param override: dictionary with key/values that will override
645
      our values
646
  @rtype: dict
647
  @return: the hook environment dictionary
648

649
  """
650
  cluster = lu.cfg.GetClusterInfo()
651
  bep = cluster.FillBE(instance)
652
  hvp = cluster.FillHV(instance)
653
  args = {
654
    'name': instance.name,
655
    'primary_node': instance.primary_node,
656
    'secondary_nodes': instance.secondary_nodes,
657
    'os_type': instance.os,
658
    'status': instance.admin_up,
659
    'memory': bep[constants.BE_MEMORY],
660
    'vcpus': bep[constants.BE_VCPUS],
661
    'nics': _NICListToTuple(lu, instance.nics),
662
    'disk_template': instance.disk_template,
663
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
664
    'bep': bep,
665
    'hvp': hvp,
666
    'hypervisor_name': instance.hypervisor,
667
  }
668
  if override:
669
    args.update(override)
670
  return _BuildInstanceHookEnv(**args)
671

    
672

    
673
def _AdjustCandidatePool(lu):
674
  """Adjust the candidate pool after node operations.
675

676
  """
677
  mod_list = lu.cfg.MaintainCandidatePool()
678
  if mod_list:
679
    lu.LogInfo("Promoted nodes to master candidate role: %s",
680
               ", ".join(node.name for node in mod_list))
681
    for name in mod_list:
682
      lu.context.ReaddNode(name)
683
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
684
  if mc_now > mc_max:
685
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
686
               (mc_now, mc_max))
687

    
688

    
689
def _CheckNicsBridgesExist(lu, target_nics, target_node,
690
                               profile=constants.PP_DEFAULT):
691
  """Check that the brigdes needed by a list of nics exist.
692

693
  """
694
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
695
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
696
                for nic in target_nics]
697
  brlist = [params[constants.NIC_LINK] for params in paramslist
698
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
699
  if brlist:
700
    result = lu.rpc.call_bridges_exist(target_node, brlist)
701
    result.Raise("Error checking bridges on destination node '%s'" %
702
                 target_node, prereq=True)
703

    
704

    
705
def _CheckInstanceBridgesExist(lu, instance, node=None):
706
  """Check that the brigdes needed by an instance exist.
707

708
  """
709
  if node is None:
710
    node = instance.primary_node
711
  _CheckNicsBridgesExist(lu, instance.nics, node)
712

    
713

    
714
def _GetNodeInstancesInner(cfg, fn):
715
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
716

    
717

    
718
def _GetNodeInstances(cfg, node_name):
719
  """Returns a list of all primary and secondary instances on a node.
720

721
  """
722

    
723
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
724

    
725

    
726
def _GetNodePrimaryInstances(cfg, node_name):
727
  """Returns primary instances on a node.
728

729
  """
730
  return _GetNodeInstancesInner(cfg,
731
                                lambda inst: node_name == inst.primary_node)
732

    
733

    
734
def _GetNodeSecondaryInstances(cfg, node_name):
735
  """Returns secondary instances on a node.
736

737
  """
738
  return _GetNodeInstancesInner(cfg,
739
                                lambda inst: node_name in inst.secondary_nodes)
740

    
741

    
742
def _GetStorageTypeArgs(cfg, storage_type):
743
  """Returns the arguments for a storage type.
744

745
  """
746
  # Special case for file storage
747
  if storage_type == constants.ST_FILE:
748
    # storage.FileStorage wants a list of storage directories
749
    return [[cfg.GetFileStorageDir()]]
750

    
751
  return []
752

    
753

    
754
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
755
  faulty = []
756

    
757
  for dev in instance.disks:
758
    cfg.SetDiskID(dev, node_name)
759

    
760
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
761
  result.Raise("Failed to get disk status from node %s" % node_name,
762
               prereq=prereq)
763

    
764
  for idx, bdev_status in enumerate(result.payload):
765
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
766
      faulty.append(idx)
767

    
768
  return faulty
769

    
770

    
771
class LUPostInitCluster(LogicalUnit):
772
  """Logical unit for running hooks after cluster initialization.
773

774
  """
775
  HPATH = "cluster-init"
776
  HTYPE = constants.HTYPE_CLUSTER
777
  _OP_REQP = []
778

    
779
  def BuildHooksEnv(self):
780
    """Build hooks env.
781

782
    """
783
    env = {"OP_TARGET": self.cfg.GetClusterName()}
784
    mn = self.cfg.GetMasterNode()
785
    return env, [], [mn]
786

    
787
  def CheckPrereq(self):
788
    """No prerequisites to check.
789

790
    """
791
    return True
792

    
793
  def Exec(self, feedback_fn):
794
    """Nothing to do.
795

796
    """
797
    return True
798

    
799

    
800
class LUDestroyCluster(LogicalUnit):
801
  """Logical unit for destroying the cluster.
802

803
  """
804
  HPATH = "cluster-destroy"
805
  HTYPE = constants.HTYPE_CLUSTER
806
  _OP_REQP = []
807

    
808
  def BuildHooksEnv(self):
809
    """Build hooks env.
810

811
    """
812
    env = {"OP_TARGET": self.cfg.GetClusterName()}
813
    return env, [], []
814

    
815
  def CheckPrereq(self):
816
    """Check prerequisites.
817

818
    This checks whether the cluster is empty.
819

820
    Any errors are signaled by raising errors.OpPrereqError.
821

822
    """
823
    master = self.cfg.GetMasterNode()
824

    
825
    nodelist = self.cfg.GetNodeList()
826
    if len(nodelist) != 1 or nodelist[0] != master:
827
      raise errors.OpPrereqError("There are still %d node(s) in"
828
                                 " this cluster." % (len(nodelist) - 1))
829
    instancelist = self.cfg.GetInstanceList()
830
    if instancelist:
831
      raise errors.OpPrereqError("There are still %d instance(s) in"
832
                                 " this cluster." % len(instancelist))
833

    
834
  def Exec(self, feedback_fn):
835
    """Destroys the cluster.
836

837
    """
838
    master = self.cfg.GetMasterNode()
839

    
840
    # Run post hooks on master node before it's removed
841
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
842
    try:
843
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
844
    except:
845
      self.LogWarning("Errors occurred running hooks on %s" % master)
846

    
847
    result = self.rpc.call_node_stop_master(master, False)
848
    result.Raise("Could not disable the master role")
849
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
850
    utils.CreateBackup(priv_key)
851
    utils.CreateBackup(pub_key)
852
    return master
853

    
854

    
855
class LUVerifyCluster(LogicalUnit):
856
  """Verifies the cluster status.
857

858
  """
859
  HPATH = "cluster-verify"
860
  HTYPE = constants.HTYPE_CLUSTER
861
  _OP_REQP = ["skip_checks"]
862
  REQ_BGL = False
863

    
864
  def ExpandNames(self):
865
    self.needed_locks = {
866
      locking.LEVEL_NODE: locking.ALL_SET,
867
      locking.LEVEL_INSTANCE: locking.ALL_SET,
868
    }
869
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
870

    
871
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
872
                  node_result, feedback_fn, master_files,
873
                  drbd_map, vg_name):
874
    """Run multiple tests against a node.
875

876
    Test list:
877

878
      - compares ganeti version
879
      - checks vg existence and size > 20G
880
      - checks config file checksum
881
      - checks ssh to other nodes
882

883
    @type nodeinfo: L{objects.Node}
884
    @param nodeinfo: the node to check
885
    @param file_list: required list of files
886
    @param local_cksum: dictionary of local files and their checksums
887
    @param node_result: the results from the node
888
    @param feedback_fn: function used to accumulate results
889
    @param master_files: list of files that only masters should have
890
    @param drbd_map: the useddrbd minors for this node, in
891
        form of minor: (instance, must_exist) which correspond to instances
892
        and their running status
893
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
894

895
    """
896
    node = nodeinfo.name
897

    
898
    # main result, node_result should be a non-empty dict
899
    if not node_result or not isinstance(node_result, dict):
900
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
901
      return True
902

    
903
    # compares ganeti version
904
    local_version = constants.PROTOCOL_VERSION
905
    remote_version = node_result.get('version', None)
906
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
907
            len(remote_version) == 2):
908
      feedback_fn("  - ERROR: connection to %s failed" % (node))
909
      return True
910

    
911
    if local_version != remote_version[0]:
912
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
913
                  " node %s %s" % (local_version, node, remote_version[0]))
914
      return True
915

    
916
    # node seems compatible, we can actually try to look into its results
917

    
918
    bad = False
919

    
920
    # full package version
921
    if constants.RELEASE_VERSION != remote_version[1]:
922
      feedback_fn("  - WARNING: software version mismatch: master %s,"
923
                  " node %s %s" %
924
                  (constants.RELEASE_VERSION, node, remote_version[1]))
925

    
926
    # checks vg existence and size > 20G
927
    if vg_name is not None:
928
      vglist = node_result.get(constants.NV_VGLIST, None)
929
      if not vglist:
930
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
931
                        (node,))
932
        bad = True
933
      else:
934
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
935
                                              constants.MIN_VG_SIZE)
936
        if vgstatus:
937
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
938
          bad = True
939

    
940
    # checks config file checksum
941

    
942
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
943
    if not isinstance(remote_cksum, dict):
944
      bad = True
945
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
946
    else:
947
      for file_name in file_list:
948
        node_is_mc = nodeinfo.master_candidate
949
        must_have_file = file_name not in master_files
950
        if file_name not in remote_cksum:
951
          if node_is_mc or must_have_file:
952
            bad = True
953
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
954
        elif remote_cksum[file_name] != local_cksum[file_name]:
955
          if node_is_mc or must_have_file:
956
            bad = True
957
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
958
          else:
959
            # not candidate and this is not a must-have file
960
            bad = True
961
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
962
                        " candidates (and the file is outdated)" % file_name)
963
        else:
964
          # all good, except non-master/non-must have combination
965
          if not node_is_mc and not must_have_file:
966
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
967
                        " candidates" % file_name)
968

    
969
    # checks ssh to any
970

    
971
    if constants.NV_NODELIST not in node_result:
972
      bad = True
973
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
974
    else:
975
      if node_result[constants.NV_NODELIST]:
976
        bad = True
977
        for node in node_result[constants.NV_NODELIST]:
978
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
979
                          (node, node_result[constants.NV_NODELIST][node]))
980

    
981
    if constants.NV_NODENETTEST not in node_result:
982
      bad = True
983
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
984
    else:
985
      if node_result[constants.NV_NODENETTEST]:
986
        bad = True
987
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
988
        for node in nlist:
989
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
990
                          (node, node_result[constants.NV_NODENETTEST][node]))
991

    
992
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
993
    if isinstance(hyp_result, dict):
994
      for hv_name, hv_result in hyp_result.iteritems():
995
        if hv_result is not None:
996
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
997
                      (hv_name, hv_result))
998

    
999
    # check used drbd list
1000
    if vg_name is not None:
1001
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1002
      if not isinstance(used_minors, (tuple, list)):
1003
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
1004
                    str(used_minors))
1005
      else:
1006
        for minor, (iname, must_exist) in drbd_map.items():
1007
          if minor not in used_minors and must_exist:
1008
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
1009
                        " not active" % (minor, iname))
1010
            bad = True
1011
        for minor in used_minors:
1012
          if minor not in drbd_map:
1013
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
1014
                        minor)
1015
            bad = True
1016

    
1017
    return bad
1018

    
1019
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1020
                      node_instance, feedback_fn, n_offline):
1021
    """Verify an instance.
1022

1023
    This function checks to see if the required block devices are
1024
    available on the instance's node.
1025

1026
    """
1027
    bad = False
1028

    
1029
    node_current = instanceconfig.primary_node
1030

    
1031
    node_vol_should = {}
1032
    instanceconfig.MapLVsByNode(node_vol_should)
1033

    
1034
    for node in node_vol_should:
1035
      if node in n_offline:
1036
        # ignore missing volumes on offline nodes
1037
        continue
1038
      for volume in node_vol_should[node]:
1039
        if node not in node_vol_is or volume not in node_vol_is[node]:
1040
          feedback_fn("  - ERROR: volume %s missing on node %s" %
1041
                          (volume, node))
1042
          bad = True
1043

    
1044
    if instanceconfig.admin_up:
1045
      if ((node_current not in node_instance or
1046
          not instance in node_instance[node_current]) and
1047
          node_current not in n_offline):
1048
        feedback_fn("  - ERROR: instance %s not running on node %s" %
1049
                        (instance, node_current))
1050
        bad = True
1051

    
1052
    for node in node_instance:
1053
      if (not node == node_current):
1054
        if instance in node_instance[node]:
1055
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
1056
                          (instance, node))
1057
          bad = True
1058

    
1059
    return bad
1060

    
1061
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
1062
    """Verify if there are any unknown volumes in the cluster.
1063

1064
    The .os, .swap and backup volumes are ignored. All other volumes are
1065
    reported as unknown.
1066

1067
    """
1068
    bad = False
1069

    
1070
    for node in node_vol_is:
1071
      for volume in node_vol_is[node]:
1072
        if node not in node_vol_should or volume not in node_vol_should[node]:
1073
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
1074
                      (volume, node))
1075
          bad = True
1076
    return bad
1077

    
1078
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
1079
    """Verify the list of running instances.
1080

1081
    This checks what instances are running but unknown to the cluster.
1082

1083
    """
1084
    bad = False
1085
    for node in node_instance:
1086
      for runninginstance in node_instance[node]:
1087
        if runninginstance not in instancelist:
1088
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
1089
                          (runninginstance, node))
1090
          bad = True
1091
    return bad
1092

    
1093
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
1094
    """Verify N+1 Memory Resilience.
1095

1096
    Check that if one single node dies we can still start all the instances it
1097
    was primary for.
1098

1099
    """
1100
    bad = False
1101

    
1102
    for node, nodeinfo in node_info.iteritems():
1103
      # This code checks that every node which is now listed as secondary has
1104
      # enough memory to host all instances it is supposed to should a single
1105
      # other node in the cluster fail.
1106
      # FIXME: not ready for failover to an arbitrary node
1107
      # FIXME: does not support file-backed instances
1108
      # WARNING: we currently take into account down instances as well as up
1109
      # ones, considering that even if they're down someone might want to start
1110
      # them even in the event of a node failure.
1111
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1112
        needed_mem = 0
1113
        for instance in instances:
1114
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1115
          if bep[constants.BE_AUTO_BALANCE]:
1116
            needed_mem += bep[constants.BE_MEMORY]
1117
        if nodeinfo['mfree'] < needed_mem:
1118
          feedback_fn("  - ERROR: not enough memory on node %s to accommodate"
1119
                      " failovers should node %s fail" % (node, prinode))
1120
          bad = True
1121
    return bad
1122

    
1123
  def CheckPrereq(self):
1124
    """Check prerequisites.
1125

1126
    Transform the list of checks we're going to skip into a set and check that
1127
    all its members are valid.
1128

1129
    """
1130
    self.skip_set = frozenset(self.op.skip_checks)
1131
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1132
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1133

    
1134
  def BuildHooksEnv(self):
1135
    """Build hooks env.
1136

1137
    Cluster-Verify hooks just ran in the post phase and their failure makes
1138
    the output be logged in the verify output and the verification to fail.
1139

1140
    """
1141
    all_nodes = self.cfg.GetNodeList()
1142
    env = {
1143
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1144
      }
1145
    for node in self.cfg.GetAllNodesInfo().values():
1146
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1147

    
1148
    return env, [], all_nodes
1149

    
1150
  def Exec(self, feedback_fn):
1151
    """Verify integrity of cluster, performing various test on nodes.
1152

1153
    """
1154
    bad = False
1155
    feedback_fn("* Verifying global settings")
1156
    for msg in self.cfg.VerifyConfig():
1157
      feedback_fn("  - ERROR: %s" % msg)
1158

    
1159
    vg_name = self.cfg.GetVGName()
1160
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1161
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1162
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1163
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1164
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1165
                        for iname in instancelist)
1166
    i_non_redundant = [] # Non redundant instances
1167
    i_non_a_balanced = [] # Non auto-balanced instances
1168
    n_offline = [] # List of offline nodes
1169
    n_drained = [] # List of nodes being drained
1170
    node_volume = {}
1171
    node_instance = {}
1172
    node_info = {}
1173
    instance_cfg = {}
1174

    
1175
    # FIXME: verify OS list
1176
    # do local checksums
1177
    master_files = [constants.CLUSTER_CONF_FILE]
1178

    
1179
    file_names = ssconf.SimpleStore().GetFileList()
1180
    file_names.append(constants.SSL_CERT_FILE)
1181
    file_names.append(constants.RAPI_CERT_FILE)
1182
    file_names.extend(master_files)
1183

    
1184
    local_checksums = utils.FingerprintFiles(file_names)
1185

    
1186
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1187
    node_verify_param = {
1188
      constants.NV_FILELIST: file_names,
1189
      constants.NV_NODELIST: [node.name for node in nodeinfo
1190
                              if not node.offline],
1191
      constants.NV_HYPERVISOR: hypervisors,
1192
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1193
                                  node.secondary_ip) for node in nodeinfo
1194
                                 if not node.offline],
1195
      constants.NV_INSTANCELIST: hypervisors,
1196
      constants.NV_VERSION: None,
1197
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1198
      }
1199
    if vg_name is not None:
1200
      node_verify_param[constants.NV_VGLIST] = None
1201
      node_verify_param[constants.NV_LVLIST] = vg_name
1202
      node_verify_param[constants.NV_DRBDLIST] = None
1203
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1204
                                           self.cfg.GetClusterName())
1205

    
1206
    cluster = self.cfg.GetClusterInfo()
1207
    master_node = self.cfg.GetMasterNode()
1208
    all_drbd_map = self.cfg.ComputeDRBDMap()
1209

    
1210
    for node_i in nodeinfo:
1211
      node = node_i.name
1212

    
1213
      if node_i.offline:
1214
        feedback_fn("* Skipping offline node %s" % (node,))
1215
        n_offline.append(node)
1216
        continue
1217

    
1218
      if node == master_node:
1219
        ntype = "master"
1220
      elif node_i.master_candidate:
1221
        ntype = "master candidate"
1222
      elif node_i.drained:
1223
        ntype = "drained"
1224
        n_drained.append(node)
1225
      else:
1226
        ntype = "regular"
1227
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1228

    
1229
      msg = all_nvinfo[node].fail_msg
1230
      if msg:
1231
        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
1232
        bad = True
1233
        continue
1234

    
1235
      nresult = all_nvinfo[node].payload
1236
      node_drbd = {}
1237
      for minor, instance in all_drbd_map[node].items():
1238
        if instance not in instanceinfo:
1239
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1240
                      instance)
1241
          # ghost instance should not be running, but otherwise we
1242
          # don't give double warnings (both ghost instance and
1243
          # unallocated minor in use)
1244
          node_drbd[minor] = (instance, False)
1245
        else:
1246
          instance = instanceinfo[instance]
1247
          node_drbd[minor] = (instance.name, instance.admin_up)
1248
      result = self._VerifyNode(node_i, file_names, local_checksums,
1249
                                nresult, feedback_fn, master_files,
1250
                                node_drbd, vg_name)
1251
      bad = bad or result
1252

    
1253
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1254
      if vg_name is None:
1255
        node_volume[node] = {}
1256
      elif isinstance(lvdata, basestring):
1257
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1258
                    (node, utils.SafeEncode(lvdata)))
1259
        bad = True
1260
        node_volume[node] = {}
1261
      elif not isinstance(lvdata, dict):
1262
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1263
        bad = True
1264
        continue
1265
      else:
1266
        node_volume[node] = lvdata
1267

    
1268
      # node_instance
1269
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1270
      if not isinstance(idata, list):
1271
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1272
                    (node,))
1273
        bad = True
1274
        continue
1275

    
1276
      node_instance[node] = idata
1277

    
1278
      # node_info
1279
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1280
      if not isinstance(nodeinfo, dict):
1281
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1282
        bad = True
1283
        continue
1284

    
1285
      try:
1286
        node_info[node] = {
1287
          "mfree": int(nodeinfo['memory_free']),
1288
          "pinst": [],
1289
          "sinst": [],
1290
          # dictionary holding all instances this node is secondary for,
1291
          # grouped by their primary node. Each key is a cluster node, and each
1292
          # value is a list of instances which have the key as primary and the
1293
          # current node as secondary.  this is handy to calculate N+1 memory
1294
          # availability if you can only failover from a primary to its
1295
          # secondary.
1296
          "sinst-by-pnode": {},
1297
        }
1298
        # FIXME: devise a free space model for file based instances as well
1299
        if vg_name is not None:
1300
          if (constants.NV_VGLIST not in nresult or
1301
              vg_name not in nresult[constants.NV_VGLIST]):
1302
            feedback_fn("  - ERROR: node %s didn't return data for the"
1303
                        " volume group '%s' - it is either missing or broken" %
1304
                        (node, vg_name))
1305
            bad = True
1306
            continue
1307
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1308
      except (ValueError, KeyError):
1309
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1310
                    " from node %s" % (node,))
1311
        bad = True
1312
        continue
1313

    
1314
    node_vol_should = {}
1315

    
1316
    for instance in instancelist:
1317
      feedback_fn("* Verifying instance %s" % instance)
1318
      inst_config = instanceinfo[instance]
1319
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1320
                                     node_instance, feedback_fn, n_offline)
1321
      bad = bad or result
1322
      inst_nodes_offline = []
1323

    
1324
      inst_config.MapLVsByNode(node_vol_should)
1325

    
1326
      instance_cfg[instance] = inst_config
1327

    
1328
      pnode = inst_config.primary_node
1329
      if pnode in node_info:
1330
        node_info[pnode]['pinst'].append(instance)
1331
      elif pnode not in n_offline:
1332
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1333
                    " %s failed" % (instance, pnode))
1334
        bad = True
1335

    
1336
      if pnode in n_offline:
1337
        inst_nodes_offline.append(pnode)
1338

    
1339
      # If the instance is non-redundant we cannot survive losing its primary
1340
      # node, so we are not N+1 compliant. On the other hand we have no disk
1341
      # templates with more than one secondary so that situation is not well
1342
      # supported either.
1343
      # FIXME: does not support file-backed instances
1344
      if len(inst_config.secondary_nodes) == 0:
1345
        i_non_redundant.append(instance)
1346
      elif len(inst_config.secondary_nodes) > 1:
1347
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1348
                    % instance)
1349

    
1350
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1351
        i_non_a_balanced.append(instance)
1352

    
1353
      for snode in inst_config.secondary_nodes:
1354
        if snode in node_info:
1355
          node_info[snode]['sinst'].append(instance)
1356
          if pnode not in node_info[snode]['sinst-by-pnode']:
1357
            node_info[snode]['sinst-by-pnode'][pnode] = []
1358
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1359
        elif snode not in n_offline:
1360
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1361
                      " %s failed" % (instance, snode))
1362
          bad = True
1363
        if snode in n_offline:
1364
          inst_nodes_offline.append(snode)
1365

    
1366
      if inst_nodes_offline:
1367
        # warn that the instance lives on offline nodes, and set bad=True
1368
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1369
                    ", ".join(inst_nodes_offline))
1370
        bad = True
1371

    
1372
    feedback_fn("* Verifying orphan volumes")
1373
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1374
                                       feedback_fn)
1375
    bad = bad or result
1376

    
1377
    feedback_fn("* Verifying remaining instances")
1378
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1379
                                         feedback_fn)
1380
    bad = bad or result
1381

    
1382
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1383
      feedback_fn("* Verifying N+1 Memory redundancy")
1384
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1385
      bad = bad or result
1386

    
1387
    feedback_fn("* Other Notes")
1388
    if i_non_redundant:
1389
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1390
                  % len(i_non_redundant))
1391

    
1392
    if i_non_a_balanced:
1393
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1394
                  % len(i_non_a_balanced))
1395

    
1396
    if n_offline:
1397
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1398

    
1399
    if n_drained:
1400
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1401

    
1402
    return not bad
1403

    
1404
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1405
    """Analyze the post-hooks' result
1406

1407
    This method analyses the hook result, handles it, and sends some
1408
    nicely-formatted feedback back to the user.
1409

1410
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1411
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1412
    @param hooks_results: the results of the multi-node hooks rpc call
1413
    @param feedback_fn: function used send feedback back to the caller
1414
    @param lu_result: previous Exec result
1415
    @return: the new Exec result, based on the previous result
1416
        and hook results
1417

1418
    """
1419
    # We only really run POST phase hooks, and are only interested in
1420
    # their results
1421
    if phase == constants.HOOKS_PHASE_POST:
1422
      # Used to change hooks' output to proper indentation
1423
      indent_re = re.compile('^', re.M)
1424
      feedback_fn("* Hooks Results")
1425
      if not hooks_results:
1426
        feedback_fn("  - ERROR: general communication failure")
1427
        lu_result = 1
1428
      else:
1429
        for node_name in hooks_results:
1430
          show_node_header = True
1431
          res = hooks_results[node_name]
1432
          msg = res.fail_msg
1433
          if msg:
1434
            if res.offline:
1435
              # no need to warn or set fail return value
1436
              continue
1437
            feedback_fn("    Communication failure in hooks execution: %s" %
1438
                        msg)
1439
            lu_result = 1
1440
            continue
1441
          for script, hkr, output in res.payload:
1442
            if hkr == constants.HKR_FAIL:
1443
              # The node header is only shown once, if there are
1444
              # failing hooks on that node
1445
              if show_node_header:
1446
                feedback_fn("  Node %s:" % node_name)
1447
                show_node_header = False
1448
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1449
              output = indent_re.sub('      ', output)
1450
              feedback_fn("%s" % output)
1451
              lu_result = 1
1452

    
1453
      return lu_result
1454

    
1455

    
1456
class LUVerifyDisks(NoHooksLU):
1457
  """Verifies the cluster disks status.
1458

1459
  """
1460
  _OP_REQP = []
1461
  REQ_BGL = False
1462

    
1463
  def ExpandNames(self):
1464
    self.needed_locks = {
1465
      locking.LEVEL_NODE: locking.ALL_SET,
1466
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1467
    }
1468
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1469

    
1470
  def CheckPrereq(self):
1471
    """Check prerequisites.
1472

1473
    This has no prerequisites.
1474

1475
    """
1476
    pass
1477

    
1478
  def Exec(self, feedback_fn):
1479
    """Verify integrity of cluster disks.
1480

1481
    @rtype: tuple of three items
1482
    @return: a tuple of (dict of node-to-node_error, list of instances
1483
        which need activate-disks, dict of instance: (node, volume) for
1484
        missing volumes
1485

1486
    """
1487
    result = res_nodes, res_instances, res_missing = {}, [], {}
1488

    
1489
    vg_name = self.cfg.GetVGName()
1490
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1491
    instances = [self.cfg.GetInstanceInfo(name)
1492
                 for name in self.cfg.GetInstanceList()]
1493

    
1494
    nv_dict = {}
1495
    for inst in instances:
1496
      inst_lvs = {}
1497
      if (not inst.admin_up or
1498
          inst.disk_template not in constants.DTS_NET_MIRROR):
1499
        continue
1500
      inst.MapLVsByNode(inst_lvs)
1501
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1502
      for node, vol_list in inst_lvs.iteritems():
1503
        for vol in vol_list:
1504
          nv_dict[(node, vol)] = inst
1505

    
1506
    if not nv_dict:
1507
      return result
1508

    
1509
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1510

    
1511
    for node in nodes:
1512
      # node_volume
1513
      node_res = node_lvs[node]
1514
      if node_res.offline:
1515
        continue
1516
      msg = node_res.fail_msg
1517
      if msg:
1518
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1519
        res_nodes[node] = msg
1520
        continue
1521

    
1522
      lvs = node_res.payload
1523
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1524
        inst = nv_dict.pop((node, lv_name), None)
1525
        if (not lv_online and inst is not None
1526
            and inst.name not in res_instances):
1527
          res_instances.append(inst.name)
1528

    
1529
    # any leftover items in nv_dict are missing LVs, let's arrange the
1530
    # data better
1531
    for key, inst in nv_dict.iteritems():
1532
      if inst.name not in res_missing:
1533
        res_missing[inst.name] = []
1534
      res_missing[inst.name].append(key)
1535

    
1536
    return result
1537

    
1538

    
1539
class LURepairDiskSizes(NoHooksLU):
1540
  """Verifies the cluster disks sizes.
1541

1542
  """
1543
  _OP_REQP = ["instances"]
1544
  REQ_BGL = False
1545

    
1546
  def ExpandNames(self):
1547

    
1548
    if not isinstance(self.op.instances, list):
1549
      raise errors.OpPrereqError("Invalid argument type 'instances'")
1550

    
1551
    if self.op.instances:
1552
      self.wanted_names = []
1553
      for name in self.op.instances:
1554
        full_name = self.cfg.ExpandInstanceName(name)
1555
        if full_name is None:
1556
          raise errors.OpPrereqError("Instance '%s' not known" % name)
1557
        self.wanted_names.append(full_name)
1558
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
1559
      self.needed_locks = {
1560
        locking.LEVEL_NODE: [],
1561
        locking.LEVEL_INSTANCE: self.wanted_names,
1562
        }
1563
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1564
    else:
1565
      self.wanted_names = None
1566
      self.needed_locks = {
1567
        locking.LEVEL_NODE: locking.ALL_SET,
1568
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1569
        }
1570
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1571

    
1572
  def DeclareLocks(self, level):
1573
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1574
      self._LockInstancesNodes(primary_only=True)
1575

    
1576
  def CheckPrereq(self):
1577
    """Check prerequisites.
1578

1579
    This only checks the optional instance list against the existing names.
1580

1581
    """
1582
    if self.wanted_names is None:
1583
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1584

    
1585
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1586
                             in self.wanted_names]
1587

    
1588
  def Exec(self, feedback_fn):
1589
    """Verify the size of cluster disks.
1590

1591
    """
1592
    # TODO: check child disks too
1593
    # TODO: check differences in size between primary/secondary nodes
1594
    per_node_disks = {}
1595
    for instance in self.wanted_instances:
1596
      pnode = instance.primary_node
1597
      if pnode not in per_node_disks:
1598
        per_node_disks[pnode] = []
1599
      for idx, disk in enumerate(instance.disks):
1600
        per_node_disks[pnode].append((instance, idx, disk))
1601

    
1602
    changed = []
1603
    for node, dskl in per_node_disks.items():
1604
      result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
1605
      if result.failed:
1606
        self.LogWarning("Failure in blockdev_getsizes call to node"
1607
                        " %s, ignoring", node)
1608
        continue
1609
      if len(result.data) != len(dskl):
1610
        self.LogWarning("Invalid result from node %s, ignoring node results",
1611
                        node)
1612
        continue
1613
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1614
        if size is None:
1615
          self.LogWarning("Disk %d of instance %s did not return size"
1616
                          " information, ignoring", idx, instance.name)
1617
          continue
1618
        if not isinstance(size, (int, long)):
1619
          self.LogWarning("Disk %d of instance %s did not return valid"
1620
                          " size information, ignoring", idx, instance.name)
1621
          continue
1622
        size = size >> 20
1623
        if size != disk.size:
1624
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1625
                       " correcting: recorded %d, actual %d", idx,
1626
                       instance.name, disk.size, size)
1627
          disk.size = size
1628
          self.cfg.Update(instance)
1629
          changed.append((instance.name, idx, size))
1630
    return changed
1631

    
1632

    
1633
class LURenameCluster(LogicalUnit):
1634
  """Rename the cluster.
1635

1636
  """
1637
  HPATH = "cluster-rename"
1638
  HTYPE = constants.HTYPE_CLUSTER
1639
  _OP_REQP = ["name"]
1640

    
1641
  def BuildHooksEnv(self):
1642
    """Build hooks env.
1643

1644
    """
1645
    env = {
1646
      "OP_TARGET": self.cfg.GetClusterName(),
1647
      "NEW_NAME": self.op.name,
1648
      }
1649
    mn = self.cfg.GetMasterNode()
1650
    return env, [mn], [mn]
1651

    
1652
  def CheckPrereq(self):
1653
    """Verify that the passed name is a valid one.
1654

1655
    """
1656
    hostname = utils.HostInfo(self.op.name)
1657

    
1658
    new_name = hostname.name
1659
    self.ip = new_ip = hostname.ip
1660
    old_name = self.cfg.GetClusterName()
1661
    old_ip = self.cfg.GetMasterIP()
1662
    if new_name == old_name and new_ip == old_ip:
1663
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1664
                                 " cluster has changed")
1665
    if new_ip != old_ip:
1666
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1667
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1668
                                   " reachable on the network. Aborting." %
1669
                                   new_ip)
1670

    
1671
    self.op.name = new_name
1672

    
1673
  def Exec(self, feedback_fn):
1674
    """Rename the cluster.
1675

1676
    """
1677
    clustername = self.op.name
1678
    ip = self.ip
1679

    
1680
    # shutdown the master IP
1681
    master = self.cfg.GetMasterNode()
1682
    result = self.rpc.call_node_stop_master(master, False)
1683
    result.Raise("Could not disable the master role")
1684

    
1685
    try:
1686
      cluster = self.cfg.GetClusterInfo()
1687
      cluster.cluster_name = clustername
1688
      cluster.master_ip = ip
1689
      self.cfg.Update(cluster)
1690

    
1691
      # update the known hosts file
1692
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1693
      node_list = self.cfg.GetNodeList()
1694
      try:
1695
        node_list.remove(master)
1696
      except ValueError:
1697
        pass
1698
      result = self.rpc.call_upload_file(node_list,
1699
                                         constants.SSH_KNOWN_HOSTS_FILE)
1700
      for to_node, to_result in result.iteritems():
1701
        msg = to_result.fail_msg
1702
        if msg:
1703
          msg = ("Copy of file %s to node %s failed: %s" %
1704
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1705
          self.proc.LogWarning(msg)
1706

    
1707
    finally:
1708
      result = self.rpc.call_node_start_master(master, False, False)
1709
      msg = result.fail_msg
1710
      if msg:
1711
        self.LogWarning("Could not re-enable the master role on"
1712
                        " the master, please restart manually: %s", msg)
1713

    
1714

    
1715
def _RecursiveCheckIfLVMBased(disk):
1716
  """Check if the given disk or its children are lvm-based.
1717

1718
  @type disk: L{objects.Disk}
1719
  @param disk: the disk to check
1720
  @rtype: boolean
1721
  @return: boolean indicating whether a LD_LV dev_type was found or not
1722

1723
  """
1724
  if disk.children:
1725
    for chdisk in disk.children:
1726
      if _RecursiveCheckIfLVMBased(chdisk):
1727
        return True
1728
  return disk.dev_type == constants.LD_LV
1729

    
1730

    
1731
class LUSetClusterParams(LogicalUnit):
1732
  """Change the parameters of the cluster.
1733

1734
  """
1735
  HPATH = "cluster-modify"
1736
  HTYPE = constants.HTYPE_CLUSTER
1737
  _OP_REQP = []
1738
  REQ_BGL = False
1739

    
1740
  def CheckArguments(self):
1741
    """Check parameters
1742

1743
    """
1744
    if not hasattr(self.op, "candidate_pool_size"):
1745
      self.op.candidate_pool_size = None
1746
    if self.op.candidate_pool_size is not None:
1747
      try:
1748
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1749
      except (ValueError, TypeError), err:
1750
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1751
                                   str(err))
1752
      if self.op.candidate_pool_size < 1:
1753
        raise errors.OpPrereqError("At least one master candidate needed")
1754

    
1755
  def ExpandNames(self):
1756
    # FIXME: in the future maybe other cluster params won't require checking on
1757
    # all nodes to be modified.
1758
    self.needed_locks = {
1759
      locking.LEVEL_NODE: locking.ALL_SET,
1760
    }
1761
    self.share_locks[locking.LEVEL_NODE] = 1
1762

    
1763
  def BuildHooksEnv(self):
1764
    """Build hooks env.
1765

1766
    """
1767
    env = {
1768
      "OP_TARGET": self.cfg.GetClusterName(),
1769
      "NEW_VG_NAME": self.op.vg_name,
1770
      }
1771
    mn = self.cfg.GetMasterNode()
1772
    return env, [mn], [mn]
1773

    
1774
  def CheckPrereq(self):
1775
    """Check prerequisites.
1776

1777
    This checks whether the given params don't conflict and
1778
    if the given volume group is valid.
1779

1780
    """
1781
    if self.op.vg_name is not None and not self.op.vg_name:
1782
      instances = self.cfg.GetAllInstancesInfo().values()
1783
      for inst in instances:
1784
        for disk in inst.disks:
1785
          if _RecursiveCheckIfLVMBased(disk):
1786
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1787
                                       " lvm-based instances exist")
1788

    
1789
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1790

    
1791
    # if vg_name not None, checks given volume group on all nodes
1792
    if self.op.vg_name:
1793
      vglist = self.rpc.call_vg_list(node_list)
1794
      for node in node_list:
1795
        msg = vglist[node].fail_msg
1796
        if msg:
1797
          # ignoring down node
1798
          self.LogWarning("Error while gathering data on node %s"
1799
                          " (ignoring node): %s", node, msg)
1800
          continue
1801
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1802
                                              self.op.vg_name,
1803
                                              constants.MIN_VG_SIZE)
1804
        if vgstatus:
1805
          raise errors.OpPrereqError("Error on node '%s': %s" %
1806
                                     (node, vgstatus))
1807

    
1808
    self.cluster = cluster = self.cfg.GetClusterInfo()
1809
    # validate params changes
1810
    if self.op.beparams:
1811
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1812
      self.new_beparams = objects.FillDict(
1813
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1814

    
1815
    if self.op.nicparams:
1816
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1817
      self.new_nicparams = objects.FillDict(
1818
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1819
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1820

    
1821
    # hypervisor list/parameters
1822
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1823
    if self.op.hvparams:
1824
      if not isinstance(self.op.hvparams, dict):
1825
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1826
      for hv_name, hv_dict in self.op.hvparams.items():
1827
        if hv_name not in self.new_hvparams:
1828
          self.new_hvparams[hv_name] = hv_dict
1829
        else:
1830
          self.new_hvparams[hv_name].update(hv_dict)
1831

    
1832
    if self.op.enabled_hypervisors is not None:
1833
      self.hv_list = self.op.enabled_hypervisors
1834
      if not self.hv_list:
1835
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1836
                                   " least one member")
1837
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1838
      if invalid_hvs:
1839
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1840
                                   " entries: %s" %
1841
                                   utils.CommaJoin(invalid_hvs))
1842
    else:
1843
      self.hv_list = cluster.enabled_hypervisors
1844

    
1845
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1846
      # either the enabled list has changed, or the parameters have, validate
1847
      for hv_name, hv_params in self.new_hvparams.items():
1848
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1849
            (self.op.enabled_hypervisors and
1850
             hv_name in self.op.enabled_hypervisors)):
1851
          # either this is a new hypervisor, or its parameters have changed
1852
          hv_class = hypervisor.GetHypervisor(hv_name)
1853
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1854
          hv_class.CheckParameterSyntax(hv_params)
1855
          _CheckHVParams(self, node_list, hv_name, hv_params)
1856

    
1857
  def Exec(self, feedback_fn):
1858
    """Change the parameters of the cluster.
1859

1860
    """
1861
    if self.op.vg_name is not None:
1862
      new_volume = self.op.vg_name
1863
      if not new_volume:
1864
        new_volume = None
1865
      if new_volume != self.cfg.GetVGName():
1866
        self.cfg.SetVGName(new_volume)
1867
      else:
1868
        feedback_fn("Cluster LVM configuration already in desired"
1869
                    " state, not changing")
1870
    if self.op.hvparams:
1871
      self.cluster.hvparams = self.new_hvparams
1872
    if self.op.enabled_hypervisors is not None:
1873
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1874
    if self.op.beparams:
1875
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1876
    if self.op.nicparams:
1877
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1878

    
1879
    if self.op.candidate_pool_size is not None:
1880
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1881
      # we need to update the pool size here, otherwise the save will fail
1882
      _AdjustCandidatePool(self)
1883

    
1884
    self.cfg.Update(self.cluster)
1885

    
1886

    
1887
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1888
  """Distribute additional files which are part of the cluster configuration.
1889

1890
  ConfigWriter takes care of distributing the config and ssconf files, but
1891
  there are more files which should be distributed to all nodes. This function
1892
  makes sure those are copied.
1893

1894
  @param lu: calling logical unit
1895
  @param additional_nodes: list of nodes not in the config to distribute to
1896

1897
  """
1898
  # 1. Gather target nodes
1899
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1900
  dist_nodes = lu.cfg.GetNodeList()
1901
  if additional_nodes is not None:
1902
    dist_nodes.extend(additional_nodes)
1903
  if myself.name in dist_nodes:
1904
    dist_nodes.remove(myself.name)
1905
  # 2. Gather files to distribute
1906
  dist_files = set([constants.ETC_HOSTS,
1907
                    constants.SSH_KNOWN_HOSTS_FILE,
1908
                    constants.RAPI_CERT_FILE,
1909
                    constants.RAPI_USERS_FILE,
1910
                    constants.HMAC_CLUSTER_KEY,
1911
                   ])
1912

    
1913
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1914
  for hv_name in enabled_hypervisors:
1915
    hv_class = hypervisor.GetHypervisor(hv_name)
1916
    dist_files.update(hv_class.GetAncillaryFiles())
1917

    
1918
  # 3. Perform the files upload
1919
  for fname in dist_files:
1920
    if os.path.exists(fname):
1921
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1922
      for to_node, to_result in result.items():
1923
        msg = to_result.fail_msg
1924
        if msg:
1925
          msg = ("Copy of file %s to node %s failed: %s" %
1926
                 (fname, to_node, msg))
1927
          lu.proc.LogWarning(msg)
1928

    
1929

    
1930
class LURedistributeConfig(NoHooksLU):
1931
  """Force the redistribution of cluster configuration.
1932

1933
  This is a very simple LU.
1934

1935
  """
1936
  _OP_REQP = []
1937
  REQ_BGL = False
1938

    
1939
  def ExpandNames(self):
1940
    self.needed_locks = {
1941
      locking.LEVEL_NODE: locking.ALL_SET,
1942
    }
1943
    self.share_locks[locking.LEVEL_NODE] = 1
1944

    
1945
  def CheckPrereq(self):
1946
    """Check prerequisites.
1947

1948
    """
1949

    
1950
  def Exec(self, feedback_fn):
1951
    """Redistribute the configuration.
1952

1953
    """
1954
    self.cfg.Update(self.cfg.GetClusterInfo())
1955
    _RedistributeAncillaryFiles(self)
1956

    
1957

    
1958
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1959
  """Sleep and poll for an instance's disk to sync.
1960

1961
  """
1962
  if not instance.disks:
1963
    return True
1964

    
1965
  if not oneshot:
1966
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1967

    
1968
  node = instance.primary_node
1969

    
1970
  for dev in instance.disks:
1971
    lu.cfg.SetDiskID(dev, node)
1972

    
1973
  retries = 0
1974
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1975
  while True:
1976
    max_time = 0
1977
    done = True
1978
    cumul_degraded = False
1979
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1980
    msg = rstats.fail_msg
1981
    if msg:
1982
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1983
      retries += 1
1984
      if retries >= 10:
1985
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1986
                                 " aborting." % node)
1987
      time.sleep(6)
1988
      continue
1989
    rstats = rstats.payload
1990
    retries = 0
1991
    for i, mstat in enumerate(rstats):
1992
      if mstat is None:
1993
        lu.LogWarning("Can't compute data for node %s/%s",
1994
                           node, instance.disks[i].iv_name)
1995
        continue
1996

    
1997
      cumul_degraded = (cumul_degraded or
1998
                        (mstat.is_degraded and mstat.sync_percent is None))
1999
      if mstat.sync_percent is not None:
2000
        done = False
2001
        if mstat.estimated_time is not None:
2002
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2003
          max_time = mstat.estimated_time
2004
        else:
2005
          rem_time = "no time estimate"
2006
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2007
                        (instance.disks[i].iv_name, mstat.sync_percent, rem_time))
2008

    
2009
    # if we're done but degraded, let's do a few small retries, to
2010
    # make sure we see a stable and not transient situation; therefore
2011
    # we force restart of the loop
2012
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2013
      logging.info("Degraded disks found, %d retries left", degr_retries)
2014
      degr_retries -= 1
2015
      time.sleep(1)
2016
      continue
2017

    
2018
    if done or oneshot:
2019
      break
2020

    
2021
    time.sleep(min(60, max_time))
2022

    
2023
  if done:
2024
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2025
  return not cumul_degraded
2026

    
2027

    
2028
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2029
  """Check that mirrors are not degraded.
2030

2031
  The ldisk parameter, if True, will change the test from the
2032
  is_degraded attribute (which represents overall non-ok status for
2033
  the device(s)) to the ldisk (representing the local storage status).
2034

2035
  """
2036
  lu.cfg.SetDiskID(dev, node)
2037

    
2038
  result = True
2039

    
2040
  if on_primary or dev.AssembleOnSecondary():
2041
    rstats = lu.rpc.call_blockdev_find(node, dev)
2042
    msg = rstats.fail_msg
2043
    if msg:
2044
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2045
      result = False
2046
    elif not rstats.payload:
2047
      lu.LogWarning("Can't find disk on node %s", node)
2048
      result = False
2049
    else:
2050
      if ldisk:
2051
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2052
      else:
2053
        result = result and not rstats.payload.is_degraded
2054

    
2055
  if dev.children:
2056
    for child in dev.children:
2057
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2058

    
2059
  return result
2060

    
2061

    
2062
class LUDiagnoseOS(NoHooksLU):
2063
  """Logical unit for OS diagnose/query.
2064

2065
  """
2066
  _OP_REQP = ["output_fields", "names"]
2067
  REQ_BGL = False
2068
  _FIELDS_STATIC = utils.FieldSet()
2069
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
2070

    
2071
  def ExpandNames(self):
2072
    if self.op.names:
2073
      raise errors.OpPrereqError("Selective OS query not supported")
2074

    
2075
    _CheckOutputFields(static=self._FIELDS_STATIC,
2076
                       dynamic=self._FIELDS_DYNAMIC,
2077
                       selected=self.op.output_fields)
2078

    
2079
    # Lock all nodes, in shared mode
2080
    # Temporary removal of locks, should be reverted later
2081
    # TODO: reintroduce locks when they are lighter-weight
2082
    self.needed_locks = {}
2083
    #self.share_locks[locking.LEVEL_NODE] = 1
2084
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2085

    
2086
  def CheckPrereq(self):
2087
    """Check prerequisites.
2088

2089
    """
2090

    
2091
  @staticmethod
2092
  def _DiagnoseByOS(node_list, rlist):
2093
    """Remaps a per-node return list into an a per-os per-node dictionary
2094

2095
    @param node_list: a list with the names of all nodes
2096
    @param rlist: a map with node names as keys and OS objects as values
2097

2098
    @rtype: dict
2099
    @return: a dictionary with osnames as keys and as value another map, with
2100
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2101

2102
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2103
                                     (/srv/..., False, "invalid api")],
2104
                           "node2": [(/srv/..., True, "")]}
2105
          }
2106

2107
    """
2108
    all_os = {}
2109
    # we build here the list of nodes that didn't fail the RPC (at RPC
2110
    # level), so that nodes with a non-responding node daemon don't
2111
    # make all OSes invalid
2112
    good_nodes = [node_name for node_name in rlist
2113
                  if not rlist[node_name].fail_msg]
2114
    for node_name, nr in rlist.items():
2115
      if nr.fail_msg or not nr.payload:
2116
        continue
2117
      for name, path, status, diagnose in nr.payload:
2118
        if name not in all_os:
2119
          # build a list of nodes for this os containing empty lists
2120
          # for each node in node_list
2121
          all_os[name] = {}
2122
          for nname in good_nodes:
2123
            all_os[name][nname] = []
2124
        all_os[name][node_name].append((path, status, diagnose))
2125
    return all_os
2126

    
2127
  def Exec(self, feedback_fn):
2128
    """Compute the list of OSes.
2129

2130
    """
2131
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2132
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2133
    pol = self._DiagnoseByOS(valid_nodes, node_data)
2134
    output = []
2135
    for os_name, os_data in pol.items():
2136
      row = []
2137
      for field in self.op.output_fields:
2138
        if field == "name":
2139
          val = os_name
2140
        elif field == "valid":
2141
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
2142
        elif field == "node_status":
2143
          # this is just a copy of the dict
2144
          val = {}
2145
          for node_name, nos_list in os_data.items():
2146
            val[node_name] = nos_list
2147
        else:
2148
          raise errors.ParameterError(field)
2149
        row.append(val)
2150
      output.append(row)
2151

    
2152
    return output
2153

    
2154

    
2155
class LURemoveNode(LogicalUnit):
2156
  """Logical unit for removing a node.
2157

2158
  """
2159
  HPATH = "node-remove"
2160
  HTYPE = constants.HTYPE_NODE
2161
  _OP_REQP = ["node_name"]
2162

    
2163
  def BuildHooksEnv(self):
2164
    """Build hooks env.
2165

2166
    This doesn't run on the target node in the pre phase as a failed
2167
    node would then be impossible to remove.
2168

2169
    """
2170
    env = {
2171
      "OP_TARGET": self.op.node_name,
2172
      "NODE_NAME": self.op.node_name,
2173
      }
2174
    all_nodes = self.cfg.GetNodeList()
2175
    if self.op.node_name in all_nodes:
2176
      all_nodes.remove(self.op.node_name)
2177
    return env, all_nodes, all_nodes
2178

    
2179
  def CheckPrereq(self):
2180
    """Check prerequisites.
2181

2182
    This checks:
2183
     - the node exists in the configuration
2184
     - it does not have primary or secondary instances
2185
     - it's not the master
2186

2187
    Any errors are signaled by raising errors.OpPrereqError.
2188

2189
    """
2190
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2191
    if node is None:
2192
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
2193

    
2194
    instance_list = self.cfg.GetInstanceList()
2195

    
2196
    masternode = self.cfg.GetMasterNode()
2197
    if node.name == masternode:
2198
      raise errors.OpPrereqError("Node is the master node,"
2199
                                 " you need to failover first.")
2200

    
2201
    for instance_name in instance_list:
2202
      instance = self.cfg.GetInstanceInfo(instance_name)
2203
      if node.name in instance.all_nodes:
2204
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2205
                                   " please remove first." % instance_name)
2206
    self.op.node_name = node.name
2207
    self.node = node
2208

    
2209
  def Exec(self, feedback_fn):
2210
    """Removes the node from the cluster.
2211

2212
    """
2213
    node = self.node
2214
    logging.info("Stopping the node daemon and removing configs from node %s",
2215
                 node.name)
2216

    
2217
    self.context.RemoveNode(node.name)
2218

    
2219
    # Run post hooks on the node before it's removed
2220
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2221
    try:
2222
      h_results = hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2223
    except:
2224
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2225

    
2226
    result = self.rpc.call_node_leave_cluster(node.name)
2227
    msg = result.fail_msg
2228
    if msg:
2229
      self.LogWarning("Errors encountered on the remote node while leaving"
2230
                      " the cluster: %s", msg)
2231

    
2232
    # Promote nodes to master candidate as needed
2233
    _AdjustCandidatePool(self)
2234

    
2235

    
2236
class LUQueryNodes(NoHooksLU):
2237
  """Logical unit for querying nodes.
2238

2239
  """
2240
  _OP_REQP = ["output_fields", "names", "use_locking"]
2241
  REQ_BGL = False
2242
  _FIELDS_DYNAMIC = utils.FieldSet(
2243
    "dtotal", "dfree",
2244
    "mtotal", "mnode", "mfree",
2245
    "bootid",
2246
    "ctotal", "cnodes", "csockets",
2247
    )
2248

    
2249
  _FIELDS_STATIC = utils.FieldSet(
2250
    "name", "pinst_cnt", "sinst_cnt",
2251
    "pinst_list", "sinst_list",
2252
    "pip", "sip", "tags",
2253
    "serial_no", "ctime", "mtime",
2254
    "master_candidate",
2255
    "master",
2256
    "offline",
2257
    "drained",
2258
    "role",
2259
    )
2260

    
2261
  def ExpandNames(self):
2262
    _CheckOutputFields(static=self._FIELDS_STATIC,
2263
                       dynamic=self._FIELDS_DYNAMIC,
2264
                       selected=self.op.output_fields)
2265

    
2266
    self.needed_locks = {}
2267
    self.share_locks[locking.LEVEL_NODE] = 1
2268

    
2269
    if self.op.names:
2270
      self.wanted = _GetWantedNodes(self, self.op.names)
2271
    else:
2272
      self.wanted = locking.ALL_SET
2273

    
2274
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2275
    self.do_locking = self.do_node_query and self.op.use_locking
2276
    if self.do_locking:
2277
      # if we don't request only static fields, we need to lock the nodes
2278
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2279

    
2280

    
2281
  def CheckPrereq(self):
2282
    """Check prerequisites.
2283

2284
    """
2285
    # The validation of the node list is done in the _GetWantedNodes,
2286
    # if non empty, and if empty, there's no validation to do
2287
    pass
2288

    
2289
  def Exec(self, feedback_fn):
2290
    """Computes the list of nodes and their attributes.
2291

2292
    """
2293
    all_info = self.cfg.GetAllNodesInfo()
2294
    if self.do_locking:
2295
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2296
    elif self.wanted != locking.ALL_SET:
2297
      nodenames = self.wanted
2298
      missing = set(nodenames).difference(all_info.keys())
2299
      if missing:
2300
        raise errors.OpExecError(
2301
          "Some nodes were removed before retrieving their data: %s" % missing)
2302
    else:
2303
      nodenames = all_info.keys()
2304

    
2305
    nodenames = utils.NiceSort(nodenames)
2306
    nodelist = [all_info[name] for name in nodenames]
2307

    
2308
    # begin data gathering
2309

    
2310
    if self.do_node_query:
2311
      live_data = {}
2312
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2313
                                          self.cfg.GetHypervisorType())
2314
      for name in nodenames:
2315
        nodeinfo = node_data[name]
2316
        if not nodeinfo.fail_msg and nodeinfo.payload:
2317
          nodeinfo = nodeinfo.payload
2318
          fn = utils.TryConvert
2319
          live_data[name] = {
2320
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2321
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2322
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2323
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2324
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2325
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2326
            "bootid": nodeinfo.get('bootid', None),
2327
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2328
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2329
            }
2330
        else:
2331
          live_data[name] = {}
2332
    else:
2333
      live_data = dict.fromkeys(nodenames, {})
2334

    
2335
    node_to_primary = dict([(name, set()) for name in nodenames])
2336
    node_to_secondary = dict([(name, set()) for name in nodenames])
2337

    
2338
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2339
                             "sinst_cnt", "sinst_list"))
2340
    if inst_fields & frozenset(self.op.output_fields):
2341
      instancelist = self.cfg.GetInstanceList()
2342

    
2343
      for instance_name in instancelist:
2344
        inst = self.cfg.GetInstanceInfo(instance_name)
2345
        if inst.primary_node in node_to_primary:
2346
          node_to_primary[inst.primary_node].add(inst.name)
2347
        for secnode in inst.secondary_nodes:
2348
          if secnode in node_to_secondary:
2349
            node_to_secondary[secnode].add(inst.name)
2350

    
2351
    master_node = self.cfg.GetMasterNode()
2352

    
2353
    # end data gathering
2354

    
2355
    output = []
2356
    for node in nodelist:
2357
      node_output = []
2358
      for field in self.op.output_fields:
2359
        if field == "name":
2360
          val = node.name
2361
        elif field == "pinst_list":
2362
          val = list(node_to_primary[node.name])
2363
        elif field == "sinst_list":
2364
          val = list(node_to_secondary[node.name])
2365
        elif field == "pinst_cnt":
2366
          val = len(node_to_primary[node.name])
2367
        elif field == "sinst_cnt":
2368
          val = len(node_to_secondary[node.name])
2369
        elif field == "pip":
2370
          val = node.primary_ip
2371
        elif field == "sip":
2372
          val = node.secondary_ip
2373
        elif field == "tags":
2374
          val = list(node.GetTags())
2375
        elif field == "serial_no":
2376
          val = node.serial_no
2377
        elif field == "ctime":
2378
          val = node.ctime
2379
        elif field == "mtime":
2380
          val = node.mtime
2381
        elif field == "master_candidate":
2382
          val = node.master_candidate
2383
        elif field == "master":
2384
          val = node.name == master_node
2385
        elif field == "offline":
2386
          val = node.offline
2387
        elif field == "drained":
2388
          val = node.drained
2389
        elif self._FIELDS_DYNAMIC.Matches(field):
2390
          val = live_data[node.name].get(field, None)
2391
        elif field == "role":
2392
          if node.name == master_node:
2393
            val = "M"
2394
          elif node.master_candidate:
2395
            val = "C"
2396
          elif node.drained:
2397
            val = "D"
2398
          elif node.offline:
2399
            val = "O"
2400
          else:
2401
            val = "R"
2402
        else:
2403
          raise errors.ParameterError(field)
2404
        node_output.append(val)
2405
      output.append(node_output)
2406

    
2407
    return output
2408

    
2409

    
2410
class LUQueryNodeVolumes(NoHooksLU):
2411
  """Logical unit for getting volumes on node(s).
2412

2413
  """
2414
  _OP_REQP = ["nodes", "output_fields"]
2415
  REQ_BGL = False
2416
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2417
  _FIELDS_STATIC = utils.FieldSet("node")
2418

    
2419
  def ExpandNames(self):
2420
    _CheckOutputFields(static=self._FIELDS_STATIC,
2421
                       dynamic=self._FIELDS_DYNAMIC,
2422
                       selected=self.op.output_fields)
2423

    
2424
    self.needed_locks = {}
2425
    self.share_locks[locking.LEVEL_NODE] = 1
2426
    if not self.op.nodes:
2427
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2428
    else:
2429
      self.needed_locks[locking.LEVEL_NODE] = \
2430
        _GetWantedNodes(self, self.op.nodes)
2431

    
2432
  def CheckPrereq(self):
2433
    """Check prerequisites.
2434

2435
    This checks that the fields required are valid output fields.
2436

2437
    """
2438
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2439

    
2440
  def Exec(self, feedback_fn):
2441
    """Computes the list of nodes and their attributes.
2442

2443
    """
2444
    nodenames = self.nodes
2445
    volumes = self.rpc.call_node_volumes(nodenames)
2446

    
2447
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2448
             in self.cfg.GetInstanceList()]
2449

    
2450
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2451

    
2452
    output = []
2453
    for node in nodenames:
2454
      nresult = volumes[node]
2455
      if nresult.offline:
2456
        continue
2457
      msg = nresult.fail_msg
2458
      if msg:
2459
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2460
        continue
2461

    
2462
      node_vols = nresult.payload[:]
2463
      node_vols.sort(key=lambda vol: vol['dev'])
2464

    
2465
      for vol in node_vols:
2466
        node_output = []
2467
        for field in self.op.output_fields:
2468
          if field == "node":
2469
            val = node
2470
          elif field == "phys":
2471
            val = vol['dev']
2472
          elif field == "vg":
2473
            val = vol['vg']
2474
          elif field == "name":
2475
            val = vol['name']
2476
          elif field == "size":
2477
            val = int(float(vol['size']))
2478
          elif field == "instance":
2479
            for inst in ilist:
2480
              if node not in lv_by_node[inst]:
2481
                continue
2482
              if vol['name'] in lv_by_node[inst][node]:
2483
                val = inst.name
2484
                break
2485
            else:
2486
              val = '-'
2487
          else:
2488
            raise errors.ParameterError(field)
2489
          node_output.append(str(val))
2490

    
2491
        output.append(node_output)
2492

    
2493
    return output
2494

    
2495

    
2496
class LUQueryNodeStorage(NoHooksLU):
2497
  """Logical unit for getting information on storage units on node(s).
2498

2499
  """
2500
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2501
  REQ_BGL = False
2502
  _FIELDS_STATIC = utils.FieldSet("node")
2503

    
2504
  def ExpandNames(self):
2505
    storage_type = self.op.storage_type
2506

    
2507
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2508
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2509

    
2510
    dynamic_fields = constants.VALID_STORAGE_FIELDS[storage_type]
2511

    
2512
    _CheckOutputFields(static=self._FIELDS_STATIC,
2513
                       dynamic=utils.FieldSet(*dynamic_fields),
2514
                       selected=self.op.output_fields)
2515

    
2516
    self.needed_locks = {}
2517
    self.share_locks[locking.LEVEL_NODE] = 1
2518

    
2519
    if self.op.nodes:
2520
      self.needed_locks[locking.LEVEL_NODE] = \
2521
        _GetWantedNodes(self, self.op.nodes)
2522
    else:
2523
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2524

    
2525
  def CheckPrereq(self):
2526
    """Check prerequisites.
2527

2528
    This checks that the fields required are valid output fields.
2529

2530
    """
2531
    self.op.name = getattr(self.op, "name", None)
2532

    
2533
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2534

    
2535
  def Exec(self, feedback_fn):
2536
    """Computes the list of nodes and their attributes.
2537

2538
    """
2539
    # Always get name to sort by
2540
    if constants.SF_NAME in self.op.output_fields:
2541
      fields = self.op.output_fields[:]
2542
    else:
2543
      fields = [constants.SF_NAME] + self.op.output_fields
2544

    
2545
    # Never ask for node as it's only known to the LU
2546
    while "node" in fields:
2547
      fields.remove("node")
2548

    
2549
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2550
    name_idx = field_idx[constants.SF_NAME]
2551

    
2552
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2553
    data = self.rpc.call_storage_list(self.nodes,
2554
                                      self.op.storage_type, st_args,
2555
                                      self.op.name, fields)
2556

    
2557
    result = []
2558

    
2559
    for node in utils.NiceSort(self.nodes):
2560
      nresult = data[node]
2561
      if nresult.offline:
2562
        continue
2563

    
2564
      msg = nresult.fail_msg
2565
      if msg:
2566
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2567
        continue
2568

    
2569
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2570

    
2571
      for name in utils.NiceSort(rows.keys()):
2572
        row = rows[name]
2573

    
2574
        out = []
2575

    
2576
        for field in self.op.output_fields:
2577
          if field == "node":
2578
            val = node
2579
          elif field in field_idx:
2580
            val = row[field_idx[field]]
2581
          else:
2582
            raise errors.ParameterError(field)
2583

    
2584
          out.append(val)
2585

    
2586
        result.append(out)
2587

    
2588
    return result
2589

    
2590

    
2591
class LUModifyNodeStorage(NoHooksLU):
2592
  """Logical unit for modifying a storage volume on a node.
2593

2594
  """
2595
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2596
  REQ_BGL = False
2597

    
2598
  def CheckArguments(self):
2599
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2600
    if node_name is None:
2601
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2602

    
2603
    self.op.node_name = node_name
2604

    
2605
    storage_type = self.op.storage_type
2606
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2607
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2608

    
2609
  def ExpandNames(self):
2610
    self.needed_locks = {
2611
      locking.LEVEL_NODE: self.op.node_name,
2612
      }
2613

    
2614
  def CheckPrereq(self):
2615
    """Check prerequisites.
2616

2617
    """
2618
    storage_type = self.op.storage_type
2619

    
2620
    try:
2621
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2622
    except KeyError:
2623
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2624
                                 " modified" % storage_type)
2625

    
2626
    diff = set(self.op.changes.keys()) - modifiable
2627
    if diff:
2628
      raise errors.OpPrereqError("The following fields can not be modified for"
2629
                                 " storage units of type '%s': %r" %
2630
                                 (storage_type, list(diff)))
2631

    
2632
  def Exec(self, feedback_fn):
2633
    """Computes the list of nodes and their attributes.
2634

2635
    """
2636
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2637
    result = self.rpc.call_storage_modify(self.op.node_name,
2638
                                          self.op.storage_type, st_args,
2639
                                          self.op.name, self.op.changes)
2640
    result.Raise("Failed to modify storage unit '%s' on %s" %
2641
                 (self.op.name, self.op.node_name))
2642

    
2643

    
2644
class LUAddNode(LogicalUnit):
2645
  """Logical unit for adding node to the cluster.
2646

2647
  """
2648
  HPATH = "node-add"
2649
  HTYPE = constants.HTYPE_NODE
2650
  _OP_REQP = ["node_name"]
2651

    
2652
  def BuildHooksEnv(self):
2653
    """Build hooks env.
2654

2655
    This will run on all nodes before, and on all nodes + the new node after.
2656

2657
    """
2658
    env = {
2659
      "OP_TARGET": self.op.node_name,
2660
      "NODE_NAME": self.op.node_name,
2661
      "NODE_PIP": self.op.primary_ip,
2662
      "NODE_SIP": self.op.secondary_ip,
2663
      }
2664
    nodes_0 = self.cfg.GetNodeList()
2665
    nodes_1 = nodes_0 + [self.op.node_name, ]
2666
    return env, nodes_0, nodes_1
2667

    
2668
  def CheckPrereq(self):
2669
    """Check prerequisites.
2670

2671
    This checks:
2672
     - the new node is not already in the config
2673
     - it is resolvable
2674
     - its parameters (single/dual homed) matches the cluster
2675

2676
    Any errors are signaled by raising errors.OpPrereqError.
2677

2678
    """
2679
    node_name = self.op.node_name
2680
    cfg = self.cfg
2681

    
2682
    dns_data = utils.HostInfo(node_name)
2683

    
2684
    node = dns_data.name
2685
    primary_ip = self.op.primary_ip = dns_data.ip
2686
    secondary_ip = getattr(self.op, "secondary_ip", None)
2687
    if secondary_ip is None:
2688
      secondary_ip = primary_ip
2689
    if not utils.IsValidIP(secondary_ip):
2690
      raise errors.OpPrereqError("Invalid secondary IP given")
2691
    self.op.secondary_ip = secondary_ip
2692

    
2693
    node_list = cfg.GetNodeList()
2694
    if not self.op.readd and node in node_list:
2695
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2696
                                 node)
2697
    elif self.op.readd and node not in node_list:
2698
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2699

    
2700
    for existing_node_name in node_list:
2701
      existing_node = cfg.GetNodeInfo(existing_node_name)
2702

    
2703
      if self.op.readd and node == existing_node_name:
2704
        if (existing_node.primary_ip != primary_ip or
2705
            existing_node.secondary_ip != secondary_ip):
2706
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2707
                                     " address configuration as before")
2708
        continue
2709

    
2710
      if (existing_node.primary_ip == primary_ip or
2711
          existing_node.secondary_ip == primary_ip or
2712
          existing_node.primary_ip == secondary_ip or
2713
          existing_node.secondary_ip == secondary_ip):
2714
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2715
                                   " existing node %s" % existing_node.name)
2716

    
2717
    # check that the type of the node (single versus dual homed) is the
2718
    # same as for the master
2719
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2720
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2721
    newbie_singlehomed = secondary_ip == primary_ip
2722
    if master_singlehomed != newbie_singlehomed:
2723
      if master_singlehomed:
2724
        raise errors.OpPrereqError("The master has no private ip but the"
2725
                                   " new node has one")
2726
      else:
2727
        raise errors.OpPrereqError("The master has a private ip but the"
2728
                                   " new node doesn't have one")
2729

    
2730
    # checks reachability
2731
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2732
      raise errors.OpPrereqError("Node not reachable by ping")
2733

    
2734
    if not newbie_singlehomed:
2735
      # check reachability from my secondary ip to newbie's secondary ip
2736
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2737
                           source=myself.secondary_ip):
2738
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2739
                                   " based ping to noded port")
2740

    
2741
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2742
    if self.op.readd:
2743
      exceptions = [node]
2744
    else:
2745
      exceptions = []
2746
    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2747
    # the new node will increase mc_max with one, so:
2748
    mc_max = min(mc_max + 1, cp_size)
2749
    self.master_candidate = mc_now < mc_max
2750

    
2751
    if self.op.readd:
2752
      self.new_node = self.cfg.GetNodeInfo(node)
2753
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2754
    else:
2755
      self.new_node = objects.Node(name=node,
2756
                                   primary_ip=primary_ip,
2757
                                   secondary_ip=secondary_ip,
2758
                                   master_candidate=self.master_candidate,
2759
                                   offline=False, drained=False)
2760

    
2761
  def Exec(self, feedback_fn):
2762
    """Adds the new node to the cluster.
2763

2764
    """
2765
    new_node = self.new_node
2766
    node = new_node.name
2767

    
2768
    # for re-adds, reset the offline/drained/master-candidate flags;
2769
    # we need to reset here, otherwise offline would prevent RPC calls
2770
    # later in the procedure; this also means that if the re-add
2771
    # fails, we are left with a non-offlined, broken node
2772
    if self.op.readd:
2773
      new_node.drained = new_node.offline = False
2774
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2775
      # if we demote the node, we do cleanup later in the procedure
2776
      new_node.master_candidate = self.master_candidate
2777

    
2778
    # notify the user about any possible mc promotion
2779
    if new_node.master_candidate:
2780
      self.LogInfo("Node will be a master candidate")
2781

    
2782
    # check connectivity
2783
    result = self.rpc.call_version([node])[node]
2784
    result.Raise("Can't get version information from node %s" % node)
2785
    if constants.PROTOCOL_VERSION == result.payload:
2786
      logging.info("Communication to node %s fine, sw version %s match",
2787
                   node, result.payload)
2788
    else:
2789
      raise errors.OpExecError("Version mismatch master version %s,"
2790
                               " node version %s" %
2791
                               (constants.PROTOCOL_VERSION, result.payload))
2792

    
2793
    # setup ssh on node
2794
    logging.info("Copy ssh key to node %s", node)
2795
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2796
    keyarray = []
2797
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2798
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2799
                priv_key, pub_key]
2800

    
2801
    for i in keyfiles:
2802
      keyarray.append(utils.ReadFile(i))
2803

    
2804
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2805
                                    keyarray[2],
2806
                                    keyarray[3], keyarray[4], keyarray[5])
2807
    result.Raise("Cannot transfer ssh keys to the new node")
2808

    
2809
    # Add node to our /etc/hosts, and add key to known_hosts
2810
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2811
      utils.AddHostToEtcHosts(new_node.name)
2812

    
2813
    if new_node.secondary_ip != new_node.primary_ip:
2814
      result = self.rpc.call_node_has_ip_address(new_node.name,
2815
                                                 new_node.secondary_ip)
2816
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2817
                   prereq=True)
2818
      if not result.payload:
2819
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2820
                                 " you gave (%s). Please fix and re-run this"
2821
                                 " command." % new_node.secondary_ip)
2822

    
2823
    node_verify_list = [self.cfg.GetMasterNode()]
2824
    node_verify_param = {
2825
      'nodelist': [node],
2826
      # TODO: do a node-net-test as well?
2827
    }
2828

    
2829
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2830
                                       self.cfg.GetClusterName())
2831
    for verifier in node_verify_list:
2832
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2833
      nl_payload = result[verifier].payload['nodelist']
2834
      if nl_payload:
2835
        for failed in nl_payload:
2836
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2837
                      (verifier, nl_payload[failed]))
2838
        raise errors.OpExecError("ssh/hostname verification failed.")
2839

    
2840
    if self.op.readd:
2841
      _RedistributeAncillaryFiles(self)
2842
      self.context.ReaddNode(new_node)
2843
      # make sure we redistribute the config
2844
      self.cfg.Update(new_node)
2845
      # and make sure the new node will not have old files around
2846
      if not new_node.master_candidate:
2847
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2848
        msg = result.RemoteFailMsg()
2849
        if msg:
2850
          self.LogWarning("Node failed to demote itself from master"
2851
                          " candidate status: %s" % msg)
2852
    else:
2853
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2854
      self.context.AddNode(new_node)
2855

    
2856

    
2857
class LUSetNodeParams(LogicalUnit):
2858
  """Modifies the parameters of a node.
2859

2860
  """
2861
  HPATH = "node-modify"
2862
  HTYPE = constants.HTYPE_NODE
2863
  _OP_REQP = ["node_name"]
2864
  REQ_BGL = False
2865

    
2866
  def CheckArguments(self):
2867
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2868
    if node_name is None:
2869
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2870
    self.op.node_name = node_name
2871
    _CheckBooleanOpField(self.op, 'master_candidate')
2872
    _CheckBooleanOpField(self.op, 'offline')
2873
    _CheckBooleanOpField(self.op, 'drained')
2874
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2875
    if all_mods.count(None) == 3:
2876
      raise errors.OpPrereqError("Please pass at least one modification")
2877
    if all_mods.count(True) > 1:
2878
      raise errors.OpPrereqError("Can't set the node into more than one"
2879
                                 " state at the same time")
2880

    
2881
  def ExpandNames(self):
2882
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2883

    
2884
  def BuildHooksEnv(self):
2885
    """Build hooks env.
2886

2887
    This runs on the master node.
2888

2889
    """
2890
    env = {
2891
      "OP_TARGET": self.op.node_name,
2892
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2893
      "OFFLINE": str(self.op.offline),
2894
      "DRAINED": str(self.op.drained),
2895
      }
2896
    nl = [self.cfg.GetMasterNode(),
2897
          self.op.node_name]
2898
    return env, nl, nl
2899

    
2900
  def CheckPrereq(self):
2901
    """Check prerequisites.
2902

2903
    This only checks the instance list against the existing names.
2904

2905
    """
2906
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2907

    
2908
    if (self.op.master_candidate is not None or
2909
        self.op.drained is not None or
2910
        self.op.offline is not None):
2911
      # we can't change the master's node flags
2912
      if self.op.node_name == self.cfg.GetMasterNode():
2913
        raise errors.OpPrereqError("The master role can be changed"
2914
                                   " only via masterfailover")
2915

    
2916
    if ((self.op.master_candidate == False or self.op.offline == True or
2917
         self.op.drained == True) and node.master_candidate):
2918
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2919
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2920
      if num_candidates <= cp_size:
2921
        msg = ("Not enough master candidates (desired"
2922
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2923
        if self.op.force:
2924
          self.LogWarning(msg)
2925
        else:
2926
          raise errors.OpPrereqError(msg)
2927

    
2928
    if (self.op.master_candidate == True and
2929
        ((node.offline and not self.op.offline == False) or
2930
         (node.drained and not self.op.drained == False))):
2931
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2932
                                 " to master_candidate" % node.name)
2933

    
2934
    return
2935

    
2936
  def Exec(self, feedback_fn):
2937
    """Modifies a node.
2938

2939
    """
2940
    node = self.node
2941

    
2942
    result = []
2943
    changed_mc = False
2944

    
2945
    if self.op.offline is not None:
2946
      node.offline = self.op.offline
2947
      result.append(("offline", str(self.op.offline)))
2948
      if self.op.offline == True:
2949
        if node.master_candidate:
2950
          node.master_candidate = False
2951
          changed_mc = True
2952
          result.append(("master_candidate", "auto-demotion due to offline"))
2953
        if node.drained:
2954
          node.drained = False
2955
          result.append(("drained", "clear drained status due to offline"))
2956

    
2957
    if self.op.master_candidate is not None:
2958
      node.master_candidate = self.op.master_candidate
2959
      changed_mc = True
2960
      result.append(("master_candidate", str(self.op.master_candidate)))
2961
      if self.op.master_candidate == False:
2962
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2963
        msg = rrc.fail_msg
2964
        if msg:
2965
          self.LogWarning("Node failed to demote itself: %s" % msg)
2966

    
2967
    if self.op.drained is not None:
2968
      node.drained = self.op.drained
2969
      result.append(("drained", str(self.op.drained)))
2970
      if self.op.drained == True:
2971
        if node.master_candidate:
2972
          node.master_candidate = False
2973
          changed_mc = True
2974
          result.append(("master_candidate", "auto-demotion due to drain"))
2975
          rrc = self.rpc.call_node_demote_from_mc(node.name)
2976
          msg = rrc.RemoteFailMsg()
2977
          if msg:
2978
            self.LogWarning("Node failed to demote itself: %s" % msg)
2979
        if node.offline:
2980
          node.offline = False
2981
          result.append(("offline", "clear offline status due to drain"))
2982

    
2983
    # this will trigger configuration file update, if needed
2984
    self.cfg.Update(node)
2985
    # this will trigger job queue propagation or cleanup
2986
    if changed_mc:
2987
      self.context.ReaddNode(node)
2988

    
2989
    return result
2990

    
2991

    
2992
class LUPowercycleNode(NoHooksLU):
2993
  """Powercycles a node.
2994

2995
  """
2996
  _OP_REQP = ["node_name", "force"]
2997
  REQ_BGL = False
2998

    
2999
  def CheckArguments(self):
3000
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3001
    if node_name is None:
3002
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
3003
    self.op.node_name = node_name
3004
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
3005
      raise errors.OpPrereqError("The node is the master and the force"
3006
                                 " parameter was not set")
3007

    
3008
  def ExpandNames(self):
3009
    """Locking for PowercycleNode.
3010

3011
    This is a last-resort option and shouldn't block on other
3012
    jobs. Therefore, we grab no locks.
3013

3014
    """
3015
    self.needed_locks = {}
3016

    
3017
  def CheckPrereq(self):
3018
    """Check prerequisites.
3019

3020
    This LU has no prereqs.
3021

3022
    """
3023
    pass
3024

    
3025
  def Exec(self, feedback_fn):
3026
    """Reboots a node.
3027

3028
    """
3029
    result = self.rpc.call_node_powercycle(self.op.node_name,
3030
                                           self.cfg.GetHypervisorType())
3031
    result.Raise("Failed to schedule the reboot")
3032
    return result.payload
3033

    
3034

    
3035
class LUQueryClusterInfo(NoHooksLU):
3036
  """Query cluster configuration.
3037

3038
  """
3039
  _OP_REQP = []
3040
  REQ_BGL = False
3041

    
3042
  def ExpandNames(self):
3043
    self.needed_locks = {}
3044

    
3045
  def CheckPrereq(self):
3046
    """No prerequsites needed for this LU.
3047

3048
    """
3049
    pass
3050

    
3051
  def Exec(self, feedback_fn):
3052
    """Return cluster config.
3053

3054
    """
3055
    cluster = self.cfg.GetClusterInfo()
3056
    result = {
3057
      "software_version": constants.RELEASE_VERSION,
3058
      "protocol_version": constants.PROTOCOL_VERSION,
3059
      "config_version": constants.CONFIG_VERSION,
3060
      "os_api_version": max(constants.OS_API_VERSIONS),
3061
      "export_version": constants.EXPORT_VERSION,
3062
      "architecture": (platform.architecture()[0], platform.machine()),
3063
      "name": cluster.cluster_name,
3064
      "master": cluster.master_node,
3065
      "default_hypervisor": cluster.enabled_hypervisors[0],
3066
      "enabled_hypervisors": cluster.enabled_hypervisors,
3067
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3068
                        for hypervisor_name in cluster.enabled_hypervisors]),
3069
      "beparams": cluster.beparams,
3070
      "nicparams": cluster.nicparams,
3071
      "candidate_pool_size": cluster.candidate_pool_size,
3072
      "master_netdev": cluster.master_netdev,
3073
      "volume_group_name": cluster.volume_group_name,
3074
      "file_storage_dir": cluster.file_storage_dir,
3075
      "ctime": cluster.ctime,
3076
      "mtime": cluster.mtime,
3077
      "tags": list(cluster.GetTags()),
3078
      }
3079

    
3080
    return result
3081

    
3082

    
3083
class LUQueryConfigValues(NoHooksLU):
3084
  """Return configuration values.
3085

3086
  """
3087
  _OP_REQP = []
3088
  REQ_BGL = False
3089
  _FIELDS_DYNAMIC = utils.FieldSet()
3090
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3091
                                  "watcher_pause")
3092

    
3093
  def ExpandNames(self):
3094
    self.needed_locks = {}
3095

    
3096
    _CheckOutputFields(static=self._FIELDS_STATIC,
3097
                       dynamic=self._FIELDS_DYNAMIC,
3098
                       selected=self.op.output_fields)
3099

    
3100
  def CheckPrereq(self):
3101
    """No prerequisites.
3102

3103
    """
3104
    pass
3105

    
3106
  def Exec(self, feedback_fn):
3107
    """Dump a representation of the cluster config to the standard output.
3108

3109
    """
3110
    values = []
3111
    for field in self.op.output_fields:
3112
      if field == "cluster_name":
3113
        entry = self.cfg.GetClusterName()
3114
      elif field == "master_node":
3115
        entry = self.cfg.GetMasterNode()
3116
      elif field == "drain_flag":
3117
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3118
      elif field == "watcher_pause":
3119
        return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3120
      else:
3121
        raise errors.ParameterError(field)
3122
      values.append(entry)
3123
    return values
3124

    
3125

    
3126
class LUActivateInstanceDisks(NoHooksLU):
3127
  """Bring up an instance's disks.
3128

3129
  """
3130
  _OP_REQP = ["instance_name"]
3131
  REQ_BGL = False
3132

    
3133
  def ExpandNames(self):
3134
    self._ExpandAndLockInstance()
3135
    self.needed_locks[locking.LEVEL_NODE] = []
3136
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3137

    
3138
  def DeclareLocks(self, level):
3139
    if level == locking.LEVEL_NODE:
3140
      self._LockInstancesNodes()
3141

    
3142
  def CheckPrereq(self):
3143
    """Check prerequisites.
3144

3145
    This checks that the instance is in the cluster.
3146

3147
    """
3148
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3149
    assert self.instance is not None, \
3150
      "Cannot retrieve locked instance %s" % self.op.instance_name
3151
    _CheckNodeOnline(self, self.instance.primary_node)
3152
    if not hasattr(self.op, "ignore_size"):
3153
      self.op.ignore_size = False
3154

    
3155
  def Exec(self, feedback_fn):
3156
    """Activate the disks.
3157

3158
    """
3159
    disks_ok, disks_info = \
3160
              _AssembleInstanceDisks(self, self.instance,
3161
                                     ignore_size=self.op.ignore_size)
3162
    if not disks_ok:
3163
      raise errors.OpExecError("Cannot activate block devices")
3164

    
3165
    return disks_info
3166

    
3167

    
3168
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3169
                           ignore_size=False):
3170
  """Prepare the block devices for an instance.
3171

3172
  This sets up the block devices on all nodes.
3173

3174
  @type lu: L{LogicalUnit}
3175
  @param lu: the logical unit on whose behalf we execute
3176
  @type instance: L{objects.Instance}
3177
  @param instance: the instance for whose disks we assemble
3178
  @type ignore_secondaries: boolean
3179
  @param ignore_secondaries: if true, errors on secondary nodes
3180
      won't result in an error return from the function
3181
  @type ignore_size: boolean
3182
  @param ignore_size: if true, the current known size of the disk
3183
      will not be used during the disk activation, useful for cases
3184
      when the size is wrong
3185
  @return: False if the operation failed, otherwise a list of
3186
      (host, instance_visible_name, node_visible_name)
3187
      with the mapping from node devices to instance devices
3188

3189
  """
3190
  device_info = []
3191
  disks_ok = True
3192
  iname = instance.name
3193
  # With the two passes mechanism we try to reduce the window of
3194
  # opportunity for the race condition of switching DRBD to primary
3195
  # before handshaking occured, but we do not eliminate it
3196

    
3197
  # The proper fix would be to wait (with some limits) until the
3198
  # connection has been made and drbd transitions from WFConnection
3199
  # into any other network-connected state (Connected, SyncTarget,
3200
  # SyncSource, etc.)
3201

    
3202
  # 1st pass, assemble on all nodes in secondary mode
3203
  for inst_disk in instance.disks:
3204
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3205
      if ignore_size:
3206
        node_disk = node_disk.Copy()
3207
        node_disk.UnsetSize()
3208
      lu.cfg.SetDiskID(node_disk, node)
3209
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3210
      msg = result.fail_msg
3211
      if msg:
3212
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3213
                           " (is_primary=False, pass=1): %s",
3214
                           inst_disk.iv_name, node, msg)
3215
        if not ignore_secondaries:
3216
          disks_ok = False
3217

    
3218
  # FIXME: race condition on drbd migration to primary
3219

    
3220
  # 2nd pass, do only the primary node
3221
  for inst_disk in instance.disks:
3222
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3223
      if node != instance.primary_node:
3224
        continue
3225
      if ignore_size:
3226
        node_disk = node_disk.Copy()
3227
        node_disk.UnsetSize()
3228
      lu.cfg.SetDiskID(node_disk, node)
3229
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3230
      msg = result.fail_msg
3231
      if msg:
3232
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3233
                           " (is_primary=True, pass=2): %s",
3234
                           inst_disk.iv_name, node, msg)
3235
        disks_ok = False
3236
    device_info.append((instance.primary_node, inst_disk.iv_name,
3237
                        result.payload))
3238

    
3239
  # leave the disks configured for the primary node
3240
  # this is a workaround that would be fixed better by
3241
  # improving the logical/physical id handling
3242
  for disk in instance.disks:
3243
    lu.cfg.SetDiskID(disk, instance.primary_node)
3244

    
3245
  return disks_ok, device_info
3246

    
3247

    
3248
def _StartInstanceDisks(lu, instance, force):
3249
  """Start the disks of an instance.
3250

3251
  """
3252
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3253
                                           ignore_secondaries=force)
3254
  if not disks_ok:
3255
    _ShutdownInstanceDisks(lu, instance)
3256
    if force is not None and not force:
3257
      lu.proc.LogWarning("", hint="If the message above refers to a"
3258
                         " secondary node,"
3259
                         " you can retry the operation using '--force'.")
3260
    raise errors.OpExecError("Disk consistency error")
3261

    
3262

    
3263
class LUDeactivateInstanceDisks(NoHooksLU):
3264
  """Shutdown an instance's disks.
3265

3266
  """
3267
  _OP_REQP = ["instance_name"]
3268
  REQ_BGL = False
3269

    
3270
  def ExpandNames(self):
3271
    self._ExpandAndLockInstance()
3272
    self.needed_locks[locking.LEVEL_NODE] = []
3273
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3274

    
3275
  def DeclareLocks(self, level):
3276
    if level == locking.LEVEL_NODE:
3277
      self._LockInstancesNodes()
3278

    
3279
  def CheckPrereq(self):
3280
    """Check prerequisites.
3281

3282
    This checks that the instance is in the cluster.
3283

3284
    """
3285
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3286
    assert self.instance is not None, \
3287
      "Cannot retrieve locked instance %s" % self.op.instance_name
3288

    
3289
  def Exec(self, feedback_fn):
3290
    """Deactivate the disks
3291

3292
    """
3293
    instance = self.instance
3294
    _SafeShutdownInstanceDisks(self, instance)
3295

    
3296

    
3297
def _SafeShutdownInstanceDisks(lu, instance):
3298
  """Shutdown block devices of an instance.
3299

3300
  This function checks if an instance is running, before calling
3301
  _ShutdownInstanceDisks.
3302

3303
  """
3304
  pnode = instance.primary_node
3305
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3306
  ins_l.Raise("Can't contact node %s" % pnode)
3307

    
3308
  if instance.name in ins_l.payload:
3309
    raise errors.OpExecError("Instance is running, can't shutdown"
3310
                             " block devices.")
3311

    
3312
  _ShutdownInstanceDisks(lu, instance)
3313

    
3314

    
3315
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3316
  """Shutdown block devices of an instance.
3317

3318
  This does the shutdown on all nodes of the instance.
3319

3320
  If the ignore_primary is false, errors on the primary node are
3321
  ignored.
3322

3323
  """
3324
  all_result = True
3325
  for disk in instance.disks:
3326
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3327
      lu.cfg.SetDiskID(top_disk, node)
3328
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3329
      msg = result.fail_msg
3330
      if msg:
3331
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3332
                      disk.iv_name, node, msg)
3333
        if not ignore_primary or node != instance.primary_node:
3334
          all_result = False
3335
  return all_result
3336

    
3337

    
3338
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3339
  """Checks if a node has enough free memory.
3340

3341
  This function check if a given node has the needed amount of free
3342
  memory. In case the node has less memory or we cannot get the
3343
  information from the node, this function raise an OpPrereqError
3344
  exception.
3345

3346
  @type lu: C{LogicalUnit}
3347
  @param lu: a logical unit from which we get configuration data
3348
  @type node: C{str}
3349
  @param node: the node to check
3350
  @type reason: C{str}
3351
  @param reason: string to use in the error message
3352
  @type requested: C{int}
3353
  @param requested: the amount of memory in MiB to check for
3354
  @type hypervisor_name: C{str}
3355
  @param hypervisor_name: the hypervisor to ask for memory stats
3356
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3357
      we cannot check the node
3358

3359
  """
3360
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3361
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
3362
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3363
  if not isinstance(free_mem, int):
3364
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3365
                               " was '%s'" % (node, free_mem))
3366
  if requested > free_mem:
3367
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3368
                               " needed %s MiB, available %s MiB" %
3369
                               (node, reason, requested, free_mem))
3370

    
3371

    
3372
class LUStartupInstance(LogicalUnit):
3373
  """Starts an instance.
3374

3375
  """
3376
  HPATH = "instance-start"
3377
  HTYPE = constants.HTYPE_INSTANCE
3378
  _OP_REQP = ["instance_name", "force"]
3379
  REQ_BGL = False
3380

    
3381
  def ExpandNames(self):
3382
    self._ExpandAndLockInstance()
3383

    
3384
  def BuildHooksEnv(self):
3385
    """Build hooks env.
3386

3387
    This runs on master, primary and secondary nodes of the instance.
3388

3389
    """
3390
    env = {
3391
      "FORCE": self.op.force,
3392
      }
3393
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3394
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3395
    return env, nl, nl
3396

    
3397
  def CheckPrereq(self):
3398
    """Check prerequisites.
3399

3400
    This checks that the instance is in the cluster.
3401

3402
    """
3403
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3404
    assert self.instance is not None, \
3405
      "Cannot retrieve locked instance %s" % self.op.instance_name
3406

    
3407
    # extra beparams
3408
    self.beparams = getattr(self.op, "beparams", {})
3409
    if self.beparams:
3410
      if not isinstance(self.beparams, dict):
3411
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3412
                                   " dict" % (type(self.beparams), ))
3413
      # fill the beparams dict
3414
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3415
      self.op.beparams = self.beparams
3416

    
3417
    # extra hvparams
3418
    self.hvparams = getattr(self.op, "hvparams", {})
3419
    if self.hvparams:
3420
      if not isinstance(self.hvparams, dict):
3421
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3422
                                   " dict" % (type(self.hvparams), ))
3423

    
3424
      # check hypervisor parameter syntax (locally)
3425
      cluster = self.cfg.GetClusterInfo()
3426
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3427
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3428
                                    instance.hvparams)
3429
      filled_hvp.update(self.hvparams)
3430
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3431
      hv_type.CheckParameterSyntax(filled_hvp)
3432
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3433
      self.op.hvparams = self.hvparams
3434

    
3435
    _CheckNodeOnline(self, instance.primary_node)
3436

    
3437
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3438
    # check bridges existence
3439
    _CheckInstanceBridgesExist(self, instance)
3440

    
3441
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3442
                                              instance.name,
3443
                                              instance.hypervisor)
3444
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3445
                      prereq=True)
3446
    if not remote_info.payload: # not running already
3447
      _CheckNodeFreeMemory(self, instance.primary_node,
3448
                           "starting instance %s" % instance.name,
3449
                           bep[constants.BE_MEMORY], instance.hypervisor)
3450

    
3451
  def Exec(self, feedback_fn):
3452
    """Start the instance.
3453

3454
    """
3455
    instance = self.instance
3456
    force = self.op.force
3457

    
3458
    self.cfg.MarkInstanceUp(instance.name)
3459

    
3460
    node_current = instance.primary_node
3461

    
3462
    _StartInstanceDisks(self, instance, force)
3463

    
3464
    result = self.rpc.call_instance_start(node_current, instance,
3465
                                          self.hvparams, self.beparams)
3466
    msg = result.fail_msg
3467
    if msg:
3468
      _ShutdownInstanceDisks(self, instance)
3469
      raise errors.OpExecError("Could not start instance: %s" % msg)
3470

    
3471

    
3472
class LURebootInstance(LogicalUnit):
3473
  """Reboot an instance.
3474

3475
  """
3476
  HPATH = "instance-reboot"
3477
  HTYPE = constants.HTYPE_INSTANCE
3478
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3479
  REQ_BGL = False
3480

    
3481
  def ExpandNames(self):
3482
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3483
                                   constants.INSTANCE_REBOOT_HARD,
3484
                                   constants.INSTANCE_REBOOT_FULL]:
3485
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3486
                                  (constants.INSTANCE_REBOOT_SOFT,
3487
                                   constants.INSTANCE_REBOOT_HARD,
3488
                                   constants.INSTANCE_REBOOT_FULL))
3489
    self._ExpandAndLockInstance()
3490

    
3491
  def BuildHooksEnv(self):
3492
    """Build hooks env.
3493

3494
    This runs on master, primary and secondary nodes of the instance.
3495

3496
    """
3497
    env = {
3498
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3499
      "REBOOT_TYPE": self.op.reboot_type,
3500
      }
3501
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3502
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3503
    return env, nl, nl
3504

    
3505
  def CheckPrereq(self):
3506
    """Check prerequisites.
3507

3508
    This checks that the instance is in the cluster.
3509

3510
    """
3511
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3512
    assert self.instance is not None, \
3513
      "Cannot retrieve locked instance %s" % self.op.instance_name
3514

    
3515
    _CheckNodeOnline(self, instance.primary_node)
3516

    
3517
    # check bridges existence
3518
    _CheckInstanceBridgesExist(self, instance)
3519

    
3520
  def Exec(self, feedback_fn):
3521
    """Reboot the instance.
3522

3523
    """
3524
    instance = self.instance
3525
    ignore_secondaries = self.op.ignore_secondaries
3526
    reboot_type = self.op.reboot_type
3527

    
3528
    node_current = instance.primary_node
3529

    
3530
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3531
                       constants.INSTANCE_REBOOT_HARD]:
3532
      for disk in instance.disks:
3533
        self.cfg.SetDiskID(disk, node_current)
3534
      result = self.rpc.call_instance_reboot(node_current, instance,
3535
                                             reboot_type)
3536
      result.Raise("Could not reboot instance")
3537
    else:
3538
      result = self.rpc.call_instance_shutdown(node_current, instance)
3539
      result.Raise("Could not shutdown instance for full reboot")
3540
      _ShutdownInstanceDisks(self, instance)
3541
      _StartInstanceDisks(self, instance, ignore_secondaries)
3542
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3543
      msg = result.fail_msg
3544
      if msg:
3545
        _ShutdownInstanceDisks(self, instance)
3546
        raise errors.OpExecError("Could not start instance for"
3547
                                 " full reboot: %s" % msg)
3548

    
3549
    self.cfg.MarkInstanceUp(instance.name)
3550

    
3551

    
3552
class LUShutdownInstance(LogicalUnit):
3553
  """Shutdown an instance.
3554

3555
  """
3556
  HPATH = "instance-stop"
3557
  HTYPE = constants.HTYPE_INSTANCE
3558
  _OP_REQP = ["instance_name"]
3559
  REQ_BGL = False
3560

    
3561
  def ExpandNames(self):
3562
    self._ExpandAndLockInstance()
3563

    
3564
  def BuildHooksEnv(self):
3565
    """Build hooks env.
3566

3567
    This runs on master, primary and secondary nodes of the instance.
3568

3569
    """
3570
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3571
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3572
    return env, nl, nl
3573

    
3574
  def CheckPrereq(self):
3575
    """Check prerequisites.
3576

3577
    This checks that the instance is in the cluster.
3578

3579
    """
3580
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3581
    assert self.instance is not None, \
3582
      "Cannot retrieve locked instance %s" % self.op.instance_name
3583
    _CheckNodeOnline(self, self.instance.primary_node)
3584

    
3585
  def Exec(self, feedback_fn):
3586
    """Shutdown the instance.
3587

3588
    """
3589
    instance = self.instance
3590
    node_current = instance.primary_node
3591
    self.cfg.MarkInstanceDown(instance.name)
3592
    result = self.rpc.call_instance_shutdown(node_current, instance)
3593
    msg = result.fail_msg
3594
    if msg:
3595
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3596

    
3597
    _ShutdownInstanceDisks(self, instance)
3598

    
3599

    
3600
class LUReinstallInstance(LogicalUnit):
3601
  """Reinstall an instance.
3602

3603
  """
3604
  HPATH = "instance-reinstall"
3605
  HTYPE = constants.HTYPE_INSTANCE
3606
  _OP_REQP = ["instance_name"]
3607
  REQ_BGL = False
3608

    
3609
  def ExpandNames(self):
3610
    self._ExpandAndLockInstance()
3611

    
3612
  def BuildHooksEnv(self):
3613
    """Build hooks env.
3614

3615
    This runs on master, primary and secondary nodes of the instance.
3616

3617
    """
3618
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3619
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3620
    return env, nl, nl
3621

    
3622
  def CheckPrereq(self):
3623
    """Check prerequisites.
3624

3625
    This checks that the instance is in the cluster and is not running.
3626

3627
    """
3628
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3629
    assert instance is not None, \
3630
      "Cannot retrieve locked instance %s" % self.op.instance_name
3631
    _CheckNodeOnline(self, instance.primary_node)
3632

    
3633
    if instance.disk_template == constants.DT_DISKLESS:
3634
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3635
                                 self.op.instance_name)
3636
    if instance.admin_up:
3637
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3638
                                 self.op.instance_name)
3639
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3640
                                              instance.name,
3641
                                              instance.hypervisor)
3642
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3643
                      prereq=True)
3644
    if remote_info.payload:
3645
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3646
                                 (self.op.instance_name,
3647
                                  instance.primary_node))
3648

    
3649
    self.op.os_type = getattr(self.op, "os_type", None)
3650
    if self.op.os_type is not None:
3651
      # OS verification
3652
      pnode = self.cfg.GetNodeInfo(
3653
        self.cfg.ExpandNodeName(instance.primary_node))
3654
      if pnode is None:
3655
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3656
                                   self.op.pnode)
3657
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3658
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3659
                   (self.op.os_type, pnode.name), prereq=True)
3660

    
3661
    self.instance = instance
3662

    
3663
  def Exec(self, feedback_fn):
3664
    """Reinstall the instance.
3665

3666
    """
3667
    inst = self.instance
3668

    
3669
    if self.op.os_type is not None:
3670
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3671
      inst.os = self.op.os_type
3672
      self.cfg.Update(inst)
3673

    
3674
    _StartInstanceDisks(self, inst, None)
3675
    try:
3676
      feedback_fn("Running the instance OS create scripts...")
3677
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3678
      result.Raise("Could not install OS for instance %s on node %s" %
3679
                   (inst.name, inst.primary_node))
3680
    finally:
3681
      _ShutdownInstanceDisks(self, inst)
3682

    
3683

    
3684
class LURecreateInstanceDisks(LogicalUnit):
3685
  """Recreate an instance's missing disks.
3686

3687
  """
3688
  HPATH = "instance-recreate-disks"
3689
  HTYPE = constants.HTYPE_INSTANCE
3690
  _OP_REQP = ["instance_name", "disks"]
3691
  REQ_BGL = False
3692

    
3693
  def CheckArguments(self):
3694
    """Check the arguments.
3695

3696
    """
3697
    if not isinstance(self.op.disks, list):
3698
      raise errors.OpPrereqError("Invalid disks parameter")
3699
    for item in self.op.disks:
3700
      if (not isinstance(item, int) or
3701
          item < 0):
3702
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
3703
                                   str(item))
3704

    
3705
  def ExpandNames(self):
3706
    self._ExpandAndLockInstance()
3707

    
3708
  def BuildHooksEnv(self):
3709
    """Build hooks env.
3710

3711
    This runs on master, primary and secondary nodes of the instance.
3712

3713
    """
3714
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3715
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3716
    return env, nl, nl
3717

    
3718
  def CheckPrereq(self):
3719
    """Check prerequisites.
3720

3721
    This checks that the instance is in the cluster and is not running.
3722

3723
    """
3724
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3725
    assert instance is not None, \
3726
      "Cannot retrieve locked instance %s" % self.op.instance_name
3727
    _CheckNodeOnline(self, instance.primary_node)
3728

    
3729
    if instance.disk_template == constants.DT_DISKLESS:
3730
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3731
                                 self.op.instance_name)
3732
    if instance.admin_up:
3733
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3734
                                 self.op.instance_name)
3735
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3736
                                              instance.name,
3737
                                              instance.hypervisor)
3738
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3739
                      prereq=True)
3740
    if remote_info.payload:
3741
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3742
                                 (self.op.instance_name,
3743
                                  instance.primary_node))
3744

    
3745
    if not self.op.disks:
3746
      self.op.disks = range(len(instance.disks))
3747
    else:
3748
      for idx in self.op.disks:
3749
        if idx >= len(instance.disks):
3750
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx)
3751

    
3752
    self.instance = instance
3753

    
3754
  def Exec(self, feedback_fn):
3755
    """Recreate the disks.
3756

3757
    """
3758
    to_skip = []
3759
    for idx, disk in enumerate(self.instance.disks):
3760
      if idx not in self.op.disks: # disk idx has not been passed in
3761
        to_skip.append(idx)
3762
        continue
3763

    
3764
    _CreateDisks(self, self.instance, to_skip=to_skip)
3765

    
3766

    
3767
class LURenameInstance(LogicalUnit):
3768
  """Rename an instance.
3769

3770
  """
3771
  HPATH = "instance-rename"
3772
  HTYPE = constants.HTYPE_INSTANCE
3773
  _OP_REQP = ["instance_name", "new_name"]
3774

    
3775
  def BuildHooksEnv(self):
3776
    """Build hooks env.
3777

3778
    This runs on master, primary and secondary nodes of the instance.
3779

3780
    """
3781
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3782
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3783
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3784
    return env, nl, nl
3785

    
3786
  def CheckPrereq(self):
3787
    """Check prerequisites.
3788

3789
    This checks that the instance is in the cluster and is not running.
3790

3791
    """
3792
    instance = self.cfg.GetInstanceInfo(
3793
      self.cfg.ExpandInstanceName(self.op.instance_name))
3794
    if instance is None:
3795
      raise errors.OpPrereqError("Instance '%s' not known" %
3796
                                 self.op.instance_name)
3797
    _CheckNodeOnline(self, instance.primary_node)
3798

    
3799
    if instance.admin_up:
3800
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3801
                                 self.op.instance_name)
3802
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3803
                                              instance.name,
3804
                                              instance.hypervisor)
3805
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3806
                      prereq=True)
3807
    if remote_info.payload:
3808
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3809
                                 (self.op.instance_name,
3810
                                  instance.primary_node))
3811
    self.instance = instance
3812

    
3813
    # new name verification
3814
    name_info = utils.HostInfo(self.op.new_name)
3815

    
3816
    self.op.new_name = new_name = name_info.name
3817
    instance_list = self.cfg.GetInstanceList()
3818
    if new_name in instance_list:
3819
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3820
                                 new_name)
3821

    
3822
    if not getattr(self.op, "ignore_ip", False):
3823
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3824
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3825
                                   (name_info.ip, new_name))
3826

    
3827

    
3828
  def Exec(self, feedback_fn):
3829
    """Reinstall the instance.
3830

3831
    """
3832
    inst = self.instance
3833
    old_name = inst.name
3834

    
3835
    if inst.disk_template == constants.DT_FILE:
3836
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3837

    
3838
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3839
    # Change the instance lock. This is definitely safe while we hold the BGL
3840
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3841
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3842

    
3843
    # re-read the instance from the configuration after rename
3844
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3845

    
3846
    if inst.disk_template == constants.DT_FILE:
3847
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3848
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3849
                                                     old_file_storage_dir,
3850
                                                     new_file_storage_dir)
3851
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3852
                   " (but the instance has been renamed in Ganeti)" %
3853
                   (inst.primary_node, old_file_storage_dir,
3854
                    new_file_storage_dir))
3855

    
3856
    _StartInstanceDisks(self, inst, None)
3857
    try:
3858
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3859
                                                 old_name)
3860
      msg = result.fail_msg
3861
      if msg:
3862
        msg = ("Could not run OS rename script for instance %s on node %s"
3863
               " (but the instance has been renamed in Ganeti): %s" %
3864
               (inst.name, inst.primary_node, msg))
3865
        self.proc.LogWarning(msg)
3866
    finally:
3867
      _ShutdownInstanceDisks(self, inst)
3868

    
3869

    
3870
class LURemoveInstance(LogicalUnit):
3871
  """Remove an instance.
3872

3873
  """
3874
  HPATH = "instance-remove"
3875
  HTYPE = constants.HTYPE_INSTANCE
3876
  _OP_REQP = ["instance_name", "ignore_failures"]
3877
  REQ_BGL = False
3878

    
3879
  def ExpandNames(self):
3880
    self._ExpandAndLockInstance()
3881
    self.needed_locks[locking.LEVEL_NODE] = []
3882
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3883

    
3884
  def DeclareLocks(self, level):
3885
    if level == locking.LEVEL_NODE:
3886
      self._LockInstancesNodes()
3887

    
3888
  def BuildHooksEnv(self):
3889
    """Build hooks env.
3890

3891
    This runs on master, primary and secondary nodes of the instance.
3892

3893
    """
3894
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3895
    nl = [self.cfg.GetMasterNode()]
3896
    return env, nl, nl
3897

    
3898
  def CheckPrereq(self):
3899
    """Check prerequisites.
3900

3901
    This checks that the instance is in the cluster.
3902

3903
    """
3904
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3905
    assert self.instance is not None, \
3906
      "Cannot retrieve locked instance %s" % self.op.instance_name
3907

    
3908
  def Exec(self, feedback_fn):
3909
    """Remove the instance.
3910

3911
    """
3912
    instance = self.instance
3913
    logging.info("Shutting down instance %s on node %s",
3914
                 instance.name, instance.primary_node)
3915

    
3916
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3917
    msg = result.fail_msg
3918
    if msg:
3919
      if self.op.ignore_failures:
3920
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3921
      else:
3922
        raise errors.OpExecError("Could not shutdown instance %s on"
3923
                                 " node %s: %s" %
3924
                                 (instance.name, instance.primary_node, msg))
3925

    
3926
    logging.info("Removing block devices for instance %s", instance.name)
3927

    
3928
    if not _RemoveDisks(self, instance):
3929
      if self.op.ignore_failures:
3930
        feedback_fn("Warning: can't remove instance's disks")
3931
      else:
3932
        raise errors.OpExecError("Can't remove instance's disks")
3933

    
3934
    logging.info("Removing instance %s out of cluster config", instance.name)
3935

    
3936
    self.cfg.RemoveInstance(instance.name)
3937
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3938

    
3939

    
3940
class LUQueryInstances(NoHooksLU):
3941
  """Logical unit for querying instances.
3942

3943
  """
3944
  _OP_REQP = ["output_fields", "names", "use_locking"]
3945
  REQ_BGL = False
3946
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3947
                                    "admin_state",
3948
                                    "disk_template", "ip", "mac", "bridge",
3949
                                    "nic_mode", "nic_link",
3950
                                    "sda_size", "sdb_size", "vcpus", "tags",
3951
                                    "network_port", "beparams",
3952
                                    r"(disk)\.(size)/([0-9]+)",
3953
                                    r"(disk)\.(sizes)", "disk_usage",
3954
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3955
                                    r"(nic)\.(bridge)/([0-9]+)",
3956
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
3957
                                    r"(disk|nic)\.(count)",
3958
                                    "serial_no", "hypervisor", "hvparams",
3959
                                    "ctime", "mtime",
3960
                                    ] +
3961
                                  ["hv/%s" % name
3962
                                   for name in constants.HVS_PARAMETERS] +
3963
                                  ["be/%s" % name
3964
                                   for name in constants.BES_PARAMETERS])
3965
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3966

    
3967

    
3968
  def ExpandNames(self):
3969
    _CheckOutputFields(static=self._FIELDS_STATIC,
3970
                       dynamic=self._FIELDS_DYNAMIC,
3971
                       selected=self.op.output_fields)
3972

    
3973
    self.needed_locks = {}
3974
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3975
    self.share_locks[locking.LEVEL_NODE] = 1
3976

    
3977
    if self.op.names:
3978
      self.wanted = _GetWantedInstances(self, self.op.names)
3979
    else:
3980
      self.wanted = locking.ALL_SET
3981

    
3982
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3983
    self.do_locking = self.do_node_query and self.op.use_locking
3984
    if self.do_locking:
3985
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3986
      self.needed_locks[locking.LEVEL_NODE] = []
3987
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3988

    
3989
  def DeclareLocks(self, level):
3990
    if level == locking.LEVEL_NODE and self.do_locking:
3991
      self._LockInstancesNodes()
3992

    
3993
  def CheckPrereq(self):
3994
    """Check prerequisites.
3995

3996
    """
3997
    pass
3998

    
3999
  def Exec(self, feedback_fn):
4000
    """Computes the list of nodes and their attributes.
4001

4002
    """
4003
    all_info = self.cfg.GetAllInstancesInfo()
4004
    if self.wanted == locking.ALL_SET:
4005
      # caller didn't specify instance names, so ordering is not important
4006
      if self.do_locking:
4007
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4008
      else:
4009
        instance_names = all_info.keys()
4010
      instance_names = utils.NiceSort(instance_names)
4011
    else:
4012
      # caller did specify names, so we must keep the ordering
4013
      if self.do_locking:
4014
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4015
      else:
4016
        tgt_set = all_info.keys()
4017
      missing = set(self.wanted).difference(tgt_set)
4018
      if missing:
4019
        raise errors.OpExecError("Some instances were removed before"
4020
                                 " retrieving their data: %s" % missing)
4021
      instance_names = self.wanted
4022

    
4023
    instance_list = [all_info[iname] for iname in instance_names]
4024

    
4025
    # begin data gathering
4026

    
4027
    nodes = frozenset([inst.primary_node for inst in instance_list])
4028
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4029

    
4030
    bad_nodes = []
4031
    off_nodes = []
4032
    if self.do_node_query:
4033
      live_data = {}
4034
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4035
      for name in nodes:
4036
        result = node_data[name]
4037
        if result.offline:
4038
          # offline nodes will be in both lists
4039
          off_nodes.append(name)
4040
        if result.failed or result.fail_msg:
4041
          bad_nodes.append(name)
4042
        else:
4043
          if result.payload:
4044
            live_data.update(result.payload)
4045
          # else no instance is alive
4046
    else:
4047
      live_data = dict([(name, {}) for name in instance_names])
4048

    
4049
    # end data gathering
4050

    
4051
    HVPREFIX = "hv/"
4052
    BEPREFIX = "be/"
4053
    output = []
4054
    cluster = self.cfg.GetClusterInfo()
4055
    for instance in instance_list:
4056
      iout = []
4057
      i_hv = cluster.FillHV(instance)
4058
      i_be = cluster.FillBE(instance)
4059
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4060
                                 nic.nicparams) for nic in instance.nics]
4061
      for field in self.op.output_fields:
4062
        st_match = self._FIELDS_STATIC.Matches(field)
4063
        if field == "name":
4064
          val = instance.name
4065
        elif field == "os":
4066
          val = instance.os
4067
        elif field == "pnode":
4068
          val = instance.primary_node
4069
        elif field == "snodes":
4070
          val = list(instance.secondary_nodes)
4071
        elif field == "admin_state":
4072
          val = instance.admin_up
4073
        elif field == "oper_state":
4074
          if instance.primary_node in bad_nodes:
4075
            val = None
4076
          else:
4077
            val = bool(live_data.get(instance.name))
4078
        elif field == "status":
4079
          if instance.primary_node in off_nodes:
4080
            val = "ERROR_nodeoffline"
4081
          elif instance.primary_node in bad_nodes:
4082
            val = "ERROR_nodedown"
4083
          else:
4084
            running = bool(live_data.get(instance.name))
4085
            if running:
4086
              if instance.admin_up:
4087
                val = "running"
4088
              else:
4089
                val = "ERROR_up"
4090
            else:
4091
              if instance.admin_up:
4092
                val = "ERROR_down"
4093
              else:
4094
                val = "ADMIN_down"
4095
        elif field == "oper_ram":
4096
          if instance.primary_node in bad_nodes:
4097
            val = None
4098
          elif instance.name in live_data:
4099
            val = live_data[instance.name].get("memory", "?")
4100
          else:
4101
            val = "-"
4102
        elif field == "vcpus":
4103
          val = i_be[constants.BE_VCPUS]
4104
        elif field == "disk_template":
4105
          val = instance.disk_template
4106
        elif field == "ip":
4107
          if instance.nics:
4108
            val = instance.nics[0].ip
4109
          else:
4110
            val = None
4111
        elif field == "nic_mode":
4112
          if instance.nics:
4113
            val = i_nicp[0][constants.NIC_MODE]
4114
          else:
4115
            val = None
4116
        elif field == "nic_link":
4117
          if instance.nics:
4118
            val = i_nicp[0][constants.NIC_LINK]
4119
          else:
4120
            val = None
4121
        elif field == "bridge":
4122
          if (instance.nics and
4123
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4124
            val = i_nicp[0][constants.NIC_LINK]
4125
          else:
4126
            val = None
4127
        elif field == "mac":
4128
          if instance.nics:
4129
            val = instance.nics[0].mac
4130
          else:
4131
            val = None
4132
        elif field == "sda_size" or field == "sdb_size":
4133
          idx = ord(field[2]) - ord('a')
4134
          try:
4135
            val = instance.FindDisk(idx).size
4136
          except errors.OpPrereqError:
4137
            val = None
4138
        elif field == "disk_usage": # total disk usage per node
4139
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4140
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4141
        elif field == "tags":
4142
          val = list(instance.GetTags())
4143
        elif field == "serial_no":
4144
          val = instance.serial_no
4145
        elif field == "ctime":
4146
          val = instance.ctime
4147
        elif field == "mtime":
4148
          val = instance.mtime
4149
        elif field == "network_port":
4150
          val = instance.network_port
4151
        elif field == "hypervisor":
4152
          val = instance.hypervisor
4153
        elif field == "hvparams":
4154
          val = i_hv
4155
        elif (field.startswith(HVPREFIX) and
4156
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
4157
          val = i_hv.get(field[len(HVPREFIX):], None)
4158
        elif field == "beparams":
4159
          val = i_be
4160
        elif (field.startswith(BEPREFIX) and
4161
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4162
          val = i_be.get(field[len(BEPREFIX):], None)
4163
        elif st_match and st_match.groups():
4164
          # matches a variable list
4165
          st_groups = st_match.groups()
4166
          if st_groups and st_groups[0] == "disk":
4167
            if st_groups[1] == "count":
4168
              val = len(instance.disks)
4169
            elif st_groups[1] == "sizes":
4170
              val = [disk.size for disk in instance.disks]
4171
            elif st_groups[1] == "size":
4172
              try:
4173
                val = instance.FindDisk(st_groups[2]).size
4174
              except errors.OpPrereqError:
4175
                val = None
4176
            else:
4177
              assert False, "Unhandled disk parameter"
4178
          elif st_groups[0] == "nic":
4179
            if st_groups[1] == "count":
4180
              val = len(instance.nics)
4181
            elif st_groups[1] == "macs":
4182
              val = [nic.mac for nic in instance.nics]
4183
            elif st_groups[1] == "ips":
4184
              val = [nic.ip for nic in instance.nics]
4185
            elif st_groups[1] == "modes":
4186
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4187
            elif st_groups[1] == "links":
4188
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4189
            elif st_groups[1] == "bridges":
4190
              val = []
4191
              for nicp in i_nicp:
4192
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4193
                  val.append(nicp[constants.NIC_LINK])
4194
                else:
4195
                  val.append(None)
4196
            else:
4197
              # index-based item
4198
              nic_idx = int(st_groups[2])
4199
              if nic_idx >= len(instance.nics):
4200
                val = None
4201
              else:
4202
                if st_groups[1] == "mac":
4203
                  val = instance.nics[nic_idx].mac
4204
                elif st_groups[1] == "ip":
4205
                  val = instance.nics[nic_idx].ip
4206
                elif st_groups[1] == "mode":
4207
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4208
                elif st_groups[1] == "link":
4209
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4210
                elif st_groups[1] == "bridge":
4211
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4212
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4213
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4214
                  else:
4215
                    val = None
4216
                else:
4217
                  assert False, "Unhandled NIC parameter"
4218
          else:
4219
            assert False, ("Declared but unhandled variable parameter '%s'" %
4220
                           field)
4221
        else:
4222
          assert False, "Declared but unhandled parameter '%s'" % field
4223
        iout.append(val)
4224
      output.append(iout)
4225

    
4226
    return output
4227

    
4228

    
4229
class LUFailoverInstance(LogicalUnit):
4230
  """Failover an instance.
4231

4232
  """
4233
  HPATH = "instance-failover"
4234
  HTYPE = constants.HTYPE_INSTANCE
4235
  _OP_REQP = ["instance_name", "ignore_consistency"]
4236
  REQ_BGL = False
4237

    
4238
  def ExpandNames(self):
4239
    self._ExpandAndLockInstance()
4240
    self.needed_locks[locking.LEVEL_NODE] = []
4241
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4242

    
4243
  def DeclareLocks(self, level):
4244
    if level == locking.LEVEL_NODE:
4245
      self._LockInstancesNodes()
4246

    
4247
  def BuildHooksEnv(self):
4248
    """Build hooks env.
4249

4250
    This runs on master, primary and secondary nodes of the instance.
4251

4252
    """
4253
    env = {
4254
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4255
      }
4256
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4257
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4258
    return env, nl, nl
4259

    
4260
  def CheckPrereq(self):
4261
    """Check prerequisites.
4262

4263
    This checks that the instance is in the cluster.
4264

4265
    """
4266
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4267
    assert self.instance is not None, \
4268
      "Cannot retrieve locked instance %s" % self.op.instance_name
4269

    
4270
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4271
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4272
      raise errors.OpPrereqError("Instance's disk layout is not"
4273
                                 " network mirrored, cannot failover.")
4274

    
4275
    secondary_nodes = instance.secondary_nodes
4276
    if not secondary_nodes:
4277
      raise errors.ProgrammerError("no secondary node but using "
4278
                                   "a mirrored disk template")
4279

    
4280
    target_node = secondary_nodes[0]
4281
    _CheckNodeOnline(self, target_node)
4282
    _CheckNodeNotDrained(self, target_node)
4283
    if instance.admin_up:
4284
      # check memory requirements on the secondary node
4285
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4286
                           instance.name, bep[constants.BE_MEMORY],
4287
                           instance.hypervisor)
4288
    else:
4289
      self.LogInfo("Not checking memory on the secondary node as"
4290
                   " instance will not be started")
4291

    
4292
    # check bridge existance
4293
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4294

    
4295
  def Exec(self, feedback_fn):
4296
    """Failover an instance.
4297

4298
    The failover is done by shutting it down on its present node and
4299
    starting it on the secondary.
4300

4301
    """
4302
    instance = self.instance
4303

    
4304
    source_node = instance.primary_node
4305
    target_node = instance.secondary_nodes[0]
4306

    
4307
    feedback_fn("* checking disk consistency between source and target")
4308
    for dev in instance.disks:
4309
      # for drbd, these are drbd over lvm
4310
      if not _CheckDiskConsistency(self, dev, target_node, False):
4311
        if instance.admin_up and not self.op.ignore_consistency:
4312
          raise errors.OpExecError("Disk %s is degraded on target node,"
4313
                                   " aborting failover." % dev.iv_name)
4314

    
4315
    feedback_fn("* shutting down instance on source node")
4316
    logging.info("Shutting down instance %s on node %s",
4317
                 instance.name, source_node)
4318

    
4319
    result = self.rpc.call_instance_shutdown(source_node, instance)
4320
    msg = result.fail_msg
4321
    if msg:
4322
      if self.op.ignore_consistency:
4323
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4324
                             " Proceeding anyway. Please make sure node"
4325
                             " %s is down. Error details: %s",
4326
                             instance.name, source_node, source_node, msg)
4327
      else:
4328
        raise errors.OpExecError("Could not shutdown instance %s on"
4329
                                 " node %s: %s" %
4330
                                 (instance.name, source_node, msg))
4331

    
4332
    feedback_fn("* deactivating the instance's disks on source node")
4333
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4334
      raise errors.OpExecError("Can't shut down the instance's disks.")
4335

    
4336
    instance.primary_node = target_node
4337
    # distribute new instance config to the other nodes
4338
    self.cfg.Update(instance)
4339

    
4340
    # Only start the instance if it's marked as up
4341
    if instance.admin_up:
4342
      feedback_fn("* activating the instance's disks on target node")
4343
      logging.info("Starting instance %s on node %s",
4344
                   instance.name, target_node)
4345

    
4346
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4347
                                               ignore_secondaries=True)
4348
      if not disks_ok:
4349
        _ShutdownInstanceDisks(self, instance)
4350
        raise errors.OpExecError("Can't activate the instance's disks")
4351

    
4352
      feedback_fn("* starting the instance on the target node")
4353
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4354
      msg = result.fail_msg
4355
      if msg:
4356
        _ShutdownInstanceDisks(self, instance)
4357
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4358
                                 (instance.name, target_node, msg))
4359

    
4360

    
4361
class LUMigrateInstance(LogicalUnit):
4362
  """Migrate an instance.
4363

4364
  This is migration without shutting down, compared to the failover,
4365
  which is done with shutdown.
4366

4367
  """
4368
  HPATH = "instance-migrate"
4369
  HTYPE = constants.HTYPE_INSTANCE
4370
  _OP_REQP = ["instance_name", "live", "cleanup"]
4371

    
4372
  REQ_BGL = False
4373

    
4374
  def ExpandNames(self):
4375
    self._ExpandAndLockInstance()
4376

    
4377
    self.needed_locks[locking.LEVEL_NODE] = []
4378
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4379

    
4380
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4381
                                       self.op.live, self.op.cleanup)
4382
    self.tasklets = [self._migrater]
4383

    
4384
  def DeclareLocks(self, level):
4385
    if level == locking.LEVEL_NODE:
4386
      self._LockInstancesNodes()
4387

    
4388
  def BuildHooksEnv(self):
4389
    """Build hooks env.
4390

4391
    This runs on master, primary and secondary nodes of the instance.
4392

4393
    """
4394
    instance = self._migrater.instance
4395
    env = _BuildInstanceHookEnvByObject(self, instance)
4396
    env["MIGRATE_LIVE"] = self.op.live
4397
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4398
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4399
    return env, nl, nl
4400

    
4401

    
4402
class LUMoveInstance(LogicalUnit):
4403
  """Move an instance by data-copying.
4404

4405
  """
4406
  HPATH = "instance-move"
4407
  HTYPE = constants.HTYPE_INSTANCE
4408
  _OP_REQP = ["instance_name", "target_node"]
4409
  REQ_BGL = False
4410

    
4411
  def ExpandNames(self):
4412
    self._ExpandAndLockInstance()
4413
    target_node = self.cfg.ExpandNodeName(self.op.target_node)
4414
    if target_node is None:
4415
      raise errors.OpPrereqError("Node '%s' not known" %
4416
                                  self.op.target_node)
4417
    self.op.target_node = target_node
4418
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4419
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4420

    
4421
  def DeclareLocks(self, level):
4422
    if level == locking.LEVEL_NODE:
4423
      self._LockInstancesNodes(primary_only=True)
4424

    
4425
  def BuildHooksEnv(self):
4426
    """Build hooks env.
4427

4428
    This runs on master, primary and secondary nodes of the instance.
4429

4430
    """
4431
    env = {
4432
      "TARGET_NODE": self.op.target_node,
4433
      }
4434
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4435
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4436
                                       self.op.target_node]
4437
    return env, nl, nl
4438

    
4439
  def CheckPrereq(self):
4440
    """Check prerequisites.
4441

4442
    This checks that the instance is in the cluster.
4443

4444
    """
4445
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4446
    assert self.instance is not None, \
4447
      "Cannot retrieve locked instance %s" % self.op.instance_name
4448

    
4449
    node = self.cfg.GetNodeInfo(self.op.target_node)
4450
    assert node is not None, \
4451
      "Cannot retrieve locked node %s" % self.op.target_node
4452

    
4453
    self.target_node = target_node = node.name
4454

    
4455
    if target_node == instance.primary_node:
4456
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4457
                                 (instance.name, target_node))
4458

    
4459
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4460

    
4461
    for idx, dsk in enumerate(instance.disks):
4462
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4463
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4464
                                   " cannot copy")
4465

    
4466
    _CheckNodeOnline(self, target_node)
4467
    _CheckNodeNotDrained(self, target_node)
4468

    
4469
    if instance.admin_up:
4470
      # check memory requirements on the secondary node
4471
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4472
                           instance.name, bep[constants.BE_MEMORY],
4473
                           instance.hypervisor)
4474
    else:
4475
      self.LogInfo("Not checking memory on the secondary node as"
4476
                   " instance will not be started")
4477

    
4478
    # check bridge existance
4479
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4480

    
4481
  def Exec(self, feedback_fn):
4482
    """Move an instance.
4483

4484
    The move is done by shutting it down on its present node, copying
4485
    the data over (slow) and starting it on the new node.
4486

4487
    """
4488
    instance = self.instance
4489

    
4490
    source_node = instance.primary_node
4491
    target_node = self.target_node
4492

    
4493
    self.LogInfo("Shutting down instance %s on source node %s",
4494
                 instance.name, source_node)
4495

    
4496
    result = self.rpc.call_instance_shutdown(source_node, instance)
4497
    msg = result.fail_msg
4498
    if msg:
4499
      if self.op.ignore_consistency:
4500
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4501
                             " Proceeding anyway. Please make sure node"
4502
                             " %s is down. Error details: %s",
4503
                             instance.name, source_node, source_node, msg)
4504
      else:
4505
        raise errors.OpExecError("Could not shutdown instance %s on"
4506
                                 " node %s: %s" %
4507
                                 (instance.name, source_node, msg))
4508

    
4509
    # create the target disks
4510
    try:
4511
      _CreateDisks(self, instance, target_node=target_node)
4512
    except errors.OpExecError:
4513
      self.LogWarning("Device creation failed, reverting...")
4514
      try:
4515
        _RemoveDisks(self, instance, target_node=target_node)
4516
      finally:
4517
        self.cfg.ReleaseDRBDMinors(instance.name)
4518
        raise
4519

    
4520
    cluster_name = self.cfg.GetClusterInfo().cluster_name
4521

    
4522
    errs = []
4523
    # activate, get path, copy the data over
4524
    for idx, disk in enumerate(instance.disks):
4525
      self.LogInfo("Copying data for disk %d", idx)
4526
      result = self.rpc.call_blockdev_assemble(target_node, disk,
4527
                                               instance.name, True)
4528
      if result.fail_msg:
4529
        self.LogWarning("Can't assemble newly created disk %d: %s",
4530
                        idx, result.fail_msg)
4531
        errs.append(result.fail_msg)
4532
        break
4533
      dev_path = result.payload
4534
      result = self.rpc.call_blockdev_export(source_node, disk,
4535
                                             target_node, dev_path,
4536
                                             cluster_name)
4537
      if result.fail_msg:
4538
        self.LogWarning("Can't copy data over for disk %d: %s",
4539
                        idx, result.fail_msg)
4540
        errs.append(result.fail_msg)
4541
        break
4542

    
4543
    if errs:
4544
      self.LogWarning("Some disks failed to copy, aborting")
4545
      try:
4546
        _RemoveDisks(self, instance, target_node=target_node)
4547
      finally:
4548
        self.cfg.ReleaseDRBDMinors(instance.name)
4549
        raise errors.OpExecError("Errors during disk copy: %s" %
4550
                                 (",".join(errs),))
4551

    
4552
    instance.primary_node = target_node
4553
    self.cfg.Update(instance)
4554

    
4555
    self.LogInfo("Removing the disks on the original node")
4556
    _RemoveDisks(self, instance, target_node=source_node)
4557

    
4558
    # Only start the instance if it's marked as up
4559
    if instance.admin_up:
4560
      self.LogInfo("Starting instance %s on node %s",
4561
                   instance.name, target_node)
4562

    
4563
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4564
                                           ignore_secondaries=True)
4565
      if not disks_ok:
4566
        _ShutdownInstanceDisks(self, instance)
4567
        raise errors.OpExecError("Can't activate the instance's disks")
4568

    
4569
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4570
      msg = result.fail_msg
4571
      if msg:
4572
        _ShutdownInstanceDisks(self, instance)
4573
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4574
                                 (instance.name, target_node, msg))
4575

    
4576

    
4577
class LUMigrateNode(LogicalUnit):
4578
  """Migrate all instances from a node.
4579

4580
  """
4581
  HPATH = "node-migrate"
4582
  HTYPE = constants.HTYPE_NODE
4583
  _OP_REQP = ["node_name", "live"]
4584
  REQ_BGL = False
4585

    
4586
  def ExpandNames(self):
4587
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4588
    if self.op.node_name is None:
4589
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
4590

    
4591
    self.needed_locks = {
4592
      locking.LEVEL_NODE: [self.op.node_name],
4593
      }
4594

    
4595
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4596

    
4597
    # Create tasklets for migrating instances for all instances on this node
4598
    names = []
4599
    tasklets = []
4600

    
4601
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4602
      logging.debug("Migrating instance %s", inst.name)
4603
      names.append(inst.name)
4604

    
4605
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4606

    
4607
    self.tasklets = tasklets
4608

    
4609
    # Declare instance locks
4610
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4611

    
4612
  def DeclareLocks(self, level):
4613
    if level == locking.LEVEL_NODE:
4614
      self._LockInstancesNodes()
4615

    
4616
  def BuildHooksEnv(self):
4617
    """Build hooks env.
4618

4619
    This runs on the master, the primary and all the secondaries.
4620

4621
    """
4622
    env = {
4623
      "NODE_NAME": self.op.node_name,
4624
      }
4625

    
4626
    nl = [self.cfg.GetMasterNode()]
4627

    
4628
    return (env, nl, nl)
4629

    
4630

    
4631
class TLMigrateInstance(Tasklet):
4632
  def __init__(self, lu, instance_name, live, cleanup):
4633
    """Initializes this class.
4634

4635
    """
4636
    Tasklet.__init__(self, lu)
4637

    
4638
    # Parameters
4639
    self.instance_name = instance_name
4640
    self.live = live
4641
    self.cleanup = cleanup
4642

    
4643
  def CheckPrereq(self):
4644
    """Check prerequisites.
4645

4646
    This checks that the instance is in the cluster.
4647

4648
    """
4649
    instance = self.cfg.GetInstanceInfo(
4650
      self.cfg.ExpandInstanceName(self.instance_name))
4651
    if instance is None:
4652
      raise errors.OpPrereqError("Instance '%s' not known" %
4653
                                 self.instance_name)
4654

    
4655
    if instance.disk_template != constants.DT_DRBD8:
4656
      raise errors.OpPrereqError("Instance's disk layout is not"
4657
                                 " drbd8, cannot migrate.")
4658

    
4659
    secondary_nodes = instance.secondary_nodes
4660
    if not secondary_nodes:
4661
      raise errors.ConfigurationError("No secondary node but using"
4662
                                      " drbd8 disk template")
4663

    
4664
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
4665

    
4666
    target_node = secondary_nodes[0]
4667
    # check memory requirements on the secondary node
4668
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4669
                         instance.name, i_be[constants.BE_MEMORY],
4670
                         instance.hypervisor)
4671

    
4672
    # check bridge existance
4673
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4674

    
4675
    if not self.cleanup:
4676
      _CheckNodeNotDrained(self, target_node)
4677
      result = self.rpc.call_instance_migratable(instance.primary_node,
4678
                                                 instance)
4679
      result.Raise("Can't migrate, please use failover", prereq=True)
4680

    
4681
    self.instance = instance
4682

    
4683
  def _WaitUntilSync(self):
4684
    """Poll with custom rpc for disk sync.
4685

4686
    This uses our own step-based rpc call.
4687

4688
    """
4689
    self.feedback_fn("* wait until resync is done")
4690
    all_done = False
4691
    while not all_done:
4692
      all_done = True
4693
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4694
                                            self.nodes_ip,
4695
                                            self.instance.disks)
4696
      min_percent = 100
4697
      for node, nres in result.items():
4698
        nres.Raise("Cannot resync disks on node %s" % node)
4699
        node_done, node_percent = nres.payload
4700
        all_done = all_done and node_done
4701
        if node_percent is not None:
4702
          min_percent = min(min_percent, node_percent)
4703
      if not all_done:
4704
        if min_percent < 100:
4705
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
4706
        time.sleep(2)
4707

    
4708
  def _EnsureSecondary(self, node):
4709
    """Demote a node to secondary.
4710

4711
    """
4712
    self.feedback_fn("* switching node %s to secondary mode" % node)
4713

    
4714
    for dev in self.instance.disks:
4715
      self.cfg.SetDiskID(dev, node)
4716

    
4717
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4718
                                          self.instance.disks)
4719
    result.Raise("Cannot change disk to secondary on node %s" % node)
4720

    
4721
  def _GoStandalone(self):
4722
    """Disconnect from the network.
4723

4724
    """
4725
    self.feedback_fn("* changing into standalone mode")
4726
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4727
                                               self.instance.disks)
4728
    for node, nres in result.items():
4729
      nres.Raise("Cannot disconnect disks node %s" % node)
4730

    
4731
  def _GoReconnect(self, multimaster):
4732
    """Reconnect to the network.
4733

4734
    """
4735
    if multimaster:
4736
      msg = "dual-master"
4737
    else:
4738
      msg = "single-master"
4739
    self.feedback_fn("* changing disks into %s mode" % msg)
4740
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4741
                                           self.instance.disks,
4742
                                           self.instance.name, multimaster)
4743
    for node, nres in result.items():
4744
      nres.Raise("Cannot change disks config on node %s" % node)
4745

    
4746
  def _ExecCleanup(self):
4747
    """Try to cleanup after a failed migration.
4748

4749
    The cleanup is done by:
4750
      - check that the instance is running only on one node
4751
        (and update the config if needed)
4752
      - change disks on its secondary node to secondary
4753
      - wait until disks are fully synchronized
4754
      - disconnect from the network
4755
      - change disks into single-master mode
4756
      - wait again until disks are fully synchronized
4757

4758
    """
4759
    instance = self.instance
4760
    target_node = self.target_node
4761
    source_node = self.source_node
4762

    
4763
    # check running on only one node
4764
    self.feedback_fn("* checking where the instance actually runs"
4765
                     " (if this hangs, the hypervisor might be in"
4766
                     " a bad state)")
4767
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4768
    for node, result in ins_l.items():
4769
      result.Raise("Can't contact node %s" % node)
4770

    
4771
    runningon_source = instance.name in ins_l[source_node].payload
4772
    runningon_target = instance.name in ins_l[target_node].payload
4773

    
4774
    if runningon_source and runningon_target:
4775
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4776
                               " or the hypervisor is confused. You will have"
4777
                               " to ensure manually that it runs only on one"
4778
                               " and restart this operation.")
4779

    
4780
    if not (runningon_source or runningon_target):
4781
      raise errors.OpExecError("Instance does not seem to be running at all."
4782
                               " In this case, it's safer to repair by"
4783
                               " running 'gnt-instance stop' to ensure disk"
4784
                               " shutdown, and then restarting it.")
4785

    
4786
    if runningon_target:
4787
      # the migration has actually succeeded, we need to update the config
4788
      self.feedback_fn("* instance running on secondary node (%s),"
4789
                       " updating config" % target_node)
4790
      instance.primary_node = target_node
4791
      self.cfg.Update(instance)
4792
      demoted_node = source_node
4793
    else:
4794
      self.feedback_fn("* instance confirmed to be running on its"
4795
                       " primary node (%s)" % source_node)
4796
      demoted_node = target_node
4797

    
4798
    self._EnsureSecondary(demoted_node)
4799
    try:
4800
      self._WaitUntilSync()
4801
    except errors.OpExecError:
4802
      # we ignore here errors, since if the device is standalone, it
4803
      # won't be able to sync
4804
      pass
4805
    self._GoStandalone()
4806
    self._GoReconnect(False)
4807
    self._WaitUntilSync()
4808

    
4809
    self.feedback_fn("* done")
4810

    
4811
  def _RevertDiskStatus(self):
4812
    """Try to revert the disk status after a failed migration.
4813

4814
    """
4815
    target_node = self.target_node
4816
    try:
4817
      self._EnsureSecondary(target_node)
4818
      self._GoStandalone()
4819
      self._GoReconnect(False)
4820
      self._WaitUntilSync()
4821
    except errors.OpExecError, err:
4822
      self.lu.LogWarning("Migration failed and I can't reconnect the"
4823
                         " drives: error '%s'\n"
4824
                         "Please look and recover the instance status" %
4825
                         str(err))
4826

    
4827
  def _AbortMigration(self):
4828
    """Call the hypervisor code to abort a started migration.
4829

4830
    """
4831
    instance = self.instance
4832
    target_node = self.target_node
4833
    migration_info = self.migration_info
4834

    
4835
    abort_result = self.rpc.call_finalize_migration(target_node,
4836
                                                    instance,
4837
                                                    migration_info,
4838
                                                    False)
4839
    abort_msg = abort_result.fail_msg
4840
    if abort_msg:
4841
      logging.error("Aborting migration failed on target node %s: %s" %
4842
                    (target_node, abort_msg))
4843
      # Don't raise an exception here, as we stil have to try to revert the
4844
      # disk status, even if this step failed.
4845

    
4846
  def _ExecMigration(self):
4847
    """Migrate an instance.
4848

4849
    The migrate is done by:
4850
      - change the disks into dual-master mode
4851
      - wait until disks are fully synchronized again
4852
      - migrate the instance
4853
      - change disks on the new secondary node (the old primary) to secondary
4854
      - wait until disks are fully synchronized
4855
      - change disks into single-master mode
4856

4857
    """
4858
    instance = self.instance
4859
    target_node = self.target_node
4860
    source_node = self.source_node
4861

    
4862
    self.feedback_fn("* checking disk consistency between source and target")
4863
    for dev in instance.disks:
4864
      if not _CheckDiskConsistency(self, dev, target_node, False):
4865
        raise errors.OpExecError("Disk %s is degraded or not fully"
4866
                                 " synchronized on target node,"
4867
                                 " aborting migrate." % dev.iv_name)
4868

    
4869
    # First get the migration information from the remote node
4870
    result = self.rpc.call_migration_info(source_node, instance)
4871
    msg = result.fail_msg
4872
    if msg:
4873
      log_err = ("Failed fetching source migration information from %s: %s" %
4874
                 (source_node, msg))
4875
      logging.error(log_err)
4876
      raise errors.OpExecError(log_err)
4877

    
4878
    self.migration_info = migration_info = result.payload
4879

    
4880
    # Then switch the disks to master/master mode
4881
    self._EnsureSecondary(target_node)
4882
    self._GoStandalone()
4883
    self._GoReconnect(True)
4884
    self._WaitUntilSync()
4885

    
4886
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4887
    result = self.rpc.call_accept_instance(target_node,
4888
                                           instance,
4889
                                           migration_info,
4890
                                           self.nodes_ip[target_node])
4891

    
4892
    msg = result.fail_msg
4893
    if msg:
4894
      logging.error("Instance pre-migration failed, trying to revert"
4895
                    " disk status: %s", msg)
4896
      self._AbortMigration()
4897
      self._RevertDiskStatus()
4898
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4899
                               (instance.name, msg))
4900

    
4901
    self.feedback_fn("* migrating instance to %s" % target_node)
4902
    time.sleep(10)
4903
    result = self.rpc.call_instance_migrate(source_node, instance,
4904
                                            self.nodes_ip[target_node],
4905
                                            self.live)
4906
    msg = result.fail_msg
4907
    if msg:
4908
      logging.error("Instance migration failed, trying to revert"
4909
                    " disk status: %s", msg)
4910
      self._AbortMigration()
4911
      self._RevertDiskStatus()
4912
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4913
                               (instance.name, msg))
4914
    time.sleep(10)
4915

    
4916
    instance.primary_node = target_node
4917
    # distribute new instance config to the other nodes
4918
    self.cfg.Update(instance)
4919

    
4920
    result = self.rpc.call_finalize_migration(target_node,
4921
                                              instance,
4922
                                              migration_info,
4923
                                              True)
4924
    msg = result.fail_msg
4925
    if msg:
4926
      logging.error("Instance migration succeeded, but finalization failed:"
4927
                    " %s" % msg)
4928
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4929
                               msg)
4930

    
4931
    self._EnsureSecondary(source_node)
4932
    self._WaitUntilSync()
4933
    self._GoStandalone()
4934
    self._GoReconnect(False)
4935
    self._WaitUntilSync()
4936

    
4937
    self.feedback_fn("* done")
4938

    
4939
  def Exec(self, feedback_fn):
4940
    """Perform the migration.
4941

4942
    """
4943
    feedback_fn("Migrating instance %s" % self.instance.name)
4944

    
4945
    self.feedback_fn = feedback_fn
4946

    
4947
    self.source_node = self.instance.primary_node
4948
    self.target_node = self.instance.secondary_nodes[0]
4949
    self.all_nodes = [self.source_node, self.target_node]
4950
    self.nodes_ip = {
4951
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4952
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4953
      }
4954

    
4955
    if self.cleanup:
4956
      return self._ExecCleanup()
4957
    else:
4958
      return self._ExecMigration()
4959

    
4960

    
4961
def _CreateBlockDev(lu, node, instance, device, force_create,
4962
                    info, force_open):
4963
  """Create a tree of block devices on a given node.
4964

4965
  If this device type has to be created on secondaries, create it and
4966
  all its children.
4967

4968
  If not, just recurse to children keeping the same 'force' value.
4969

4970
  @param lu: the lu on whose behalf we execute
4971
  @param node: the node on which to create the device
4972
  @type instance: L{objects.Instance}
4973
  @param instance: the instance which owns the device
4974
  @type device: L{objects.Disk}
4975
  @param device: the device to create
4976
  @type force_create: boolean
4977
  @param force_create: whether to force creation of this device; this
4978
      will be change to True whenever we find a device which has
4979
      CreateOnSecondary() attribute
4980
  @param info: the extra 'metadata' we should attach to the device
4981
      (this will be represented as a LVM tag)
4982
  @type force_open: boolean
4983
  @param force_open: this parameter will be passes to the
4984
      L{backend.BlockdevCreate} function where it specifies
4985
      whether we run on primary or not, and it affects both
4986
      the child assembly and the device own Open() execution
4987

4988
  """
4989
  if device.CreateOnSecondary():
4990
    force_create = True
4991

    
4992
  if device.children:
4993
    for child in device.children:
4994
      _CreateBlockDev(lu, node, instance, child, force_create,
4995
                      info, force_open)
4996

    
4997
  if not force_create:
4998
    return
4999

    
5000
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5001

    
5002

    
5003
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5004
  """Create a single block device on a given node.
5005

5006
  This will not recurse over children of the device, so they must be
5007
  created in advance.
5008

5009
  @param lu: the lu on whose behalf we execute
5010
  @param node: the node on which to create the device
5011
  @type instance: L{objects.Instance}
5012
  @param instance: the instance which owns the device
5013
  @type device: L{objects.Disk}
5014
  @param device: the device to create
5015
  @param info: the extra 'metadata' we should attach to the device
5016
      (this will be represented as a LVM tag)
5017
  @type force_open: boolean
5018
  @param force_open: this parameter will be passes to the
5019
      L{backend.BlockdevCreate} function where it specifies
5020
      whether we run on primary or not, and it affects both
5021
      the child assembly and the device own Open() execution
5022

5023
  """
5024
  lu.cfg.SetDiskID(device, node)
5025
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5026
                                       instance.name, force_open, info)
5027
  result.Raise("Can't create block device %s on"
5028
               " node %s for instance %s" % (device, node, instance.name))
5029
  if device.physical_id is None:
5030
    device.physical_id = result.payload
5031

    
5032

    
5033
def _GenerateUniqueNames(lu, exts):
5034
  """Generate a suitable LV name.
5035

5036
  This will generate a logical volume name for the given instance.
5037

5038
  """
5039
  results = []
5040
  for val in exts:
5041
    new_id = lu.cfg.GenerateUniqueID()
5042
    results.append("%s%s" % (new_id, val))
5043
  return results
5044

    
5045

    
5046
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5047
                         p_minor, s_minor):
5048
  """Generate a drbd8 device complete with its children.
5049

5050
  """
5051
  port = lu.cfg.AllocatePort()
5052
  vgname = lu.cfg.GetVGName()
5053
  shared_secret = lu.cfg.GenerateDRBDSecret()
5054
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5055
                          logical_id=(vgname, names[0]))
5056
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5057
                          logical_id=(vgname, names[1]))
5058
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5059
                          logical_id=(primary, secondary, port,
5060
                                      p_minor, s_minor,
5061
                                      shared_secret),
5062
                          children=[dev_data, dev_meta],
5063
                          iv_name=iv_name)
5064
  return drbd_dev
5065

    
5066

    
5067
def _GenerateDiskTemplate(lu, template_name,
5068
                          instance_name, primary_node,
5069
                          secondary_nodes, disk_info,
5070
                          file_storage_dir, file_driver,
5071
                          base_index):
5072
  """Generate the entire disk layout for a given template type.
5073

5074
  """
5075
  #TODO: compute space requirements
5076

    
5077
  vgname = lu.cfg.GetVGName()
5078
  disk_count = len(disk_info)
5079
  disks = []
5080
  if template_name == constants.DT_DISKLESS:
5081
    pass
5082
  elif template_name == constants.DT_PLAIN:
5083
    if len(secondary_nodes) != 0:
5084
      raise errors.ProgrammerError("Wrong template configuration")
5085

    
5086
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5087
                                      for i in range(disk_count)])
5088
    for idx, disk in enumerate(disk_info):
5089
      disk_index = idx + base_index
5090
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5091
                              logical_id=(vgname, names[idx]),
5092
                              iv_name="disk/%d" % disk_index,
5093
                              mode=disk["mode"])
5094
      disks.append(disk_dev)
5095
  elif template_name == constants.DT_DRBD8:
5096
    if len(secondary_nodes) != 1:
5097
      raise errors.ProgrammerError("Wrong template configuration")
5098
    remote_node = secondary_nodes[0]
5099
    minors = lu.cfg.AllocateDRBDMinor(
5100
      [primary_node, remote_node] * len(disk_info), instance_name)
5101

    
5102
    names = []
5103
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5104
                                               for i in range(disk_count)]):
5105
      names.append(lv_prefix + "_data")
5106
      names.append(lv_prefix + "_meta")
5107
    for idx, disk in enumerate(disk_info):
5108
      disk_index = idx + base_index
5109
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5110
                                      disk["size"], names[idx*2:idx*2+2],
5111
                                      "disk/%d" % disk_index,
5112
                                      minors[idx*2], minors[idx*2+1])
5113
      disk_dev.mode = disk["mode"]
5114
      disks.append(disk_dev)
5115
  elif template_name == constants.DT_FILE:
5116
    if len(secondary_nodes) != 0:
5117
      raise errors.ProgrammerError("Wrong template configuration")
5118

    
5119
    for idx, disk in enumerate(disk_info):
5120
      disk_index = idx + base_index
5121
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5122
                              iv_name="disk/%d" % disk_index,
5123
                              logical_id=(file_driver,
5124
                                          "%s/disk%d" % (file_storage_dir,
5125
                                                         disk_index)),
5126
                              mode=disk["mode"])
5127
      disks.append(disk_dev)
5128
  else:
5129
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5130
  return disks
5131

    
5132

    
5133
def _GetInstanceInfoText(instance):
5134
  """Compute that text that should be added to the disk's metadata.
5135

5136
  """
5137
  return "originstname+%s" % instance.name
5138

    
5139

    
5140
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5141
  """Create all disks for an instance.
5142

5143
  This abstracts away some work from AddInstance.
5144

5145
  @type lu: L{LogicalUnit}
5146
  @param lu: the logical unit on whose behalf we execute
5147
  @type instance: L{objects.Instance}
5148
  @param instance: the instance whose disks we should create
5149
  @type to_skip: list
5150
  @param to_skip: list of indices to skip
5151
  @type target_node: string
5152
  @param target_node: if passed, overrides the target node for creation
5153
  @rtype: boolean
5154
  @return: the success of the creation
5155

5156
  """
5157
  info = _GetInstanceInfoText(instance)
5158
  if target_node is None:
5159
    pnode = instance.primary_node
5160
    all_nodes = instance.all_nodes
5161
  else:
5162
    pnode = target_node
5163
    all_nodes = [pnode]
5164

    
5165
  if instance.disk_template == constants.DT_FILE:
5166
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5167
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5168

    
5169
    result.Raise("Failed to create directory '%s' on"
5170
                 " node %s: %s" % (file_storage_dir, pnode))
5171

    
5172
  # Note: this needs to be kept in sync with adding of disks in
5173
  # LUSetInstanceParams
5174
  for idx, device in enumerate(instance.disks):
5175
    if to_skip and idx in to_skip:
5176
      continue
5177
    logging.info("Creating volume %s for instance %s",
5178
                 device.iv_name, instance.name)
5179
    #HARDCODE
5180
    for node in all_nodes:
5181
      f_create = node == pnode
5182
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5183

    
5184

    
5185
def _RemoveDisks(lu, instance, target_node=None):
5186
  """Remove all disks for an instance.
5187

5188
  This abstracts away some work from `AddInstance()` and
5189
  `RemoveInstance()`. Note that in case some of the devices couldn't
5190
  be removed, the removal will continue with the other ones (compare
5191
  with `_CreateDisks()`).
5192

5193
  @type lu: L{LogicalUnit}
5194
  @param lu: the logical unit on whose behalf we execute
5195
  @type instance: L{objects.Instance}
5196
  @param instance: the instance whose disks we should remove
5197
  @type target_node: string
5198
  @param target_node: used to override the node on which to remove the disks
5199
  @rtype: boolean
5200
  @return: the success of the removal
5201

5202
  """
5203
  logging.info("Removing block devices for instance %s", instance.name)
5204

    
5205
  all_result = True
5206
  for device in instance.disks:
5207
    if target_node:
5208
      edata = [(target_node, device)]
5209
    else:
5210
      edata = device.ComputeNodeTree(instance.primary_node)
5211
    for node, disk in edata:
5212
      lu.cfg.SetDiskID(disk, node)
5213
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5214
      if msg:
5215
        lu.LogWarning("Could not remove block device %s on node %s,"
5216
                      " continuing anyway: %s", device.iv_name, node, msg)
5217
        all_result = False
5218

    
5219
  if instance.disk_template == constants.DT_FILE:
5220
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5221
    if target_node is node:
5222
      tgt = instance.primary_node
5223
    else:
5224
      tgt = instance.target_node
5225
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5226
    if result.fail_msg:
5227
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5228
                    file_storage_dir, instance.primary_node, result.fail_msg)
5229
      all_result = False
5230

    
5231
  return all_result
5232

    
5233

    
5234
def _ComputeDiskSize(disk_template, disks):
5235
  """Compute disk size requirements in the volume group
5236

5237
  """
5238
  # Required free disk space as a function of disk and swap space
5239
  req_size_dict = {
5240
    constants.DT_DISKLESS: None,
5241
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5242
    # 128 MB are added for drbd metadata for each disk
5243
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5244
    constants.DT_FILE: None,
5245
  }
5246

    
5247
  if disk_template not in req_size_dict:
5248
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5249
                                 " is unknown" %  disk_template)
5250

    
5251
  return req_size_dict[disk_template]
5252

    
5253

    
5254
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5255
  """Hypervisor parameter validation.
5256

5257
  This function abstract the hypervisor parameter validation to be
5258
  used in both instance create and instance modify.
5259

5260
  @type lu: L{LogicalUnit}
5261
  @param lu: the logical unit for which we check
5262
  @type nodenames: list
5263
  @param nodenames: the list of nodes on which we should check
5264
  @type hvname: string
5265
  @param hvname: the name of the hypervisor we should use
5266
  @type hvparams: dict
5267
  @param hvparams: the parameters which we need to check
5268
  @raise errors.OpPrereqError: if the parameters are not valid
5269

5270
  """
5271
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5272
                                                  hvname,
5273
                                                  hvparams)
5274
  for node in nodenames:
5275
    info = hvinfo[node]
5276
    if info.offline:
5277
      continue
5278
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5279

    
5280

    
5281
class LUCreateInstance(LogicalUnit):
5282
  """Create an instance.
5283

5284
  """
5285
  HPATH = "instance-add"
5286
  HTYPE = constants.HTYPE_INSTANCE
5287
  _OP_REQP = ["instance_name", "disks", "disk_template",
5288
              "mode", "start",
5289
              "wait_for_sync", "ip_check", "nics",
5290
              "hvparams", "beparams"]
5291
  REQ_BGL = False
5292

    
5293
  def _ExpandNode(self, node):
5294
    """Expands and checks one node name.
5295

5296
    """
5297
    node_full = self.cfg.ExpandNodeName(node)
5298
    if node_full is None:
5299
      raise errors.OpPrereqError("Unknown node %s" % node)
5300
    return node_full
5301

    
5302
  def ExpandNames(self):
5303
    """ExpandNames for CreateInstance.
5304

5305
    Figure out the right locks for instance creation.
5306

5307
    """
5308
    self.needed_locks = {}
5309

    
5310
    # set optional parameters to none if they don't exist
5311
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5312
      if not hasattr(self.op, attr):
5313
        setattr(self.op, attr, None)
5314

    
5315
    # cheap checks, mostly valid constants given
5316

    
5317
    # verify creation mode
5318
    if self.op.mode not in (constants.INSTANCE_CREATE,
5319
                            constants.INSTANCE_IMPORT):
5320
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5321
                                 self.op.mode)
5322

    
5323
    # disk template and mirror node verification
5324
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5325
      raise errors.OpPrereqError("Invalid disk template name")
5326

    
5327
    if self.op.hypervisor is None:
5328
      self.op.hypervisor = self.cfg.GetHypervisorType()
5329

    
5330
    cluster = self.cfg.GetClusterInfo()
5331
    enabled_hvs = cluster.enabled_hypervisors
5332
    if self.op.hypervisor not in enabled_hvs:
5333
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5334
                                 " cluster (%s)" % (self.op.hypervisor,
5335
                                  ",".join(enabled_hvs)))
5336

    
5337
    # check hypervisor parameter syntax (locally)
5338
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5339
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5340
                                  self.op.hvparams)
5341
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5342
    hv_type.CheckParameterSyntax(filled_hvp)
5343
    self.hv_full = filled_hvp
5344

    
5345
    # fill and remember the beparams dict
5346
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5347
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5348
                                    self.op.beparams)
5349

    
5350
    #### instance parameters check
5351

    
5352
    # instance name verification
5353
    hostname1 = utils.HostInfo(self.op.instance_name)
5354
    self.op.instance_name = instance_name = hostname1.name
5355

    
5356
    # this is just a preventive check, but someone might still add this
5357
    # instance in the meantime, and creation will fail at lock-add time
5358
    if instance_name in self.cfg.GetInstanceList():
5359
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5360
                                 instance_name)
5361

    
5362
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5363

    
5364
    # NIC buildup
5365
    self.nics = []
5366
    for idx, nic in enumerate(self.op.nics):
5367
      nic_mode_req = nic.get("mode", None)
5368
      nic_mode = nic_mode_req
5369
      if nic_mode is None:
5370
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5371

    
5372
      # in routed mode, for the first nic, the default ip is 'auto'
5373
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5374
        default_ip_mode = constants.VALUE_AUTO
5375
      else:
5376
        default_ip_mode = constants.VALUE_NONE
5377

    
5378
      # ip validity checks
5379
      ip = nic.get("ip", default_ip_mode)
5380
      if ip is None or ip.lower() == constants.VALUE_NONE:
5381
        nic_ip = None
5382
      elif ip.lower() == constants.VALUE_AUTO:
5383
        nic_ip = hostname1.ip
5384
      else:
5385
        if not utils.IsValidIP(ip):
5386
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5387
                                     " like a valid IP" % ip)
5388
        nic_ip = ip
5389

    
5390
      # TODO: check the ip for uniqueness !!
5391
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5392
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
5393

    
5394
      # MAC address verification
5395
      mac = nic.get("mac", constants.VALUE_AUTO)
5396
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5397
        if not utils.IsValidMac(mac.lower()):
5398
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5399
                                     mac)
5400
      # bridge verification
5401
      bridge = nic.get("bridge", None)
5402
      link = nic.get("link", None)
5403
      if bridge and link:
5404
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5405
                                   " at the same time")
5406
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5407
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
5408
      elif bridge:
5409
        link = bridge
5410

    
5411
      nicparams = {}
5412
      if nic_mode_req:
5413
        nicparams[constants.NIC_MODE] = nic_mode_req
5414
      if link:
5415
        nicparams[constants.NIC_LINK] = link
5416

    
5417
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5418
                                      nicparams)
5419
      objects.NIC.CheckParameterSyntax(check_params)
5420
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5421

    
5422
    # disk checks/pre-build
5423
    self.disks = []
5424
    for disk in self.op.disks:
5425
      mode = disk.get("mode", constants.DISK_RDWR)
5426
      if mode not in constants.DISK_ACCESS_SET:
5427
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5428
                                   mode)
5429
      size = disk.get("size", None)
5430
      if size is None:
5431
        raise errors.OpPrereqError("Missing disk size")
5432
      try:
5433
        size = int(size)
5434
      except ValueError:
5435
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
5436
      self.disks.append({"size": size, "mode": mode})
5437

    
5438
    # used in CheckPrereq for ip ping check
5439
    self.check_ip = hostname1.ip
5440

    
5441
    # file storage checks
5442
    if (self.op.file_driver and
5443
        not self.op.file_driver in constants.FILE_DRIVER):
5444
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5445
                                 self.op.file_driver)
5446

    
5447
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5448
      raise errors.OpPrereqError("File storage directory path not absolute")
5449

    
5450
    ### Node/iallocator related checks
5451
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5452
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5453
                                 " node must be given")
5454

    
5455
    if self.op.iallocator:
5456
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5457
    else:
5458
      self.op.pnode = self._ExpandNode(self.op.pnode)
5459
      nodelist = [self.op.pnode]
5460
      if self.op.snode is not None:
5461
        self.op.snode = self._ExpandNode(self.op.snode)
5462
        nodelist.append(self.op.snode)
5463
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5464

    
5465
    # in case of import lock the source node too
5466
    if self.op.mode == constants.INSTANCE_IMPORT:
5467
      src_node = getattr(self.op, "src_node", None)
5468
      src_path = getattr(self.op, "src_path", None)
5469

    
5470
      if src_path is None:
5471
        self.op.src_path = src_path = self.op.instance_name
5472

    
5473
      if src_node is None:
5474
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5475
        self.op.src_node = None
5476
        if os.path.isabs(src_path):
5477
          raise errors.OpPrereqError("Importing an instance from an absolute"
5478
                                     " path requires a source node option.")
5479
      else:
5480
        self.op.src_node = src_node = self._ExpandNode(src_node)
5481
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5482
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5483
        if not os.path.isabs(src_path):
5484
          self.op.src_path = src_path = \
5485
            os.path.join(constants.EXPORT_DIR, src_path)
5486

    
5487
    else: # INSTANCE_CREATE
5488
      if getattr(self.op, "os_type", None) is None:
5489
        raise errors.OpPrereqError("No guest OS specified")
5490

    
5491
  def _RunAllocator(self):
5492
    """Run the allocator based on input opcode.
5493

5494
    """
5495
    nics = [n.ToDict() for n in self.nics]
5496
    ial = IAllocator(self.cfg, self.rpc,
5497
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5498
                     name=self.op.instance_name,
5499
                     disk_template=self.op.disk_template,
5500
                     tags=[],
5501
                     os=self.op.os_type,
5502
                     vcpus=self.be_full[constants.BE_VCPUS],
5503
                     mem_size=self.be_full[constants.BE_MEMORY],
5504
                     disks=self.disks,
5505
                     nics=nics,
5506
                     hypervisor=self.op.hypervisor,
5507
                     )
5508

    
5509
    ial.Run(self.op.iallocator)
5510

    
5511
    if not ial.success:
5512
      raise errors.OpPrereqError("Can't compute nodes using"
5513
                                 " iallocator '%s': %s" % (self.op.iallocator,
5514
                                                           ial.info))
5515
    if len(ial.nodes) != ial.required_nodes:
5516
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5517
                                 " of nodes (%s), required %s" %
5518
                                 (self.op.iallocator, len(ial.nodes),
5519
                                  ial.required_nodes))
5520
    self.op.pnode = ial.nodes[0]
5521
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5522
                 self.op.instance_name, self.op.iallocator,
5523
                 ", ".join(ial.nodes))
5524
    if ial.required_nodes == 2:
5525
      self.op.snode = ial.nodes[1]
5526

    
5527
  def BuildHooksEnv(self):
5528
    """Build hooks env.
5529

5530
    This runs on master, primary and secondary nodes of the instance.
5531

5532
    """
5533
    env = {
5534
      "ADD_MODE": self.op.mode,
5535
      }
5536
    if self.op.mode == constants.INSTANCE_IMPORT:
5537
      env["SRC_NODE"] = self.op.src_node
5538
      env["SRC_PATH"] = self.op.src_path
5539
      env["SRC_IMAGES"] = self.src_images
5540

    
5541
    env.update(_BuildInstanceHookEnv(
5542
      name=self.op.instance_name,
5543
      primary_node=self.op.pnode,
5544
      secondary_nodes=self.secondaries,
5545
      status=self.op.start,
5546
      os_type=self.op.os_type,
5547
      memory=self.be_full[constants.BE_MEMORY],
5548
      vcpus=self.be_full[constants.BE_VCPUS],
5549
      nics=_NICListToTuple(self, self.nics),
5550
      disk_template=self.op.disk_template,
5551
      disks=[(d["size"], d["mode"]) for d in self.disks],
5552
      bep=self.be_full,
5553
      hvp=self.hv_full,
5554
      hypervisor_name=self.op.hypervisor,
5555
    ))
5556

    
5557
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5558
          self.secondaries)
5559
    return env, nl, nl
5560

    
5561

    
5562
  def CheckPrereq(self):
5563
    """Check prerequisites.
5564

5565
    """
5566
    if (not self.cfg.GetVGName() and
5567
        self.op.disk_template not in constants.DTS_NOT_LVM):
5568
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5569
                                 " instances")
5570

    
5571
    if self.op.mode == constants.INSTANCE_IMPORT:
5572
      src_node = self.op.src_node
5573
      src_path = self.op.src_path
5574

    
5575
      if src_node is None:
5576
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5577
        exp_list = self.rpc.call_export_list(locked_nodes)
5578
        found = False
5579
        for node in exp_list:
5580
          if exp_list[node].fail_msg:
5581
            continue
5582
          if src_path in exp_list[node].payload:
5583
            found = True
5584
            self.op.src_node = src_node = node
5585
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5586
                                                       src_path)
5587
            break
5588
        if not found:
5589
          raise errors.OpPrereqError("No export found for relative path %s" %
5590
                                      src_path)
5591

    
5592
      _CheckNodeOnline(self, src_node)
5593
      result = self.rpc.call_export_info(src_node, src_path)
5594
      result.Raise("No export or invalid export found in dir %s" % src_path)
5595

    
5596
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5597
      if not export_info.has_section(constants.INISECT_EXP):
5598
        raise errors.ProgrammerError("Corrupted export config")
5599

    
5600
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5601
      if (int(ei_version) != constants.EXPORT_VERSION):
5602
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5603
                                   (ei_version, constants.EXPORT_VERSION))
5604

    
5605
      # Check that the new instance doesn't have less disks than the export
5606
      instance_disks = len(self.disks)
5607
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
5608
      if instance_disks < export_disks:
5609
        raise errors.OpPrereqError("Not enough disks to import."
5610
                                   " (instance: %d, export: %d)" %
5611
                                   (instance_disks, export_disks))
5612

    
5613
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
5614
      disk_images = []
5615
      for idx in range(export_disks):
5616
        option = 'disk%d_dump' % idx
5617
        if export_info.has_option(constants.INISECT_INS, option):
5618
          # FIXME: are the old os-es, disk sizes, etc. useful?
5619
          export_name = export_info.get(constants.INISECT_INS, option)
5620
          image = os.path.join(src_path, export_name)
5621
          disk_images.append(image)
5622
        else:
5623
          disk_images.append(False)
5624

    
5625
      self.src_images = disk_images
5626

    
5627
      old_name = export_info.get(constants.INISECT_INS, 'name')
5628
      # FIXME: int() here could throw a ValueError on broken exports
5629
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
5630
      if self.op.instance_name == old_name:
5631
        for idx, nic in enumerate(self.nics):
5632
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
5633
            nic_mac_ini = 'nic%d_mac' % idx
5634
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
5635

    
5636
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
5637
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
5638
    if self.op.start and not self.op.ip_check:
5639
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
5640
                                 " adding an instance in start mode")
5641

    
5642
    if self.op.ip_check:
5643
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
5644
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5645
                                   (self.check_ip, self.op.instance_name))
5646

    
5647
    #### mac address generation
5648
    # By generating here the mac address both the allocator and the hooks get
5649
    # the real final mac address rather than the 'auto' or 'generate' value.
5650
    # There is a race condition between the generation and the instance object
5651
    # creation, which means that we know the mac is valid now, but we're not
5652
    # sure it will be when we actually add the instance. If things go bad
5653
    # adding the instance will abort because of a duplicate mac, and the
5654
    # creation job will fail.
5655
    for nic in self.nics:
5656
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5657
        nic.mac = self.cfg.GenerateMAC()
5658

    
5659
    #### allocator run
5660

    
5661
    if self.op.iallocator is not None:
5662
      self._RunAllocator()
5663

    
5664
    #### node related checks
5665

    
5666
    # check primary node
5667
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
5668
    assert self.pnode is not None, \
5669
      "Cannot retrieve locked node %s" % self.op.pnode
5670
    if pnode.offline:
5671
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
5672
                                 pnode.name)
5673
    if pnode.drained:
5674
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
5675
                                 pnode.name)
5676

    
5677
    self.secondaries = []
5678

    
5679
    # mirror node verification
5680
    if self.op.disk_template in constants.DTS_NET_MIRROR:
5681
      if self.op.snode is None:
5682
        raise errors.OpPrereqError("The networked disk templates need"
5683
                                   " a mirror node")
5684
      if self.op.snode == pnode.name:
5685
        raise errors.OpPrereqError("The secondary node cannot be"
5686
                                   " the primary node.")
5687
      _CheckNodeOnline(self, self.op.snode)
5688
      _CheckNodeNotDrained(self, self.op.snode)
5689
      self.secondaries.append(self.op.snode)
5690

    
5691
    nodenames = [pnode.name] + self.secondaries
5692

    
5693
    req_size = _ComputeDiskSize(self.op.disk_template,
5694
                                self.disks)
5695

    
5696
    # Check lv size requirements
5697
    if req_size is not None:
5698
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5699
                                         self.op.hypervisor)
5700
      for node in nodenames:
5701
        info = nodeinfo[node]
5702
        info.Raise("Cannot get current information from node %s" % node)
5703
        info = info.payload
5704
        vg_free = info.get('vg_free', None)
5705
        if not isinstance(vg_free, int):
5706
          raise errors.OpPrereqError("Can't compute free disk space on"
5707
                                     " node %s" % node)
5708
        if req_size > vg_free:
5709
          raise errors.OpPrereqError("Not enough disk space on target node %s."
5710
                                     " %d MB available, %d MB required" %
5711
                                     (node, vg_free, req_size))
5712

    
5713
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
5714

    
5715
    # os verification
5716
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5717
    result.Raise("OS '%s' not in supported os list for primary node %s" %
5718
                 (self.op.os_type, pnode.name), prereq=True)
5719

    
5720
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5721

    
5722
    # memory check on primary node
5723
    if self.op.start:
5724
      _CheckNodeFreeMemory(self, self.pnode.name,
5725
                           "creating instance %s" % self.op.instance_name,
5726
                           self.be_full[constants.BE_MEMORY],
5727
                           self.op.hypervisor)
5728

    
5729
    self.dry_run_result = list(nodenames)
5730

    
5731
  def Exec(self, feedback_fn):
5732
    """Create and add the instance to the cluster.
5733

5734
    """
5735
    instance = self.op.instance_name
5736
    pnode_name = self.pnode.name
5737

    
5738
    ht_kind = self.op.hypervisor
5739
    if ht_kind in constants.HTS_REQ_PORT:
5740
      network_port = self.cfg.AllocatePort()
5741
    else:
5742
      network_port = None
5743

    
5744
    ##if self.op.vnc_bind_address is None:
5745
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5746

    
5747
    # this is needed because os.path.join does not accept None arguments
5748
    if self.op.file_storage_dir is None:
5749
      string_file_storage_dir = ""
5750
    else:
5751
      string_file_storage_dir = self.op.file_storage_dir
5752

    
5753
    # build the full file storage dir path
5754
    file_storage_dir = os.path.normpath(os.path.join(
5755
                                        self.cfg.GetFileStorageDir(),
5756
                                        string_file_storage_dir, instance))
5757

    
5758

    
5759
    disks = _GenerateDiskTemplate(self,
5760
                                  self.op.disk_template,
5761
                                  instance, pnode_name,
5762
                                  self.secondaries,
5763
                                  self.disks,
5764
                                  file_storage_dir,
5765
                                  self.op.file_driver,
5766
                                  0)
5767

    
5768
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5769
                            primary_node=pnode_name,
5770
                            nics=self.nics, disks=disks,
5771
                            disk_template=self.op.disk_template,
5772
                            admin_up=False,
5773
                            network_port=network_port,
5774
                            beparams=self.op.beparams,
5775
                            hvparams=self.op.hvparams,
5776
                            hypervisor=self.op.hypervisor,
5777
                            )
5778

    
5779
    feedback_fn("* creating instance disks...")
5780
    try:
5781
      _CreateDisks(self, iobj)
5782
    except errors.OpExecError:
5783
      self.LogWarning("Device creation failed, reverting...")
5784
      try:
5785
        _RemoveDisks(self, iobj)
5786
      finally:
5787
        self.cfg.ReleaseDRBDMinors(instance)
5788
        raise
5789

    
5790
    feedback_fn("adding instance %s to cluster config" % instance)
5791

    
5792
    self.cfg.AddInstance(iobj)
5793
    # Declare that we don't want to remove the instance lock anymore, as we've
5794
    # added the instance to the config
5795
    del self.remove_locks[locking.LEVEL_INSTANCE]
5796
    # Unlock all the nodes
5797
    if self.op.mode == constants.INSTANCE_IMPORT:
5798
      nodes_keep = [self.op.src_node]
5799
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5800
                       if node != self.op.src_node]
5801
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5802
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5803
    else:
5804
      self.context.glm.release(locking.LEVEL_NODE)
5805
      del self.acquired_locks[locking.LEVEL_NODE]
5806

    
5807
    if self.op.wait_for_sync:
5808
      disk_abort = not _WaitForSync(self, iobj)
5809
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
5810
      # make sure the disks are not degraded (still sync-ing is ok)
5811
      time.sleep(15)
5812
      feedback_fn("* checking mirrors status")
5813
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5814
    else:
5815
      disk_abort = False
5816

    
5817
    if disk_abort:
5818
      _RemoveDisks(self, iobj)
5819
      self.cfg.RemoveInstance(iobj.name)
5820
      # Make sure the instance lock gets removed
5821
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5822
      raise errors.OpExecError("There are some degraded disks for"
5823
                               " this instance")
5824

    
5825
    feedback_fn("creating os for instance %s on node %s" %
5826
                (instance, pnode_name))
5827

    
5828
    if iobj.disk_template != constants.DT_DISKLESS:
5829
      if self.op.mode == constants.INSTANCE_CREATE:
5830
        feedback_fn("* running the instance OS create scripts...")
5831
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5832
        result.Raise("Could not add os for instance %s"
5833
                     " on node %s" % (instance, pnode_name))
5834

    
5835
      elif self.op.mode == constants.INSTANCE_IMPORT:
5836
        feedback_fn("* running the instance OS import scripts...")
5837
        src_node = self.op.src_node
5838
        src_images = self.src_images
5839
        cluster_name = self.cfg.GetClusterName()
5840
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5841
                                                         src_node, src_images,
5842
                                                         cluster_name)
5843
        msg = import_result.fail_msg
5844
        if msg:
5845
          self.LogWarning("Error while importing the disk images for instance"
5846
                          " %s on node %s: %s" % (instance, pnode_name, msg))
5847
      else:
5848
        # also checked in the prereq part
5849
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5850
                                     % self.op.mode)
5851

    
5852
    if self.op.start:
5853
      iobj.admin_up = True
5854
      self.cfg.Update(iobj)
5855
      logging.info("Starting instance %s on node %s", instance, pnode_name)
5856
      feedback_fn("* starting instance...")
5857
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5858
      result.Raise("Could not start instance")
5859

    
5860
    return list(iobj.all_nodes)
5861

    
5862

    
5863
class LUConnectConsole(NoHooksLU):
5864
  """Connect to an instance's console.
5865

5866
  This is somewhat special in that it returns the command line that
5867
  you need to run on the master node in order to connect to the
5868
  console.
5869

5870
  """
5871
  _OP_REQP = ["instance_name"]
5872
  REQ_BGL = False
5873

    
5874
  def ExpandNames(self):
5875
    self._ExpandAndLockInstance()
5876

    
5877
  def CheckPrereq(self):
5878
    """Check prerequisites.
5879

5880
    This checks that the instance is in the cluster.
5881

5882
    """
5883
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5884
    assert self.instance is not None, \
5885
      "Cannot retrieve locked instance %s" % self.op.instance_name
5886
    _CheckNodeOnline(self, self.instance.primary_node)
5887

    
5888
  def Exec(self, feedback_fn):
5889
    """Connect to the console of an instance
5890

5891
    """
5892
    instance = self.instance
5893
    node = instance.primary_node
5894

    
5895
    node_insts = self.rpc.call_instance_list([node],
5896
                                             [instance.hypervisor])[node]
5897
    node_insts.Raise("Can't get node information from %s" % node)
5898

    
5899
    if instance.name not in node_insts.payload:
5900
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5901

    
5902
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5903

    
5904
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5905
    cluster = self.cfg.GetClusterInfo()
5906
    # beparams and hvparams are passed separately, to avoid editing the
5907
    # instance and then saving the defaults in the instance itself.
5908
    hvparams = cluster.FillHV(instance)
5909
    beparams = cluster.FillBE(instance)
5910
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5911

    
5912
    # build ssh cmdline
5913
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5914

    
5915

    
5916
class LUReplaceDisks(LogicalUnit):
5917
  """Replace the disks of an instance.
5918

5919
  """
5920
  HPATH = "mirrors-replace"
5921
  HTYPE = constants.HTYPE_INSTANCE
5922
  _OP_REQP = ["instance_name", "mode", "disks"]
5923
  REQ_BGL = False
5924

    
5925
  def CheckArguments(self):
5926
    if not hasattr(self.op, "remote_node"):
5927
      self.op.remote_node = None
5928
    if not hasattr(self.op, "iallocator"):
5929
      self.op.iallocator = None
5930

    
5931
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
5932
                                  self.op.iallocator)
5933

    
5934
  def ExpandNames(self):
5935
    self._ExpandAndLockInstance()
5936

    
5937
    if self.op.iallocator is not None:
5938
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5939

    
5940
    elif self.op.remote_node is not None:
5941
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5942
      if remote_node is None:
5943
        raise errors.OpPrereqError("Node '%s' not known" %
5944
                                   self.op.remote_node)
5945

    
5946
      self.op.remote_node = remote_node
5947

    
5948
      # Warning: do not remove the locking of the new secondary here
5949
      # unless DRBD8.AddChildren is changed to work in parallel;
5950
      # currently it doesn't since parallel invocations of
5951
      # FindUnusedMinor will conflict
5952
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5953
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5954

    
5955
    else:
5956
      self.needed_locks[locking.LEVEL_NODE] = []
5957
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5958

    
5959
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
5960
                                   self.op.iallocator, self.op.remote_node,
5961
                                   self.op.disks)
5962

    
5963
    self.tasklets = [self.replacer]
5964

    
5965
  def DeclareLocks(self, level):
5966
    # If we're not already locking all nodes in the set we have to declare the
5967
    # instance's primary/secondary nodes.
5968
    if (level == locking.LEVEL_NODE and
5969
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5970
      self._LockInstancesNodes()
5971

    
5972
  def BuildHooksEnv(self):
5973
    """Build hooks env.
5974

5975
    This runs on the master, the primary and all the secondaries.
5976

5977
    """
5978
    instance = self.replacer.instance
5979
    env = {
5980
      "MODE": self.op.mode,
5981
      "NEW_SECONDARY": self.op.remote_node,
5982
      "OLD_SECONDARY": instance.secondary_nodes[0],
5983
      }
5984
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5985
    nl = [
5986
      self.cfg.GetMasterNode(),
5987
      instance.primary_node,
5988
      ]
5989
    if self.op.remote_node is not None:
5990
      nl.append(self.op.remote_node)
5991
    return env, nl, nl
5992

    
5993

    
5994
class LUEvacuateNode(LogicalUnit):
5995
  """Relocate the secondary instances from a node.
5996

5997
  """
5998
  HPATH = "node-evacuate"
5999
  HTYPE = constants.HTYPE_NODE
6000
  _OP_REQP = ["node_name"]
6001
  REQ_BGL = False
6002

    
6003
  def CheckArguments(self):
6004
    if not hasattr(self.op, "remote_node"):
6005
      self.op.remote_node = None
6006
    if not hasattr(self.op, "iallocator"):
6007
      self.op.iallocator = None
6008

    
6009
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6010
                                  self.op.remote_node,
6011
                                  self.op.iallocator)
6012

    
6013
  def ExpandNames(self):
6014
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6015
    if self.op.node_name is None:
6016
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
6017

    
6018
    self.needed_locks = {}
6019

    
6020
    # Declare node locks
6021
    if self.op.iallocator is not None:
6022
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6023

    
6024
    elif self.op.remote_node is not None:
6025
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6026
      if remote_node is None:
6027
        raise errors.OpPrereqError("Node '%s' not known" %
6028
                                   self.op.remote_node)
6029

    
6030
      self.op.remote_node = remote_node
6031

    
6032
      # Warning: do not remove the locking of the new secondary here
6033
      # unless DRBD8.AddChildren is changed to work in parallel;
6034
      # currently it doesn't since parallel invocations of
6035
      # FindUnusedMinor will conflict
6036
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6037
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6038

    
6039
    else:
6040
      raise errors.OpPrereqError("Invalid parameters")
6041

    
6042
    # Create tasklets for replacing disks for all secondary instances on this
6043
    # node
6044
    names = []
6045
    tasklets = []
6046

    
6047
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6048
      logging.debug("Replacing disks for instance %s", inst.name)
6049
      names.append(inst.name)
6050

    
6051
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6052
                                self.op.iallocator, self.op.remote_node, [])
6053
      tasklets.append(replacer)
6054

    
6055
    self.tasklets = tasklets
6056
    self.instance_names = names
6057

    
6058
    # Declare instance locks
6059
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6060

    
6061
  def DeclareLocks(self, level):
6062
    # If we're not already locking all nodes in the set we have to declare the
6063
    # instance's primary/secondary nodes.
6064
    if (level == locking.LEVEL_NODE and
6065
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6066
      self._LockInstancesNodes()
6067

    
6068
  def BuildHooksEnv(self):
6069
    """Build hooks env.
6070

6071
    This runs on the master, the primary and all the secondaries.
6072

6073
    """
6074
    env = {
6075
      "NODE_NAME": self.op.node_name,
6076
      }
6077

    
6078
    nl = [self.cfg.GetMasterNode()]
6079

    
6080
    if self.op.remote_node is not None:
6081
      env["NEW_SECONDARY"] = self.op.remote_node
6082
      nl.append(self.op.remote_node)
6083

    
6084
    return (env, nl, nl)
6085

    
6086

    
6087
class TLReplaceDisks(Tasklet):
6088
  """Replaces disks for an instance.
6089

6090
  Note: Locking is not within the scope of this class.
6091

6092
  """
6093
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6094
               disks):
6095
    """Initializes this class.
6096

6097
    """
6098
    Tasklet.__init__(self, lu)
6099

    
6100
    # Parameters
6101
    self.instance_name = instance_name
6102
    self.mode = mode
6103
    self.iallocator_name = iallocator_name
6104
    self.remote_node = remote_node
6105
    self.disks = disks
6106

    
6107
    # Runtime data
6108
    self.instance = None
6109
    self.new_node = None
6110
    self.target_node = None
6111
    self.other_node = None
6112
    self.remote_node_info = None
6113
    self.node_secondary_ip = None
6114

    
6115
  @staticmethod
6116
  def CheckArguments(mode, remote_node, iallocator):
6117
    """Helper function for users of this class.
6118

6119
    """
6120
    # check for valid parameter combination
6121
    if mode == constants.REPLACE_DISK_CHG:
6122
      if remote_node is None and iallocator is None:
6123
        raise errors.OpPrereqError("When changing the secondary either an"
6124
                                   " iallocator script must be used or the"
6125
                                   " new node given")
6126

    
6127
      if remote_node is not None and iallocator is not None:
6128
        raise errors.OpPrereqError("Give either the iallocator or the new"
6129
                                   " secondary, not both")
6130

    
6131
    elif remote_node is not None or iallocator is not None:
6132
      # Not replacing the secondary
6133
      raise errors.OpPrereqError("The iallocator and new node options can"
6134
                                 " only be used when changing the"
6135
                                 " secondary node")
6136

    
6137
  @staticmethod
6138
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6139
    """Compute a new secondary node using an IAllocator.
6140

6141
    """
6142
    ial = IAllocator(lu.cfg, lu.rpc,
6143
                     mode=constants.IALLOCATOR_MODE_RELOC,
6144
                     name=instance_name,
6145
                     relocate_from=relocate_from)
6146

    
6147
    ial.Run(iallocator_name)
6148

    
6149
    if not ial.success:
6150
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6151
                                 " %s" % (iallocator_name, ial.info))
6152

    
6153
    if len(ial.nodes) != ial.required_nodes:
6154
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6155
                                 " of nodes (%s), required %s" %
6156
                                 (len(ial.nodes), ial.required_nodes))
6157

    
6158
    remote_node_name = ial.nodes[0]
6159

    
6160
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6161
               instance_name, remote_node_name)
6162

    
6163
    return remote_node_name
6164

    
6165
  def _FindFaultyDisks(self, node_name):
6166
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6167
                                    node_name, True)
6168

    
6169
  def CheckPrereq(self):
6170
    """Check prerequisites.
6171

6172
    This checks that the instance is in the cluster.
6173

6174
    """
6175
    self.instance = self.cfg.GetInstanceInfo(self.instance_name)
6176
    assert self.instance is not None, \
6177
      "Cannot retrieve locked instance %s" % self.instance_name
6178

    
6179
    if self.instance.disk_template != constants.DT_DRBD8:
6180
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6181
                                 " instances")
6182

    
6183
    if len(self.instance.secondary_nodes) != 1:
6184
      raise errors.OpPrereqError("The instance has a strange layout,"
6185
                                 " expected one secondary but found %d" %
6186
                                 len(self.instance.secondary_nodes))
6187

    
6188
    secondary_node = self.instance.secondary_nodes[0]
6189

    
6190
    if self.iallocator_name is None:
6191
      remote_node = self.remote_node
6192
    else:
6193
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6194
                                       self.instance.name, secondary_node)
6195

    
6196
    if remote_node is not None:
6197
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6198
      assert self.remote_node_info is not None, \
6199
        "Cannot retrieve locked node %s" % remote_node
6200
    else:
6201
      self.remote_node_info = None
6202

    
6203
    if remote_node == self.instance.primary_node:
6204
      raise errors.OpPrereqError("The specified node is the primary node of"
6205
                                 " the instance.")
6206

    
6207
    if remote_node == secondary_node:
6208
      raise errors.OpPrereqError("The specified node is already the"
6209
                                 " secondary node of the instance.")
6210

    
6211
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6212
                                    constants.REPLACE_DISK_CHG):
6213
      raise errors.OpPrereqError("Cannot specify disks to be replaced")
6214

    
6215
    if self.mode == constants.REPLACE_DISK_AUTO:
6216
      faulty_primary = self._FindFaultyDisks(self.instance.primary_node)
6217
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6218

    
6219
      if faulty_primary and faulty_secondary:
6220
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6221
                                   " one node and can not be repaired"
6222
                                   " automatically" % self.instance_name)
6223

    
6224
      if faulty_primary:
6225
        self.disks = faulty_primary
6226
        self.target_node = self.instance.primary_node
6227
        self.other_node = secondary_node
6228
        check_nodes = [self.target_node, self.other_node]
6229
      elif faulty_secondary:
6230
        self.disks = faulty_secondary
6231
        self.target_node = secondary_node
6232
        self.other_node = self.instance.primary_node
6233
        check_nodes = [self.target_node, self.other_node]
6234
      else:
6235
        self.disks = []
6236
        check_nodes = []
6237

    
6238
    else:
6239
      # Non-automatic modes
6240
      if self.mode == constants.REPLACE_DISK_PRI:
6241
        self.target_node = self.instance.primary_node
6242
        self.other_node = secondary_node
6243
        check_nodes = [self.target_node, self.other_node]
6244

    
6245
      elif self.mode == constants.REPLACE_DISK_SEC:
6246
        self.target_node = secondary_node
6247
        self.other_node = self.instance.primary_node
6248
        check_nodes = [self.target_node, self.other_node]
6249

    
6250
      elif self.mode == constants.REPLACE_DISK_CHG:
6251
        self.new_node = remote_node
6252
        self.other_node = self.instance.primary_node
6253
        self.target_node = secondary_node
6254
        check_nodes = [self.new_node, self.other_node]
6255

    
6256
        _CheckNodeNotDrained(self.lu, remote_node)
6257

    
6258
      else:
6259
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6260
                                     self.mode)
6261

    
6262
      # If not specified all disks should be replaced
6263
      if not self.disks:
6264
        self.disks = range(len(self.instance.disks))
6265

    
6266
    for node in check_nodes:
6267
      _CheckNodeOnline(self.lu, node)
6268

    
6269
    # Check whether disks are valid
6270
    for disk_idx in self.disks:
6271
      self.instance.FindDisk(disk_idx)
6272

    
6273
    # Get secondary node IP addresses
6274
    node_2nd_ip = {}
6275

    
6276
    for node_name in [self.target_node, self.other_node, self.new_node]:
6277
      if node_name is not None:
6278
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6279

    
6280
    self.node_secondary_ip = node_2nd_ip
6281

    
6282
  def Exec(self, feedback_fn):
6283
    """Execute disk replacement.
6284

6285
    This dispatches the disk replacement to the appropriate handler.
6286

6287
    """
6288
    if not self.disks:
6289
      feedback_fn("No disks need replacement")
6290
      return
6291

    
6292
    feedback_fn("Replacing disk(s) %s for %s" %
6293
                (", ".join([str(i) for i in self.disks]), self.instance.name))
6294

    
6295
    activate_disks = (not self.instance.admin_up)
6296

    
6297
    # Activate the instance disks if we're replacing them on a down instance
6298
    if activate_disks:
6299
      _StartInstanceDisks(self.lu, self.instance, True)
6300

    
6301
    try:
6302
      # Should we replace the secondary node?
6303
      if self.new_node is not None:
6304
        return self._ExecDrbd8Secondary()
6305
      else:
6306
        return self._ExecDrbd8DiskOnly()
6307

    
6308
    finally:
6309
      # Deactivate the instance disks if we're replacing them on a down instance
6310
      if activate_disks:
6311
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6312

    
6313
  def _CheckVolumeGroup(self, nodes):
6314
    self.lu.LogInfo("Checking volume groups")
6315

    
6316
    vgname = self.cfg.GetVGName()
6317

    
6318
    # Make sure volume group exists on all involved nodes
6319
    results = self.rpc.call_vg_list(nodes)
6320
    if not results:
6321
      raise errors.OpExecError("Can't list volume groups on the nodes")
6322

    
6323
    for node in nodes:
6324
      res = results[node]
6325
      res.Raise("Error checking node %s" % node)
6326
      if vgname not in res.payload:
6327
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6328
                                 (vgname, node))
6329

    
6330
  def _CheckDisksExistence(self, nodes):
6331
    # Check disk existence
6332
    for idx, dev in enumerate(self.instance.disks):
6333
      if idx not in self.disks:
6334
        continue
6335

    
6336
      for node in nodes:
6337
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6338
        self.cfg.SetDiskID(dev, node)
6339

    
6340
        result = self.rpc.call_blockdev_find(node, dev)
6341

    
6342
        msg = result.fail_msg
6343
        if msg or not result.payload:
6344
          if not msg:
6345
            msg = "disk not found"
6346
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6347
                                   (idx, node, msg))
6348

    
6349
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6350
    for idx, dev in enumerate(self.instance.disks):
6351
      if idx not in self.disks:
6352
        continue
6353

    
6354
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6355
                      (idx, node_name))
6356

    
6357
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6358
                                   ldisk=ldisk):
6359
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6360
                                 " replace disks for instance %s" %
6361
                                 (node_name, self.instance.name))
6362

    
6363
  def _CreateNewStorage(self, node_name):
6364
    vgname = self.cfg.GetVGName()
6365
    iv_names = {}
6366

    
6367
    for idx, dev in enumerate(self.instance.disks):
6368
      if idx not in self.disks:
6369
        continue
6370

    
6371
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6372

    
6373
      self.cfg.SetDiskID(dev, node_name)
6374

    
6375
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6376
      names = _GenerateUniqueNames(self.lu, lv_names)
6377

    
6378
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6379
                             logical_id=(vgname, names[0]))
6380
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6381
                             logical_id=(vgname, names[1]))
6382

    
6383
      new_lvs = [lv_data, lv_meta]
6384
      old_lvs = dev.children
6385
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6386

    
6387
      # we pass force_create=True to force the LVM creation
6388
      for new_lv in new_lvs:
6389
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6390
                        _GetInstanceInfoText(self.instance), False)
6391

    
6392
    return iv_names
6393

    
6394
  def _CheckDevices(self, node_name, iv_names):
6395
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
6396
      self.cfg.SetDiskID(dev, node_name)
6397

    
6398
      result = self.rpc.call_blockdev_find(node_name, dev)
6399

    
6400
      msg = result.fail_msg
6401
      if msg or not result.payload:
6402
        if not msg:
6403
          msg = "disk not found"
6404
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6405
                                 (name, msg))
6406

    
6407
      if result.payload.is_degraded:
6408
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6409

    
6410
  def _RemoveOldStorage(self, node_name, iv_names):
6411
    for name, (dev, old_lvs, _) in iv_names.iteritems():
6412
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6413

    
6414
      for lv in old_lvs:
6415
        self.cfg.SetDiskID(lv, node_name)
6416

    
6417
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6418
        if msg:
6419
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6420
                             hint="remove unused LVs manually")
6421

    
6422
  def _ExecDrbd8DiskOnly(self):
6423
    """Replace a disk on the primary or secondary for DRBD 8.
6424

6425
    The algorithm for replace is quite complicated:
6426

6427
      1. for each disk to be replaced:
6428

6429
        1. create new LVs on the target node with unique names
6430
        1. detach old LVs from the drbd device
6431
        1. rename old LVs to name_replaced.<time_t>
6432
        1. rename new LVs to old LVs
6433
        1. attach the new LVs (with the old names now) to the drbd device
6434

6435
      1. wait for sync across all devices
6436

6437
      1. for each modified disk:
6438

6439
        1. remove old LVs (which have the name name_replaces.<time_t>)
6440

6441
    Failures are not very well handled.
6442

6443
    """
6444
    steps_total = 6
6445

    
6446
    # Step: check device activation
6447
    self.lu.LogStep(1, steps_total, "Check device existence")
6448
    self._CheckDisksExistence([self.other_node, self.target_node])
6449
    self._CheckVolumeGroup([self.target_node, self.other_node])
6450

    
6451
    # Step: check other node consistency
6452
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6453
    self._CheckDisksConsistency(self.other_node,
6454
                                self.other_node == self.instance.primary_node,
6455
                                False)
6456

    
6457
    # Step: create new storage
6458
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6459
    iv_names = self._CreateNewStorage(self.target_node)
6460

    
6461
    # Step: for each lv, detach+rename*2+attach
6462
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6463
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6464
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6465

    
6466
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev, old_lvs)
6467
      result.Raise("Can't detach drbd from local storage on node"
6468
                   " %s for device %s" % (self.target_node, dev.iv_name))
6469
      #dev.children = []
6470
      #cfg.Update(instance)
6471

    
6472
      # ok, we created the new LVs, so now we know we have the needed
6473
      # storage; as such, we proceed on the target node to rename
6474
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6475
      # using the assumption that logical_id == physical_id (which in
6476
      # turn is the unique_id on that node)
6477

    
6478
      # FIXME(iustin): use a better name for the replaced LVs
6479
      temp_suffix = int(time.time())
6480
      ren_fn = lambda d, suff: (d.physical_id[0],
6481
                                d.physical_id[1] + "_replaced-%s" % suff)
6482

    
6483
      # Build the rename list based on what LVs exist on the node
6484
      rename_old_to_new = []
6485
      for to_ren in old_lvs:
6486
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6487
        if not result.fail_msg and result.payload:
6488
          # device exists
6489
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6490

    
6491
      self.lu.LogInfo("Renaming the old LVs on the target node")
6492
      result = self.rpc.call_blockdev_rename(self.target_node, rename_old_to_new)
6493
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6494

    
6495
      # Now we rename the new LVs to the old LVs
6496
      self.lu.LogInfo("Renaming the new LVs on the target node")
6497
      rename_new_to_old = [(new, old.physical_id)
6498
                           for old, new in zip(old_lvs, new_lvs)]
6499
      result = self.rpc.call_blockdev_rename(self.target_node, rename_new_to_old)
6500
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6501

    
6502
      for old, new in zip(old_lvs, new_lvs):
6503
        new.logical_id = old.logical_id
6504
        self.cfg.SetDiskID(new, self.target_node)
6505

    
6506
      for disk in old_lvs:
6507
        disk.logical_id = ren_fn(disk, temp_suffix)
6508
        self.cfg.SetDiskID(disk, self.target_node)
6509

    
6510
      # Now that the new lvs have the old name, we can add them to the device
6511
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6512
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev, new_lvs)
6513
      msg = result.fail_msg
6514
      if msg:
6515
        for new_lv in new_lvs:
6516
          msg2 = self.rpc.call_blockdev_remove(self.target_node, new_lv).fail_msg
6517
          if msg2:
6518
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6519
                               hint=("cleanup manually the unused logical"
6520
                                     "volumes"))
6521
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6522

    
6523
      dev.children = new_lvs
6524

    
6525
      self.cfg.Update(self.instance)
6526

    
6527
    # Wait for sync
6528
    # This can fail as the old devices are degraded and _WaitForSync
6529
    # does a combined result over all disks, so we don't check its return value
6530
    self.lu.LogStep(5, steps_total, "Sync devices")
6531
    _WaitForSync(self.lu, self.instance, unlock=True)
6532

    
6533
    # Check all devices manually
6534
    self._CheckDevices(self.instance.primary_node, iv_names)
6535

    
6536
    # Step: remove old storage
6537
    self.lu.LogStep(6, steps_total, "Removing old storage")
6538
    self._RemoveOldStorage(self.target_node, iv_names)
6539

    
6540
  def _ExecDrbd8Secondary(self):
6541
    """Replace the secondary node for DRBD 8.
6542

6543
    The algorithm for replace is quite complicated:
6544
      - for all disks of the instance:
6545
        - create new LVs on the new node with same names
6546
        - shutdown the drbd device on the old secondary
6547
        - disconnect the drbd network on the primary
6548
        - create the drbd device on the new secondary
6549
        - network attach the drbd on the primary, using an artifice:
6550
          the drbd code for Attach() will connect to the network if it
6551
          finds a device which is connected to the good local disks but
6552
          not network enabled
6553
      - wait for sync across all devices
6554
      - remove all disks from the old secondary
6555

6556
    Failures are not very well handled.
6557

6558
    """
6559
    steps_total = 6
6560

    
6561
    # Step: check device activation
6562
    self.lu.LogStep(1, steps_total, "Check device existence")
6563
    self._CheckDisksExistence([self.instance.primary_node])
6564
    self._CheckVolumeGroup([self.instance.primary_node])
6565

    
6566
    # Step: check other node consistency
6567
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6568
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
6569

    
6570
    # Step: create new storage
6571
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6572
    for idx, dev in enumerate(self.instance.disks):
6573
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6574
                      (self.new_node, idx))
6575
      # we pass force_create=True to force LVM creation
6576
      for new_lv in dev.children:
6577
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6578
                        _GetInstanceInfoText(self.instance), False)
6579

    
6580
    # Step 4: dbrd minors and drbd setups changes
6581
    # after this, we must manually remove the drbd minors on both the
6582
    # error and the success paths
6583
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6584
    minors = self.cfg.AllocateDRBDMinor([self.new_node for dev in self.instance.disks],
6585
                                        self.instance.name)
6586
    logging.debug("Allocated minors %r" % (minors,))
6587

    
6588
    iv_names = {}
6589
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
6590
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" % (self.new_node, idx))
6591
      # create new devices on new_node; note that we create two IDs:
6592
      # one without port, so the drbd will be activated without
6593
      # networking information on the new node at this stage, and one
6594
      # with network, for the latter activation in step 4
6595
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
6596
      if self.instance.primary_node == o_node1:
6597
        p_minor = o_minor1
6598
      else:
6599
        p_minor = o_minor2
6600

    
6601
      new_alone_id = (self.instance.primary_node, self.new_node, None, p_minor, new_minor, o_secret)
6602
      new_net_id = (self.instance.primary_node, self.new_node, o_port, p_minor, new_minor, o_secret)
6603

    
6604
      iv_names[idx] = (dev, dev.children, new_net_id)
6605
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
6606
                    new_net_id)
6607
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
6608
                              logical_id=new_alone_id,
6609
                              children=dev.children,
6610
                              size=dev.size)
6611
      try:
6612
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
6613
                              _GetInstanceInfoText(self.instance), False)
6614
      except errors.GenericError:
6615
        self.cfg.ReleaseDRBDMinors(self.instance.name)
6616
        raise
6617

    
6618
    # We have new devices, shutdown the drbd on the old secondary
6619
    for idx, dev in enumerate(self.instance.disks):
6620
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
6621
      self.cfg.SetDiskID(dev, self.target_node)
6622
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
6623
      if msg:
6624
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
6625
                           "node: %s" % (idx, msg),
6626
                           hint=("Please cleanup this device manually as"
6627
                                 " soon as possible"))
6628

    
6629
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
6630
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], self.node_secondary_ip,
6631
                                               self.instance.disks)[self.instance.primary_node]
6632

    
6633
    msg = result.fail_msg
6634
    if msg:
6635
      # detaches didn't succeed (unlikely)
6636
      self.cfg.ReleaseDRBDMinors(self.instance.name)
6637
      raise errors.OpExecError("Can't detach the disks from the network on"
6638
                               " old node: %s" % (msg,))
6639

    
6640
    # if we managed to detach at least one, we update all the disks of
6641
    # the instance to point to the new secondary
6642
    self.lu.LogInfo("Updating instance configuration")
6643
    for dev, _, new_logical_id in iv_names.itervalues():
6644
      dev.logical_id = new_logical_id
6645
      self.cfg.SetDiskID(dev, self.instance.primary_node)
6646

    
6647
    self.cfg.Update(self.instance)
6648

    
6649
    # and now perform the drbd attach
6650
    self.lu.LogInfo("Attaching primary drbds to new secondary"
6651
                    " (standalone => connected)")
6652
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node, self.new_node], self.node_secondary_ip,
6653
                                           self.instance.disks, self.instance.name,
6654
                                           False)
6655
    for to_node, to_result in result.items():
6656
      msg = to_result.fail_msg
6657
      if msg:
6658
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s", to_node, msg,
6659
                           hint=("please do a gnt-instance info to see the"
6660
                                 " status of disks"))
6661

    
6662
    # Wait for sync
6663
    # This can fail as the old devices are degraded and _WaitForSync
6664
    # does a combined result over all disks, so we don't check its return value
6665
    self.lu.LogStep(5, steps_total, "Sync devices")
6666
    _WaitForSync(self.lu, self.instance, unlock=True)
6667

    
6668
    # Check all devices manually
6669
    self._CheckDevices(self.instance.primary_node, iv_names)
6670

    
6671
    # Step: remove old storage
6672
    self.lu.LogStep(6, steps_total, "Removing old storage")
6673
    self._RemoveOldStorage(self.target_node, iv_names)
6674

    
6675

    
6676
class LURepairNodeStorage(NoHooksLU):
6677
  """Repairs the volume group on a node.
6678

6679
  """
6680
  _OP_REQP = ["node_name"]
6681
  REQ_BGL = False
6682

    
6683
  def CheckArguments(self):
6684
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
6685
    if node_name is None:
6686
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
6687

    
6688
    self.op.node_name = node_name
6689

    
6690
  def ExpandNames(self):
6691
    self.needed_locks = {
6692
      locking.LEVEL_NODE: [self.op.node_name],
6693
      }
6694

    
6695
  def _CheckFaultyDisks(self, instance, node_name):
6696
    if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
6697
                                node_name, True):
6698
      raise errors.OpPrereqError("Instance '%s' has faulty disks on"
6699
                                 " node '%s'" % (inst.name, node_name))
6700

    
6701
  def CheckPrereq(self):
6702
    """Check prerequisites.
6703

6704
    """
6705
    storage_type = self.op.storage_type
6706

    
6707
    if (constants.SO_FIX_CONSISTENCY not in
6708
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
6709
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
6710
                                 " repaired" % storage_type)
6711

    
6712
    # Check whether any instance on this node has faulty disks
6713
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
6714
      check_nodes = set(inst.all_nodes)
6715
      check_nodes.discard(self.op.node_name)
6716
      for inst_node_name in check_nodes:
6717
        self._CheckFaultyDisks(inst, inst_node_name)
6718

    
6719
  def Exec(self, feedback_fn):
6720
    feedback_fn("Repairing storage unit '%s' on %s ..." %
6721
                (self.op.name, self.op.node_name))
6722

    
6723
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
6724
    result = self.rpc.call_storage_execute(self.op.node_name,
6725
                                           self.op.storage_type, st_args,
6726
                                           self.op.name,
6727
                                           constants.SO_FIX_CONSISTENCY)
6728
    result.Raise("Failed to repair storage unit '%s' on %s" %
6729
                 (self.op.name, self.op.node_name))
6730

    
6731

    
6732
class LUGrowDisk(LogicalUnit):
6733
  """Grow a disk of an instance.
6734

6735
  """
6736
  HPATH = "disk-grow"
6737
  HTYPE = constants.HTYPE_INSTANCE
6738
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
6739
  REQ_BGL = False
6740

    
6741
  def ExpandNames(self):
6742
    self._ExpandAndLockInstance()
6743
    self.needed_locks[locking.LEVEL_NODE] = []
6744
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6745

    
6746
  def DeclareLocks(self, level):
6747
    if level == locking.LEVEL_NODE:
6748
      self._LockInstancesNodes()
6749

    
6750
  def BuildHooksEnv(self):
6751
    """Build hooks env.
6752

6753
    This runs on the master, the primary and all the secondaries.
6754

6755
    """
6756
    env = {
6757
      "DISK": self.op.disk,
6758
      "AMOUNT": self.op.amount,
6759
      }
6760
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6761
    nl = [
6762
      self.cfg.GetMasterNode(),
6763
      self.instance.primary_node,
6764
      ]
6765
    return env, nl, nl
6766

    
6767
  def CheckPrereq(self):
6768
    """Check prerequisites.
6769

6770
    This checks that the instance is in the cluster.
6771

6772
    """
6773
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6774
    assert instance is not None, \
6775
      "Cannot retrieve locked instance %s" % self.op.instance_name
6776
    nodenames = list(instance.all_nodes)
6777
    for node in nodenames:
6778
      _CheckNodeOnline(self, node)
6779

    
6780

    
6781
    self.instance = instance
6782

    
6783
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
6784
      raise errors.OpPrereqError("Instance's disk layout does not support"
6785
                                 " growing.")
6786

    
6787
    self.disk = instance.FindDisk(self.op.disk)
6788

    
6789
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6790
                                       instance.hypervisor)
6791
    for node in nodenames:
6792
      info = nodeinfo[node]
6793
      info.Raise("Cannot get current information from node %s" % node)
6794
      vg_free = info.payload.get('vg_free', None)
6795
      if not isinstance(vg_free, int):
6796
        raise errors.OpPrereqError("Can't compute free disk space on"
6797
                                   " node %s" % node)
6798
      if self.op.amount > vg_free:
6799
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
6800
                                   " %d MiB available, %d MiB required" %
6801
                                   (node, vg_free, self.op.amount))
6802

    
6803
  def Exec(self, feedback_fn):
6804
    """Execute disk grow.
6805

6806
    """
6807
    instance = self.instance
6808
    disk = self.disk
6809
    for node in instance.all_nodes:
6810
      self.cfg.SetDiskID(disk, node)
6811
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
6812
      result.Raise("Grow request failed to node %s" % node)
6813
    disk.RecordGrow(self.op.amount)
6814
    self.cfg.Update(instance)
6815
    if self.op.wait_for_sync:
6816
      disk_abort = not _WaitForSync(self, instance)
6817
      if disk_abort:
6818
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
6819
                             " status.\nPlease check the instance.")
6820

    
6821

    
6822
class LUQueryInstanceData(NoHooksLU):
6823
  """Query runtime instance data.
6824

6825
  """
6826
  _OP_REQP = ["instances", "static"]
6827
  REQ_BGL = False
6828

    
6829
  def ExpandNames(self):
6830
    self.needed_locks = {}
6831
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6832

    
6833
    if not isinstance(self.op.instances, list):
6834
      raise errors.OpPrereqError("Invalid argument type 'instances'")
6835

    
6836
    if self.op.instances:
6837
      self.wanted_names = []
6838
      for name in self.op.instances:
6839
        full_name = self.cfg.ExpandInstanceName(name)
6840
        if full_name is None:
6841
          raise errors.OpPrereqError("Instance '%s' not known" % name)
6842
        self.wanted_names.append(full_name)
6843
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
6844
    else:
6845
      self.wanted_names = None
6846
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
6847

    
6848
    self.needed_locks[locking.LEVEL_NODE] = []
6849
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6850

    
6851
  def DeclareLocks(self, level):
6852
    if level == locking.LEVEL_NODE:
6853
      self._LockInstancesNodes()
6854

    
6855
  def CheckPrereq(self):
6856
    """Check prerequisites.
6857

6858
    This only checks the optional instance list against the existing names.
6859

6860
    """
6861
    if self.wanted_names is None:
6862
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
6863

    
6864
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
6865
                             in self.wanted_names]
6866
    return
6867

    
6868
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
6869
    """Returns the status of a block device
6870

6871
    """
6872
    if self.op.static or not node:
6873
      return None
6874

    
6875
    self.cfg.SetDiskID(dev, node)
6876

    
6877
    result = self.rpc.call_blockdev_find(node, dev)
6878
    if result.offline:
6879
      return None
6880

    
6881
    result.Raise("Can't compute disk status for %s" % instance_name)
6882

    
6883
    status = result.payload
6884
    if status is None:
6885
      return None
6886

    
6887
    return (status.dev_path, status.major, status.minor,
6888
            status.sync_percent, status.estimated_time,
6889
            status.is_degraded, status.ldisk_status)
6890

    
6891
  def _ComputeDiskStatus(self, instance, snode, dev):
6892
    """Compute block device status.
6893

6894
    """
6895
    if dev.dev_type in constants.LDS_DRBD:
6896
      # we change the snode then (otherwise we use the one passed in)
6897
      if dev.logical_id[0] == instance.primary_node:
6898
        snode = dev.logical_id[1]
6899
      else:
6900
        snode = dev.logical_id[0]
6901

    
6902
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
6903
                                              instance.name, dev)
6904
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
6905

    
6906
    if dev.children:
6907
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
6908
                      for child in dev.children]
6909
    else:
6910
      dev_children = []
6911

    
6912
    data = {
6913
      "iv_name": dev.iv_name,
6914
      "dev_type": dev.dev_type,
6915
      "logical_id": dev.logical_id,
6916
      "physical_id": dev.physical_id,
6917
      "pstatus": dev_pstatus,
6918
      "sstatus": dev_sstatus,
6919
      "children": dev_children,
6920
      "mode": dev.mode,
6921
      "size": dev.size,
6922
      }
6923

    
6924
    return data
6925

    
6926
  def Exec(self, feedback_fn):
6927
    """Gather and return data"""
6928
    result = {}
6929

    
6930
    cluster = self.cfg.GetClusterInfo()
6931

    
6932
    for instance in self.wanted_instances:
6933
      if not self.op.static:
6934
        remote_info = self.rpc.call_instance_info(instance.primary_node,
6935
                                                  instance.name,
6936
                                                  instance.hypervisor)
6937
        remote_info.Raise("Error checking node %s" % instance.primary_node)
6938
        remote_info = remote_info.payload
6939
        if remote_info and "state" in remote_info:
6940
          remote_state = "up"
6941
        else:
6942
          remote_state = "down"
6943
      else:
6944
        remote_state = None
6945
      if instance.admin_up:
6946
        config_state = "up"
6947
      else:
6948
        config_state = "down"
6949

    
6950
      disks = [self._ComputeDiskStatus(instance, None, device)
6951
               for device in instance.disks]
6952

    
6953
      idict = {
6954
        "name": instance.name,
6955
        "config_state": config_state,
6956
        "run_state": remote_state,
6957
        "pnode": instance.primary_node,
6958
        "snodes": instance.secondary_nodes,
6959
        "os": instance.os,
6960
        # this happens to be the same format used for hooks
6961
        "nics": _NICListToTuple(self, instance.nics),
6962
        "disks": disks,
6963
        "hypervisor": instance.hypervisor,
6964
        "network_port": instance.network_port,
6965
        "hv_instance": instance.hvparams,
6966
        "hv_actual": cluster.FillHV(instance),
6967
        "be_instance": instance.beparams,
6968
        "be_actual": cluster.FillBE(instance),
6969
        "serial_no": instance.serial_no,
6970
        "mtime": instance.mtime,
6971
        "ctime": instance.ctime,
6972
        }
6973

    
6974
      result[instance.name] = idict
6975

    
6976
    return result
6977

    
6978

    
6979
class LUSetInstanceParams(LogicalUnit):
6980
  """Modifies an instances's parameters.
6981

6982
  """
6983
  HPATH = "instance-modify"
6984
  HTYPE = constants.HTYPE_INSTANCE
6985
  _OP_REQP = ["instance_name"]
6986
  REQ_BGL = False
6987

    
6988
  def CheckArguments(self):
6989
    if not hasattr(self.op, 'nics'):
6990
      self.op.nics = []
6991
    if not hasattr(self.op, 'disks'):
6992
      self.op.disks = []
6993
    if not hasattr(self.op, 'beparams'):
6994
      self.op.beparams = {}
6995
    if not hasattr(self.op, 'hvparams'):
6996
      self.op.hvparams = {}
6997
    self.op.force = getattr(self.op, "force", False)
6998
    if not (self.op.nics or self.op.disks or
6999
            self.op.hvparams or self.op.beparams):
7000
      raise errors.OpPrereqError("No changes submitted")
7001

    
7002
    # Disk validation
7003
    disk_addremove = 0
7004
    for disk_op, disk_dict in self.op.disks:
7005
      if disk_op == constants.DDM_REMOVE:
7006
        disk_addremove += 1
7007
        continue
7008
      elif disk_op == constants.DDM_ADD:
7009
        disk_addremove += 1
7010
      else:
7011
        if not isinstance(disk_op, int):
7012
          raise errors.OpPrereqError("Invalid disk index")
7013
        if not isinstance(disk_dict, dict):
7014
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7015
          raise errors.OpPrereqError(msg)
7016

    
7017
      if disk_op == constants.DDM_ADD:
7018
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7019
        if mode not in constants.DISK_ACCESS_SET:
7020
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
7021
        size = disk_dict.get('size', None)
7022
        if size is None:
7023
          raise errors.OpPrereqError("Required disk parameter size missing")
7024
        try:
7025
          size = int(size)
7026
        except ValueError, err:
7027
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7028
                                     str(err))
7029
        disk_dict['size'] = size
7030
      else:
7031
        # modification of disk
7032
        if 'size' in disk_dict:
7033
          raise errors.OpPrereqError("Disk size change not possible, use"
7034
                                     " grow-disk")
7035

    
7036
    if disk_addremove > 1:
7037
      raise errors.OpPrereqError("Only one disk add or remove operation"
7038
                                 " supported at a time")
7039

    
7040
    # NIC validation
7041
    nic_addremove = 0
7042
    for nic_op, nic_dict in self.op.nics:
7043
      if nic_op == constants.DDM_REMOVE:
7044
        nic_addremove += 1
7045
        continue
7046
      elif nic_op == constants.DDM_ADD:
7047
        nic_addremove += 1
7048
      else:
7049
        if not isinstance(nic_op, int):
7050
          raise errors.OpPrereqError("Invalid nic index")
7051
        if not isinstance(nic_dict, dict):
7052
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7053
          raise errors.OpPrereqError(msg)
7054

    
7055
      # nic_dict should be a dict
7056
      nic_ip = nic_dict.get('ip', None)
7057
      if nic_ip is not None:
7058
        if nic_ip.lower() == constants.VALUE_NONE:
7059
          nic_dict['ip'] = None
7060
        else:
7061
          if not utils.IsValidIP(nic_ip):
7062
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
7063

    
7064
      nic_bridge = nic_dict.get('bridge', None)
7065
      nic_link = nic_dict.get('link', None)
7066
      if nic_bridge and nic_link:
7067
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7068
                                   " at the same time")
7069
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7070
        nic_dict['bridge'] = None
7071
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7072
        nic_dict['link'] = None
7073

    
7074
      if nic_op == constants.DDM_ADD:
7075
        nic_mac = nic_dict.get('mac', None)
7076
        if nic_mac is None:
7077
          nic_dict['mac'] = constants.VALUE_AUTO
7078

    
7079
      if 'mac' in nic_dict:
7080
        nic_mac = nic_dict['mac']
7081
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7082
          if not utils.IsValidMac(nic_mac):
7083
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
7084
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7085
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7086
                                     " modifying an existing nic")
7087

    
7088
    if nic_addremove > 1:
7089
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7090
                                 " supported at a time")
7091

    
7092
  def ExpandNames(self):
7093
    self._ExpandAndLockInstance()
7094
    self.needed_locks[locking.LEVEL_NODE] = []
7095
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7096

    
7097
  def DeclareLocks(self, level):
7098
    if level == locking.LEVEL_NODE:
7099
      self._LockInstancesNodes()
7100

    
7101
  def BuildHooksEnv(self):
7102
    """Build hooks env.
7103

7104
    This runs on the master, primary and secondaries.
7105

7106
    """
7107
    args = dict()
7108
    if constants.BE_MEMORY in self.be_new:
7109
      args['memory'] = self.be_new[constants.BE_MEMORY]
7110
    if constants.BE_VCPUS in self.be_new:
7111
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7112
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7113
    # information at all.
7114
    if self.op.nics:
7115
      args['nics'] = []
7116
      nic_override = dict(self.op.nics)
7117
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7118
      for idx, nic in enumerate(self.instance.nics):
7119
        if idx in nic_override:
7120
          this_nic_override = nic_override[idx]
7121
        else:
7122
          this_nic_override = {}
7123
        if 'ip' in this_nic_override:
7124
          ip = this_nic_override['ip']
7125
        else:
7126
          ip = nic.ip
7127
        if 'mac' in this_nic_override:
7128
          mac = this_nic_override['mac']
7129
        else:
7130
          mac = nic.mac
7131
        if idx in self.nic_pnew:
7132
          nicparams = self.nic_pnew[idx]
7133
        else:
7134
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7135
        mode = nicparams[constants.NIC_MODE]
7136
        link = nicparams[constants.NIC_LINK]
7137
        args['nics'].append((ip, mac, mode, link))
7138
      if constants.DDM_ADD in nic_override:
7139
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7140
        mac = nic_override[constants.DDM_ADD]['mac']
7141
        nicparams = self.nic_pnew[constants.DDM_ADD]
7142
        mode = nicparams[constants.NIC_MODE]
7143
        link = nicparams[constants.NIC_LINK]
7144
        args['nics'].append((ip, mac, mode, link))
7145
      elif constants.DDM_REMOVE in nic_override:
7146
        del args['nics'][-1]
7147

    
7148
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7149
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7150
    return env, nl, nl
7151

    
7152
  def _GetUpdatedParams(self, old_params, update_dict,
7153
                        default_values, parameter_types):
7154
    """Return the new params dict for the given params.
7155

7156
    @type old_params: dict
7157
    @param old_params: old parameters
7158
    @type update_dict: dict
7159
    @param update_dict: dict containing new parameter values,
7160
                        or constants.VALUE_DEFAULT to reset the
7161
                        parameter to its default value
7162
    @type default_values: dict
7163
    @param default_values: default values for the filled parameters
7164
    @type parameter_types: dict
7165
    @param parameter_types: dict mapping target dict keys to types
7166
                            in constants.ENFORCEABLE_TYPES
7167
    @rtype: (dict, dict)
7168
    @return: (new_parameters, filled_parameters)
7169

7170
    """
7171
    params_copy = copy.deepcopy(old_params)
7172
    for key, val in update_dict.iteritems():
7173
      if val == constants.VALUE_DEFAULT:
7174
        try:
7175
          del params_copy[key]
7176
        except KeyError:
7177
          pass
7178
      else:
7179
        params_copy[key] = val
7180
    utils.ForceDictType(params_copy, parameter_types)
7181
    params_filled = objects.FillDict(default_values, params_copy)
7182
    return (params_copy, params_filled)
7183

    
7184
  def CheckPrereq(self):
7185
    """Check prerequisites.
7186

7187
    This only checks the instance list against the existing names.
7188

7189
    """
7190
    self.force = self.op.force
7191

    
7192
    # checking the new params on the primary/secondary nodes
7193

    
7194
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7195
    cluster = self.cluster = self.cfg.GetClusterInfo()
7196
    assert self.instance is not None, \
7197
      "Cannot retrieve locked instance %s" % self.op.instance_name
7198
    pnode = instance.primary_node
7199
    nodelist = list(instance.all_nodes)
7200

    
7201
    # hvparams processing
7202
    if self.op.hvparams:
7203
      i_hvdict, hv_new = self._GetUpdatedParams(
7204
                             instance.hvparams, self.op.hvparams,
7205
                             cluster.hvparams[instance.hypervisor],
7206
                             constants.HVS_PARAMETER_TYPES)
7207
      # local check
7208
      hypervisor.GetHypervisor(
7209
        instance.hypervisor).CheckParameterSyntax(hv_new)
7210
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7211
      self.hv_new = hv_new # the new actual values
7212
      self.hv_inst = i_hvdict # the new dict (without defaults)
7213
    else:
7214
      self.hv_new = self.hv_inst = {}
7215

    
7216
    # beparams processing
7217
    if self.op.beparams:
7218
      i_bedict, be_new = self._GetUpdatedParams(
7219
                             instance.beparams, self.op.beparams,
7220
                             cluster.beparams[constants.PP_DEFAULT],
7221
                             constants.BES_PARAMETER_TYPES)
7222
      self.be_new = be_new # the new actual values
7223
      self.be_inst = i_bedict # the new dict (without defaults)
7224
    else:
7225
      self.be_new = self.be_inst = {}
7226

    
7227
    self.warn = []
7228

    
7229
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7230
      mem_check_list = [pnode]
7231
      if be_new[constants.BE_AUTO_BALANCE]:
7232
        # either we changed auto_balance to yes or it was from before
7233
        mem_check_list.extend(instance.secondary_nodes)
7234
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7235
                                                  instance.hypervisor)
7236
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7237
                                         instance.hypervisor)
7238
      pninfo = nodeinfo[pnode]
7239
      msg = pninfo.fail_msg
7240
      if msg:
7241
        # Assume the primary node is unreachable and go ahead
7242
        self.warn.append("Can't get info from primary node %s: %s" %
7243
                         (pnode,  msg))
7244
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7245
        self.warn.append("Node data from primary node %s doesn't contain"
7246
                         " free memory information" % pnode)
7247
      elif instance_info.fail_msg:
7248
        self.warn.append("Can't get instance runtime information: %s" %
7249
                        instance_info.fail_msg)
7250
      else:
7251
        if instance_info.payload:
7252
          current_mem = int(instance_info.payload['memory'])
7253
        else:
7254
          # Assume instance not running
7255
          # (there is a slight race condition here, but it's not very probable,
7256
          # and we have no other way to check)
7257
          current_mem = 0
7258
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7259
                    pninfo.payload['memory_free'])
7260
        if miss_mem > 0:
7261
          raise errors.OpPrereqError("This change will prevent the instance"
7262
                                     " from starting, due to %d MB of memory"
7263
                                     " missing on its primary node" % miss_mem)
7264

    
7265
      if be_new[constants.BE_AUTO_BALANCE]:
7266
        for node, nres in nodeinfo.items():
7267
          if node not in instance.secondary_nodes:
7268
            continue
7269
          msg = nres.fail_msg
7270
          if msg:
7271
            self.warn.append("Can't get info from secondary node %s: %s" %
7272
                             (node, msg))
7273
          elif not isinstance(nres.payload.get('memory_free', None), int):
7274
            self.warn.append("Secondary node %s didn't return free"
7275
                             " memory information" % node)
7276
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7277
            self.warn.append("Not enough memory to failover instance to"
7278
                             " secondary node %s" % node)
7279

    
7280
    # NIC processing
7281
    self.nic_pnew = {}
7282
    self.nic_pinst = {}
7283
    for nic_op, nic_dict in self.op.nics:
7284
      if nic_op == constants.DDM_REMOVE:
7285
        if not instance.nics:
7286
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
7287
        continue
7288
      if nic_op != constants.DDM_ADD:
7289
        # an existing nic
7290
        if nic_op < 0 or nic_op >= len(instance.nics):
7291
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7292
                                     " are 0 to %d" %
7293
                                     (nic_op, len(instance.nics)))
7294
        old_nic_params = instance.nics[nic_op].nicparams
7295
        old_nic_ip = instance.nics[nic_op].ip
7296
      else:
7297
        old_nic_params = {}
7298
        old_nic_ip = None
7299

    
7300
      update_params_dict = dict([(key, nic_dict[key])
7301
                                 for key in constants.NICS_PARAMETERS
7302
                                 if key in nic_dict])
7303

    
7304
      if 'bridge' in nic_dict:
7305
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7306

    
7307
      new_nic_params, new_filled_nic_params = \
7308
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7309
                                 cluster.nicparams[constants.PP_DEFAULT],
7310
                                 constants.NICS_PARAMETER_TYPES)
7311
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7312
      self.nic_pinst[nic_op] = new_nic_params
7313
      self.nic_pnew[nic_op] = new_filled_nic_params
7314
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7315

    
7316
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7317
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7318
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7319
        if msg:
7320
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7321
          if self.force:
7322
            self.warn.append(msg)
7323
          else:
7324
            raise errors.OpPrereqError(msg)
7325
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7326
        if 'ip' in nic_dict:
7327
          nic_ip = nic_dict['ip']
7328
        else:
7329
          nic_ip = old_nic_ip
7330
        if nic_ip is None:
7331
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7332
                                     ' on a routed nic')
7333
      if 'mac' in nic_dict:
7334
        nic_mac = nic_dict['mac']
7335
        if nic_mac is None:
7336
          raise errors.OpPrereqError('Cannot set the nic mac to None')
7337
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7338
          # otherwise generate the mac
7339
          nic_dict['mac'] = self.cfg.GenerateMAC()
7340
        else:
7341
          # or validate/reserve the current one
7342
          if self.cfg.IsMacInUse(nic_mac):
7343
            raise errors.OpPrereqError("MAC address %s already in use"
7344
                                       " in cluster" % nic_mac)
7345

    
7346
    # DISK processing
7347
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7348
      raise errors.OpPrereqError("Disk operations not supported for"
7349
                                 " diskless instances")
7350
    for disk_op, disk_dict in self.op.disks:
7351
      if disk_op == constants.DDM_REMOVE:
7352
        if len(instance.disks) == 1:
7353
          raise errors.OpPrereqError("Cannot remove the last disk of"
7354
                                     " an instance")
7355
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7356
        ins_l = ins_l[pnode]
7357
        msg = ins_l.fail_msg
7358
        if msg:
7359
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7360
                                     (pnode, msg))
7361
        if instance.name in ins_l.payload:
7362
          raise errors.OpPrereqError("Instance is running, can't remove"
7363
                                     " disks.")
7364

    
7365
      if (disk_op == constants.DDM_ADD and
7366
          len(instance.nics) >= constants.MAX_DISKS):
7367
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7368
                                   " add more" % constants.MAX_DISKS)
7369
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7370
        # an existing disk
7371
        if disk_op < 0 or disk_op >= len(instance.disks):
7372
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7373
                                     " are 0 to %d" %
7374
                                     (disk_op, len(instance.disks)))
7375

    
7376
    return
7377

    
7378
  def Exec(self, feedback_fn):
7379
    """Modifies an instance.
7380

7381
    All parameters take effect only at the next restart of the instance.
7382

7383
    """
7384
    # Process here the warnings from CheckPrereq, as we don't have a
7385
    # feedback_fn there.
7386
    for warn in self.warn:
7387
      feedback_fn("WARNING: %s" % warn)
7388

    
7389
    result = []
7390
    instance = self.instance
7391
    cluster = self.cluster
7392
    # disk changes
7393
    for disk_op, disk_dict in self.op.disks:
7394
      if disk_op == constants.DDM_REMOVE:
7395
        # remove the last disk
7396
        device = instance.disks.pop()
7397
        device_idx = len(instance.disks)
7398
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7399
          self.cfg.SetDiskID(disk, node)
7400
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7401
          if msg:
7402
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7403
                            " continuing anyway", device_idx, node, msg)
7404
        result.append(("disk/%d" % device_idx, "remove"))
7405
      elif disk_op == constants.DDM_ADD:
7406
        # add a new disk
7407
        if instance.disk_template == constants.DT_FILE:
7408
          file_driver, file_path = instance.disks[0].logical_id
7409
          file_path = os.path.dirname(file_path)
7410
        else:
7411
          file_driver = file_path = None
7412
        disk_idx_base = len(instance.disks)
7413
        new_disk = _GenerateDiskTemplate(self,
7414
                                         instance.disk_template,
7415
                                         instance.name, instance.primary_node,
7416
                                         instance.secondary_nodes,
7417
                                         [disk_dict],
7418
                                         file_path,
7419
                                         file_driver,
7420
                                         disk_idx_base)[0]
7421
        instance.disks.append(new_disk)
7422
        info = _GetInstanceInfoText(instance)
7423

    
7424
        logging.info("Creating volume %s for instance %s",
7425
                     new_disk.iv_name, instance.name)
7426
        # Note: this needs to be kept in sync with _CreateDisks
7427
        #HARDCODE
7428
        for node in instance.all_nodes:
7429
          f_create = node == instance.primary_node
7430
          try:
7431
            _CreateBlockDev(self, node, instance, new_disk,
7432
                            f_create, info, f_create)
7433
          except errors.OpExecError, err:
7434
            self.LogWarning("Failed to create volume %s (%s) on"
7435
                            " node %s: %s",
7436
                            new_disk.iv_name, new_disk, node, err)
7437
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7438
                       (new_disk.size, new_disk.mode)))
7439
      else:
7440
        # change a given disk
7441
        instance.disks[disk_op].mode = disk_dict['mode']
7442
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7443
    # NIC changes
7444
    for nic_op, nic_dict in self.op.nics:
7445
      if nic_op == constants.DDM_REMOVE:
7446
        # remove the last nic
7447
        del instance.nics[-1]
7448
        result.append(("nic.%d" % len(instance.nics), "remove"))
7449
      elif nic_op == constants.DDM_ADD:
7450
        # mac and bridge should be set, by now
7451
        mac = nic_dict['mac']
7452
        ip = nic_dict.get('ip', None)
7453
        nicparams = self.nic_pinst[constants.DDM_ADD]
7454
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7455
        instance.nics.append(new_nic)
7456
        result.append(("nic.%d" % (len(instance.nics) - 1),
7457
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7458
                       (new_nic.mac, new_nic.ip,
7459
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7460
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7461
                       )))
7462
      else:
7463
        for key in 'mac', 'ip':
7464
          if key in nic_dict:
7465
            setattr(instance.nics[nic_op], key, nic_dict[key])
7466
        if nic_op in self.nic_pnew:
7467
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
7468
        for key, val in nic_dict.iteritems():
7469
          result.append(("nic.%s/%d" % (key, nic_op), val))
7470

    
7471
    # hvparams changes
7472
    if self.op.hvparams:
7473
      instance.hvparams = self.hv_inst
7474
      for key, val in self.op.hvparams.iteritems():
7475
        result.append(("hv/%s" % key, val))
7476

    
7477
    # beparams changes
7478
    if self.op.beparams:
7479
      instance.beparams = self.be_inst
7480
      for key, val in self.op.beparams.iteritems():
7481
        result.append(("be/%s" % key, val))
7482

    
7483
    self.cfg.Update(instance)
7484

    
7485
    return result
7486

    
7487

    
7488
class LUQueryExports(NoHooksLU):
7489
  """Query the exports list
7490

7491
  """
7492
  _OP_REQP = ['nodes']
7493
  REQ_BGL = False
7494

    
7495
  def ExpandNames(self):
7496
    self.needed_locks = {}
7497
    self.share_locks[locking.LEVEL_NODE] = 1
7498
    if not self.op.nodes:
7499
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7500
    else:
7501
      self.needed_locks[locking.LEVEL_NODE] = \
7502
        _GetWantedNodes(self, self.op.nodes)
7503

    
7504
  def CheckPrereq(self):
7505
    """Check prerequisites.
7506

7507
    """
7508
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7509

    
7510
  def Exec(self, feedback_fn):
7511
    """Compute the list of all the exported system images.
7512

7513
    @rtype: dict
7514
    @return: a dictionary with the structure node->(export-list)
7515
        where export-list is a list of the instances exported on
7516
        that node.
7517

7518
    """
7519
    rpcresult = self.rpc.call_export_list(self.nodes)
7520
    result = {}
7521
    for node in rpcresult:
7522
      if rpcresult[node].fail_msg:
7523
        result[node] = False
7524
      else:
7525
        result[node] = rpcresult[node].payload
7526

    
7527
    return result
7528

    
7529

    
7530
class LUExportInstance(LogicalUnit):
7531
  """Export an instance to an image in the cluster.
7532

7533
  """
7534
  HPATH = "instance-export"
7535
  HTYPE = constants.HTYPE_INSTANCE
7536
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
7537
  REQ_BGL = False
7538

    
7539
  def ExpandNames(self):
7540
    self._ExpandAndLockInstance()
7541
    # FIXME: lock only instance primary and destination node
7542
    #
7543
    # Sad but true, for now we have do lock all nodes, as we don't know where
7544
    # the previous export might be, and and in this LU we search for it and
7545
    # remove it from its current node. In the future we could fix this by:
7546
    #  - making a tasklet to search (share-lock all), then create the new one,
7547
    #    then one to remove, after
7548
    #  - removing the removal operation altogether
7549
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7550

    
7551
  def DeclareLocks(self, level):
7552
    """Last minute lock declaration."""
7553
    # All nodes are locked anyway, so nothing to do here.
7554

    
7555
  def BuildHooksEnv(self):
7556
    """Build hooks env.
7557

7558
    This will run on the master, primary node and target node.
7559

7560
    """
7561
    env = {
7562
      "EXPORT_NODE": self.op.target_node,
7563
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
7564
      }
7565
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7566
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
7567
          self.op.target_node]
7568
    return env, nl, nl
7569

    
7570
  def CheckPrereq(self):
7571
    """Check prerequisites.
7572

7573
    This checks that the instance and node names are valid.
7574

7575
    """
7576
    instance_name = self.op.instance_name
7577
    self.instance = self.cfg.GetInstanceInfo(instance_name)
7578
    assert self.instance is not None, \
7579
          "Cannot retrieve locked instance %s" % self.op.instance_name
7580
    _CheckNodeOnline(self, self.instance.primary_node)
7581

    
7582
    self.dst_node = self.cfg.GetNodeInfo(
7583
      self.cfg.ExpandNodeName(self.op.target_node))
7584

    
7585
    if self.dst_node is None:
7586
      # This is wrong node name, not a non-locked node
7587
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
7588
    _CheckNodeOnline(self, self.dst_node.name)
7589
    _CheckNodeNotDrained(self, self.dst_node.name)
7590

    
7591
    # instance disk type verification
7592
    for disk in self.instance.disks:
7593
      if disk.dev_type == constants.LD_FILE:
7594
        raise errors.OpPrereqError("Export not supported for instances with"
7595
                                   " file-based disks")
7596

    
7597
  def Exec(self, feedback_fn):
7598
    """Export an instance to an image in the cluster.
7599

7600
    """
7601
    instance = self.instance
7602
    dst_node = self.dst_node
7603
    src_node = instance.primary_node
7604

    
7605
    if self.op.shutdown:
7606
      # shutdown the instance, but not the disks
7607
      feedback_fn("Shutting down instance %s" % instance.name)
7608
      result = self.rpc.call_instance_shutdown(src_node, instance)
7609
      result.Raise("Could not shutdown instance %s on"
7610
                   " node %s" % (instance.name, src_node))
7611

    
7612
    vgname = self.cfg.GetVGName()
7613

    
7614
    snap_disks = []
7615

    
7616
    # set the disks ID correctly since call_instance_start needs the
7617
    # correct drbd minor to create the symlinks
7618
    for disk in instance.disks:
7619
      self.cfg.SetDiskID(disk, src_node)
7620

    
7621
    # per-disk results
7622
    dresults = []
7623
    try:
7624
      for idx, disk in enumerate(instance.disks):
7625
        feedback_fn("Creating a snapshot of disk/%s on node %s" %
7626
                    (idx, src_node))
7627

    
7628
        # result.payload will be a snapshot of an lvm leaf of the one we passed
7629
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
7630
        msg = result.fail_msg
7631
        if msg:
7632
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
7633
                          idx, src_node, msg)
7634
          snap_disks.append(False)
7635
        else:
7636
          disk_id = (vgname, result.payload)
7637
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
7638
                                 logical_id=disk_id, physical_id=disk_id,
7639
                                 iv_name=disk.iv_name)
7640
          snap_disks.append(new_dev)
7641

    
7642
    finally:
7643
      if self.op.shutdown and instance.admin_up:
7644
        feedback_fn("Starting instance %s" % instance.name)
7645
        result = self.rpc.call_instance_start(src_node, instance, None, None)
7646
        msg = result.fail_msg
7647
        if msg:
7648
          _ShutdownInstanceDisks(self, instance)
7649
          raise errors.OpExecError("Could not start instance: %s" % msg)
7650

    
7651
    # TODO: check for size
7652

    
7653
    cluster_name = self.cfg.GetClusterName()
7654
    for idx, dev in enumerate(snap_disks):
7655
      feedback_fn("Exporting snapshot %s from %s to %s" %
7656
                  (idx, src_node, dst_node.name))
7657
      if dev:
7658
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
7659
                                               instance, cluster_name, idx)
7660
        msg = result.fail_msg
7661
        if msg:
7662
          self.LogWarning("Could not export disk/%s from node %s to"
7663
                          " node %s: %s", idx, src_node, dst_node.name, msg)
7664
          dresults.append(False)
7665
        else:
7666
          dresults.append(True)
7667
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
7668
        if msg:
7669
          self.LogWarning("Could not remove snapshot for disk/%d from node"
7670
                          " %s: %s", idx, src_node, msg)
7671
      else:
7672
        dresults.append(False)
7673

    
7674
    feedback_fn("Finalizing export on %s" % dst_node.name)
7675
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
7676
    fin_resu = True
7677
    msg = result.fail_msg
7678
    if msg:
7679
      self.LogWarning("Could not finalize export for instance %s"
7680
                      " on node %s: %s", instance.name, dst_node.name, msg)
7681
      fin_resu = False
7682

    
7683
    nodelist = self.cfg.GetNodeList()
7684
    nodelist.remove(dst_node.name)
7685

    
7686
    # on one-node clusters nodelist will be empty after the removal
7687
    # if we proceed the backup would be removed because OpQueryExports
7688
    # substitutes an empty list with the full cluster node list.
7689
    iname = instance.name
7690
    if nodelist:
7691
      feedback_fn("Removing old exports for instance %s" % iname)
7692
      exportlist = self.rpc.call_export_list(nodelist)
7693
      for node in exportlist:
7694
        if exportlist[node].fail_msg:
7695
          continue
7696
        if iname in exportlist[node].payload:
7697
          msg = self.rpc.call_export_remove(node, iname).fail_msg
7698
          if msg:
7699
            self.LogWarning("Could not remove older export for instance %s"
7700
                            " on node %s: %s", iname, node, msg)
7701
    return fin_resu, dresults
7702

    
7703

    
7704
class LURemoveExport(NoHooksLU):
7705
  """Remove exports related to the named instance.
7706

7707
  """
7708
  _OP_REQP = ["instance_name"]
7709
  REQ_BGL = False
7710

    
7711
  def ExpandNames(self):
7712
    self.needed_locks = {}
7713
    # We need all nodes to be locked in order for RemoveExport to work, but we
7714
    # don't need to lock the instance itself, as nothing will happen to it (and
7715
    # we can remove exports also for a removed instance)
7716
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7717

    
7718
  def CheckPrereq(self):
7719
    """Check prerequisites.
7720
    """
7721
    pass
7722

    
7723
  def Exec(self, feedback_fn):
7724
    """Remove any export.
7725

7726
    """
7727
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
7728
    # If the instance was not found we'll try with the name that was passed in.
7729
    # This will only work if it was an FQDN, though.
7730
    fqdn_warn = False
7731
    if not instance_name:
7732
      fqdn_warn = True
7733
      instance_name = self.op.instance_name
7734

    
7735
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7736
    exportlist = self.rpc.call_export_list(locked_nodes)
7737
    found = False
7738
    for node in exportlist:
7739
      msg = exportlist[node].fail_msg
7740
      if msg:
7741
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
7742
        continue
7743
      if instance_name in exportlist[node].payload:
7744
        found = True
7745
        result = self.rpc.call_export_remove(node, instance_name)
7746
        msg = result.fail_msg
7747
        if msg:
7748
          logging.error("Could not remove export for instance %s"
7749
                        " on node %s: %s", instance_name, node, msg)
7750

    
7751
    if fqdn_warn and not found:
7752
      feedback_fn("Export not found. If trying to remove an export belonging"
7753
                  " to a deleted instance please use its Fully Qualified"
7754
                  " Domain Name.")
7755

    
7756

    
7757
class TagsLU(NoHooksLU):
7758
  """Generic tags LU.
7759

7760
  This is an abstract class which is the parent of all the other tags LUs.
7761

7762
  """
7763

    
7764
  def ExpandNames(self):
7765
    self.needed_locks = {}
7766
    if self.op.kind == constants.TAG_NODE:
7767
      name = self.cfg.ExpandNodeName(self.op.name)
7768
      if name is None:
7769
        raise errors.OpPrereqError("Invalid node name (%s)" %
7770
                                   (self.op.name,))
7771
      self.op.name = name
7772
      self.needed_locks[locking.LEVEL_NODE] = name
7773
    elif self.op.kind == constants.TAG_INSTANCE:
7774
      name = self.cfg.ExpandInstanceName(self.op.name)
7775
      if name is None:
7776
        raise errors.OpPrereqError("Invalid instance name (%s)" %
7777
                                   (self.op.name,))
7778
      self.op.name = name
7779
      self.needed_locks[locking.LEVEL_INSTANCE] = name
7780

    
7781
  def CheckPrereq(self):
7782
    """Check prerequisites.
7783

7784
    """
7785
    if self.op.kind == constants.TAG_CLUSTER:
7786
      self.target = self.cfg.GetClusterInfo()
7787
    elif self.op.kind == constants.TAG_NODE:
7788
      self.target = self.cfg.GetNodeInfo(self.op.name)
7789
    elif self.op.kind == constants.TAG_INSTANCE:
7790
      self.target = self.cfg.GetInstanceInfo(self.op.name)
7791
    else:
7792
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
7793
                                 str(self.op.kind))
7794

    
7795

    
7796
class LUGetTags(TagsLU):
7797
  """Returns the tags of a given object.
7798

7799
  """
7800
  _OP_REQP = ["kind", "name"]
7801
  REQ_BGL = False
7802

    
7803
  def Exec(self, feedback_fn):
7804
    """Returns the tag list.
7805

7806
    """
7807
    return list(self.target.GetTags())
7808

    
7809

    
7810
class LUSearchTags(NoHooksLU):
7811
  """Searches the tags for a given pattern.
7812

7813
  """
7814
  _OP_REQP = ["pattern"]
7815
  REQ_BGL = False
7816

    
7817
  def ExpandNames(self):
7818
    self.needed_locks = {}
7819

    
7820
  def CheckPrereq(self):
7821
    """Check prerequisites.
7822

7823
    This checks the pattern passed for validity by compiling it.
7824

7825
    """
7826
    try:
7827
      self.re = re.compile(self.op.pattern)
7828
    except re.error, err:
7829
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
7830
                                 (self.op.pattern, err))
7831

    
7832
  def Exec(self, feedback_fn):
7833
    """Returns the tag list.
7834

7835
    """
7836
    cfg = self.cfg
7837
    tgts = [("/cluster", cfg.GetClusterInfo())]
7838
    ilist = cfg.GetAllInstancesInfo().values()
7839
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
7840
    nlist = cfg.GetAllNodesInfo().values()
7841
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
7842
    results = []
7843
    for path, target in tgts:
7844
      for tag in target.GetTags():
7845
        if self.re.search(tag):
7846
          results.append((path, tag))
7847
    return results
7848

    
7849

    
7850
class LUAddTags(TagsLU):
7851
  """Sets a tag on a given object.
7852

7853
  """
7854
  _OP_REQP = ["kind", "name", "tags"]
7855
  REQ_BGL = False
7856

    
7857
  def CheckPrereq(self):
7858
    """Check prerequisites.
7859

7860
    This checks the type and length of the tag name and value.
7861

7862
    """
7863
    TagsLU.CheckPrereq(self)
7864
    for tag in self.op.tags:
7865
      objects.TaggableObject.ValidateTag(tag)
7866

    
7867
  def Exec(self, feedback_fn):
7868
    """Sets the tag.
7869

7870
    """
7871
    try:
7872
      for tag in self.op.tags:
7873
        self.target.AddTag(tag)
7874
    except errors.TagError, err:
7875
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
7876
    try:
7877
      self.cfg.Update(self.target)
7878
    except errors.ConfigurationError:
7879
      raise errors.OpRetryError("There has been a modification to the"
7880
                                " config file and the operation has been"
7881
                                " aborted. Please retry.")
7882

    
7883

    
7884
class LUDelTags(TagsLU):
7885
  """Delete a list of tags from a given object.
7886

7887
  """
7888
  _OP_REQP = ["kind", "name", "tags"]
7889
  REQ_BGL = False
7890

    
7891
  def CheckPrereq(self):
7892
    """Check prerequisites.
7893

7894
    This checks that we have the given tag.
7895

7896
    """
7897
    TagsLU.CheckPrereq(self)
7898
    for tag in self.op.tags:
7899
      objects.TaggableObject.ValidateTag(tag)
7900
    del_tags = frozenset(self.op.tags)
7901
    cur_tags = self.target.GetTags()
7902
    if not del_tags <= cur_tags:
7903
      diff_tags = del_tags - cur_tags
7904
      diff_names = ["'%s'" % tag for tag in diff_tags]
7905
      diff_names.sort()
7906
      raise errors.OpPrereqError("Tag(s) %s not found" %
7907
                                 (",".join(diff_names)))
7908

    
7909
  def Exec(self, feedback_fn):
7910
    """Remove the tag from the object.
7911

7912
    """
7913
    for tag in self.op.tags:
7914
      self.target.RemoveTag(tag)
7915
    try:
7916
      self.cfg.Update(self.target)
7917
    except errors.ConfigurationError:
7918
      raise errors.OpRetryError("There has been a modification to the"
7919
                                " config file and the operation has been"
7920
                                " aborted. Please retry.")
7921

    
7922

    
7923
class LUTestDelay(NoHooksLU):
7924
  """Sleep for a specified amount of time.
7925

7926
  This LU sleeps on the master and/or nodes for a specified amount of
7927
  time.
7928

7929
  """
7930
  _OP_REQP = ["duration", "on_master", "on_nodes"]
7931
  REQ_BGL = False
7932

    
7933
  def ExpandNames(self):
7934
    """Expand names and set required locks.
7935

7936
    This expands the node list, if any.
7937

7938
    """
7939
    self.needed_locks = {}
7940
    if self.op.on_nodes:
7941
      # _GetWantedNodes can be used here, but is not always appropriate to use
7942
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
7943
      # more information.
7944
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
7945
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
7946

    
7947
  def CheckPrereq(self):
7948
    """Check prerequisites.
7949

7950
    """
7951

    
7952
  def Exec(self, feedback_fn):
7953
    """Do the actual sleep.
7954

7955
    """
7956
    if self.op.on_master:
7957
      if not utils.TestDelay(self.op.duration):
7958
        raise errors.OpExecError("Error during master delay test")
7959
    if self.op.on_nodes:
7960
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
7961
      for node, node_result in result.items():
7962
        node_result.Raise("Failure during rpc call to node %s" % node)
7963

    
7964

    
7965
class IAllocator(object):
7966
  """IAllocator framework.
7967

7968
  An IAllocator instance has three sets of attributes:
7969
    - cfg that is needed to query the cluster
7970
    - input data (all members of the _KEYS class attribute are required)
7971
    - four buffer attributes (in|out_data|text), that represent the
7972
      input (to the external script) in text and data structure format,
7973
      and the output from it, again in two formats
7974
    - the result variables from the script (success, info, nodes) for
7975
      easy usage
7976

7977
  """
7978
  _ALLO_KEYS = [
7979
    "mem_size", "disks", "disk_template",
7980
    "os", "tags", "nics", "vcpus", "hypervisor",
7981
    ]
7982
  _RELO_KEYS = [
7983
    "relocate_from",
7984
    ]
7985

    
7986
  def __init__(self, cfg, rpc, mode, name, **kwargs):
7987
    self.cfg = cfg
7988
    self.rpc = rpc
7989
    # init buffer variables
7990
    self.in_text = self.out_text = self.in_data = self.out_data = None
7991
    # init all input fields so that pylint is happy
7992
    self.mode = mode
7993
    self.name = name
7994
    self.mem_size = self.disks = self.disk_template = None
7995
    self.os = self.tags = self.nics = self.vcpus = None
7996
    self.hypervisor = None
7997
    self.relocate_from = None
7998
    # computed fields
7999
    self.required_nodes = None
8000
    # init result fields
8001
    self.success = self.info = self.nodes = None
8002
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8003
      keyset = self._ALLO_KEYS
8004
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8005
      keyset = self._RELO_KEYS
8006
    else:
8007
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8008
                                   " IAllocator" % self.mode)
8009
    for key in kwargs:
8010
      if key not in keyset:
8011
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8012
                                     " IAllocator" % key)
8013
      setattr(self, key, kwargs[key])
8014
    for key in keyset:
8015
      if key not in kwargs:
8016
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8017
                                     " IAllocator" % key)
8018
    self._BuildInputData()
8019

    
8020
  def _ComputeClusterData(self):
8021
    """Compute the generic allocator input data.
8022

8023
    This is the data that is independent of the actual operation.
8024

8025
    """
8026
    cfg = self.cfg
8027
    cluster_info = cfg.GetClusterInfo()
8028
    # cluster data
8029
    data = {
8030
      "version": constants.IALLOCATOR_VERSION,
8031
      "cluster_name": cfg.GetClusterName(),
8032
      "cluster_tags": list(cluster_info.GetTags()),
8033
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8034
      # we don't have job IDs
8035
      }
8036
    iinfo = cfg.GetAllInstancesInfo().values()
8037
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8038

    
8039
    # node data
8040
    node_results = {}
8041
    node_list = cfg.GetNodeList()
8042

    
8043
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8044
      hypervisor_name = self.hypervisor
8045
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8046
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8047

    
8048
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8049
                                        hypervisor_name)
8050
    node_iinfo = \
8051
      self.rpc.call_all_instances_info(node_list,
8052
                                       cluster_info.enabled_hypervisors)
8053
    for nname, nresult in node_data.items():
8054
      # first fill in static (config-based) values
8055
      ninfo = cfg.GetNodeInfo(nname)
8056
      pnr = {
8057
        "tags": list(ninfo.GetTags()),
8058
        "primary_ip": ninfo.primary_ip,
8059
        "secondary_ip": ninfo.secondary_ip,
8060
        "offline": ninfo.offline,
8061
        "drained": ninfo.drained,
8062
        "master_candidate": ninfo.master_candidate,
8063
        }
8064

    
8065
      if not (ninfo.offline or ninfo.drained):
8066
        nresult.Raise("Can't get data for node %s" % nname)
8067
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8068
                                nname)
8069
        remote_info = nresult.payload
8070

    
8071
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8072
                     'vg_size', 'vg_free', 'cpu_total']:
8073
          if attr not in remote_info:
8074
            raise errors.OpExecError("Node '%s' didn't return attribute"
8075
                                     " '%s'" % (nname, attr))
8076
          if not isinstance(remote_info[attr], int):
8077
            raise errors.OpExecError("Node '%s' returned invalid value"
8078
                                     " for '%s': %s" %
8079
                                     (nname, attr, remote_info[attr]))
8080
        # compute memory used by primary instances
8081
        i_p_mem = i_p_up_mem = 0
8082
        for iinfo, beinfo in i_list:
8083
          if iinfo.primary_node == nname:
8084
            i_p_mem += beinfo[constants.BE_MEMORY]
8085
            if iinfo.name not in node_iinfo[nname].payload:
8086
              i_used_mem = 0
8087
            else:
8088
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8089
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8090
            remote_info['memory_free'] -= max(0, i_mem_diff)
8091

    
8092
            if iinfo.admin_up:
8093
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8094

    
8095
        # compute memory used by instances
8096
        pnr_dyn = {
8097
          "total_memory": remote_info['memory_total'],
8098
          "reserved_memory": remote_info['memory_dom0'],
8099
          "free_memory": remote_info['memory_free'],
8100
          "total_disk": remote_info['vg_size'],
8101
          "free_disk": remote_info['vg_free'],
8102
          "total_cpus": remote_info['cpu_total'],
8103
          "i_pri_memory": i_p_mem,
8104
          "i_pri_up_memory": i_p_up_mem,
8105
          }
8106
        pnr.update(pnr_dyn)
8107

    
8108
      node_results[nname] = pnr
8109
    data["nodes"] = node_results
8110

    
8111
    # instance data
8112
    instance_data = {}
8113
    for iinfo, beinfo in i_list:
8114
      nic_data = []
8115
      for nic in iinfo.nics:
8116
        filled_params = objects.FillDict(
8117
            cluster_info.nicparams[constants.PP_DEFAULT],
8118
            nic.nicparams)
8119
        nic_dict = {"mac": nic.mac,
8120
                    "ip": nic.ip,
8121
                    "mode": filled_params[constants.NIC_MODE],
8122
                    "link": filled_params[constants.NIC_LINK],
8123
                   }
8124
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8125
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8126
        nic_data.append(nic_dict)
8127
      pir = {
8128
        "tags": list(iinfo.GetTags()),
8129
        "admin_up": iinfo.admin_up,
8130
        "vcpus": beinfo[constants.BE_VCPUS],
8131
        "memory": beinfo[constants.BE_MEMORY],
8132
        "os": iinfo.os,
8133
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8134
        "nics": nic_data,
8135
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8136
        "disk_template": iinfo.disk_template,
8137
        "hypervisor": iinfo.hypervisor,
8138
        }
8139
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8140
                                                 pir["disks"])
8141
      instance_data[iinfo.name] = pir
8142

    
8143
    data["instances"] = instance_data
8144

    
8145
    self.in_data = data
8146

    
8147
  def _AddNewInstance(self):
8148
    """Add new instance data to allocator structure.
8149

8150
    This in combination with _AllocatorGetClusterData will create the
8151
    correct structure needed as input for the allocator.
8152

8153
    The checks for the completeness of the opcode must have already been
8154
    done.
8155

8156
    """
8157
    data = self.in_data
8158

    
8159
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8160

    
8161
    if self.disk_template in constants.DTS_NET_MIRROR:
8162
      self.required_nodes = 2
8163
    else:
8164
      self.required_nodes = 1
8165
    request = {
8166
      "type": "allocate",
8167
      "name": self.name,
8168
      "disk_template": self.disk_template,
8169
      "tags": self.tags,
8170
      "os": self.os,
8171
      "vcpus": self.vcpus,
8172
      "memory": self.mem_size,
8173
      "disks": self.disks,
8174
      "disk_space_total": disk_space,
8175
      "nics": self.nics,
8176
      "required_nodes": self.required_nodes,
8177
      }
8178
    data["request"] = request
8179

    
8180
  def _AddRelocateInstance(self):
8181
    """Add relocate instance data to allocator structure.
8182

8183
    This in combination with _IAllocatorGetClusterData will create the
8184
    correct structure needed as input for the allocator.
8185

8186
    The checks for the completeness of the opcode must have already been
8187
    done.
8188

8189
    """
8190
    instance = self.cfg.GetInstanceInfo(self.name)
8191
    if instance is None:
8192
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8193
                                   " IAllocator" % self.name)
8194

    
8195
    if instance.disk_template not in constants.DTS_NET_MIRROR:
8196
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
8197

    
8198
    if len(instance.secondary_nodes) != 1:
8199
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
8200

    
8201
    self.required_nodes = 1
8202
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
8203
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8204

    
8205
    request = {
8206
      "type": "relocate",
8207
      "name": self.name,
8208
      "disk_space_total": disk_space,
8209
      "required_nodes": self.required_nodes,
8210
      "relocate_from": self.relocate_from,
8211
      }
8212
    self.in_data["request"] = request
8213

    
8214
  def _BuildInputData(self):
8215
    """Build input data structures.
8216

8217
    """
8218
    self._ComputeClusterData()
8219

    
8220
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8221
      self._AddNewInstance()
8222
    else:
8223
      self._AddRelocateInstance()
8224

    
8225
    self.in_text = serializer.Dump(self.in_data)
8226

    
8227
  def Run(self, name, validate=True, call_fn=None):
8228
    """Run an instance allocator and return the results.
8229

8230
    """
8231
    if call_fn is None:
8232
      call_fn = self.rpc.call_iallocator_runner
8233

    
8234
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8235
    result.Raise("Failure while running the iallocator script")
8236

    
8237
    self.out_text = result.payload
8238
    if validate:
8239
      self._ValidateResult()
8240

    
8241
  def _ValidateResult(self):
8242
    """Process the allocator results.
8243

8244
    This will process and if successful save the result in
8245
    self.out_data and the other parameters.
8246

8247
    """
8248
    try:
8249
      rdict = serializer.Load(self.out_text)
8250
    except Exception, err:
8251
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8252

    
8253
    if not isinstance(rdict, dict):
8254
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8255

    
8256
    for key in "success", "info", "nodes":
8257
      if key not in rdict:
8258
        raise errors.OpExecError("Can't parse iallocator results:"
8259
                                 " missing key '%s'" % key)
8260
      setattr(self, key, rdict[key])
8261

    
8262
    if not isinstance(rdict["nodes"], list):
8263
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8264
                               " is not a list")
8265
    self.out_data = rdict
8266

    
8267

    
8268
class LUTestAllocator(NoHooksLU):
8269
  """Run allocator tests.
8270

8271
  This LU runs the allocator tests
8272

8273
  """
8274
  _OP_REQP = ["direction", "mode", "name"]
8275

    
8276
  def CheckPrereq(self):
8277
    """Check prerequisites.
8278

8279
    This checks the opcode parameters depending on the director and mode test.
8280

8281
    """
8282
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8283
      for attr in ["name", "mem_size", "disks", "disk_template",
8284
                   "os", "tags", "nics", "vcpus"]:
8285
        if not hasattr(self.op, attr):
8286
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8287
                                     attr)
8288
      iname = self.cfg.ExpandInstanceName(self.op.name)
8289
      if iname is not None:
8290
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8291
                                   iname)
8292
      if not isinstance(self.op.nics, list):
8293
        raise errors.OpPrereqError("Invalid parameter 'nics'")
8294
      for row in self.op.nics:
8295
        if (not isinstance(row, dict) or
8296
            "mac" not in row or
8297
            "ip" not in row or
8298
            "bridge" not in row):
8299
          raise errors.OpPrereqError("Invalid contents of the"
8300
                                     " 'nics' parameter")
8301
      if not isinstance(self.op.disks, list):
8302
        raise errors.OpPrereqError("Invalid parameter 'disks'")
8303
      for row in self.op.disks:
8304
        if (not isinstance(row, dict) or
8305
            "size" not in row or
8306
            not isinstance(row["size"], int) or
8307
            "mode" not in row or
8308
            row["mode"] not in ['r', 'w']):
8309
          raise errors.OpPrereqError("Invalid contents of the"
8310
                                     " 'disks' parameter")
8311
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8312
        self.op.hypervisor = self.cfg.GetHypervisorType()
8313
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8314
      if not hasattr(self.op, "name"):
8315
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
8316
      fname = self.cfg.ExpandInstanceName(self.op.name)
8317
      if fname is None:
8318
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8319
                                   self.op.name)
8320
      self.op.name = fname
8321
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8322
    else:
8323
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8324
                                 self.op.mode)
8325

    
8326
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8327
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8328
        raise errors.OpPrereqError("Missing allocator name")
8329
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8330
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8331
                                 self.op.direction)
8332

    
8333
  def Exec(self, feedback_fn):
8334
    """Run the allocator test.
8335

8336
    """
8337
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8338
      ial = IAllocator(self.cfg, self.rpc,
8339
                       mode=self.op.mode,
8340
                       name=self.op.name,
8341
                       mem_size=self.op.mem_size,
8342
                       disks=self.op.disks,
8343
                       disk_template=self.op.disk_template,
8344
                       os=self.op.os,
8345
                       tags=self.op.tags,
8346
                       nics=self.op.nics,
8347
                       vcpus=self.op.vcpus,
8348
                       hypervisor=self.op.hypervisor,
8349
                       )
8350
    else:
8351
      ial = IAllocator(self.cfg, self.rpc,
8352
                       mode=self.op.mode,
8353
                       name=self.op.name,
8354
                       relocate_from=list(self.relocate_from),
8355
                       )
8356

    
8357
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8358
      result = ial.in_text
8359
    else:
8360
      ial.Run(self.op.allocator, validate=False)
8361
      result = ial.out_text
8362
    return result