Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 701384a9

History | View | Annotate | Download (322.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36
import OpenSSL
37

    
38
from ganeti import ssh
39
from ganeti import utils
40
from ganeti import errors
41
from ganeti import hypervisor
42
from ganeti import locking
43
from ganeti import constants
44
from ganeti import objects
45
from ganeti import serializer
46
from ganeti import ssconf
47

    
48

    
49
class LogicalUnit(object):
50
  """Logical Unit base class.
51

52
  Subclasses must follow these rules:
53
    - implement ExpandNames
54
    - implement CheckPrereq (except when tasklets are used)
55
    - implement Exec (except when tasklets are used)
56
    - implement BuildHooksEnv
57
    - redefine HPATH and HTYPE
58
    - optionally redefine their run requirements:
59
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
60

61
  Note that all commands require root permissions.
62

63
  @ivar dry_run_result: the value (if any) that will be returned to the caller
64
      in dry-run mode (signalled by opcode dry_run parameter)
65

66
  """
67
  HPATH = None
68
  HTYPE = None
69
  _OP_REQP = []
70
  REQ_BGL = True
71

    
72
  def __init__(self, processor, op, context, rpc):
73
    """Constructor for LogicalUnit.
74

75
    This needs to be overridden in derived classes in order to check op
76
    validity.
77

78
    """
79
    self.proc = processor
80
    self.op = op
81
    self.cfg = context.cfg
82
    self.context = context
83
    self.rpc = rpc
84
    # Dicts used to declare locking needs to mcpu
85
    self.needed_locks = None
86
    self.acquired_locks = {}
87
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
88
    self.add_locks = {}
89
    self.remove_locks = {}
90
    # Used to force good behavior when calling helper functions
91
    self.recalculate_locks = {}
92
    self.__ssh = None
93
    # logging
94
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
95
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
96
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
97
    # support for dry-run
98
    self.dry_run_result = None
99
    # support for generic debug attribute
100
    if (not hasattr(self.op, "debug_level") or
101
        not isinstance(self.op.debug_level, int)):
102
      self.op.debug_level = 0
103

    
104
    # Tasklets
105
    self.tasklets = None
106

    
107
    for attr_name in self._OP_REQP:
108
      attr_val = getattr(op, attr_name, None)
109
      if attr_val is None:
110
        raise errors.OpPrereqError("Required parameter '%s' missing" %
111
                                   attr_name, errors.ECODE_INVAL)
112

    
113
    self.CheckArguments()
114

    
115
  def __GetSSH(self):
116
    """Returns the SshRunner object
117

118
    """
119
    if not self.__ssh:
120
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
121
    return self.__ssh
122

    
123
  ssh = property(fget=__GetSSH)
124

    
125
  def CheckArguments(self):
126
    """Check syntactic validity for the opcode arguments.
127

128
    This method is for doing a simple syntactic check and ensure
129
    validity of opcode parameters, without any cluster-related
130
    checks. While the same can be accomplished in ExpandNames and/or
131
    CheckPrereq, doing these separate is better because:
132

133
      - ExpandNames is left as as purely a lock-related function
134
      - CheckPrereq is run after we have acquired locks (and possible
135
        waited for them)
136

137
    The function is allowed to change the self.op attribute so that
138
    later methods can no longer worry about missing parameters.
139

140
    """
141
    pass
142

    
143
  def ExpandNames(self):
144
    """Expand names for this LU.
145

146
    This method is called before starting to execute the opcode, and it should
147
    update all the parameters of the opcode to their canonical form (e.g. a
148
    short node name must be fully expanded after this method has successfully
149
    completed). This way locking, hooks, logging, ecc. can work correctly.
150

151
    LUs which implement this method must also populate the self.needed_locks
152
    member, as a dict with lock levels as keys, and a list of needed lock names
153
    as values. Rules:
154

155
      - use an empty dict if you don't need any lock
156
      - if you don't need any lock at a particular level omit that level
157
      - don't put anything for the BGL level
158
      - if you want all locks at a level use locking.ALL_SET as a value
159

160
    If you need to share locks (rather than acquire them exclusively) at one
161
    level you can modify self.share_locks, setting a true value (usually 1) for
162
    that level. By default locks are not shared.
163

164
    This function can also define a list of tasklets, which then will be
165
    executed in order instead of the usual LU-level CheckPrereq and Exec
166
    functions, if those are not defined by the LU.
167

168
    Examples::
169

170
      # Acquire all nodes and one instance
171
      self.needed_locks = {
172
        locking.LEVEL_NODE: locking.ALL_SET,
173
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
174
      }
175
      # Acquire just two nodes
176
      self.needed_locks = {
177
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
178
      }
179
      # Acquire no locks
180
      self.needed_locks = {} # No, you can't leave it to the default value None
181

182
    """
183
    # The implementation of this method is mandatory only if the new LU is
184
    # concurrent, so that old LUs don't need to be changed all at the same
185
    # time.
186
    if self.REQ_BGL:
187
      self.needed_locks = {} # Exclusive LUs don't need locks.
188
    else:
189
      raise NotImplementedError
190

    
191
  def DeclareLocks(self, level):
192
    """Declare LU locking needs for a level
193

194
    While most LUs can just declare their locking needs at ExpandNames time,
195
    sometimes there's the need to calculate some locks after having acquired
196
    the ones before. This function is called just before acquiring locks at a
197
    particular level, but after acquiring the ones at lower levels, and permits
198
    such calculations. It can be used to modify self.needed_locks, and by
199
    default it does nothing.
200

201
    This function is only called if you have something already set in
202
    self.needed_locks for the level.
203

204
    @param level: Locking level which is going to be locked
205
    @type level: member of ganeti.locking.LEVELS
206

207
    """
208

    
209
  def CheckPrereq(self):
210
    """Check prerequisites for this LU.
211

212
    This method should check that the prerequisites for the execution
213
    of this LU are fulfilled. It can do internode communication, but
214
    it should be idempotent - no cluster or system changes are
215
    allowed.
216

217
    The method should raise errors.OpPrereqError in case something is
218
    not fulfilled. Its return value is ignored.
219

220
    This method should also update all the parameters of the opcode to
221
    their canonical form if it hasn't been done by ExpandNames before.
222

223
    """
224
    if self.tasklets is not None:
225
      for (idx, tl) in enumerate(self.tasklets):
226
        logging.debug("Checking prerequisites for tasklet %s/%s",
227
                      idx + 1, len(self.tasklets))
228
        tl.CheckPrereq()
229
    else:
230
      raise NotImplementedError
231

    
232
  def Exec(self, feedback_fn):
233
    """Execute the LU.
234

235
    This method should implement the actual work. It should raise
236
    errors.OpExecError for failures that are somewhat dealt with in
237
    code, or expected.
238

239
    """
240
    if self.tasklets is not None:
241
      for (idx, tl) in enumerate(self.tasklets):
242
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
243
        tl.Exec(feedback_fn)
244
    else:
245
      raise NotImplementedError
246

    
247
  def BuildHooksEnv(self):
248
    """Build hooks environment for this LU.
249

250
    This method should return a three-node tuple consisting of: a dict
251
    containing the environment that will be used for running the
252
    specific hook for this LU, a list of node names on which the hook
253
    should run before the execution, and a list of node names on which
254
    the hook should run after the execution.
255

256
    The keys of the dict must not have 'GANETI_' prefixed as this will
257
    be handled in the hooks runner. Also note additional keys will be
258
    added by the hooks runner. If the LU doesn't define any
259
    environment, an empty dict (and not None) should be returned.
260

261
    No nodes should be returned as an empty list (and not None).
262

263
    Note that if the HPATH for a LU class is None, this function will
264
    not be called.
265

266
    """
267
    raise NotImplementedError
268

    
269
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
270
    """Notify the LU about the results of its hooks.
271

272
    This method is called every time a hooks phase is executed, and notifies
273
    the Logical Unit about the hooks' result. The LU can then use it to alter
274
    its result based on the hooks.  By default the method does nothing and the
275
    previous result is passed back unchanged but any LU can define it if it
276
    wants to use the local cluster hook-scripts somehow.
277

278
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
279
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
280
    @param hook_results: the results of the multi-node hooks rpc call
281
    @param feedback_fn: function used send feedback back to the caller
282
    @param lu_result: the previous Exec result this LU had, or None
283
        in the PRE phase
284
    @return: the new Exec result, based on the previous result
285
        and hook results
286

287
    """
288
    # API must be kept, thus we ignore the unused argument and could
289
    # be a function warnings
290
    # pylint: disable-msg=W0613,R0201
291
    return lu_result
292

    
293
  def _ExpandAndLockInstance(self):
294
    """Helper function to expand and lock an instance.
295

296
    Many LUs that work on an instance take its name in self.op.instance_name
297
    and need to expand it and then declare the expanded name for locking. This
298
    function does it, and then updates self.op.instance_name to the expanded
299
    name. It also initializes needed_locks as a dict, if this hasn't been done
300
    before.
301

302
    """
303
    if self.needed_locks is None:
304
      self.needed_locks = {}
305
    else:
306
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
307
        "_ExpandAndLockInstance called with instance-level locks set"
308
    self.op.instance_name = _ExpandInstanceName(self.cfg,
309
                                                self.op.instance_name)
310
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
311

    
312
  def _LockInstancesNodes(self, primary_only=False):
313
    """Helper function to declare instances' nodes for locking.
314

315
    This function should be called after locking one or more instances to lock
316
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
317
    with all primary or secondary nodes for instances already locked and
318
    present in self.needed_locks[locking.LEVEL_INSTANCE].
319

320
    It should be called from DeclareLocks, and for safety only works if
321
    self.recalculate_locks[locking.LEVEL_NODE] is set.
322

323
    In the future it may grow parameters to just lock some instance's nodes, or
324
    to just lock primaries or secondary nodes, if needed.
325

326
    If should be called in DeclareLocks in a way similar to::
327

328
      if level == locking.LEVEL_NODE:
329
        self._LockInstancesNodes()
330

331
    @type primary_only: boolean
332
    @param primary_only: only lock primary nodes of locked instances
333

334
    """
335
    assert locking.LEVEL_NODE in self.recalculate_locks, \
336
      "_LockInstancesNodes helper function called with no nodes to recalculate"
337

    
338
    # TODO: check if we're really been called with the instance locks held
339

    
340
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
341
    # future we might want to have different behaviors depending on the value
342
    # of self.recalculate_locks[locking.LEVEL_NODE]
343
    wanted_nodes = []
344
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
345
      instance = self.context.cfg.GetInstanceInfo(instance_name)
346
      wanted_nodes.append(instance.primary_node)
347
      if not primary_only:
348
        wanted_nodes.extend(instance.secondary_nodes)
349

    
350
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
351
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
352
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
353
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
354

    
355
    del self.recalculate_locks[locking.LEVEL_NODE]
356

    
357

    
358
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
359
  """Simple LU which runs no hooks.
360

361
  This LU is intended as a parent for other LogicalUnits which will
362
  run no hooks, in order to reduce duplicate code.
363

364
  """
365
  HPATH = None
366
  HTYPE = None
367

    
368
  def BuildHooksEnv(self):
369
    """Empty BuildHooksEnv for NoHooksLu.
370

371
    This just raises an error.
372

373
    """
374
    assert False, "BuildHooksEnv called for NoHooksLUs"
375

    
376

    
377
class Tasklet:
378
  """Tasklet base class.
379

380
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
381
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
382
  tasklets know nothing about locks.
383

384
  Subclasses must follow these rules:
385
    - Implement CheckPrereq
386
    - Implement Exec
387

388
  """
389
  def __init__(self, lu):
390
    self.lu = lu
391

    
392
    # Shortcuts
393
    self.cfg = lu.cfg
394
    self.rpc = lu.rpc
395

    
396
  def CheckPrereq(self):
397
    """Check prerequisites for this tasklets.
398

399
    This method should check whether the prerequisites for the execution of
400
    this tasklet are fulfilled. It can do internode communication, but it
401
    should be idempotent - no cluster or system changes are allowed.
402

403
    The method should raise errors.OpPrereqError in case something is not
404
    fulfilled. Its return value is ignored.
405

406
    This method should also update all parameters to their canonical form if it
407
    hasn't been done before.
408

409
    """
410
    raise NotImplementedError
411

    
412
  def Exec(self, feedback_fn):
413
    """Execute the tasklet.
414

415
    This method should implement the actual work. It should raise
416
    errors.OpExecError for failures that are somewhat dealt with in code, or
417
    expected.
418

419
    """
420
    raise NotImplementedError
421

    
422

    
423
def _GetWantedNodes(lu, nodes):
424
  """Returns list of checked and expanded node names.
425

426
  @type lu: L{LogicalUnit}
427
  @param lu: the logical unit on whose behalf we execute
428
  @type nodes: list
429
  @param nodes: list of node names or None for all nodes
430
  @rtype: list
431
  @return: the list of nodes, sorted
432
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
433

434
  """
435
  if not isinstance(nodes, list):
436
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
437
                               errors.ECODE_INVAL)
438

    
439
  if not nodes:
440
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
441
      " non-empty list of nodes whose name is to be expanded.")
442

    
443
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
444
  return utils.NiceSort(wanted)
445

    
446

    
447
def _GetWantedInstances(lu, instances):
448
  """Returns list of checked and expanded instance names.
449

450
  @type lu: L{LogicalUnit}
451
  @param lu: the logical unit on whose behalf we execute
452
  @type instances: list
453
  @param instances: list of instance names or None for all instances
454
  @rtype: list
455
  @return: the list of instances, sorted
456
  @raise errors.OpPrereqError: if the instances parameter is wrong type
457
  @raise errors.OpPrereqError: if any of the passed instances is not found
458

459
  """
460
  if not isinstance(instances, list):
461
    raise errors.OpPrereqError("Invalid argument type 'instances'",
462
                               errors.ECODE_INVAL)
463

    
464
  if instances:
465
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
466
  else:
467
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
468
  return wanted
469

    
470

    
471
def _CheckOutputFields(static, dynamic, selected):
472
  """Checks whether all selected fields are valid.
473

474
  @type static: L{utils.FieldSet}
475
  @param static: static fields set
476
  @type dynamic: L{utils.FieldSet}
477
  @param dynamic: dynamic fields set
478

479
  """
480
  f = utils.FieldSet()
481
  f.Extend(static)
482
  f.Extend(dynamic)
483

    
484
  delta = f.NonMatching(selected)
485
  if delta:
486
    raise errors.OpPrereqError("Unknown output fields selected: %s"
487
                               % ",".join(delta), errors.ECODE_INVAL)
488

    
489

    
490
def _CheckBooleanOpField(op, name):
491
  """Validates boolean opcode parameters.
492

493
  This will ensure that an opcode parameter is either a boolean value,
494
  or None (but that it always exists).
495

496
  """
497
  val = getattr(op, name, None)
498
  if not (val is None or isinstance(val, bool)):
499
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
500
                               (name, str(val)), errors.ECODE_INVAL)
501
  setattr(op, name, val)
502

    
503

    
504
def _CheckGlobalHvParams(params):
505
  """Validates that given hypervisor params are not global ones.
506

507
  This will ensure that instances don't get customised versions of
508
  global params.
509

510
  """
511
  used_globals = constants.HVC_GLOBALS.intersection(params)
512
  if used_globals:
513
    msg = ("The following hypervisor parameters are global and cannot"
514
           " be customized at instance level, please modify them at"
515
           " cluster level: %s" % utils.CommaJoin(used_globals))
516
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
517

    
518

    
519
def _CheckNodeOnline(lu, node):
520
  """Ensure that a given node is online.
521

522
  @param lu: the LU on behalf of which we make the check
523
  @param node: the node to check
524
  @raise errors.OpPrereqError: if the node is offline
525

526
  """
527
  if lu.cfg.GetNodeInfo(node).offline:
528
    raise errors.OpPrereqError("Can't use offline node %s" % node,
529
                               errors.ECODE_INVAL)
530

    
531

    
532
def _CheckNodeNotDrained(lu, node):
533
  """Ensure that a given node is not drained.
534

535
  @param lu: the LU on behalf of which we make the check
536
  @param node: the node to check
537
  @raise errors.OpPrereqError: if the node is drained
538

539
  """
540
  if lu.cfg.GetNodeInfo(node).drained:
541
    raise errors.OpPrereqError("Can't use drained node %s" % node,
542
                               errors.ECODE_INVAL)
543

    
544

    
545
def _CheckDiskTemplate(template):
546
  """Ensure a given disk template is valid.
547

548
  """
549
  if template not in constants.DISK_TEMPLATES:
550
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
551
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
552
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
553

    
554

    
555
def _ExpandItemName(fn, name, kind):
556
  """Expand an item name.
557

558
  @param fn: the function to use for expansion
559
  @param name: requested item name
560
  @param kind: text description ('Node' or 'Instance')
561
  @return: the resolved (full) name
562
  @raise errors.OpPrereqError: if the item is not found
563

564
  """
565
  full_name = fn(name)
566
  if full_name is None:
567
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
568
                               errors.ECODE_NOENT)
569
  return full_name
570

    
571

    
572
def _ExpandNodeName(cfg, name):
573
  """Wrapper over L{_ExpandItemName} for nodes."""
574
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
575

    
576

    
577
def _ExpandInstanceName(cfg, name):
578
  """Wrapper over L{_ExpandItemName} for instance."""
579
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
580

    
581

    
582
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
583
                          memory, vcpus, nics, disk_template, disks,
584
                          bep, hvp, hypervisor_name):
585
  """Builds instance related env variables for hooks
586

587
  This builds the hook environment from individual variables.
588

589
  @type name: string
590
  @param name: the name of the instance
591
  @type primary_node: string
592
  @param primary_node: the name of the instance's primary node
593
  @type secondary_nodes: list
594
  @param secondary_nodes: list of secondary nodes as strings
595
  @type os_type: string
596
  @param os_type: the name of the instance's OS
597
  @type status: boolean
598
  @param status: the should_run status of the instance
599
  @type memory: string
600
  @param memory: the memory size of the instance
601
  @type vcpus: string
602
  @param vcpus: the count of VCPUs the instance has
603
  @type nics: list
604
  @param nics: list of tuples (ip, mac, mode, link) representing
605
      the NICs the instance has
606
  @type disk_template: string
607
  @param disk_template: the disk template of the instance
608
  @type disks: list
609
  @param disks: the list of (size, mode) pairs
610
  @type bep: dict
611
  @param bep: the backend parameters for the instance
612
  @type hvp: dict
613
  @param hvp: the hypervisor parameters for the instance
614
  @type hypervisor_name: string
615
  @param hypervisor_name: the hypervisor for the instance
616
  @rtype: dict
617
  @return: the hook environment for this instance
618

619
  """
620
  if status:
621
    str_status = "up"
622
  else:
623
    str_status = "down"
624
  env = {
625
    "OP_TARGET": name,
626
    "INSTANCE_NAME": name,
627
    "INSTANCE_PRIMARY": primary_node,
628
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
629
    "INSTANCE_OS_TYPE": os_type,
630
    "INSTANCE_STATUS": str_status,
631
    "INSTANCE_MEMORY": memory,
632
    "INSTANCE_VCPUS": vcpus,
633
    "INSTANCE_DISK_TEMPLATE": disk_template,
634
    "INSTANCE_HYPERVISOR": hypervisor_name,
635
  }
636

    
637
  if nics:
638
    nic_count = len(nics)
639
    for idx, (ip, mac, mode, link) in enumerate(nics):
640
      if ip is None:
641
        ip = ""
642
      env["INSTANCE_NIC%d_IP" % idx] = ip
643
      env["INSTANCE_NIC%d_MAC" % idx] = mac
644
      env["INSTANCE_NIC%d_MODE" % idx] = mode
645
      env["INSTANCE_NIC%d_LINK" % idx] = link
646
      if mode == constants.NIC_MODE_BRIDGED:
647
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
648
  else:
649
    nic_count = 0
650

    
651
  env["INSTANCE_NIC_COUNT"] = nic_count
652

    
653
  if disks:
654
    disk_count = len(disks)
655
    for idx, (size, mode) in enumerate(disks):
656
      env["INSTANCE_DISK%d_SIZE" % idx] = size
657
      env["INSTANCE_DISK%d_MODE" % idx] = mode
658
  else:
659
    disk_count = 0
660

    
661
  env["INSTANCE_DISK_COUNT"] = disk_count
662

    
663
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
664
    for key, value in source.items():
665
      env["INSTANCE_%s_%s" % (kind, key)] = value
666

    
667
  return env
668

    
669

    
670
def _NICListToTuple(lu, nics):
671
  """Build a list of nic information tuples.
672

673
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
674
  value in LUQueryInstanceData.
675

676
  @type lu:  L{LogicalUnit}
677
  @param lu: the logical unit on whose behalf we execute
678
  @type nics: list of L{objects.NIC}
679
  @param nics: list of nics to convert to hooks tuples
680

681
  """
682
  hooks_nics = []
683
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
684
  for nic in nics:
685
    ip = nic.ip
686
    mac = nic.mac
687
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
688
    mode = filled_params[constants.NIC_MODE]
689
    link = filled_params[constants.NIC_LINK]
690
    hooks_nics.append((ip, mac, mode, link))
691
  return hooks_nics
692

    
693

    
694
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
695
  """Builds instance related env variables for hooks from an object.
696

697
  @type lu: L{LogicalUnit}
698
  @param lu: the logical unit on whose behalf we execute
699
  @type instance: L{objects.Instance}
700
  @param instance: the instance for which we should build the
701
      environment
702
  @type override: dict
703
  @param override: dictionary with key/values that will override
704
      our values
705
  @rtype: dict
706
  @return: the hook environment dictionary
707

708
  """
709
  cluster = lu.cfg.GetClusterInfo()
710
  bep = cluster.FillBE(instance)
711
  hvp = cluster.FillHV(instance)
712
  args = {
713
    'name': instance.name,
714
    'primary_node': instance.primary_node,
715
    'secondary_nodes': instance.secondary_nodes,
716
    'os_type': instance.os,
717
    'status': instance.admin_up,
718
    'memory': bep[constants.BE_MEMORY],
719
    'vcpus': bep[constants.BE_VCPUS],
720
    'nics': _NICListToTuple(lu, instance.nics),
721
    'disk_template': instance.disk_template,
722
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
723
    'bep': bep,
724
    'hvp': hvp,
725
    'hypervisor_name': instance.hypervisor,
726
  }
727
  if override:
728
    args.update(override)
729
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
730

    
731

    
732
def _AdjustCandidatePool(lu, exceptions):
733
  """Adjust the candidate pool after node operations.
734

735
  """
736
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
737
  if mod_list:
738
    lu.LogInfo("Promoted nodes to master candidate role: %s",
739
               utils.CommaJoin(node.name for node in mod_list))
740
    for name in mod_list:
741
      lu.context.ReaddNode(name)
742
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
743
  if mc_now > mc_max:
744
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
745
               (mc_now, mc_max))
746

    
747

    
748
def _DecideSelfPromotion(lu, exceptions=None):
749
  """Decide whether I should promote myself as a master candidate.
750

751
  """
752
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
753
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
754
  # the new node will increase mc_max with one, so:
755
  mc_should = min(mc_should + 1, cp_size)
756
  return mc_now < mc_should
757

    
758

    
759
def _CheckNicsBridgesExist(lu, target_nics, target_node,
760
                               profile=constants.PP_DEFAULT):
761
  """Check that the brigdes needed by a list of nics exist.
762

763
  """
764
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
765
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
766
                for nic in target_nics]
767
  brlist = [params[constants.NIC_LINK] for params in paramslist
768
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
769
  if brlist:
770
    result = lu.rpc.call_bridges_exist(target_node, brlist)
771
    result.Raise("Error checking bridges on destination node '%s'" %
772
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
773

    
774

    
775
def _CheckInstanceBridgesExist(lu, instance, node=None):
776
  """Check that the brigdes needed by an instance exist.
777

778
  """
779
  if node is None:
780
    node = instance.primary_node
781
  _CheckNicsBridgesExist(lu, instance.nics, node)
782

    
783

    
784
def _CheckOSVariant(os_obj, name):
785
  """Check whether an OS name conforms to the os variants specification.
786

787
  @type os_obj: L{objects.OS}
788
  @param os_obj: OS object to check
789
  @type name: string
790
  @param name: OS name passed by the user, to check for validity
791

792
  """
793
  if not os_obj.supported_variants:
794
    return
795
  try:
796
    variant = name.split("+", 1)[1]
797
  except IndexError:
798
    raise errors.OpPrereqError("OS name must include a variant",
799
                               errors.ECODE_INVAL)
800

    
801
  if variant not in os_obj.supported_variants:
802
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
803

    
804

    
805
def _GetNodeInstancesInner(cfg, fn):
806
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
807

    
808

    
809
def _GetNodeInstances(cfg, node_name):
810
  """Returns a list of all primary and secondary instances on a node.
811

812
  """
813

    
814
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
815

    
816

    
817
def _GetNodePrimaryInstances(cfg, node_name):
818
  """Returns primary instances on a node.
819

820
  """
821
  return _GetNodeInstancesInner(cfg,
822
                                lambda inst: node_name == inst.primary_node)
823

    
824

    
825
def _GetNodeSecondaryInstances(cfg, node_name):
826
  """Returns secondary instances on a node.
827

828
  """
829
  return _GetNodeInstancesInner(cfg,
830
                                lambda inst: node_name in inst.secondary_nodes)
831

    
832

    
833
def _GetStorageTypeArgs(cfg, storage_type):
834
  """Returns the arguments for a storage type.
835

836
  """
837
  # Special case for file storage
838
  if storage_type == constants.ST_FILE:
839
    # storage.FileStorage wants a list of storage directories
840
    return [[cfg.GetFileStorageDir()]]
841

    
842
  return []
843

    
844

    
845
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
846
  faulty = []
847

    
848
  for dev in instance.disks:
849
    cfg.SetDiskID(dev, node_name)
850

    
851
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
852
  result.Raise("Failed to get disk status from node %s" % node_name,
853
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
854

    
855
  for idx, bdev_status in enumerate(result.payload):
856
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
857
      faulty.append(idx)
858

    
859
  return faulty
860

    
861

    
862
def _FormatTimestamp(secs):
863
  """Formats a Unix timestamp with the local timezone.
864

865
  """
866
  return time.strftime("%F %T %Z", time.gmtime(secs))
867

    
868

    
869
class LUPostInitCluster(LogicalUnit):
870
  """Logical unit for running hooks after cluster initialization.
871

872
  """
873
  HPATH = "cluster-init"
874
  HTYPE = constants.HTYPE_CLUSTER
875
  _OP_REQP = []
876

    
877
  def BuildHooksEnv(self):
878
    """Build hooks env.
879

880
    """
881
    env = {"OP_TARGET": self.cfg.GetClusterName()}
882
    mn = self.cfg.GetMasterNode()
883
    return env, [], [mn]
884

    
885
  def CheckPrereq(self):
886
    """No prerequisites to check.
887

888
    """
889
    return True
890

    
891
  def Exec(self, feedback_fn):
892
    """Nothing to do.
893

894
    """
895
    return True
896

    
897

    
898
class LUDestroyCluster(LogicalUnit):
899
  """Logical unit for destroying the cluster.
900

901
  """
902
  HPATH = "cluster-destroy"
903
  HTYPE = constants.HTYPE_CLUSTER
904
  _OP_REQP = []
905

    
906
  def BuildHooksEnv(self):
907
    """Build hooks env.
908

909
    """
910
    env = {"OP_TARGET": self.cfg.GetClusterName()}
911
    return env, [], []
912

    
913
  def CheckPrereq(self):
914
    """Check prerequisites.
915

916
    This checks whether the cluster is empty.
917

918
    Any errors are signaled by raising errors.OpPrereqError.
919

920
    """
921
    master = self.cfg.GetMasterNode()
922

    
923
    nodelist = self.cfg.GetNodeList()
924
    if len(nodelist) != 1 or nodelist[0] != master:
925
      raise errors.OpPrereqError("There are still %d node(s) in"
926
                                 " this cluster." % (len(nodelist) - 1),
927
                                 errors.ECODE_INVAL)
928
    instancelist = self.cfg.GetInstanceList()
929
    if instancelist:
930
      raise errors.OpPrereqError("There are still %d instance(s) in"
931
                                 " this cluster." % len(instancelist),
932
                                 errors.ECODE_INVAL)
933

    
934
  def Exec(self, feedback_fn):
935
    """Destroys the cluster.
936

937
    """
938
    master = self.cfg.GetMasterNode()
939
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
940

    
941
    # Run post hooks on master node before it's removed
942
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
943
    try:
944
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
945
    except:
946
      # pylint: disable-msg=W0702
947
      self.LogWarning("Errors occurred running hooks on %s" % master)
948

    
949
    result = self.rpc.call_node_stop_master(master, False)
950
    result.Raise("Could not disable the master role")
951

    
952
    if modify_ssh_setup:
953
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
954
      utils.CreateBackup(priv_key)
955
      utils.CreateBackup(pub_key)
956

    
957
    return master
958

    
959

    
960
def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
961
                            warn_days=constants.SSL_CERT_EXPIRATION_WARN,
962
                            error_days=constants.SSL_CERT_EXPIRATION_ERROR):
963
  """Verifies certificate details for LUVerifyCluster.
964

965
  """
966
  if expired:
967
    msg = "Certificate %s is expired" % filename
968

    
969
    if not_before is not None and not_after is not None:
970
      msg += (" (valid from %s to %s)" %
971
              (_FormatTimestamp(not_before),
972
               _FormatTimestamp(not_after)))
973
    elif not_before is not None:
974
      msg += " (valid from %s)" % _FormatTimestamp(not_before)
975
    elif not_after is not None:
976
      msg += " (valid until %s)" % _FormatTimestamp(not_after)
977

    
978
    return (LUVerifyCluster.ETYPE_ERROR, msg)
979

    
980
  elif not_before is not None and not_before > now:
981
    return (LUVerifyCluster.ETYPE_WARNING,
982
            "Certificate %s not yet valid (valid from %s)" %
983
            (filename, _FormatTimestamp(not_before)))
984

    
985
  elif not_after is not None:
986
    remaining_days = int((not_after - now) / (24 * 3600))
987

    
988
    msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
989

    
990
    if remaining_days <= error_days:
991
      return (LUVerifyCluster.ETYPE_ERROR, msg)
992

    
993
    if remaining_days <= warn_days:
994
      return (LUVerifyCluster.ETYPE_WARNING, msg)
995

    
996
  return (None, None)
997

    
998

    
999
def _VerifyCertificate(filename):
1000
  """Verifies a certificate for LUVerifyCluster.
1001

1002
  @type filename: string
1003
  @param filename: Path to PEM file
1004

1005
  """
1006
  try:
1007
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1008
                                           utils.ReadFile(filename))
1009
  except Exception, err: # pylint: disable-msg=W0703
1010
    return (LUVerifyCluster.ETYPE_ERROR,
1011
            "Failed to load X509 certificate %s: %s" % (filename, err))
1012

    
1013
  # Depending on the pyOpenSSL version, this can just return (None, None)
1014
  (not_before, not_after) = utils.GetX509CertValidity(cert)
1015

    
1016
  return _VerifyCertificateInner(filename, cert.has_expired(),
1017
                                 not_before, not_after, time.time())
1018

    
1019

    
1020
class LUVerifyCluster(LogicalUnit):
1021
  """Verifies the cluster status.
1022

1023
  """
1024
  HPATH = "cluster-verify"
1025
  HTYPE = constants.HTYPE_CLUSTER
1026
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1027
  REQ_BGL = False
1028

    
1029
  TCLUSTER = "cluster"
1030
  TNODE = "node"
1031
  TINSTANCE = "instance"
1032

    
1033
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1034
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1035
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1036
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1037
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1038
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1039
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1040
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1041
  ENODEDRBD = (TNODE, "ENODEDRBD")
1042
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1043
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1044
  ENODEHV = (TNODE, "ENODEHV")
1045
  ENODELVM = (TNODE, "ENODELVM")
1046
  ENODEN1 = (TNODE, "ENODEN1")
1047
  ENODENET = (TNODE, "ENODENET")
1048
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1049
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1050
  ENODERPC = (TNODE, "ENODERPC")
1051
  ENODESSH = (TNODE, "ENODESSH")
1052
  ENODEVERSION = (TNODE, "ENODEVERSION")
1053
  ENODESETUP = (TNODE, "ENODESETUP")
1054
  ENODETIME = (TNODE, "ENODETIME")
1055

    
1056
  ETYPE_FIELD = "code"
1057
  ETYPE_ERROR = "ERROR"
1058
  ETYPE_WARNING = "WARNING"
1059

    
1060
  def ExpandNames(self):
1061
    self.needed_locks = {
1062
      locking.LEVEL_NODE: locking.ALL_SET,
1063
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1064
    }
1065
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1066

    
1067
  def _Error(self, ecode, item, msg, *args, **kwargs):
1068
    """Format an error message.
1069

1070
    Based on the opcode's error_codes parameter, either format a
1071
    parseable error code, or a simpler error string.
1072

1073
    This must be called only from Exec and functions called from Exec.
1074

1075
    """
1076
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1077
    itype, etxt = ecode
1078
    # first complete the msg
1079
    if args:
1080
      msg = msg % args
1081
    # then format the whole message
1082
    if self.op.error_codes:
1083
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1084
    else:
1085
      if item:
1086
        item = " " + item
1087
      else:
1088
        item = ""
1089
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1090
    # and finally report it via the feedback_fn
1091
    self._feedback_fn("  - %s" % msg)
1092

    
1093
  def _ErrorIf(self, cond, *args, **kwargs):
1094
    """Log an error message if the passed condition is True.
1095

1096
    """
1097
    cond = bool(cond) or self.op.debug_simulate_errors
1098
    if cond:
1099
      self._Error(*args, **kwargs)
1100
    # do not mark the operation as failed for WARN cases only
1101
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1102
      self.bad = self.bad or cond
1103

    
1104
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1105
                  node_result, master_files, drbd_map, vg_name):
1106
    """Run multiple tests against a node.
1107

1108
    Test list:
1109

1110
      - compares ganeti version
1111
      - checks vg existence and size > 20G
1112
      - checks config file checksum
1113
      - checks ssh to other nodes
1114

1115
    @type nodeinfo: L{objects.Node}
1116
    @param nodeinfo: the node to check
1117
    @param file_list: required list of files
1118
    @param local_cksum: dictionary of local files and their checksums
1119
    @param node_result: the results from the node
1120
    @param master_files: list of files that only masters should have
1121
    @param drbd_map: the useddrbd minors for this node, in
1122
        form of minor: (instance, must_exist) which correspond to instances
1123
        and their running status
1124
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1125

1126
    """
1127
    node = nodeinfo.name
1128
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1129

    
1130
    # main result, node_result should be a non-empty dict
1131
    test = not node_result or not isinstance(node_result, dict)
1132
    _ErrorIf(test, self.ENODERPC, node,
1133
                  "unable to verify node: no data returned")
1134
    if test:
1135
      return
1136

    
1137
    # compares ganeti version
1138
    local_version = constants.PROTOCOL_VERSION
1139
    remote_version = node_result.get('version', None)
1140
    test = not (remote_version and
1141
                isinstance(remote_version, (list, tuple)) and
1142
                len(remote_version) == 2)
1143
    _ErrorIf(test, self.ENODERPC, node,
1144
             "connection to node returned invalid data")
1145
    if test:
1146
      return
1147

    
1148
    test = local_version != remote_version[0]
1149
    _ErrorIf(test, self.ENODEVERSION, node,
1150
             "incompatible protocol versions: master %s,"
1151
             " node %s", local_version, remote_version[0])
1152
    if test:
1153
      return
1154

    
1155
    # node seems compatible, we can actually try to look into its results
1156

    
1157
    # full package version
1158
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1159
                  self.ENODEVERSION, node,
1160
                  "software version mismatch: master %s, node %s",
1161
                  constants.RELEASE_VERSION, remote_version[1],
1162
                  code=self.ETYPE_WARNING)
1163

    
1164
    # checks vg existence and size > 20G
1165
    if vg_name is not None:
1166
      vglist = node_result.get(constants.NV_VGLIST, None)
1167
      test = not vglist
1168
      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1169
      if not test:
1170
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1171
                                              constants.MIN_VG_SIZE)
1172
        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1173

    
1174
    # checks config file checksum
1175

    
1176
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
1177
    test = not isinstance(remote_cksum, dict)
1178
    _ErrorIf(test, self.ENODEFILECHECK, node,
1179
             "node hasn't returned file checksum data")
1180
    if not test:
1181
      for file_name in file_list:
1182
        node_is_mc = nodeinfo.master_candidate
1183
        must_have = (file_name not in master_files) or node_is_mc
1184
        # missing
1185
        test1 = file_name not in remote_cksum
1186
        # invalid checksum
1187
        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1188
        # existing and good
1189
        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1190
        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1191
                 "file '%s' missing", file_name)
1192
        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1193
                 "file '%s' has wrong checksum", file_name)
1194
        # not candidate and this is not a must-have file
1195
        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1196
                 "file '%s' should not exist on non master"
1197
                 " candidates (and the file is outdated)", file_name)
1198
        # all good, except non-master/non-must have combination
1199
        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1200
                 "file '%s' should not exist"
1201
                 " on non master candidates", file_name)
1202

    
1203
    # checks ssh to any
1204

    
1205
    test = constants.NV_NODELIST not in node_result
1206
    _ErrorIf(test, self.ENODESSH, node,
1207
             "node hasn't returned node ssh connectivity data")
1208
    if not test:
1209
      if node_result[constants.NV_NODELIST]:
1210
        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1211
          _ErrorIf(True, self.ENODESSH, node,
1212
                   "ssh communication with node '%s': %s", a_node, a_msg)
1213

    
1214
    test = constants.NV_NODENETTEST not in node_result
1215
    _ErrorIf(test, self.ENODENET, node,
1216
             "node hasn't returned node tcp connectivity data")
1217
    if not test:
1218
      if node_result[constants.NV_NODENETTEST]:
1219
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1220
        for anode in nlist:
1221
          _ErrorIf(True, self.ENODENET, node,
1222
                   "tcp communication with node '%s': %s",
1223
                   anode, node_result[constants.NV_NODENETTEST][anode])
1224

    
1225
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1226
    if isinstance(hyp_result, dict):
1227
      for hv_name, hv_result in hyp_result.iteritems():
1228
        test = hv_result is not None
1229
        _ErrorIf(test, self.ENODEHV, node,
1230
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1231

    
1232
    # check used drbd list
1233
    if vg_name is not None:
1234
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1235
      test = not isinstance(used_minors, (tuple, list))
1236
      _ErrorIf(test, self.ENODEDRBD, node,
1237
               "cannot parse drbd status file: %s", str(used_minors))
1238
      if not test:
1239
        for minor, (iname, must_exist) in drbd_map.items():
1240
          test = minor not in used_minors and must_exist
1241
          _ErrorIf(test, self.ENODEDRBD, node,
1242
                   "drbd minor %d of instance %s is not active",
1243
                   minor, iname)
1244
        for minor in used_minors:
1245
          test = minor not in drbd_map
1246
          _ErrorIf(test, self.ENODEDRBD, node,
1247
                   "unallocated drbd minor %d is in use", minor)
1248
    test = node_result.get(constants.NV_NODESETUP,
1249
                           ["Missing NODESETUP results"])
1250
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1251
             "; ".join(test))
1252

    
1253
    # check pv names
1254
    if vg_name is not None:
1255
      pvlist = node_result.get(constants.NV_PVLIST, None)
1256
      test = pvlist is None
1257
      _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1258
      if not test:
1259
        # check that ':' is not present in PV names, since it's a
1260
        # special character for lvcreate (denotes the range of PEs to
1261
        # use on the PV)
1262
        for _, pvname, owner_vg in pvlist:
1263
          test = ":" in pvname
1264
          _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1265
                   " '%s' of VG '%s'", pvname, owner_vg)
1266

    
1267
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1268
                      node_instance, n_offline):
1269
    """Verify an instance.
1270

1271
    This function checks to see if the required block devices are
1272
    available on the instance's node.
1273

1274
    """
1275
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1276
    node_current = instanceconfig.primary_node
1277

    
1278
    node_vol_should = {}
1279
    instanceconfig.MapLVsByNode(node_vol_should)
1280

    
1281
    for node in node_vol_should:
1282
      if node in n_offline:
1283
        # ignore missing volumes on offline nodes
1284
        continue
1285
      for volume in node_vol_should[node]:
1286
        test = node not in node_vol_is or volume not in node_vol_is[node]
1287
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1288
                 "volume %s missing on node %s", volume, node)
1289

    
1290
    if instanceconfig.admin_up:
1291
      test = ((node_current not in node_instance or
1292
               not instance in node_instance[node_current]) and
1293
              node_current not in n_offline)
1294
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1295
               "instance not running on its primary node %s",
1296
               node_current)
1297

    
1298
    for node in node_instance:
1299
      if (not node == node_current):
1300
        test = instance in node_instance[node]
1301
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1302
                 "instance should not run on node %s", node)
1303

    
1304
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1305
    """Verify if there are any unknown volumes in the cluster.
1306

1307
    The .os, .swap and backup volumes are ignored. All other volumes are
1308
    reported as unknown.
1309

1310
    """
1311
    for node in node_vol_is:
1312
      for volume in node_vol_is[node]:
1313
        test = (node not in node_vol_should or
1314
                volume not in node_vol_should[node])
1315
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1316
                      "volume %s is unknown", volume)
1317

    
1318
  def _VerifyOrphanInstances(self, instancelist, node_instance):
1319
    """Verify the list of running instances.
1320

1321
    This checks what instances are running but unknown to the cluster.
1322

1323
    """
1324
    for node in node_instance:
1325
      for o_inst in node_instance[node]:
1326
        test = o_inst not in instancelist
1327
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1328
                      "instance %s on node %s should not exist", o_inst, node)
1329

    
1330
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1331
    """Verify N+1 Memory Resilience.
1332

1333
    Check that if one single node dies we can still start all the instances it
1334
    was primary for.
1335

1336
    """
1337
    for node, nodeinfo in node_info.iteritems():
1338
      # This code checks that every node which is now listed as secondary has
1339
      # enough memory to host all instances it is supposed to should a single
1340
      # other node in the cluster fail.
1341
      # FIXME: not ready for failover to an arbitrary node
1342
      # FIXME: does not support file-backed instances
1343
      # WARNING: we currently take into account down instances as well as up
1344
      # ones, considering that even if they're down someone might want to start
1345
      # them even in the event of a node failure.
1346
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1347
        needed_mem = 0
1348
        for instance in instances:
1349
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1350
          if bep[constants.BE_AUTO_BALANCE]:
1351
            needed_mem += bep[constants.BE_MEMORY]
1352
        test = nodeinfo['mfree'] < needed_mem
1353
        self._ErrorIf(test, self.ENODEN1, node,
1354
                      "not enough memory on to accommodate"
1355
                      " failovers should peer node %s fail", prinode)
1356

    
1357
  def CheckPrereq(self):
1358
    """Check prerequisites.
1359

1360
    Transform the list of checks we're going to skip into a set and check that
1361
    all its members are valid.
1362

1363
    """
1364
    self.skip_set = frozenset(self.op.skip_checks)
1365
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1366
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1367
                                 errors.ECODE_INVAL)
1368

    
1369
  def BuildHooksEnv(self):
1370
    """Build hooks env.
1371

1372
    Cluster-Verify hooks just ran in the post phase and their failure makes
1373
    the output be logged in the verify output and the verification to fail.
1374

1375
    """
1376
    all_nodes = self.cfg.GetNodeList()
1377
    env = {
1378
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1379
      }
1380
    for node in self.cfg.GetAllNodesInfo().values():
1381
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1382

    
1383
    return env, [], all_nodes
1384

    
1385
  def Exec(self, feedback_fn):
1386
    """Verify integrity of cluster, performing various test on nodes.
1387

1388
    """
1389
    self.bad = False
1390
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1391
    verbose = self.op.verbose
1392
    self._feedback_fn = feedback_fn
1393
    feedback_fn("* Verifying global settings")
1394
    for msg in self.cfg.VerifyConfig():
1395
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1396

    
1397
    # Check the cluster certificates
1398
    for cert_filename in constants.ALL_CERT_FILES:
1399
      (errcode, msg) = _VerifyCertificate(cert_filename)
1400
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1401

    
1402
    vg_name = self.cfg.GetVGName()
1403
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1404
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1405
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1406
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1407
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1408
                        for iname in instancelist)
1409
    i_non_redundant = [] # Non redundant instances
1410
    i_non_a_balanced = [] # Non auto-balanced instances
1411
    n_offline = [] # List of offline nodes
1412
    n_drained = [] # List of nodes being drained
1413
    node_volume = {}
1414
    node_instance = {}
1415
    node_info = {}
1416
    instance_cfg = {}
1417

    
1418
    # FIXME: verify OS list
1419
    # do local checksums
1420
    master_files = [constants.CLUSTER_CONF_FILE]
1421

    
1422
    file_names = ssconf.SimpleStore().GetFileList()
1423
    file_names.extend(constants.ALL_CERT_FILES)
1424
    file_names.extend(master_files)
1425

    
1426
    local_checksums = utils.FingerprintFiles(file_names)
1427

    
1428
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1429
    node_verify_param = {
1430
      constants.NV_FILELIST: file_names,
1431
      constants.NV_NODELIST: [node.name for node in nodeinfo
1432
                              if not node.offline],
1433
      constants.NV_HYPERVISOR: hypervisors,
1434
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1435
                                  node.secondary_ip) for node in nodeinfo
1436
                                 if not node.offline],
1437
      constants.NV_INSTANCELIST: hypervisors,
1438
      constants.NV_VERSION: None,
1439
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1440
      constants.NV_NODESETUP: None,
1441
      constants.NV_TIME: None,
1442
      }
1443

    
1444
    if vg_name is not None:
1445
      node_verify_param[constants.NV_VGLIST] = None
1446
      node_verify_param[constants.NV_LVLIST] = vg_name
1447
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1448
      node_verify_param[constants.NV_DRBDLIST] = None
1449

    
1450
    # Due to the way our RPC system works, exact response times cannot be
1451
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1452
    # time before and after executing the request, we can at least have a time
1453
    # window.
1454
    nvinfo_starttime = time.time()
1455
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1456
                                           self.cfg.GetClusterName())
1457
    nvinfo_endtime = time.time()
1458

    
1459
    cluster = self.cfg.GetClusterInfo()
1460
    master_node = self.cfg.GetMasterNode()
1461
    all_drbd_map = self.cfg.ComputeDRBDMap()
1462

    
1463
    feedback_fn("* Verifying node status")
1464
    for node_i in nodeinfo:
1465
      node = node_i.name
1466

    
1467
      if node_i.offline:
1468
        if verbose:
1469
          feedback_fn("* Skipping offline node %s" % (node,))
1470
        n_offline.append(node)
1471
        continue
1472

    
1473
      if node == master_node:
1474
        ntype = "master"
1475
      elif node_i.master_candidate:
1476
        ntype = "master candidate"
1477
      elif node_i.drained:
1478
        ntype = "drained"
1479
        n_drained.append(node)
1480
      else:
1481
        ntype = "regular"
1482
      if verbose:
1483
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1484

    
1485
      msg = all_nvinfo[node].fail_msg
1486
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1487
      if msg:
1488
        continue
1489

    
1490
      nresult = all_nvinfo[node].payload
1491
      node_drbd = {}
1492
      for minor, instance in all_drbd_map[node].items():
1493
        test = instance not in instanceinfo
1494
        _ErrorIf(test, self.ECLUSTERCFG, None,
1495
                 "ghost instance '%s' in temporary DRBD map", instance)
1496
          # ghost instance should not be running, but otherwise we
1497
          # don't give double warnings (both ghost instance and
1498
          # unallocated minor in use)
1499
        if test:
1500
          node_drbd[minor] = (instance, False)
1501
        else:
1502
          instance = instanceinfo[instance]
1503
          node_drbd[minor] = (instance.name, instance.admin_up)
1504

    
1505
      self._VerifyNode(node_i, file_names, local_checksums,
1506
                       nresult, master_files, node_drbd, vg_name)
1507

    
1508
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1509
      if vg_name is None:
1510
        node_volume[node] = {}
1511
      elif isinstance(lvdata, basestring):
1512
        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1513
                 utils.SafeEncode(lvdata))
1514
        node_volume[node] = {}
1515
      elif not isinstance(lvdata, dict):
1516
        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1517
        continue
1518
      else:
1519
        node_volume[node] = lvdata
1520

    
1521
      # node_instance
1522
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1523
      test = not isinstance(idata, list)
1524
      _ErrorIf(test, self.ENODEHV, node,
1525
               "rpc call to node failed (instancelist): %s",
1526
               utils.SafeEncode(str(idata)))
1527
      if test:
1528
        continue
1529

    
1530
      node_instance[node] = idata
1531

    
1532
      # node_info
1533
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1534
      test = not isinstance(nodeinfo, dict)
1535
      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1536
      if test:
1537
        continue
1538

    
1539
      # Node time
1540
      ntime = nresult.get(constants.NV_TIME, None)
1541
      try:
1542
        ntime_merged = utils.MergeTime(ntime)
1543
      except (ValueError, TypeError):
1544
        _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1545

    
1546
      if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1547
        ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1548
      elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1549
        ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1550
      else:
1551
        ntime_diff = None
1552

    
1553
      _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1554
               "Node time diverges by at least %s from master node time",
1555
               ntime_diff)
1556

    
1557
      if ntime_diff is not None:
1558
        continue
1559

    
1560
      try:
1561
        node_info[node] = {
1562
          "mfree": int(nodeinfo['memory_free']),
1563
          "pinst": [],
1564
          "sinst": [],
1565
          # dictionary holding all instances this node is secondary for,
1566
          # grouped by their primary node. Each key is a cluster node, and each
1567
          # value is a list of instances which have the key as primary and the
1568
          # current node as secondary.  this is handy to calculate N+1 memory
1569
          # availability if you can only failover from a primary to its
1570
          # secondary.
1571
          "sinst-by-pnode": {},
1572
        }
1573
        # FIXME: devise a free space model for file based instances as well
1574
        if vg_name is not None:
1575
          test = (constants.NV_VGLIST not in nresult or
1576
                  vg_name not in nresult[constants.NV_VGLIST])
1577
          _ErrorIf(test, self.ENODELVM, node,
1578
                   "node didn't return data for the volume group '%s'"
1579
                   " - it is either missing or broken", vg_name)
1580
          if test:
1581
            continue
1582
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1583
      except (ValueError, KeyError):
1584
        _ErrorIf(True, self.ENODERPC, node,
1585
                 "node returned invalid nodeinfo, check lvm/hypervisor")
1586
        continue
1587

    
1588
    node_vol_should = {}
1589

    
1590
    feedback_fn("* Verifying instance status")
1591
    for instance in instancelist:
1592
      if verbose:
1593
        feedback_fn("* Verifying instance %s" % instance)
1594
      inst_config = instanceinfo[instance]
1595
      self._VerifyInstance(instance, inst_config, node_volume,
1596
                           node_instance, n_offline)
1597
      inst_nodes_offline = []
1598

    
1599
      inst_config.MapLVsByNode(node_vol_should)
1600

    
1601
      instance_cfg[instance] = inst_config
1602

    
1603
      pnode = inst_config.primary_node
1604
      _ErrorIf(pnode not in node_info and pnode not in n_offline,
1605
               self.ENODERPC, pnode, "instance %s, connection to"
1606
               " primary node failed", instance)
1607
      if pnode in node_info:
1608
        node_info[pnode]['pinst'].append(instance)
1609

    
1610
      if pnode in n_offline:
1611
        inst_nodes_offline.append(pnode)
1612

    
1613
      # If the instance is non-redundant we cannot survive losing its primary
1614
      # node, so we are not N+1 compliant. On the other hand we have no disk
1615
      # templates with more than one secondary so that situation is not well
1616
      # supported either.
1617
      # FIXME: does not support file-backed instances
1618
      if len(inst_config.secondary_nodes) == 0:
1619
        i_non_redundant.append(instance)
1620
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
1621
               self.EINSTANCELAYOUT, instance,
1622
               "instance has multiple secondary nodes", code="WARNING")
1623

    
1624
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1625
        i_non_a_balanced.append(instance)
1626

    
1627
      for snode in inst_config.secondary_nodes:
1628
        _ErrorIf(snode not in node_info and snode not in n_offline,
1629
                 self.ENODERPC, snode,
1630
                 "instance %s, connection to secondary node"
1631
                 " failed", instance)
1632

    
1633
        if snode in node_info:
1634
          node_info[snode]['sinst'].append(instance)
1635
          if pnode not in node_info[snode]['sinst-by-pnode']:
1636
            node_info[snode]['sinst-by-pnode'][pnode] = []
1637
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1638

    
1639
        if snode in n_offline:
1640
          inst_nodes_offline.append(snode)
1641

    
1642
      # warn that the instance lives on offline nodes
1643
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1644
               "instance lives on offline node(s) %s",
1645
               utils.CommaJoin(inst_nodes_offline))
1646

    
1647
    feedback_fn("* Verifying orphan volumes")
1648
    self._VerifyOrphanVolumes(node_vol_should, node_volume)
1649

    
1650
    feedback_fn("* Verifying remaining instances")
1651
    self._VerifyOrphanInstances(instancelist, node_instance)
1652

    
1653
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1654
      feedback_fn("* Verifying N+1 Memory redundancy")
1655
      self._VerifyNPlusOneMemory(node_info, instance_cfg)
1656

    
1657
    feedback_fn("* Other Notes")
1658
    if i_non_redundant:
1659
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1660
                  % len(i_non_redundant))
1661

    
1662
    if i_non_a_balanced:
1663
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1664
                  % len(i_non_a_balanced))
1665

    
1666
    if n_offline:
1667
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1668

    
1669
    if n_drained:
1670
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1671

    
1672
    return not self.bad
1673

    
1674
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1675
    """Analyze the post-hooks' result
1676

1677
    This method analyses the hook result, handles it, and sends some
1678
    nicely-formatted feedback back to the user.
1679

1680
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1681
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1682
    @param hooks_results: the results of the multi-node hooks rpc call
1683
    @param feedback_fn: function used send feedback back to the caller
1684
    @param lu_result: previous Exec result
1685
    @return: the new Exec result, based on the previous result
1686
        and hook results
1687

1688
    """
1689
    # We only really run POST phase hooks, and are only interested in
1690
    # their results
1691
    if phase == constants.HOOKS_PHASE_POST:
1692
      # Used to change hooks' output to proper indentation
1693
      indent_re = re.compile('^', re.M)
1694
      feedback_fn("* Hooks Results")
1695
      assert hooks_results, "invalid result from hooks"
1696

    
1697
      for node_name in hooks_results:
1698
        res = hooks_results[node_name]
1699
        msg = res.fail_msg
1700
        test = msg and not res.offline
1701
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1702
                      "Communication failure in hooks execution: %s", msg)
1703
        if res.offline or msg:
1704
          # No need to investigate payload if node is offline or gave an error.
1705
          # override manually lu_result here as _ErrorIf only
1706
          # overrides self.bad
1707
          lu_result = 1
1708
          continue
1709
        for script, hkr, output in res.payload:
1710
          test = hkr == constants.HKR_FAIL
1711
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1712
                        "Script %s failed, output:", script)
1713
          if test:
1714
            output = indent_re.sub('      ', output)
1715
            feedback_fn("%s" % output)
1716
            lu_result = 0
1717

    
1718
      return lu_result
1719

    
1720

    
1721
class LUVerifyDisks(NoHooksLU):
1722
  """Verifies the cluster disks status.
1723

1724
  """
1725
  _OP_REQP = []
1726
  REQ_BGL = False
1727

    
1728
  def ExpandNames(self):
1729
    self.needed_locks = {
1730
      locking.LEVEL_NODE: locking.ALL_SET,
1731
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1732
    }
1733
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1734

    
1735
  def CheckPrereq(self):
1736
    """Check prerequisites.
1737

1738
    This has no prerequisites.
1739

1740
    """
1741
    pass
1742

    
1743
  def Exec(self, feedback_fn):
1744
    """Verify integrity of cluster disks.
1745

1746
    @rtype: tuple of three items
1747
    @return: a tuple of (dict of node-to-node_error, list of instances
1748
        which need activate-disks, dict of instance: (node, volume) for
1749
        missing volumes
1750

1751
    """
1752
    result = res_nodes, res_instances, res_missing = {}, [], {}
1753

    
1754
    vg_name = self.cfg.GetVGName()
1755
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1756
    instances = [self.cfg.GetInstanceInfo(name)
1757
                 for name in self.cfg.GetInstanceList()]
1758

    
1759
    nv_dict = {}
1760
    for inst in instances:
1761
      inst_lvs = {}
1762
      if (not inst.admin_up or
1763
          inst.disk_template not in constants.DTS_NET_MIRROR):
1764
        continue
1765
      inst.MapLVsByNode(inst_lvs)
1766
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1767
      for node, vol_list in inst_lvs.iteritems():
1768
        for vol in vol_list:
1769
          nv_dict[(node, vol)] = inst
1770

    
1771
    if not nv_dict:
1772
      return result
1773

    
1774
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1775

    
1776
    for node in nodes:
1777
      # node_volume
1778
      node_res = node_lvs[node]
1779
      if node_res.offline:
1780
        continue
1781
      msg = node_res.fail_msg
1782
      if msg:
1783
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1784
        res_nodes[node] = msg
1785
        continue
1786

    
1787
      lvs = node_res.payload
1788
      for lv_name, (_, _, lv_online) in lvs.items():
1789
        inst = nv_dict.pop((node, lv_name), None)
1790
        if (not lv_online and inst is not None
1791
            and inst.name not in res_instances):
1792
          res_instances.append(inst.name)
1793

    
1794
    # any leftover items in nv_dict are missing LVs, let's arrange the
1795
    # data better
1796
    for key, inst in nv_dict.iteritems():
1797
      if inst.name not in res_missing:
1798
        res_missing[inst.name] = []
1799
      res_missing[inst.name].append(key)
1800

    
1801
    return result
1802

    
1803

    
1804
class LURepairDiskSizes(NoHooksLU):
1805
  """Verifies the cluster disks sizes.
1806

1807
  """
1808
  _OP_REQP = ["instances"]
1809
  REQ_BGL = False
1810

    
1811
  def ExpandNames(self):
1812
    if not isinstance(self.op.instances, list):
1813
      raise errors.OpPrereqError("Invalid argument type 'instances'",
1814
                                 errors.ECODE_INVAL)
1815

    
1816
    if self.op.instances:
1817
      self.wanted_names = []
1818
      for name in self.op.instances:
1819
        full_name = _ExpandInstanceName(self.cfg, name)
1820
        self.wanted_names.append(full_name)
1821
      self.needed_locks = {
1822
        locking.LEVEL_NODE: [],
1823
        locking.LEVEL_INSTANCE: self.wanted_names,
1824
        }
1825
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1826
    else:
1827
      self.wanted_names = None
1828
      self.needed_locks = {
1829
        locking.LEVEL_NODE: locking.ALL_SET,
1830
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1831
        }
1832
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1833

    
1834
  def DeclareLocks(self, level):
1835
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1836
      self._LockInstancesNodes(primary_only=True)
1837

    
1838
  def CheckPrereq(self):
1839
    """Check prerequisites.
1840

1841
    This only checks the optional instance list against the existing names.
1842

1843
    """
1844
    if self.wanted_names is None:
1845
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1846

    
1847
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1848
                             in self.wanted_names]
1849

    
1850
  def _EnsureChildSizes(self, disk):
1851
    """Ensure children of the disk have the needed disk size.
1852

1853
    This is valid mainly for DRBD8 and fixes an issue where the
1854
    children have smaller disk size.
1855

1856
    @param disk: an L{ganeti.objects.Disk} object
1857

1858
    """
1859
    if disk.dev_type == constants.LD_DRBD8:
1860
      assert disk.children, "Empty children for DRBD8?"
1861
      fchild = disk.children[0]
1862
      mismatch = fchild.size < disk.size
1863
      if mismatch:
1864
        self.LogInfo("Child disk has size %d, parent %d, fixing",
1865
                     fchild.size, disk.size)
1866
        fchild.size = disk.size
1867

    
1868
      # and we recurse on this child only, not on the metadev
1869
      return self._EnsureChildSizes(fchild) or mismatch
1870
    else:
1871
      return False
1872

    
1873
  def Exec(self, feedback_fn):
1874
    """Verify the size of cluster disks.
1875

1876
    """
1877
    # TODO: check child disks too
1878
    # TODO: check differences in size between primary/secondary nodes
1879
    per_node_disks = {}
1880
    for instance in self.wanted_instances:
1881
      pnode = instance.primary_node
1882
      if pnode not in per_node_disks:
1883
        per_node_disks[pnode] = []
1884
      for idx, disk in enumerate(instance.disks):
1885
        per_node_disks[pnode].append((instance, idx, disk))
1886

    
1887
    changed = []
1888
    for node, dskl in per_node_disks.items():
1889
      newl = [v[2].Copy() for v in dskl]
1890
      for dsk in newl:
1891
        self.cfg.SetDiskID(dsk, node)
1892
      result = self.rpc.call_blockdev_getsizes(node, newl)
1893
      if result.fail_msg:
1894
        self.LogWarning("Failure in blockdev_getsizes call to node"
1895
                        " %s, ignoring", node)
1896
        continue
1897
      if len(result.data) != len(dskl):
1898
        self.LogWarning("Invalid result from node %s, ignoring node results",
1899
                        node)
1900
        continue
1901
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1902
        if size is None:
1903
          self.LogWarning("Disk %d of instance %s did not return size"
1904
                          " information, ignoring", idx, instance.name)
1905
          continue
1906
        if not isinstance(size, (int, long)):
1907
          self.LogWarning("Disk %d of instance %s did not return valid"
1908
                          " size information, ignoring", idx, instance.name)
1909
          continue
1910
        size = size >> 20
1911
        if size != disk.size:
1912
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1913
                       " correcting: recorded %d, actual %d", idx,
1914
                       instance.name, disk.size, size)
1915
          disk.size = size
1916
          self.cfg.Update(instance, feedback_fn)
1917
          changed.append((instance.name, idx, size))
1918
        if self._EnsureChildSizes(disk):
1919
          self.cfg.Update(instance, feedback_fn)
1920
          changed.append((instance.name, idx, disk.size))
1921
    return changed
1922

    
1923

    
1924
class LURenameCluster(LogicalUnit):
1925
  """Rename the cluster.
1926

1927
  """
1928
  HPATH = "cluster-rename"
1929
  HTYPE = constants.HTYPE_CLUSTER
1930
  _OP_REQP = ["name"]
1931

    
1932
  def BuildHooksEnv(self):
1933
    """Build hooks env.
1934

1935
    """
1936
    env = {
1937
      "OP_TARGET": self.cfg.GetClusterName(),
1938
      "NEW_NAME": self.op.name,
1939
      }
1940
    mn = self.cfg.GetMasterNode()
1941
    all_nodes = self.cfg.GetNodeList()
1942
    return env, [mn], all_nodes
1943

    
1944
  def CheckPrereq(self):
1945
    """Verify that the passed name is a valid one.
1946

1947
    """
1948
    hostname = utils.GetHostInfo(self.op.name)
1949

    
1950
    new_name = hostname.name
1951
    self.ip = new_ip = hostname.ip
1952
    old_name = self.cfg.GetClusterName()
1953
    old_ip = self.cfg.GetMasterIP()
1954
    if new_name == old_name and new_ip == old_ip:
1955
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1956
                                 " cluster has changed",
1957
                                 errors.ECODE_INVAL)
1958
    if new_ip != old_ip:
1959
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1960
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1961
                                   " reachable on the network. Aborting." %
1962
                                   new_ip, errors.ECODE_NOTUNIQUE)
1963

    
1964
    self.op.name = new_name
1965

    
1966
  def Exec(self, feedback_fn):
1967
    """Rename the cluster.
1968

1969
    """
1970
    clustername = self.op.name
1971
    ip = self.ip
1972

    
1973
    # shutdown the master IP
1974
    master = self.cfg.GetMasterNode()
1975
    result = self.rpc.call_node_stop_master(master, False)
1976
    result.Raise("Could not disable the master role")
1977

    
1978
    try:
1979
      cluster = self.cfg.GetClusterInfo()
1980
      cluster.cluster_name = clustername
1981
      cluster.master_ip = ip
1982
      self.cfg.Update(cluster, feedback_fn)
1983

    
1984
      # update the known hosts file
1985
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1986
      node_list = self.cfg.GetNodeList()
1987
      try:
1988
        node_list.remove(master)
1989
      except ValueError:
1990
        pass
1991
      result = self.rpc.call_upload_file(node_list,
1992
                                         constants.SSH_KNOWN_HOSTS_FILE)
1993
      for to_node, to_result in result.iteritems():
1994
        msg = to_result.fail_msg
1995
        if msg:
1996
          msg = ("Copy of file %s to node %s failed: %s" %
1997
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1998
          self.proc.LogWarning(msg)
1999

    
2000
    finally:
2001
      result = self.rpc.call_node_start_master(master, False, False)
2002
      msg = result.fail_msg
2003
      if msg:
2004
        self.LogWarning("Could not re-enable the master role on"
2005
                        " the master, please restart manually: %s", msg)
2006

    
2007

    
2008
def _RecursiveCheckIfLVMBased(disk):
2009
  """Check if the given disk or its children are lvm-based.
2010

2011
  @type disk: L{objects.Disk}
2012
  @param disk: the disk to check
2013
  @rtype: boolean
2014
  @return: boolean indicating whether a LD_LV dev_type was found or not
2015

2016
  """
2017
  if disk.children:
2018
    for chdisk in disk.children:
2019
      if _RecursiveCheckIfLVMBased(chdisk):
2020
        return True
2021
  return disk.dev_type == constants.LD_LV
2022

    
2023

    
2024
class LUSetClusterParams(LogicalUnit):
2025
  """Change the parameters of the cluster.
2026

2027
  """
2028
  HPATH = "cluster-modify"
2029
  HTYPE = constants.HTYPE_CLUSTER
2030
  _OP_REQP = []
2031
  REQ_BGL = False
2032

    
2033
  def CheckArguments(self):
2034
    """Check parameters
2035

2036
    """
2037
    if not hasattr(self.op, "candidate_pool_size"):
2038
      self.op.candidate_pool_size = None
2039
    if self.op.candidate_pool_size is not None:
2040
      try:
2041
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2042
      except (ValueError, TypeError), err:
2043
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2044
                                   str(err), errors.ECODE_INVAL)
2045
      if self.op.candidate_pool_size < 1:
2046
        raise errors.OpPrereqError("At least one master candidate needed",
2047
                                   errors.ECODE_INVAL)
2048

    
2049
  def ExpandNames(self):
2050
    # FIXME: in the future maybe other cluster params won't require checking on
2051
    # all nodes to be modified.
2052
    self.needed_locks = {
2053
      locking.LEVEL_NODE: locking.ALL_SET,
2054
    }
2055
    self.share_locks[locking.LEVEL_NODE] = 1
2056

    
2057
  def BuildHooksEnv(self):
2058
    """Build hooks env.
2059

2060
    """
2061
    env = {
2062
      "OP_TARGET": self.cfg.GetClusterName(),
2063
      "NEW_VG_NAME": self.op.vg_name,
2064
      }
2065
    mn = self.cfg.GetMasterNode()
2066
    return env, [mn], [mn]
2067

    
2068
  def CheckPrereq(self):
2069
    """Check prerequisites.
2070

2071
    This checks whether the given params don't conflict and
2072
    if the given volume group is valid.
2073

2074
    """
2075
    if self.op.vg_name is not None and not self.op.vg_name:
2076
      instances = self.cfg.GetAllInstancesInfo().values()
2077
      for inst in instances:
2078
        for disk in inst.disks:
2079
          if _RecursiveCheckIfLVMBased(disk):
2080
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2081
                                       " lvm-based instances exist",
2082
                                       errors.ECODE_INVAL)
2083

    
2084
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2085

    
2086
    # if vg_name not None, checks given volume group on all nodes
2087
    if self.op.vg_name:
2088
      vglist = self.rpc.call_vg_list(node_list)
2089
      for node in node_list:
2090
        msg = vglist[node].fail_msg
2091
        if msg:
2092
          # ignoring down node
2093
          self.LogWarning("Error while gathering data on node %s"
2094
                          " (ignoring node): %s", node, msg)
2095
          continue
2096
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2097
                                              self.op.vg_name,
2098
                                              constants.MIN_VG_SIZE)
2099
        if vgstatus:
2100
          raise errors.OpPrereqError("Error on node '%s': %s" %
2101
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2102

    
2103
    self.cluster = cluster = self.cfg.GetClusterInfo()
2104
    # validate params changes
2105
    if self.op.beparams:
2106
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2107
      self.new_beparams = objects.FillDict(
2108
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2109

    
2110
    if self.op.nicparams:
2111
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2112
      self.new_nicparams = objects.FillDict(
2113
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2114
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2115
      nic_errors = []
2116

    
2117
      # check all instances for consistency
2118
      for instance in self.cfg.GetAllInstancesInfo().values():
2119
        for nic_idx, nic in enumerate(instance.nics):
2120
          params_copy = copy.deepcopy(nic.nicparams)
2121
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2122

    
2123
          # check parameter syntax
2124
          try:
2125
            objects.NIC.CheckParameterSyntax(params_filled)
2126
          except errors.ConfigurationError, err:
2127
            nic_errors.append("Instance %s, nic/%d: %s" %
2128
                              (instance.name, nic_idx, err))
2129

    
2130
          # if we're moving instances to routed, check that they have an ip
2131
          target_mode = params_filled[constants.NIC_MODE]
2132
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2133
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2134
                              (instance.name, nic_idx))
2135
      if nic_errors:
2136
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2137
                                   "\n".join(nic_errors))
2138

    
2139
    # hypervisor list/parameters
2140
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2141
    if self.op.hvparams:
2142
      if not isinstance(self.op.hvparams, dict):
2143
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2144
                                   errors.ECODE_INVAL)
2145
      for hv_name, hv_dict in self.op.hvparams.items():
2146
        if hv_name not in self.new_hvparams:
2147
          self.new_hvparams[hv_name] = hv_dict
2148
        else:
2149
          self.new_hvparams[hv_name].update(hv_dict)
2150

    
2151
    # os hypervisor parameters
2152
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2153
    if self.op.os_hvp:
2154
      if not isinstance(self.op.os_hvp, dict):
2155
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2156
                                   errors.ECODE_INVAL)
2157
      for os_name, hvs in self.op.os_hvp.items():
2158
        if not isinstance(hvs, dict):
2159
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2160
                                      " input"), errors.ECODE_INVAL)
2161
        if os_name not in self.new_os_hvp:
2162
          self.new_os_hvp[os_name] = hvs
2163
        else:
2164
          for hv_name, hv_dict in hvs.items():
2165
            if hv_name not in self.new_os_hvp[os_name]:
2166
              self.new_os_hvp[os_name][hv_name] = hv_dict
2167
            else:
2168
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2169

    
2170
    if self.op.enabled_hypervisors is not None:
2171
      self.hv_list = self.op.enabled_hypervisors
2172
      if not self.hv_list:
2173
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2174
                                   " least one member",
2175
                                   errors.ECODE_INVAL)
2176
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2177
      if invalid_hvs:
2178
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2179
                                   " entries: %s" %
2180
                                   utils.CommaJoin(invalid_hvs),
2181
                                   errors.ECODE_INVAL)
2182
    else:
2183
      self.hv_list = cluster.enabled_hypervisors
2184

    
2185
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2186
      # either the enabled list has changed, or the parameters have, validate
2187
      for hv_name, hv_params in self.new_hvparams.items():
2188
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2189
            (self.op.enabled_hypervisors and
2190
             hv_name in self.op.enabled_hypervisors)):
2191
          # either this is a new hypervisor, or its parameters have changed
2192
          hv_class = hypervisor.GetHypervisor(hv_name)
2193
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2194
          hv_class.CheckParameterSyntax(hv_params)
2195
          _CheckHVParams(self, node_list, hv_name, hv_params)
2196

    
2197
    if self.op.os_hvp:
2198
      # no need to check any newly-enabled hypervisors, since the
2199
      # defaults have already been checked in the above code-block
2200
      for os_name, os_hvp in self.new_os_hvp.items():
2201
        for hv_name, hv_params in os_hvp.items():
2202
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2203
          # we need to fill in the new os_hvp on top of the actual hv_p
2204
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2205
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2206
          hv_class = hypervisor.GetHypervisor(hv_name)
2207
          hv_class.CheckParameterSyntax(new_osp)
2208
          _CheckHVParams(self, node_list, hv_name, new_osp)
2209

    
2210

    
2211
  def Exec(self, feedback_fn):
2212
    """Change the parameters of the cluster.
2213

2214
    """
2215
    if self.op.vg_name is not None:
2216
      new_volume = self.op.vg_name
2217
      if not new_volume:
2218
        new_volume = None
2219
      if new_volume != self.cfg.GetVGName():
2220
        self.cfg.SetVGName(new_volume)
2221
      else:
2222
        feedback_fn("Cluster LVM configuration already in desired"
2223
                    " state, not changing")
2224
    if self.op.hvparams:
2225
      self.cluster.hvparams = self.new_hvparams
2226
    if self.op.os_hvp:
2227
      self.cluster.os_hvp = self.new_os_hvp
2228
    if self.op.enabled_hypervisors is not None:
2229
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2230
    if self.op.beparams:
2231
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2232
    if self.op.nicparams:
2233
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2234

    
2235
    if self.op.candidate_pool_size is not None:
2236
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2237
      # we need to update the pool size here, otherwise the save will fail
2238
      _AdjustCandidatePool(self, [])
2239

    
2240
    self.cfg.Update(self.cluster, feedback_fn)
2241

    
2242

    
2243
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2244
  """Distribute additional files which are part of the cluster configuration.
2245

2246
  ConfigWriter takes care of distributing the config and ssconf files, but
2247
  there are more files which should be distributed to all nodes. This function
2248
  makes sure those are copied.
2249

2250
  @param lu: calling logical unit
2251
  @param additional_nodes: list of nodes not in the config to distribute to
2252

2253
  """
2254
  # 1. Gather target nodes
2255
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2256
  dist_nodes = lu.cfg.GetOnlineNodeList()
2257
  if additional_nodes is not None:
2258
    dist_nodes.extend(additional_nodes)
2259
  if myself.name in dist_nodes:
2260
    dist_nodes.remove(myself.name)
2261

    
2262
  # 2. Gather files to distribute
2263
  dist_files = set([constants.ETC_HOSTS,
2264
                    constants.SSH_KNOWN_HOSTS_FILE,
2265
                    constants.RAPI_CERT_FILE,
2266
                    constants.RAPI_USERS_FILE,
2267
                    constants.HMAC_CLUSTER_KEY,
2268
                   ])
2269

    
2270
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2271
  for hv_name in enabled_hypervisors:
2272
    hv_class = hypervisor.GetHypervisor(hv_name)
2273
    dist_files.update(hv_class.GetAncillaryFiles())
2274

    
2275
  # 3. Perform the files upload
2276
  for fname in dist_files:
2277
    if os.path.exists(fname):
2278
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2279
      for to_node, to_result in result.items():
2280
        msg = to_result.fail_msg
2281
        if msg:
2282
          msg = ("Copy of file %s to node %s failed: %s" %
2283
                 (fname, to_node, msg))
2284
          lu.proc.LogWarning(msg)
2285

    
2286

    
2287
class LURedistributeConfig(NoHooksLU):
2288
  """Force the redistribution of cluster configuration.
2289

2290
  This is a very simple LU.
2291

2292
  """
2293
  _OP_REQP = []
2294
  REQ_BGL = False
2295

    
2296
  def ExpandNames(self):
2297
    self.needed_locks = {
2298
      locking.LEVEL_NODE: locking.ALL_SET,
2299
    }
2300
    self.share_locks[locking.LEVEL_NODE] = 1
2301

    
2302
  def CheckPrereq(self):
2303
    """Check prerequisites.
2304

2305
    """
2306

    
2307
  def Exec(self, feedback_fn):
2308
    """Redistribute the configuration.
2309

2310
    """
2311
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2312
    _RedistributeAncillaryFiles(self)
2313

    
2314

    
2315
def _WaitForSync(lu, instance, oneshot=False):
2316
  """Sleep and poll for an instance's disk to sync.
2317

2318
  """
2319
  if not instance.disks:
2320
    return True
2321

    
2322
  if not oneshot:
2323
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2324

    
2325
  node = instance.primary_node
2326

    
2327
  for dev in instance.disks:
2328
    lu.cfg.SetDiskID(dev, node)
2329

    
2330
  # TODO: Convert to utils.Retry
2331

    
2332
  retries = 0
2333
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2334
  while True:
2335
    max_time = 0
2336
    done = True
2337
    cumul_degraded = False
2338
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2339
    msg = rstats.fail_msg
2340
    if msg:
2341
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2342
      retries += 1
2343
      if retries >= 10:
2344
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2345
                                 " aborting." % node)
2346
      time.sleep(6)
2347
      continue
2348
    rstats = rstats.payload
2349
    retries = 0
2350
    for i, mstat in enumerate(rstats):
2351
      if mstat is None:
2352
        lu.LogWarning("Can't compute data for node %s/%s",
2353
                           node, instance.disks[i].iv_name)
2354
        continue
2355

    
2356
      cumul_degraded = (cumul_degraded or
2357
                        (mstat.is_degraded and mstat.sync_percent is None))
2358
      if mstat.sync_percent is not None:
2359
        done = False
2360
        if mstat.estimated_time is not None:
2361
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2362
          max_time = mstat.estimated_time
2363
        else:
2364
          rem_time = "no time estimate"
2365
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2366
                        (instance.disks[i].iv_name, mstat.sync_percent,
2367
                         rem_time))
2368

    
2369
    # if we're done but degraded, let's do a few small retries, to
2370
    # make sure we see a stable and not transient situation; therefore
2371
    # we force restart of the loop
2372
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2373
      logging.info("Degraded disks found, %d retries left", degr_retries)
2374
      degr_retries -= 1
2375
      time.sleep(1)
2376
      continue
2377

    
2378
    if done or oneshot:
2379
      break
2380

    
2381
    time.sleep(min(60, max_time))
2382

    
2383
  if done:
2384
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2385
  return not cumul_degraded
2386

    
2387

    
2388
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2389
  """Check that mirrors are not degraded.
2390

2391
  The ldisk parameter, if True, will change the test from the
2392
  is_degraded attribute (which represents overall non-ok status for
2393
  the device(s)) to the ldisk (representing the local storage status).
2394

2395
  """
2396
  lu.cfg.SetDiskID(dev, node)
2397

    
2398
  result = True
2399

    
2400
  if on_primary or dev.AssembleOnSecondary():
2401
    rstats = lu.rpc.call_blockdev_find(node, dev)
2402
    msg = rstats.fail_msg
2403
    if msg:
2404
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2405
      result = False
2406
    elif not rstats.payload:
2407
      lu.LogWarning("Can't find disk on node %s", node)
2408
      result = False
2409
    else:
2410
      if ldisk:
2411
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2412
      else:
2413
        result = result and not rstats.payload.is_degraded
2414

    
2415
  if dev.children:
2416
    for child in dev.children:
2417
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2418

    
2419
  return result
2420

    
2421

    
2422
class LUDiagnoseOS(NoHooksLU):
2423
  """Logical unit for OS diagnose/query.
2424

2425
  """
2426
  _OP_REQP = ["output_fields", "names"]
2427
  REQ_BGL = False
2428
  _FIELDS_STATIC = utils.FieldSet()
2429
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2430
  # Fields that need calculation of global os validity
2431
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2432

    
2433
  def ExpandNames(self):
2434
    if self.op.names:
2435
      raise errors.OpPrereqError("Selective OS query not supported",
2436
                                 errors.ECODE_INVAL)
2437

    
2438
    _CheckOutputFields(static=self._FIELDS_STATIC,
2439
                       dynamic=self._FIELDS_DYNAMIC,
2440
                       selected=self.op.output_fields)
2441

    
2442
    # Lock all nodes, in shared mode
2443
    # Temporary removal of locks, should be reverted later
2444
    # TODO: reintroduce locks when they are lighter-weight
2445
    self.needed_locks = {}
2446
    #self.share_locks[locking.LEVEL_NODE] = 1
2447
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2448

    
2449
  def CheckPrereq(self):
2450
    """Check prerequisites.
2451

2452
    """
2453

    
2454
  @staticmethod
2455
  def _DiagnoseByOS(rlist):
2456
    """Remaps a per-node return list into an a per-os per-node dictionary
2457

2458
    @param rlist: a map with node names as keys and OS objects as values
2459

2460
    @rtype: dict
2461
    @return: a dictionary with osnames as keys and as value another map, with
2462
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2463

2464
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2465
                                     (/srv/..., False, "invalid api")],
2466
                           "node2": [(/srv/..., True, "")]}
2467
          }
2468

2469
    """
2470
    all_os = {}
2471
    # we build here the list of nodes that didn't fail the RPC (at RPC
2472
    # level), so that nodes with a non-responding node daemon don't
2473
    # make all OSes invalid
2474
    good_nodes = [node_name for node_name in rlist
2475
                  if not rlist[node_name].fail_msg]
2476
    for node_name, nr in rlist.items():
2477
      if nr.fail_msg or not nr.payload:
2478
        continue
2479
      for name, path, status, diagnose, variants in nr.payload:
2480
        if name not in all_os:
2481
          # build a list of nodes for this os containing empty lists
2482
          # for each node in node_list
2483
          all_os[name] = {}
2484
          for nname in good_nodes:
2485
            all_os[name][nname] = []
2486
        all_os[name][node_name].append((path, status, diagnose, variants))
2487
    return all_os
2488

    
2489
  def Exec(self, feedback_fn):
2490
    """Compute the list of OSes.
2491

2492
    """
2493
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2494
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2495
    pol = self._DiagnoseByOS(node_data)
2496
    output = []
2497
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2498
    calc_variants = "variants" in self.op.output_fields
2499

    
2500
    for os_name, os_data in pol.items():
2501
      row = []
2502
      if calc_valid:
2503
        valid = True
2504
        variants = None
2505
        for osl in os_data.values():
2506
          valid = valid and osl and osl[0][1]
2507
          if not valid:
2508
            variants = None
2509
            break
2510
          if calc_variants:
2511
            node_variants = osl[0][3]
2512
            if variants is None:
2513
              variants = node_variants
2514
            else:
2515
              variants = [v for v in variants if v in node_variants]
2516

    
2517
      for field in self.op.output_fields:
2518
        if field == "name":
2519
          val = os_name
2520
        elif field == "valid":
2521
          val = valid
2522
        elif field == "node_status":
2523
          # this is just a copy of the dict
2524
          val = {}
2525
          for node_name, nos_list in os_data.items():
2526
            val[node_name] = nos_list
2527
        elif field == "variants":
2528
          val =  variants
2529
        else:
2530
          raise errors.ParameterError(field)
2531
        row.append(val)
2532
      output.append(row)
2533

    
2534
    return output
2535

    
2536

    
2537
class LURemoveNode(LogicalUnit):
2538
  """Logical unit for removing a node.
2539

2540
  """
2541
  HPATH = "node-remove"
2542
  HTYPE = constants.HTYPE_NODE
2543
  _OP_REQP = ["node_name"]
2544

    
2545
  def BuildHooksEnv(self):
2546
    """Build hooks env.
2547

2548
    This doesn't run on the target node in the pre phase as a failed
2549
    node would then be impossible to remove.
2550

2551
    """
2552
    env = {
2553
      "OP_TARGET": self.op.node_name,
2554
      "NODE_NAME": self.op.node_name,
2555
      }
2556
    all_nodes = self.cfg.GetNodeList()
2557
    try:
2558
      all_nodes.remove(self.op.node_name)
2559
    except ValueError:
2560
      logging.warning("Node %s which is about to be removed not found"
2561
                      " in the all nodes list", self.op.node_name)
2562
    return env, all_nodes, all_nodes
2563

    
2564
  def CheckPrereq(self):
2565
    """Check prerequisites.
2566

2567
    This checks:
2568
     - the node exists in the configuration
2569
     - it does not have primary or secondary instances
2570
     - it's not the master
2571

2572
    Any errors are signaled by raising errors.OpPrereqError.
2573

2574
    """
2575
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2576
    node = self.cfg.GetNodeInfo(self.op.node_name)
2577
    assert node is not None
2578

    
2579
    instance_list = self.cfg.GetInstanceList()
2580

    
2581
    masternode = self.cfg.GetMasterNode()
2582
    if node.name == masternode:
2583
      raise errors.OpPrereqError("Node is the master node,"
2584
                                 " you need to failover first.",
2585
                                 errors.ECODE_INVAL)
2586

    
2587
    for instance_name in instance_list:
2588
      instance = self.cfg.GetInstanceInfo(instance_name)
2589
      if node.name in instance.all_nodes:
2590
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2591
                                   " please remove first." % instance_name,
2592
                                   errors.ECODE_INVAL)
2593
    self.op.node_name = node.name
2594
    self.node = node
2595

    
2596
  def Exec(self, feedback_fn):
2597
    """Removes the node from the cluster.
2598

2599
    """
2600
    node = self.node
2601
    logging.info("Stopping the node daemon and removing configs from node %s",
2602
                 node.name)
2603

    
2604
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2605

    
2606
    # Promote nodes to master candidate as needed
2607
    _AdjustCandidatePool(self, exceptions=[node.name])
2608
    self.context.RemoveNode(node.name)
2609

    
2610
    # Run post hooks on the node before it's removed
2611
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2612
    try:
2613
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2614
    except:
2615
      # pylint: disable-msg=W0702
2616
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2617

    
2618
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2619
    msg = result.fail_msg
2620
    if msg:
2621
      self.LogWarning("Errors encountered on the remote node while leaving"
2622
                      " the cluster: %s", msg)
2623

    
2624

    
2625
class LUQueryNodes(NoHooksLU):
2626
  """Logical unit for querying nodes.
2627

2628
  """
2629
  # pylint: disable-msg=W0142
2630
  _OP_REQP = ["output_fields", "names", "use_locking"]
2631
  REQ_BGL = False
2632

    
2633
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2634
                    "master_candidate", "offline", "drained"]
2635

    
2636
  _FIELDS_DYNAMIC = utils.FieldSet(
2637
    "dtotal", "dfree",
2638
    "mtotal", "mnode", "mfree",
2639
    "bootid",
2640
    "ctotal", "cnodes", "csockets",
2641
    )
2642

    
2643
  _FIELDS_STATIC = utils.FieldSet(*[
2644
    "pinst_cnt", "sinst_cnt",
2645
    "pinst_list", "sinst_list",
2646
    "pip", "sip", "tags",
2647
    "master",
2648
    "role"] + _SIMPLE_FIELDS
2649
    )
2650

    
2651
  def ExpandNames(self):
2652
    _CheckOutputFields(static=self._FIELDS_STATIC,
2653
                       dynamic=self._FIELDS_DYNAMIC,
2654
                       selected=self.op.output_fields)
2655

    
2656
    self.needed_locks = {}
2657
    self.share_locks[locking.LEVEL_NODE] = 1
2658

    
2659
    if self.op.names:
2660
      self.wanted = _GetWantedNodes(self, self.op.names)
2661
    else:
2662
      self.wanted = locking.ALL_SET
2663

    
2664
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2665
    self.do_locking = self.do_node_query and self.op.use_locking
2666
    if self.do_locking:
2667
      # if we don't request only static fields, we need to lock the nodes
2668
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2669

    
2670
  def CheckPrereq(self):
2671
    """Check prerequisites.
2672

2673
    """
2674
    # The validation of the node list is done in the _GetWantedNodes,
2675
    # if non empty, and if empty, there's no validation to do
2676
    pass
2677

    
2678
  def Exec(self, feedback_fn):
2679
    """Computes the list of nodes and their attributes.
2680

2681
    """
2682
    all_info = self.cfg.GetAllNodesInfo()
2683
    if self.do_locking:
2684
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2685
    elif self.wanted != locking.ALL_SET:
2686
      nodenames = self.wanted
2687
      missing = set(nodenames).difference(all_info.keys())
2688
      if missing:
2689
        raise errors.OpExecError(
2690
          "Some nodes were removed before retrieving their data: %s" % missing)
2691
    else:
2692
      nodenames = all_info.keys()
2693

    
2694
    nodenames = utils.NiceSort(nodenames)
2695
    nodelist = [all_info[name] for name in nodenames]
2696

    
2697
    # begin data gathering
2698

    
2699
    if self.do_node_query:
2700
      live_data = {}
2701
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2702
                                          self.cfg.GetHypervisorType())
2703
      for name in nodenames:
2704
        nodeinfo = node_data[name]
2705
        if not nodeinfo.fail_msg and nodeinfo.payload:
2706
          nodeinfo = nodeinfo.payload
2707
          fn = utils.TryConvert
2708
          live_data[name] = {
2709
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2710
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2711
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2712
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2713
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2714
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2715
            "bootid": nodeinfo.get('bootid', None),
2716
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2717
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2718
            }
2719
        else:
2720
          live_data[name] = {}
2721
    else:
2722
      live_data = dict.fromkeys(nodenames, {})
2723

    
2724
    node_to_primary = dict([(name, set()) for name in nodenames])
2725
    node_to_secondary = dict([(name, set()) for name in nodenames])
2726

    
2727
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2728
                             "sinst_cnt", "sinst_list"))
2729
    if inst_fields & frozenset(self.op.output_fields):
2730
      inst_data = self.cfg.GetAllInstancesInfo()
2731

    
2732
      for inst in inst_data.values():
2733
        if inst.primary_node in node_to_primary:
2734
          node_to_primary[inst.primary_node].add(inst.name)
2735
        for secnode in inst.secondary_nodes:
2736
          if secnode in node_to_secondary:
2737
            node_to_secondary[secnode].add(inst.name)
2738

    
2739
    master_node = self.cfg.GetMasterNode()
2740

    
2741
    # end data gathering
2742

    
2743
    output = []
2744
    for node in nodelist:
2745
      node_output = []
2746
      for field in self.op.output_fields:
2747
        if field in self._SIMPLE_FIELDS:
2748
          val = getattr(node, field)
2749
        elif field == "pinst_list":
2750
          val = list(node_to_primary[node.name])
2751
        elif field == "sinst_list":
2752
          val = list(node_to_secondary[node.name])
2753
        elif field == "pinst_cnt":
2754
          val = len(node_to_primary[node.name])
2755
        elif field == "sinst_cnt":
2756
          val = len(node_to_secondary[node.name])
2757
        elif field == "pip":
2758
          val = node.primary_ip
2759
        elif field == "sip":
2760
          val = node.secondary_ip
2761
        elif field == "tags":
2762
          val = list(node.GetTags())
2763
        elif field == "master":
2764
          val = node.name == master_node
2765
        elif self._FIELDS_DYNAMIC.Matches(field):
2766
          val = live_data[node.name].get(field, None)
2767
        elif field == "role":
2768
          if node.name == master_node:
2769
            val = "M"
2770
          elif node.master_candidate:
2771
            val = "C"
2772
          elif node.drained:
2773
            val = "D"
2774
          elif node.offline:
2775
            val = "O"
2776
          else:
2777
            val = "R"
2778
        else:
2779
          raise errors.ParameterError(field)
2780
        node_output.append(val)
2781
      output.append(node_output)
2782

    
2783
    return output
2784

    
2785

    
2786
class LUQueryNodeVolumes(NoHooksLU):
2787
  """Logical unit for getting volumes on node(s).
2788

2789
  """
2790
  _OP_REQP = ["nodes", "output_fields"]
2791
  REQ_BGL = False
2792
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2793
  _FIELDS_STATIC = utils.FieldSet("node")
2794

    
2795
  def ExpandNames(self):
2796
    _CheckOutputFields(static=self._FIELDS_STATIC,
2797
                       dynamic=self._FIELDS_DYNAMIC,
2798
                       selected=self.op.output_fields)
2799

    
2800
    self.needed_locks = {}
2801
    self.share_locks[locking.LEVEL_NODE] = 1
2802
    if not self.op.nodes:
2803
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2804
    else:
2805
      self.needed_locks[locking.LEVEL_NODE] = \
2806
        _GetWantedNodes(self, self.op.nodes)
2807

    
2808
  def CheckPrereq(self):
2809
    """Check prerequisites.
2810

2811
    This checks that the fields required are valid output fields.
2812

2813
    """
2814
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2815

    
2816
  def Exec(self, feedback_fn):
2817
    """Computes the list of nodes and their attributes.
2818

2819
    """
2820
    nodenames = self.nodes
2821
    volumes = self.rpc.call_node_volumes(nodenames)
2822

    
2823
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2824
             in self.cfg.GetInstanceList()]
2825

    
2826
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2827

    
2828
    output = []
2829
    for node in nodenames:
2830
      nresult = volumes[node]
2831
      if nresult.offline:
2832
        continue
2833
      msg = nresult.fail_msg
2834
      if msg:
2835
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2836
        continue
2837

    
2838
      node_vols = nresult.payload[:]
2839
      node_vols.sort(key=lambda vol: vol['dev'])
2840

    
2841
      for vol in node_vols:
2842
        node_output = []
2843
        for field in self.op.output_fields:
2844
          if field == "node":
2845
            val = node
2846
          elif field == "phys":
2847
            val = vol['dev']
2848
          elif field == "vg":
2849
            val = vol['vg']
2850
          elif field == "name":
2851
            val = vol['name']
2852
          elif field == "size":
2853
            val = int(float(vol['size']))
2854
          elif field == "instance":
2855
            for inst in ilist:
2856
              if node not in lv_by_node[inst]:
2857
                continue
2858
              if vol['name'] in lv_by_node[inst][node]:
2859
                val = inst.name
2860
                break
2861
            else:
2862
              val = '-'
2863
          else:
2864
            raise errors.ParameterError(field)
2865
          node_output.append(str(val))
2866

    
2867
        output.append(node_output)
2868

    
2869
    return output
2870

    
2871

    
2872
class LUQueryNodeStorage(NoHooksLU):
2873
  """Logical unit for getting information on storage units on node(s).
2874

2875
  """
2876
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2877
  REQ_BGL = False
2878
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2879

    
2880
  def ExpandNames(self):
2881
    storage_type = self.op.storage_type
2882

    
2883
    if storage_type not in constants.VALID_STORAGE_TYPES:
2884
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2885
                                 errors.ECODE_INVAL)
2886

    
2887
    _CheckOutputFields(static=self._FIELDS_STATIC,
2888
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2889
                       selected=self.op.output_fields)
2890

    
2891
    self.needed_locks = {}
2892
    self.share_locks[locking.LEVEL_NODE] = 1
2893

    
2894
    if self.op.nodes:
2895
      self.needed_locks[locking.LEVEL_NODE] = \
2896
        _GetWantedNodes(self, self.op.nodes)
2897
    else:
2898
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2899

    
2900
  def CheckPrereq(self):
2901
    """Check prerequisites.
2902

2903
    This checks that the fields required are valid output fields.
2904

2905
    """
2906
    self.op.name = getattr(self.op, "name", None)
2907

    
2908
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2909

    
2910
  def Exec(self, feedback_fn):
2911
    """Computes the list of nodes and their attributes.
2912

2913
    """
2914
    # Always get name to sort by
2915
    if constants.SF_NAME in self.op.output_fields:
2916
      fields = self.op.output_fields[:]
2917
    else:
2918
      fields = [constants.SF_NAME] + self.op.output_fields
2919

    
2920
    # Never ask for node or type as it's only known to the LU
2921
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
2922
      while extra in fields:
2923
        fields.remove(extra)
2924

    
2925
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2926
    name_idx = field_idx[constants.SF_NAME]
2927

    
2928
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2929
    data = self.rpc.call_storage_list(self.nodes,
2930
                                      self.op.storage_type, st_args,
2931
                                      self.op.name, fields)
2932

    
2933
    result = []
2934

    
2935
    for node in utils.NiceSort(self.nodes):
2936
      nresult = data[node]
2937
      if nresult.offline:
2938
        continue
2939

    
2940
      msg = nresult.fail_msg
2941
      if msg:
2942
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2943
        continue
2944

    
2945
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2946

    
2947
      for name in utils.NiceSort(rows.keys()):
2948
        row = rows[name]
2949

    
2950
        out = []
2951

    
2952
        for field in self.op.output_fields:
2953
          if field == constants.SF_NODE:
2954
            val = node
2955
          elif field == constants.SF_TYPE:
2956
            val = self.op.storage_type
2957
          elif field in field_idx:
2958
            val = row[field_idx[field]]
2959
          else:
2960
            raise errors.ParameterError(field)
2961

    
2962
          out.append(val)
2963

    
2964
        result.append(out)
2965

    
2966
    return result
2967

    
2968

    
2969
class LUModifyNodeStorage(NoHooksLU):
2970
  """Logical unit for modifying a storage volume on a node.
2971

2972
  """
2973
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2974
  REQ_BGL = False
2975

    
2976
  def CheckArguments(self):
2977
    self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
2978

    
2979
    storage_type = self.op.storage_type
2980
    if storage_type not in constants.VALID_STORAGE_TYPES:
2981
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2982
                                 errors.ECODE_INVAL)
2983

    
2984
  def ExpandNames(self):
2985
    self.needed_locks = {
2986
      locking.LEVEL_NODE: self.op.node_name,
2987
      }
2988

    
2989
  def CheckPrereq(self):
2990
    """Check prerequisites.
2991

2992
    """
2993
    storage_type = self.op.storage_type
2994

    
2995
    try:
2996
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2997
    except KeyError:
2998
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2999
                                 " modified" % storage_type,
3000
                                 errors.ECODE_INVAL)
3001

    
3002
    diff = set(self.op.changes.keys()) - modifiable
3003
    if diff:
3004
      raise errors.OpPrereqError("The following fields can not be modified for"
3005
                                 " storage units of type '%s': %r" %
3006
                                 (storage_type, list(diff)),
3007
                                 errors.ECODE_INVAL)
3008

    
3009
  def Exec(self, feedback_fn):
3010
    """Computes the list of nodes and their attributes.
3011

3012
    """
3013
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3014
    result = self.rpc.call_storage_modify(self.op.node_name,
3015
                                          self.op.storage_type, st_args,
3016
                                          self.op.name, self.op.changes)
3017
    result.Raise("Failed to modify storage unit '%s' on %s" %
3018
                 (self.op.name, self.op.node_name))
3019

    
3020

    
3021
class LUAddNode(LogicalUnit):
3022
  """Logical unit for adding node to the cluster.
3023

3024
  """
3025
  HPATH = "node-add"
3026
  HTYPE = constants.HTYPE_NODE
3027
  _OP_REQP = ["node_name"]
3028

    
3029
  def CheckArguments(self):
3030
    # validate/normalize the node name
3031
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3032

    
3033
  def BuildHooksEnv(self):
3034
    """Build hooks env.
3035

3036
    This will run on all nodes before, and on all nodes + the new node after.
3037

3038
    """
3039
    env = {
3040
      "OP_TARGET": self.op.node_name,
3041
      "NODE_NAME": self.op.node_name,
3042
      "NODE_PIP": self.op.primary_ip,
3043
      "NODE_SIP": self.op.secondary_ip,
3044
      }
3045
    nodes_0 = self.cfg.GetNodeList()
3046
    nodes_1 = nodes_0 + [self.op.node_name, ]
3047
    return env, nodes_0, nodes_1
3048

    
3049
  def CheckPrereq(self):
3050
    """Check prerequisites.
3051

3052
    This checks:
3053
     - the new node is not already in the config
3054
     - it is resolvable
3055
     - its parameters (single/dual homed) matches the cluster
3056

3057
    Any errors are signaled by raising errors.OpPrereqError.
3058

3059
    """
3060
    node_name = self.op.node_name
3061
    cfg = self.cfg
3062

    
3063
    dns_data = utils.GetHostInfo(node_name)
3064

    
3065
    node = dns_data.name
3066
    primary_ip = self.op.primary_ip = dns_data.ip
3067
    secondary_ip = getattr(self.op, "secondary_ip", None)
3068
    if secondary_ip is None:
3069
      secondary_ip = primary_ip
3070
    if not utils.IsValidIP(secondary_ip):
3071
      raise errors.OpPrereqError("Invalid secondary IP given",
3072
                                 errors.ECODE_INVAL)
3073
    self.op.secondary_ip = secondary_ip
3074

    
3075
    node_list = cfg.GetNodeList()
3076
    if not self.op.readd and node in node_list:
3077
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3078
                                 node, errors.ECODE_EXISTS)
3079
    elif self.op.readd and node not in node_list:
3080
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3081
                                 errors.ECODE_NOENT)
3082

    
3083
    for existing_node_name in node_list:
3084
      existing_node = cfg.GetNodeInfo(existing_node_name)
3085

    
3086
      if self.op.readd and node == existing_node_name:
3087
        if (existing_node.primary_ip != primary_ip or
3088
            existing_node.secondary_ip != secondary_ip):
3089
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3090
                                     " address configuration as before",
3091
                                     errors.ECODE_INVAL)
3092
        continue
3093

    
3094
      if (existing_node.primary_ip == primary_ip or
3095
          existing_node.secondary_ip == primary_ip or
3096
          existing_node.primary_ip == secondary_ip or
3097
          existing_node.secondary_ip == secondary_ip):
3098
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3099
                                   " existing node %s" % existing_node.name,
3100
                                   errors.ECODE_NOTUNIQUE)
3101

    
3102
    # check that the type of the node (single versus dual homed) is the
3103
    # same as for the master
3104
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3105
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3106
    newbie_singlehomed = secondary_ip == primary_ip
3107
    if master_singlehomed != newbie_singlehomed:
3108
      if master_singlehomed:
3109
        raise errors.OpPrereqError("The master has no private ip but the"
3110
                                   " new node has one",
3111
                                   errors.ECODE_INVAL)
3112
      else:
3113
        raise errors.OpPrereqError("The master has a private ip but the"
3114
                                   " new node doesn't have one",
3115
                                   errors.ECODE_INVAL)
3116

    
3117
    # checks reachability
3118
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3119
      raise errors.OpPrereqError("Node not reachable by ping",
3120
                                 errors.ECODE_ENVIRON)
3121

    
3122
    if not newbie_singlehomed:
3123
      # check reachability from my secondary ip to newbie's secondary ip
3124
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3125
                           source=myself.secondary_ip):
3126
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3127
                                   " based ping to noded port",
3128
                                   errors.ECODE_ENVIRON)
3129

    
3130
    if self.op.readd:
3131
      exceptions = [node]
3132
    else:
3133
      exceptions = []
3134

    
3135
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3136

    
3137
    if self.op.readd:
3138
      self.new_node = self.cfg.GetNodeInfo(node)
3139
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3140
    else:
3141
      self.new_node = objects.Node(name=node,
3142
                                   primary_ip=primary_ip,
3143
                                   secondary_ip=secondary_ip,
3144
                                   master_candidate=self.master_candidate,
3145
                                   offline=False, drained=False)
3146

    
3147
  def Exec(self, feedback_fn):
3148
    """Adds the new node to the cluster.
3149

3150
    """
3151
    new_node = self.new_node
3152
    node = new_node.name
3153

    
3154
    # for re-adds, reset the offline/drained/master-candidate flags;
3155
    # we need to reset here, otherwise offline would prevent RPC calls
3156
    # later in the procedure; this also means that if the re-add
3157
    # fails, we are left with a non-offlined, broken node
3158
    if self.op.readd:
3159
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3160
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3161
      # if we demote the node, we do cleanup later in the procedure
3162
      new_node.master_candidate = self.master_candidate
3163

    
3164
    # notify the user about any possible mc promotion
3165
    if new_node.master_candidate:
3166
      self.LogInfo("Node will be a master candidate")
3167

    
3168
    # check connectivity
3169
    result = self.rpc.call_version([node])[node]
3170
    result.Raise("Can't get version information from node %s" % node)
3171
    if constants.PROTOCOL_VERSION == result.payload:
3172
      logging.info("Communication to node %s fine, sw version %s match",
3173
                   node, result.payload)
3174
    else:
3175
      raise errors.OpExecError("Version mismatch master version %s,"
3176
                               " node version %s" %
3177
                               (constants.PROTOCOL_VERSION, result.payload))
3178

    
3179
    # setup ssh on node
3180
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3181
      logging.info("Copy ssh key to node %s", node)
3182
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3183
      keyarray = []
3184
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3185
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3186
                  priv_key, pub_key]
3187

    
3188
      for i in keyfiles:
3189
        keyarray.append(utils.ReadFile(i))
3190

    
3191
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3192
                                      keyarray[2], keyarray[3], keyarray[4],
3193
                                      keyarray[5])
3194
      result.Raise("Cannot transfer ssh keys to the new node")
3195

    
3196
    # Add node to our /etc/hosts, and add key to known_hosts
3197
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3198
      utils.AddHostToEtcHosts(new_node.name)
3199

    
3200
    if new_node.secondary_ip != new_node.primary_ip:
3201
      result = self.rpc.call_node_has_ip_address(new_node.name,
3202
                                                 new_node.secondary_ip)
3203
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3204
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3205
      if not result.payload:
3206
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3207
                                 " you gave (%s). Please fix and re-run this"
3208
                                 " command." % new_node.secondary_ip)
3209

    
3210
    node_verify_list = [self.cfg.GetMasterNode()]
3211
    node_verify_param = {
3212
      constants.NV_NODELIST: [node],
3213
      # TODO: do a node-net-test as well?
3214
    }
3215

    
3216
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3217
                                       self.cfg.GetClusterName())
3218
    for verifier in node_verify_list:
3219
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3220
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3221
      if nl_payload:
3222
        for failed in nl_payload:
3223
          feedback_fn("ssh/hostname verification failed"
3224
                      " (checking from %s): %s" %
3225
                      (verifier, nl_payload[failed]))
3226
        raise errors.OpExecError("ssh/hostname verification failed.")
3227

    
3228
    if self.op.readd:
3229
      _RedistributeAncillaryFiles(self)
3230
      self.context.ReaddNode(new_node)
3231
      # make sure we redistribute the config
3232
      self.cfg.Update(new_node, feedback_fn)
3233
      # and make sure the new node will not have old files around
3234
      if not new_node.master_candidate:
3235
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3236
        msg = result.fail_msg
3237
        if msg:
3238
          self.LogWarning("Node failed to demote itself from master"
3239
                          " candidate status: %s" % msg)
3240
    else:
3241
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3242
      self.context.AddNode(new_node, self.proc.GetECId())
3243

    
3244

    
3245
class LUSetNodeParams(LogicalUnit):
3246
  """Modifies the parameters of a node.
3247

3248
  """
3249
  HPATH = "node-modify"
3250
  HTYPE = constants.HTYPE_NODE
3251
  _OP_REQP = ["node_name"]
3252
  REQ_BGL = False
3253

    
3254
  def CheckArguments(self):
3255
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3256
    _CheckBooleanOpField(self.op, 'master_candidate')
3257
    _CheckBooleanOpField(self.op, 'offline')
3258
    _CheckBooleanOpField(self.op, 'drained')
3259
    _CheckBooleanOpField(self.op, 'auto_promote')
3260
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3261
    if all_mods.count(None) == 3:
3262
      raise errors.OpPrereqError("Please pass at least one modification",
3263
                                 errors.ECODE_INVAL)
3264
    if all_mods.count(True) > 1:
3265
      raise errors.OpPrereqError("Can't set the node into more than one"
3266
                                 " state at the same time",
3267
                                 errors.ECODE_INVAL)
3268

    
3269
    # Boolean value that tells us whether we're offlining or draining the node
3270
    self.offline_or_drain = (self.op.offline == True or
3271
                             self.op.drained == True)
3272
    self.deoffline_or_drain = (self.op.offline == False or
3273
                               self.op.drained == False)
3274
    self.might_demote = (self.op.master_candidate == False or
3275
                         self.offline_or_drain)
3276

    
3277
    self.lock_all = self.op.auto_promote and self.might_demote
3278

    
3279

    
3280
  def ExpandNames(self):
3281
    if self.lock_all:
3282
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3283
    else:
3284
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3285

    
3286
  def BuildHooksEnv(self):
3287
    """Build hooks env.
3288

3289
    This runs on the master node.
3290

3291
    """
3292
    env = {
3293
      "OP_TARGET": self.op.node_name,
3294
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3295
      "OFFLINE": str(self.op.offline),
3296
      "DRAINED": str(self.op.drained),
3297
      }
3298
    nl = [self.cfg.GetMasterNode(),
3299
          self.op.node_name]
3300
    return env, nl, nl
3301

    
3302
  def CheckPrereq(self):
3303
    """Check prerequisites.
3304

3305
    This only checks the instance list against the existing names.
3306

3307
    """
3308
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3309

    
3310
    if (self.op.master_candidate is not None or
3311
        self.op.drained is not None or
3312
        self.op.offline is not None):
3313
      # we can't change the master's node flags
3314
      if self.op.node_name == self.cfg.GetMasterNode():
3315
        raise errors.OpPrereqError("The master role can be changed"
3316
                                   " only via masterfailover",
3317
                                   errors.ECODE_INVAL)
3318

    
3319

    
3320
    if node.master_candidate and self.might_demote and not self.lock_all:
3321
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3322
      # check if after removing the current node, we're missing master
3323
      # candidates
3324
      (mc_remaining, mc_should, _) = \
3325
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3326
      if mc_remaining != mc_should:
3327
        raise errors.OpPrereqError("Not enough master candidates, please"
3328
                                   " pass auto_promote to allow promotion",
3329
                                   errors.ECODE_INVAL)
3330

    
3331
    if (self.op.master_candidate == True and
3332
        ((node.offline and not self.op.offline == False) or
3333
         (node.drained and not self.op.drained == False))):
3334
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3335
                                 " to master_candidate" % node.name,
3336
                                 errors.ECODE_INVAL)
3337

    
3338
    # If we're being deofflined/drained, we'll MC ourself if needed
3339
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3340
        self.op.master_candidate == True and not node.master_candidate):
3341
      self.op.master_candidate = _DecideSelfPromotion(self)
3342
      if self.op.master_candidate:
3343
        self.LogInfo("Autopromoting node to master candidate")
3344

    
3345
    return
3346

    
3347
  def Exec(self, feedback_fn):
3348
    """Modifies a node.
3349

3350
    """
3351
    node = self.node
3352

    
3353
    result = []
3354
    changed_mc = False
3355

    
3356
    if self.op.offline is not None:
3357
      node.offline = self.op.offline
3358
      result.append(("offline", str(self.op.offline)))
3359
      if self.op.offline == True:
3360
        if node.master_candidate:
3361
          node.master_candidate = False
3362
          changed_mc = True
3363
          result.append(("master_candidate", "auto-demotion due to offline"))
3364
        if node.drained:
3365
          node.drained = False
3366
          result.append(("drained", "clear drained status due to offline"))
3367

    
3368
    if self.op.master_candidate is not None:
3369
      node.master_candidate = self.op.master_candidate
3370
      changed_mc = True
3371
      result.append(("master_candidate", str(self.op.master_candidate)))
3372
      if self.op.master_candidate == False:
3373
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3374
        msg = rrc.fail_msg
3375
        if msg:
3376
          self.LogWarning("Node failed to demote itself: %s" % msg)
3377

    
3378
    if self.op.drained is not None:
3379
      node.drained = self.op.drained
3380
      result.append(("drained", str(self.op.drained)))
3381
      if self.op.drained == True:
3382
        if node.master_candidate:
3383
          node.master_candidate = False
3384
          changed_mc = True
3385
          result.append(("master_candidate", "auto-demotion due to drain"))
3386
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3387
          msg = rrc.fail_msg
3388
          if msg:
3389
            self.LogWarning("Node failed to demote itself: %s" % msg)
3390
        if node.offline:
3391
          node.offline = False
3392
          result.append(("offline", "clear offline status due to drain"))
3393

    
3394
    # we locked all nodes, we adjust the CP before updating this node
3395
    if self.lock_all:
3396
      _AdjustCandidatePool(self, [node.name])
3397

    
3398
    # this will trigger configuration file update, if needed
3399
    self.cfg.Update(node, feedback_fn)
3400

    
3401
    # this will trigger job queue propagation or cleanup
3402
    if changed_mc:
3403
      self.context.ReaddNode(node)
3404

    
3405
    return result
3406

    
3407

    
3408
class LUPowercycleNode(NoHooksLU):
3409
  """Powercycles a node.
3410

3411
  """
3412
  _OP_REQP = ["node_name", "force"]
3413
  REQ_BGL = False
3414

    
3415
  def CheckArguments(self):
3416
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3417
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3418
      raise errors.OpPrereqError("The node is the master and the force"
3419
                                 " parameter was not set",
3420
                                 errors.ECODE_INVAL)
3421

    
3422
  def ExpandNames(self):
3423
    """Locking for PowercycleNode.
3424

3425
    This is a last-resort option and shouldn't block on other
3426
    jobs. Therefore, we grab no locks.
3427

3428
    """
3429
    self.needed_locks = {}
3430

    
3431
  def CheckPrereq(self):
3432
    """Check prerequisites.
3433

3434
    This LU has no prereqs.
3435

3436
    """
3437
    pass
3438

    
3439
  def Exec(self, feedback_fn):
3440
    """Reboots a node.
3441

3442
    """
3443
    result = self.rpc.call_node_powercycle(self.op.node_name,
3444
                                           self.cfg.GetHypervisorType())
3445
    result.Raise("Failed to schedule the reboot")
3446
    return result.payload
3447

    
3448

    
3449
class LUQueryClusterInfo(NoHooksLU):
3450
  """Query cluster configuration.
3451

3452
  """
3453
  _OP_REQP = []
3454
  REQ_BGL = False
3455

    
3456
  def ExpandNames(self):
3457
    self.needed_locks = {}
3458

    
3459
  def CheckPrereq(self):
3460
    """No prerequsites needed for this LU.
3461

3462
    """
3463
    pass
3464

    
3465
  def Exec(self, feedback_fn):
3466
    """Return cluster config.
3467

3468
    """
3469
    cluster = self.cfg.GetClusterInfo()
3470
    os_hvp = {}
3471

    
3472
    # Filter just for enabled hypervisors
3473
    for os_name, hv_dict in cluster.os_hvp.items():
3474
      os_hvp[os_name] = {}
3475
      for hv_name, hv_params in hv_dict.items():
3476
        if hv_name in cluster.enabled_hypervisors:
3477
          os_hvp[os_name][hv_name] = hv_params
3478

    
3479
    result = {
3480
      "software_version": constants.RELEASE_VERSION,
3481
      "protocol_version": constants.PROTOCOL_VERSION,
3482
      "config_version": constants.CONFIG_VERSION,
3483
      "os_api_version": max(constants.OS_API_VERSIONS),
3484
      "export_version": constants.EXPORT_VERSION,
3485
      "architecture": (platform.architecture()[0], platform.machine()),
3486
      "name": cluster.cluster_name,
3487
      "master": cluster.master_node,
3488
      "default_hypervisor": cluster.enabled_hypervisors[0],
3489
      "enabled_hypervisors": cluster.enabled_hypervisors,
3490
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3491
                        for hypervisor_name in cluster.enabled_hypervisors]),
3492
      "os_hvp": os_hvp,
3493
      "beparams": cluster.beparams,
3494
      "nicparams": cluster.nicparams,
3495
      "candidate_pool_size": cluster.candidate_pool_size,
3496
      "master_netdev": cluster.master_netdev,
3497
      "volume_group_name": cluster.volume_group_name,
3498
      "file_storage_dir": cluster.file_storage_dir,
3499
      "ctime": cluster.ctime,
3500
      "mtime": cluster.mtime,
3501
      "uuid": cluster.uuid,
3502
      "tags": list(cluster.GetTags()),
3503
      }
3504

    
3505
    return result
3506

    
3507

    
3508
class LUQueryConfigValues(NoHooksLU):
3509
  """Return configuration values.
3510

3511
  """
3512
  _OP_REQP = []
3513
  REQ_BGL = False
3514
  _FIELDS_DYNAMIC = utils.FieldSet()
3515
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3516
                                  "watcher_pause")
3517

    
3518
  def ExpandNames(self):
3519
    self.needed_locks = {}
3520

    
3521
    _CheckOutputFields(static=self._FIELDS_STATIC,
3522
                       dynamic=self._FIELDS_DYNAMIC,
3523
                       selected=self.op.output_fields)
3524

    
3525
  def CheckPrereq(self):
3526
    """No prerequisites.
3527

3528
    """
3529
    pass
3530

    
3531
  def Exec(self, feedback_fn):
3532
    """Dump a representation of the cluster config to the standard output.
3533

3534
    """
3535
    values = []
3536
    for field in self.op.output_fields:
3537
      if field == "cluster_name":
3538
        entry = self.cfg.GetClusterName()
3539
      elif field == "master_node":
3540
        entry = self.cfg.GetMasterNode()
3541
      elif field == "drain_flag":
3542
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3543
      elif field == "watcher_pause":
3544
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3545
      else:
3546
        raise errors.ParameterError(field)
3547
      values.append(entry)
3548
    return values
3549

    
3550

    
3551
class LUActivateInstanceDisks(NoHooksLU):
3552
  """Bring up an instance's disks.
3553

3554
  """
3555
  _OP_REQP = ["instance_name"]
3556
  REQ_BGL = False
3557

    
3558
  def ExpandNames(self):
3559
    self._ExpandAndLockInstance()
3560
    self.needed_locks[locking.LEVEL_NODE] = []
3561
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3562

    
3563
  def DeclareLocks(self, level):
3564
    if level == locking.LEVEL_NODE:
3565
      self._LockInstancesNodes()
3566

    
3567
  def CheckPrereq(self):
3568
    """Check prerequisites.
3569

3570
    This checks that the instance is in the cluster.
3571

3572
    """
3573
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3574
    assert self.instance is not None, \
3575
      "Cannot retrieve locked instance %s" % self.op.instance_name
3576
    _CheckNodeOnline(self, self.instance.primary_node)
3577
    if not hasattr(self.op, "ignore_size"):
3578
      self.op.ignore_size = False
3579

    
3580
  def Exec(self, feedback_fn):
3581
    """Activate the disks.
3582

3583
    """
3584
    disks_ok, disks_info = \
3585
              _AssembleInstanceDisks(self, self.instance,
3586
                                     ignore_size=self.op.ignore_size)
3587
    if not disks_ok:
3588
      raise errors.OpExecError("Cannot activate block devices")
3589

    
3590
    return disks_info
3591

    
3592

    
3593
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3594
                           ignore_size=False):
3595
  """Prepare the block devices for an instance.
3596

3597
  This sets up the block devices on all nodes.
3598

3599
  @type lu: L{LogicalUnit}
3600
  @param lu: the logical unit on whose behalf we execute
3601
  @type instance: L{objects.Instance}
3602
  @param instance: the instance for whose disks we assemble
3603
  @type ignore_secondaries: boolean
3604
  @param ignore_secondaries: if true, errors on secondary nodes
3605
      won't result in an error return from the function
3606
  @type ignore_size: boolean
3607
  @param ignore_size: if true, the current known size of the disk
3608
      will not be used during the disk activation, useful for cases
3609
      when the size is wrong
3610
  @return: False if the operation failed, otherwise a list of
3611
      (host, instance_visible_name, node_visible_name)
3612
      with the mapping from node devices to instance devices
3613

3614
  """
3615
  device_info = []
3616
  disks_ok = True
3617
  iname = instance.name
3618
  # With the two passes mechanism we try to reduce the window of
3619
  # opportunity for the race condition of switching DRBD to primary
3620
  # before handshaking occured, but we do not eliminate it
3621

    
3622
  # The proper fix would be to wait (with some limits) until the
3623
  # connection has been made and drbd transitions from WFConnection
3624
  # into any other network-connected state (Connected, SyncTarget,
3625
  # SyncSource, etc.)
3626

    
3627
  # 1st pass, assemble on all nodes in secondary mode
3628
  for inst_disk in instance.disks:
3629
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3630
      if ignore_size:
3631
        node_disk = node_disk.Copy()
3632
        node_disk.UnsetSize()
3633
      lu.cfg.SetDiskID(node_disk, node)
3634
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3635
      msg = result.fail_msg
3636
      if msg:
3637
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3638
                           " (is_primary=False, pass=1): %s",
3639
                           inst_disk.iv_name, node, msg)
3640
        if not ignore_secondaries:
3641
          disks_ok = False
3642

    
3643
  # FIXME: race condition on drbd migration to primary
3644

    
3645
  # 2nd pass, do only the primary node
3646
  for inst_disk in instance.disks:
3647
    dev_path = None
3648

    
3649
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3650
      if node != instance.primary_node:
3651
        continue
3652
      if ignore_size:
3653
        node_disk = node_disk.Copy()
3654
        node_disk.UnsetSize()
3655
      lu.cfg.SetDiskID(node_disk, node)
3656
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3657
      msg = result.fail_msg
3658
      if msg:
3659
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3660
                           " (is_primary=True, pass=2): %s",
3661
                           inst_disk.iv_name, node, msg)
3662
        disks_ok = False
3663
      else:
3664
        dev_path = result.payload
3665

    
3666
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3667

    
3668
  # leave the disks configured for the primary node
3669
  # this is a workaround that would be fixed better by
3670
  # improving the logical/physical id handling
3671
  for disk in instance.disks:
3672
    lu.cfg.SetDiskID(disk, instance.primary_node)
3673

    
3674
  return disks_ok, device_info
3675

    
3676

    
3677
def _StartInstanceDisks(lu, instance, force):
3678
  """Start the disks of an instance.
3679

3680
  """
3681
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3682
                                           ignore_secondaries=force)
3683
  if not disks_ok:
3684
    _ShutdownInstanceDisks(lu, instance)
3685
    if force is not None and not force:
3686
      lu.proc.LogWarning("", hint="If the message above refers to a"
3687
                         " secondary node,"
3688
                         " you can retry the operation using '--force'.")
3689
    raise errors.OpExecError("Disk consistency error")
3690

    
3691

    
3692
class LUDeactivateInstanceDisks(NoHooksLU):
3693
  """Shutdown an instance's disks.
3694

3695
  """
3696
  _OP_REQP = ["instance_name"]
3697
  REQ_BGL = False
3698

    
3699
  def ExpandNames(self):
3700
    self._ExpandAndLockInstance()
3701
    self.needed_locks[locking.LEVEL_NODE] = []
3702
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3703

    
3704
  def DeclareLocks(self, level):
3705
    if level == locking.LEVEL_NODE:
3706
      self._LockInstancesNodes()
3707

    
3708
  def CheckPrereq(self):
3709
    """Check prerequisites.
3710

3711
    This checks that the instance is in the cluster.
3712

3713
    """
3714
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3715
    assert self.instance is not None, \
3716
      "Cannot retrieve locked instance %s" % self.op.instance_name
3717

    
3718
  def Exec(self, feedback_fn):
3719
    """Deactivate the disks
3720

3721
    """
3722
    instance = self.instance
3723
    _SafeShutdownInstanceDisks(self, instance)
3724

    
3725

    
3726
def _SafeShutdownInstanceDisks(lu, instance):
3727
  """Shutdown block devices of an instance.
3728

3729
  This function checks if an instance is running, before calling
3730
  _ShutdownInstanceDisks.
3731

3732
  """
3733
  pnode = instance.primary_node
3734
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3735
  ins_l.Raise("Can't contact node %s" % pnode)
3736

    
3737
  if instance.name in ins_l.payload:
3738
    raise errors.OpExecError("Instance is running, can't shutdown"
3739
                             " block devices.")
3740

    
3741
  _ShutdownInstanceDisks(lu, instance)
3742

    
3743

    
3744
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3745
  """Shutdown block devices of an instance.
3746

3747
  This does the shutdown on all nodes of the instance.
3748

3749
  If the ignore_primary is false, errors on the primary node are
3750
  ignored.
3751

3752
  """
3753
  all_result = True
3754
  for disk in instance.disks:
3755
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3756
      lu.cfg.SetDiskID(top_disk, node)
3757
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3758
      msg = result.fail_msg
3759
      if msg:
3760
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3761
                      disk.iv_name, node, msg)
3762
        if not ignore_primary or node != instance.primary_node:
3763
          all_result = False
3764
  return all_result
3765

    
3766

    
3767
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3768
  """Checks if a node has enough free memory.
3769

3770
  This function check if a given node has the needed amount of free
3771
  memory. In case the node has less memory or we cannot get the
3772
  information from the node, this function raise an OpPrereqError
3773
  exception.
3774

3775
  @type lu: C{LogicalUnit}
3776
  @param lu: a logical unit from which we get configuration data
3777
  @type node: C{str}
3778
  @param node: the node to check
3779
  @type reason: C{str}
3780
  @param reason: string to use in the error message
3781
  @type requested: C{int}
3782
  @param requested: the amount of memory in MiB to check for
3783
  @type hypervisor_name: C{str}
3784
  @param hypervisor_name: the hypervisor to ask for memory stats
3785
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3786
      we cannot check the node
3787

3788
  """
3789
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3790
  nodeinfo[node].Raise("Can't get data from node %s" % node,
3791
                       prereq=True, ecode=errors.ECODE_ENVIRON)
3792
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3793
  if not isinstance(free_mem, int):
3794
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3795
                               " was '%s'" % (node, free_mem),
3796
                               errors.ECODE_ENVIRON)
3797
  if requested > free_mem:
3798
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3799
                               " needed %s MiB, available %s MiB" %
3800
                               (node, reason, requested, free_mem),
3801
                               errors.ECODE_NORES)
3802

    
3803

    
3804
def _CheckNodesFreeDisk(lu, nodenames, requested):
3805
  """Checks if nodes have enough free disk space in the default VG.
3806

3807
  This function check if all given nodes have the needed amount of
3808
  free disk. In case any node has less disk or we cannot get the
3809
  information from the node, this function raise an OpPrereqError
3810
  exception.
3811

3812
  @type lu: C{LogicalUnit}
3813
  @param lu: a logical unit from which we get configuration data
3814
  @type nodenames: C{list}
3815
  @param node: the list of node names to check
3816
  @type requested: C{int}
3817
  @param requested: the amount of disk in MiB to check for
3818
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
3819
      we cannot check the node
3820

3821
  """
3822
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3823
                                   lu.cfg.GetHypervisorType())
3824
  for node in nodenames:
3825
    info = nodeinfo[node]
3826
    info.Raise("Cannot get current information from node %s" % node,
3827
               prereq=True, ecode=errors.ECODE_ENVIRON)
3828
    vg_free = info.payload.get("vg_free", None)
3829
    if not isinstance(vg_free, int):
3830
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
3831
                                 " result was '%s'" % (node, vg_free),
3832
                                 errors.ECODE_ENVIRON)
3833
    if requested > vg_free:
3834
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
3835
                                 " required %d MiB, available %d MiB" %
3836
                                 (node, requested, vg_free),
3837
                                 errors.ECODE_NORES)
3838

    
3839

    
3840
class LUStartupInstance(LogicalUnit):
3841
  """Starts an instance.
3842

3843
  """
3844
  HPATH = "instance-start"
3845
  HTYPE = constants.HTYPE_INSTANCE
3846
  _OP_REQP = ["instance_name", "force"]
3847
  REQ_BGL = False
3848

    
3849
  def ExpandNames(self):
3850
    self._ExpandAndLockInstance()
3851

    
3852
  def BuildHooksEnv(self):
3853
    """Build hooks env.
3854

3855
    This runs on master, primary and secondary nodes of the instance.
3856

3857
    """
3858
    env = {
3859
      "FORCE": self.op.force,
3860
      }
3861
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3862
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3863
    return env, nl, nl
3864

    
3865
  def CheckPrereq(self):
3866
    """Check prerequisites.
3867

3868
    This checks that the instance is in the cluster.
3869

3870
    """
3871
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3872
    assert self.instance is not None, \
3873
      "Cannot retrieve locked instance %s" % self.op.instance_name
3874

    
3875
    # extra beparams
3876
    self.beparams = getattr(self.op, "beparams", {})
3877
    if self.beparams:
3878
      if not isinstance(self.beparams, dict):
3879
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3880
                                   " dict" % (type(self.beparams), ),
3881
                                   errors.ECODE_INVAL)
3882
      # fill the beparams dict
3883
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3884
      self.op.beparams = self.beparams
3885

    
3886
    # extra hvparams
3887
    self.hvparams = getattr(self.op, "hvparams", {})
3888
    if self.hvparams:
3889
      if not isinstance(self.hvparams, dict):
3890
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3891
                                   " dict" % (type(self.hvparams), ),
3892
                                   errors.ECODE_INVAL)
3893

    
3894
      # check hypervisor parameter syntax (locally)
3895
      cluster = self.cfg.GetClusterInfo()
3896
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3897
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3898
                                    instance.hvparams)
3899
      filled_hvp.update(self.hvparams)
3900
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3901
      hv_type.CheckParameterSyntax(filled_hvp)
3902
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3903
      self.op.hvparams = self.hvparams
3904

    
3905
    _CheckNodeOnline(self, instance.primary_node)
3906

    
3907
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3908
    # check bridges existence
3909
    _CheckInstanceBridgesExist(self, instance)
3910

    
3911
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3912
                                              instance.name,
3913
                                              instance.hypervisor)
3914
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3915
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3916
    if not remote_info.payload: # not running already
3917
      _CheckNodeFreeMemory(self, instance.primary_node,
3918
                           "starting instance %s" % instance.name,
3919
                           bep[constants.BE_MEMORY], instance.hypervisor)
3920

    
3921
  def Exec(self, feedback_fn):
3922
    """Start the instance.
3923

3924
    """
3925
    instance = self.instance
3926
    force = self.op.force
3927

    
3928
    self.cfg.MarkInstanceUp(instance.name)
3929

    
3930
    node_current = instance.primary_node
3931

    
3932
    _StartInstanceDisks(self, instance, force)
3933

    
3934
    result = self.rpc.call_instance_start(node_current, instance,
3935
                                          self.hvparams, self.beparams)
3936
    msg = result.fail_msg
3937
    if msg:
3938
      _ShutdownInstanceDisks(self, instance)
3939
      raise errors.OpExecError("Could not start instance: %s" % msg)
3940

    
3941

    
3942
class LURebootInstance(LogicalUnit):
3943
  """Reboot an instance.
3944

3945
  """
3946
  HPATH = "instance-reboot"
3947
  HTYPE = constants.HTYPE_INSTANCE
3948
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3949
  REQ_BGL = False
3950

    
3951
  def CheckArguments(self):
3952
    """Check the arguments.
3953

3954
    """
3955
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3956
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
3957

    
3958
  def ExpandNames(self):
3959
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3960
                                   constants.INSTANCE_REBOOT_HARD,
3961
                                   constants.INSTANCE_REBOOT_FULL]:
3962
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3963
                                  (constants.INSTANCE_REBOOT_SOFT,
3964
                                   constants.INSTANCE_REBOOT_HARD,
3965
                                   constants.INSTANCE_REBOOT_FULL))
3966
    self._ExpandAndLockInstance()
3967

    
3968
  def BuildHooksEnv(self):
3969
    """Build hooks env.
3970

3971
    This runs on master, primary and secondary nodes of the instance.
3972

3973
    """
3974
    env = {
3975
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3976
      "REBOOT_TYPE": self.op.reboot_type,
3977
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3978
      }
3979
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3980
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3981
    return env, nl, nl
3982

    
3983
  def CheckPrereq(self):
3984
    """Check prerequisites.
3985

3986
    This checks that the instance is in the cluster.
3987

3988
    """
3989
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3990
    assert self.instance is not None, \
3991
      "Cannot retrieve locked instance %s" % self.op.instance_name
3992

    
3993
    _CheckNodeOnline(self, instance.primary_node)
3994

    
3995
    # check bridges existence
3996
    _CheckInstanceBridgesExist(self, instance)
3997

    
3998
  def Exec(self, feedback_fn):
3999
    """Reboot the instance.
4000

4001
    """
4002
    instance = self.instance
4003
    ignore_secondaries = self.op.ignore_secondaries
4004
    reboot_type = self.op.reboot_type
4005

    
4006
    node_current = instance.primary_node
4007

    
4008
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4009
                       constants.INSTANCE_REBOOT_HARD]:
4010
      for disk in instance.disks:
4011
        self.cfg.SetDiskID(disk, node_current)
4012
      result = self.rpc.call_instance_reboot(node_current, instance,
4013
                                             reboot_type,
4014
                                             self.shutdown_timeout)
4015
      result.Raise("Could not reboot instance")
4016
    else:
4017
      result = self.rpc.call_instance_shutdown(node_current, instance,
4018
                                               self.shutdown_timeout)
4019
      result.Raise("Could not shutdown instance for full reboot")
4020
      _ShutdownInstanceDisks(self, instance)
4021
      _StartInstanceDisks(self, instance, ignore_secondaries)
4022
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4023
      msg = result.fail_msg
4024
      if msg:
4025
        _ShutdownInstanceDisks(self, instance)
4026
        raise errors.OpExecError("Could not start instance for"
4027
                                 " full reboot: %s" % msg)
4028

    
4029
    self.cfg.MarkInstanceUp(instance.name)
4030

    
4031

    
4032
class LUShutdownInstance(LogicalUnit):
4033
  """Shutdown an instance.
4034

4035
  """
4036
  HPATH = "instance-stop"
4037
  HTYPE = constants.HTYPE_INSTANCE
4038
  _OP_REQP = ["instance_name"]
4039
  REQ_BGL = False
4040

    
4041
  def CheckArguments(self):
4042
    """Check the arguments.
4043

4044
    """
4045
    self.timeout = getattr(self.op, "timeout",
4046
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
4047

    
4048
  def ExpandNames(self):
4049
    self._ExpandAndLockInstance()
4050

    
4051
  def BuildHooksEnv(self):
4052
    """Build hooks env.
4053

4054
    This runs on master, primary and secondary nodes of the instance.
4055

4056
    """
4057
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4058
    env["TIMEOUT"] = self.timeout
4059
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4060
    return env, nl, nl
4061

    
4062
  def CheckPrereq(self):
4063
    """Check prerequisites.
4064

4065
    This checks that the instance is in the cluster.
4066

4067
    """
4068
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4069
    assert self.instance is not None, \
4070
      "Cannot retrieve locked instance %s" % self.op.instance_name
4071
    _CheckNodeOnline(self, self.instance.primary_node)
4072

    
4073
  def Exec(self, feedback_fn):
4074
    """Shutdown the instance.
4075

4076
    """
4077
    instance = self.instance
4078
    node_current = instance.primary_node
4079
    timeout = self.timeout
4080
    self.cfg.MarkInstanceDown(instance.name)
4081
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4082
    msg = result.fail_msg
4083
    if msg:
4084
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4085

    
4086
    _ShutdownInstanceDisks(self, instance)
4087

    
4088

    
4089
class LUReinstallInstance(LogicalUnit):
4090
  """Reinstall an instance.
4091

4092
  """
4093
  HPATH = "instance-reinstall"
4094
  HTYPE = constants.HTYPE_INSTANCE
4095
  _OP_REQP = ["instance_name"]
4096
  REQ_BGL = False
4097

    
4098
  def ExpandNames(self):
4099
    self._ExpandAndLockInstance()
4100

    
4101
  def BuildHooksEnv(self):
4102
    """Build hooks env.
4103

4104
    This runs on master, primary and secondary nodes of the instance.
4105

4106
    """
4107
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4108
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4109
    return env, nl, nl
4110

    
4111
  def CheckPrereq(self):
4112
    """Check prerequisites.
4113

4114
    This checks that the instance is in the cluster and is not running.
4115

4116
    """
4117
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4118
    assert instance is not None, \
4119
      "Cannot retrieve locked instance %s" % self.op.instance_name
4120
    _CheckNodeOnline(self, instance.primary_node)
4121

    
4122
    if instance.disk_template == constants.DT_DISKLESS:
4123
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4124
                                 self.op.instance_name,
4125
                                 errors.ECODE_INVAL)
4126
    if instance.admin_up:
4127
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4128
                                 self.op.instance_name,
4129
                                 errors.ECODE_STATE)
4130
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4131
                                              instance.name,
4132
                                              instance.hypervisor)
4133
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4134
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4135
    if remote_info.payload:
4136
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4137
                                 (self.op.instance_name,
4138
                                  instance.primary_node),
4139
                                 errors.ECODE_STATE)
4140

    
4141
    self.op.os_type = getattr(self.op, "os_type", None)
4142
    self.op.force_variant = getattr(self.op, "force_variant", False)
4143
    if self.op.os_type is not None:
4144
      # OS verification
4145
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4146
      result = self.rpc.call_os_get(pnode, self.op.os_type)
4147
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
4148
                   (self.op.os_type, pnode),
4149
                   prereq=True, ecode=errors.ECODE_INVAL)
4150
      if not self.op.force_variant:
4151
        _CheckOSVariant(result.payload, self.op.os_type)
4152

    
4153
    self.instance = instance
4154

    
4155
  def Exec(self, feedback_fn):
4156
    """Reinstall the instance.
4157

4158
    """
4159
    inst = self.instance
4160

    
4161
    if self.op.os_type is not None:
4162
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4163
      inst.os = self.op.os_type
4164
      self.cfg.Update(inst, feedback_fn)
4165

    
4166
    _StartInstanceDisks(self, inst, None)
4167
    try:
4168
      feedback_fn("Running the instance OS create scripts...")
4169
      # FIXME: pass debug option from opcode to backend
4170
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4171
                                             self.op.debug_level)
4172
      result.Raise("Could not install OS for instance %s on node %s" %
4173
                   (inst.name, inst.primary_node))
4174
    finally:
4175
      _ShutdownInstanceDisks(self, inst)
4176

    
4177

    
4178
class LURecreateInstanceDisks(LogicalUnit):
4179
  """Recreate an instance's missing disks.
4180

4181
  """
4182
  HPATH = "instance-recreate-disks"
4183
  HTYPE = constants.HTYPE_INSTANCE
4184
  _OP_REQP = ["instance_name", "disks"]
4185
  REQ_BGL = False
4186

    
4187
  def CheckArguments(self):
4188
    """Check the arguments.
4189

4190
    """
4191
    if not isinstance(self.op.disks, list):
4192
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4193
    for item in self.op.disks:
4194
      if (not isinstance(item, int) or
4195
          item < 0):
4196
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4197
                                   str(item), errors.ECODE_INVAL)
4198

    
4199
  def ExpandNames(self):
4200
    self._ExpandAndLockInstance()
4201

    
4202
  def BuildHooksEnv(self):
4203
    """Build hooks env.
4204

4205
    This runs on master, primary and secondary nodes of the instance.
4206

4207
    """
4208
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4209
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4210
    return env, nl, nl
4211

    
4212
  def CheckPrereq(self):
4213
    """Check prerequisites.
4214

4215
    This checks that the instance is in the cluster and is not running.
4216

4217
    """
4218
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4219
    assert instance is not None, \
4220
      "Cannot retrieve locked instance %s" % self.op.instance_name
4221
    _CheckNodeOnline(self, instance.primary_node)
4222

    
4223
    if instance.disk_template == constants.DT_DISKLESS:
4224
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4225
                                 self.op.instance_name, errors.ECODE_INVAL)
4226
    if instance.admin_up:
4227
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4228
                                 self.op.instance_name, errors.ECODE_STATE)
4229
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4230
                                              instance.name,
4231
                                              instance.hypervisor)
4232
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4233
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4234
    if remote_info.payload:
4235
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4236
                                 (self.op.instance_name,
4237
                                  instance.primary_node), errors.ECODE_STATE)
4238

    
4239
    if not self.op.disks:
4240
      self.op.disks = range(len(instance.disks))
4241
    else:
4242
      for idx in self.op.disks:
4243
        if idx >= len(instance.disks):
4244
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4245
                                     errors.ECODE_INVAL)
4246

    
4247
    self.instance = instance
4248

    
4249
  def Exec(self, feedback_fn):
4250
    """Recreate the disks.
4251

4252
    """
4253
    to_skip = []
4254
    for idx, _ in enumerate(self.instance.disks):
4255
      if idx not in self.op.disks: # disk idx has not been passed in
4256
        to_skip.append(idx)
4257
        continue
4258

    
4259
    _CreateDisks(self, self.instance, to_skip=to_skip)
4260

    
4261

    
4262
class LURenameInstance(LogicalUnit):
4263
  """Rename an instance.
4264

4265
  """
4266
  HPATH = "instance-rename"
4267
  HTYPE = constants.HTYPE_INSTANCE
4268
  _OP_REQP = ["instance_name", "new_name"]
4269

    
4270
  def BuildHooksEnv(self):
4271
    """Build hooks env.
4272

4273
    This runs on master, primary and secondary nodes of the instance.
4274

4275
    """
4276
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4277
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4278
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4279
    return env, nl, nl
4280

    
4281
  def CheckPrereq(self):
4282
    """Check prerequisites.
4283

4284
    This checks that the instance is in the cluster and is not running.
4285

4286
    """
4287
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4288
                                                self.op.instance_name)
4289
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4290
    assert instance is not None
4291
    _CheckNodeOnline(self, instance.primary_node)
4292

    
4293
    if instance.admin_up:
4294
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4295
                                 self.op.instance_name, errors.ECODE_STATE)
4296
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4297
                                              instance.name,
4298
                                              instance.hypervisor)
4299
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4300
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4301
    if remote_info.payload:
4302
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4303
                                 (self.op.instance_name,
4304
                                  instance.primary_node), errors.ECODE_STATE)
4305
    self.instance = instance
4306

    
4307
    # new name verification
4308
    name_info = utils.GetHostInfo(self.op.new_name)
4309

    
4310
    self.op.new_name = new_name = name_info.name
4311
    instance_list = self.cfg.GetInstanceList()
4312
    if new_name in instance_list:
4313
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4314
                                 new_name, errors.ECODE_EXISTS)
4315

    
4316
    if not getattr(self.op, "ignore_ip", False):
4317
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4318
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4319
                                   (name_info.ip, new_name),
4320
                                   errors.ECODE_NOTUNIQUE)
4321

    
4322

    
4323
  def Exec(self, feedback_fn):
4324
    """Reinstall the instance.
4325

4326
    """
4327
    inst = self.instance
4328
    old_name = inst.name
4329

    
4330
    if inst.disk_template == constants.DT_FILE:
4331
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4332

    
4333
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4334
    # Change the instance lock. This is definitely safe while we hold the BGL
4335
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4336
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4337

    
4338
    # re-read the instance from the configuration after rename
4339
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4340

    
4341
    if inst.disk_template == constants.DT_FILE:
4342
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4343
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4344
                                                     old_file_storage_dir,
4345
                                                     new_file_storage_dir)
4346
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4347
                   " (but the instance has been renamed in Ganeti)" %
4348
                   (inst.primary_node, old_file_storage_dir,
4349
                    new_file_storage_dir))
4350

    
4351
    _StartInstanceDisks(self, inst, None)
4352
    try:
4353
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4354
                                                 old_name, self.op.debug_level)
4355
      msg = result.fail_msg
4356
      if msg:
4357
        msg = ("Could not run OS rename script for instance %s on node %s"
4358
               " (but the instance has been renamed in Ganeti): %s" %
4359
               (inst.name, inst.primary_node, msg))
4360
        self.proc.LogWarning(msg)
4361
    finally:
4362
      _ShutdownInstanceDisks(self, inst)
4363

    
4364

    
4365
class LURemoveInstance(LogicalUnit):
4366
  """Remove an instance.
4367

4368
  """
4369
  HPATH = "instance-remove"
4370
  HTYPE = constants.HTYPE_INSTANCE
4371
  _OP_REQP = ["instance_name", "ignore_failures"]
4372
  REQ_BGL = False
4373

    
4374
  def CheckArguments(self):
4375
    """Check the arguments.
4376

4377
    """
4378
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4379
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4380

    
4381
  def ExpandNames(self):
4382
    self._ExpandAndLockInstance()
4383
    self.needed_locks[locking.LEVEL_NODE] = []
4384
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4385

    
4386
  def DeclareLocks(self, level):
4387
    if level == locking.LEVEL_NODE:
4388
      self._LockInstancesNodes()
4389

    
4390
  def BuildHooksEnv(self):
4391
    """Build hooks env.
4392

4393
    This runs on master, primary and secondary nodes of the instance.
4394

4395
    """
4396
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4397
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4398
    nl = [self.cfg.GetMasterNode()]
4399
    nl_post = list(self.instance.all_nodes) + nl
4400
    return env, nl, nl_post
4401

    
4402
  def CheckPrereq(self):
4403
    """Check prerequisites.
4404

4405
    This checks that the instance is in the cluster.
4406

4407
    """
4408
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4409
    assert self.instance is not None, \
4410
      "Cannot retrieve locked instance %s" % self.op.instance_name
4411

    
4412
  def Exec(self, feedback_fn):
4413
    """Remove the instance.
4414

4415
    """
4416
    instance = self.instance
4417
    logging.info("Shutting down instance %s on node %s",
4418
                 instance.name, instance.primary_node)
4419

    
4420
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4421
                                             self.shutdown_timeout)
4422
    msg = result.fail_msg
4423
    if msg:
4424
      if self.op.ignore_failures:
4425
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4426
      else:
4427
        raise errors.OpExecError("Could not shutdown instance %s on"
4428
                                 " node %s: %s" %
4429
                                 (instance.name, instance.primary_node, msg))
4430

    
4431
    logging.info("Removing block devices for instance %s", instance.name)
4432

    
4433
    if not _RemoveDisks(self, instance):
4434
      if self.op.ignore_failures:
4435
        feedback_fn("Warning: can't remove instance's disks")
4436
      else:
4437
        raise errors.OpExecError("Can't remove instance's disks")
4438

    
4439
    logging.info("Removing instance %s out of cluster config", instance.name)
4440

    
4441
    self.cfg.RemoveInstance(instance.name)
4442
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4443

    
4444

    
4445
class LUQueryInstances(NoHooksLU):
4446
  """Logical unit for querying instances.
4447

4448
  """
4449
  # pylint: disable-msg=W0142
4450
  _OP_REQP = ["output_fields", "names", "use_locking"]
4451
  REQ_BGL = False
4452
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4453
                    "serial_no", "ctime", "mtime", "uuid"]
4454
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4455
                                    "admin_state",
4456
                                    "disk_template", "ip", "mac", "bridge",
4457
                                    "nic_mode", "nic_link",
4458
                                    "sda_size", "sdb_size", "vcpus", "tags",
4459
                                    "network_port", "beparams",
4460
                                    r"(disk)\.(size)/([0-9]+)",
4461
                                    r"(disk)\.(sizes)", "disk_usage",
4462
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4463
                                    r"(nic)\.(bridge)/([0-9]+)",
4464
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4465
                                    r"(disk|nic)\.(count)",
4466
                                    "hvparams",
4467
                                    ] + _SIMPLE_FIELDS +
4468
                                  ["hv/%s" % name
4469
                                   for name in constants.HVS_PARAMETERS
4470
                                   if name not in constants.HVC_GLOBALS] +
4471
                                  ["be/%s" % name
4472
                                   for name in constants.BES_PARAMETERS])
4473
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4474

    
4475

    
4476
  def ExpandNames(self):
4477
    _CheckOutputFields(static=self._FIELDS_STATIC,
4478
                       dynamic=self._FIELDS_DYNAMIC,
4479
                       selected=self.op.output_fields)
4480

    
4481
    self.needed_locks = {}
4482
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4483
    self.share_locks[locking.LEVEL_NODE] = 1
4484

    
4485
    if self.op.names:
4486
      self.wanted = _GetWantedInstances(self, self.op.names)
4487
    else:
4488
      self.wanted = locking.ALL_SET
4489

    
4490
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4491
    self.do_locking = self.do_node_query and self.op.use_locking
4492
    if self.do_locking:
4493
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4494
      self.needed_locks[locking.LEVEL_NODE] = []
4495
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4496

    
4497
  def DeclareLocks(self, level):
4498
    if level == locking.LEVEL_NODE and self.do_locking:
4499
      self._LockInstancesNodes()
4500

    
4501
  def CheckPrereq(self):
4502
    """Check prerequisites.
4503

4504
    """
4505
    pass
4506

    
4507
  def Exec(self, feedback_fn):
4508
    """Computes the list of nodes and their attributes.
4509

4510
    """
4511
    # pylint: disable-msg=R0912
4512
    # way too many branches here
4513
    all_info = self.cfg.GetAllInstancesInfo()
4514
    if self.wanted == locking.ALL_SET:
4515
      # caller didn't specify instance names, so ordering is not important
4516
      if self.do_locking:
4517
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4518
      else:
4519
        instance_names = all_info.keys()
4520
      instance_names = utils.NiceSort(instance_names)
4521
    else:
4522
      # caller did specify names, so we must keep the ordering
4523
      if self.do_locking:
4524
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4525
      else:
4526
        tgt_set = all_info.keys()
4527
      missing = set(self.wanted).difference(tgt_set)
4528
      if missing:
4529
        raise errors.OpExecError("Some instances were removed before"
4530
                                 " retrieving their data: %s" % missing)
4531
      instance_names = self.wanted
4532

    
4533
    instance_list = [all_info[iname] for iname in instance_names]
4534

    
4535
    # begin data gathering
4536

    
4537
    nodes = frozenset([inst.primary_node for inst in instance_list])
4538
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4539

    
4540
    bad_nodes = []
4541
    off_nodes = []
4542
    if self.do_node_query:
4543
      live_data = {}
4544
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4545
      for name in nodes:
4546
        result = node_data[name]
4547
        if result.offline:
4548
          # offline nodes will be in both lists
4549
          off_nodes.append(name)
4550
        if result.fail_msg:
4551
          bad_nodes.append(name)
4552
        else:
4553
          if result.payload:
4554
            live_data.update(result.payload)
4555
          # else no instance is alive
4556
    else:
4557
      live_data = dict([(name, {}) for name in instance_names])
4558

    
4559
    # end data gathering
4560

    
4561
    HVPREFIX = "hv/"
4562
    BEPREFIX = "be/"
4563
    output = []
4564
    cluster = self.cfg.GetClusterInfo()
4565
    for instance in instance_list:
4566
      iout = []
4567
      i_hv = cluster.FillHV(instance, skip_globals=True)
4568
      i_be = cluster.FillBE(instance)
4569
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4570
                                 nic.nicparams) for nic in instance.nics]
4571
      for field in self.op.output_fields:
4572
        st_match = self._FIELDS_STATIC.Matches(field)
4573
        if field in self._SIMPLE_FIELDS:
4574
          val = getattr(instance, field)
4575
        elif field == "pnode":
4576
          val = instance.primary_node
4577
        elif field == "snodes":
4578
          val = list(instance.secondary_nodes)
4579
        elif field == "admin_state":
4580
          val = instance.admin_up
4581
        elif field == "oper_state":
4582
          if instance.primary_node in bad_nodes:
4583
            val = None
4584
          else:
4585
            val = bool(live_data.get(instance.name))
4586
        elif field == "status":
4587
          if instance.primary_node in off_nodes:
4588
            val = "ERROR_nodeoffline"
4589
          elif instance.primary_node in bad_nodes:
4590
            val = "ERROR_nodedown"
4591
          else:
4592
            running = bool(live_data.get(instance.name))
4593
            if running:
4594
              if instance.admin_up:
4595
                val = "running"
4596
              else:
4597
                val = "ERROR_up"
4598
            else:
4599
              if instance.admin_up:
4600
                val = "ERROR_down"
4601
              else:
4602
                val = "ADMIN_down"
4603
        elif field == "oper_ram":
4604
          if instance.primary_node in bad_nodes:
4605
            val = None
4606
          elif instance.name in live_data:
4607
            val = live_data[instance.name].get("memory", "?")
4608
          else:
4609
            val = "-"
4610
        elif field == "vcpus":
4611
          val = i_be[constants.BE_VCPUS]
4612
        elif field == "disk_template":
4613
          val = instance.disk_template
4614
        elif field == "ip":
4615
          if instance.nics:
4616
            val = instance.nics[0].ip
4617
          else:
4618
            val = None
4619
        elif field == "nic_mode":
4620
          if instance.nics:
4621
            val = i_nicp[0][constants.NIC_MODE]
4622
          else:
4623
            val = None
4624
        elif field == "nic_link":
4625
          if instance.nics:
4626
            val = i_nicp[0][constants.NIC_LINK]
4627
          else:
4628
            val = None
4629
        elif field == "bridge":
4630
          if (instance.nics and
4631
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4632
            val = i_nicp[0][constants.NIC_LINK]
4633
          else:
4634
            val = None
4635
        elif field == "mac":
4636
          if instance.nics:
4637
            val = instance.nics[0].mac
4638
          else:
4639
            val = None
4640
        elif field == "sda_size" or field == "sdb_size":
4641
          idx = ord(field[2]) - ord('a')
4642
          try:
4643
            val = instance.FindDisk(idx).size
4644
          except errors.OpPrereqError:
4645
            val = None
4646
        elif field == "disk_usage": # total disk usage per node
4647
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4648
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4649
        elif field == "tags":
4650
          val = list(instance.GetTags())
4651
        elif field == "hvparams":
4652
          val = i_hv
4653
        elif (field.startswith(HVPREFIX) and
4654
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4655
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4656
          val = i_hv.get(field[len(HVPREFIX):], None)
4657
        elif field == "beparams":
4658
          val = i_be
4659
        elif (field.startswith(BEPREFIX) and
4660
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4661
          val = i_be.get(field[len(BEPREFIX):], None)
4662
        elif st_match and st_match.groups():
4663
          # matches a variable list
4664
          st_groups = st_match.groups()
4665
          if st_groups and st_groups[0] == "disk":
4666
            if st_groups[1] == "count":
4667
              val = len(instance.disks)
4668
            elif st_groups[1] == "sizes":
4669
              val = [disk.size for disk in instance.disks]
4670
            elif st_groups[1] == "size":
4671
              try:
4672
                val = instance.FindDisk(st_groups[2]).size
4673
              except errors.OpPrereqError:
4674
                val = None
4675
            else:
4676
              assert False, "Unhandled disk parameter"
4677
          elif st_groups[0] == "nic":
4678
            if st_groups[1] == "count":
4679
              val = len(instance.nics)
4680
            elif st_groups[1] == "macs":
4681
              val = [nic.mac for nic in instance.nics]
4682
            elif st_groups[1] == "ips":
4683
              val = [nic.ip for nic in instance.nics]
4684
            elif st_groups[1] == "modes":
4685
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4686
            elif st_groups[1] == "links":
4687
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4688
            elif st_groups[1] == "bridges":
4689
              val = []
4690
              for nicp in i_nicp:
4691
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4692
                  val.append(nicp[constants.NIC_LINK])
4693
                else:
4694
                  val.append(None)
4695
            else:
4696
              # index-based item
4697
              nic_idx = int(st_groups[2])
4698
              if nic_idx >= len(instance.nics):
4699
                val = None
4700
              else:
4701
                if st_groups[1] == "mac":
4702
                  val = instance.nics[nic_idx].mac
4703
                elif st_groups[1] == "ip":
4704
                  val = instance.nics[nic_idx].ip
4705
                elif st_groups[1] == "mode":
4706
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4707
                elif st_groups[1] == "link":
4708
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4709
                elif st_groups[1] == "bridge":
4710
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4711
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4712
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4713
                  else:
4714
                    val = None
4715
                else:
4716
                  assert False, "Unhandled NIC parameter"
4717
          else:
4718
            assert False, ("Declared but unhandled variable parameter '%s'" %
4719
                           field)
4720
        else:
4721
          assert False, "Declared but unhandled parameter '%s'" % field
4722
        iout.append(val)
4723
      output.append(iout)
4724

    
4725
    return output
4726

    
4727

    
4728
class LUFailoverInstance(LogicalUnit):
4729
  """Failover an instance.
4730

4731
  """
4732
  HPATH = "instance-failover"
4733
  HTYPE = constants.HTYPE_INSTANCE
4734
  _OP_REQP = ["instance_name", "ignore_consistency"]
4735
  REQ_BGL = False
4736

    
4737
  def CheckArguments(self):
4738
    """Check the arguments.
4739

4740
    """
4741
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4742
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4743

    
4744
  def ExpandNames(self):
4745
    self._ExpandAndLockInstance()
4746
    self.needed_locks[locking.LEVEL_NODE] = []
4747
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4748

    
4749
  def DeclareLocks(self, level):
4750
    if level == locking.LEVEL_NODE:
4751
      self._LockInstancesNodes()
4752

    
4753
  def BuildHooksEnv(self):
4754
    """Build hooks env.
4755

4756
    This runs on master, primary and secondary nodes of the instance.
4757

4758
    """
4759
    instance = self.instance
4760
    source_node = instance.primary_node
4761
    target_node = instance.secondary_nodes[0]
4762
    env = {
4763
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4764
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4765
      "OLD_PRIMARY": source_node,
4766
      "OLD_SECONDARY": target_node,
4767
      "NEW_PRIMARY": target_node,
4768
      "NEW_SECONDARY": source_node,
4769
      }
4770
    env.update(_BuildInstanceHookEnvByObject(self, instance))
4771
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4772
    nl_post = list(nl)
4773
    nl_post.append(source_node)
4774
    return env, nl, nl_post
4775

    
4776
  def CheckPrereq(self):
4777
    """Check prerequisites.
4778

4779
    This checks that the instance is in the cluster.
4780

4781
    """
4782
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4783
    assert self.instance is not None, \
4784
      "Cannot retrieve locked instance %s" % self.op.instance_name
4785

    
4786
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4787
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4788
      raise errors.OpPrereqError("Instance's disk layout is not"
4789
                                 " network mirrored, cannot failover.",
4790
                                 errors.ECODE_STATE)
4791

    
4792
    secondary_nodes = instance.secondary_nodes
4793
    if not secondary_nodes:
4794
      raise errors.ProgrammerError("no secondary node but using "
4795
                                   "a mirrored disk template")
4796

    
4797
    target_node = secondary_nodes[0]
4798
    _CheckNodeOnline(self, target_node)
4799
    _CheckNodeNotDrained(self, target_node)
4800
    if instance.admin_up:
4801
      # check memory requirements on the secondary node
4802
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4803
                           instance.name, bep[constants.BE_MEMORY],
4804
                           instance.hypervisor)
4805
    else:
4806
      self.LogInfo("Not checking memory on the secondary node as"
4807
                   " instance will not be started")
4808

    
4809
    # check bridge existance
4810
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4811

    
4812
  def Exec(self, feedback_fn):
4813
    """Failover an instance.
4814

4815
    The failover is done by shutting it down on its present node and
4816
    starting it on the secondary.
4817

4818
    """
4819
    instance = self.instance
4820

    
4821
    source_node = instance.primary_node
4822
    target_node = instance.secondary_nodes[0]
4823

    
4824
    if instance.admin_up:
4825
      feedback_fn("* checking disk consistency between source and target")
4826
      for dev in instance.disks:
4827
        # for drbd, these are drbd over lvm
4828
        if not _CheckDiskConsistency(self, dev, target_node, False):
4829
          if not self.op.ignore_consistency:
4830
            raise errors.OpExecError("Disk %s is degraded on target node,"
4831
                                     " aborting failover." % dev.iv_name)
4832
    else:
4833
      feedback_fn("* not checking disk consistency as instance is not running")
4834

    
4835
    feedback_fn("* shutting down instance on source node")
4836
    logging.info("Shutting down instance %s on node %s",
4837
                 instance.name, source_node)
4838

    
4839
    result = self.rpc.call_instance_shutdown(source_node, instance,
4840
                                             self.shutdown_timeout)
4841
    msg = result.fail_msg
4842
    if msg:
4843
      if self.op.ignore_consistency:
4844
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4845
                             " Proceeding anyway. Please make sure node"
4846
                             " %s is down. Error details: %s",
4847
                             instance.name, source_node, source_node, msg)
4848
      else:
4849
        raise errors.OpExecError("Could not shutdown instance %s on"
4850
                                 " node %s: %s" %
4851
                                 (instance.name, source_node, msg))
4852

    
4853
    feedback_fn("* deactivating the instance's disks on source node")
4854
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4855
      raise errors.OpExecError("Can't shut down the instance's disks.")
4856

    
4857
    instance.primary_node = target_node
4858
    # distribute new instance config to the other nodes
4859
    self.cfg.Update(instance, feedback_fn)
4860

    
4861
    # Only start the instance if it's marked as up
4862
    if instance.admin_up:
4863
      feedback_fn("* activating the instance's disks on target node")
4864
      logging.info("Starting instance %s on node %s",
4865
                   instance.name, target_node)
4866

    
4867
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4868
                                               ignore_secondaries=True)
4869
      if not disks_ok:
4870
        _ShutdownInstanceDisks(self, instance)
4871
        raise errors.OpExecError("Can't activate the instance's disks")
4872

    
4873
      feedback_fn("* starting the instance on the target node")
4874
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4875
      msg = result.fail_msg
4876
      if msg:
4877
        _ShutdownInstanceDisks(self, instance)
4878
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4879
                                 (instance.name, target_node, msg))
4880

    
4881

    
4882
class LUMigrateInstance(LogicalUnit):
4883
  """Migrate an instance.
4884

4885
  This is migration without shutting down, compared to the failover,
4886
  which is done with shutdown.
4887

4888
  """
4889
  HPATH = "instance-migrate"
4890
  HTYPE = constants.HTYPE_INSTANCE
4891
  _OP_REQP = ["instance_name", "live", "cleanup"]
4892

    
4893
  REQ_BGL = False
4894

    
4895
  def ExpandNames(self):
4896
    self._ExpandAndLockInstance()
4897

    
4898
    self.needed_locks[locking.LEVEL_NODE] = []
4899
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4900

    
4901
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4902
                                       self.op.live, self.op.cleanup)
4903
    self.tasklets = [self._migrater]
4904

    
4905
  def DeclareLocks(self, level):
4906
    if level == locking.LEVEL_NODE:
4907
      self._LockInstancesNodes()
4908

    
4909
  def BuildHooksEnv(self):
4910
    """Build hooks env.
4911

4912
    This runs on master, primary and secondary nodes of the instance.
4913

4914
    """
4915
    instance = self._migrater.instance
4916
    source_node = instance.primary_node
4917
    target_node = instance.secondary_nodes[0]
4918
    env = _BuildInstanceHookEnvByObject(self, instance)
4919
    env["MIGRATE_LIVE"] = self.op.live
4920
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4921
    env.update({
4922
        "OLD_PRIMARY": source_node,
4923
        "OLD_SECONDARY": target_node,
4924
        "NEW_PRIMARY": target_node,
4925
        "NEW_SECONDARY": source_node,
4926
        })
4927
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4928
    nl_post = list(nl)
4929
    nl_post.append(source_node)
4930
    return env, nl, nl_post
4931

    
4932

    
4933
class LUMoveInstance(LogicalUnit):
4934
  """Move an instance by data-copying.
4935

4936
  """
4937
  HPATH = "instance-move"
4938
  HTYPE = constants.HTYPE_INSTANCE
4939
  _OP_REQP = ["instance_name", "target_node"]
4940
  REQ_BGL = False
4941

    
4942
  def CheckArguments(self):
4943
    """Check the arguments.
4944

4945
    """
4946
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4947
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4948

    
4949
  def ExpandNames(self):
4950
    self._ExpandAndLockInstance()
4951
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
4952
    self.op.target_node = target_node
4953
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4954
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4955

    
4956
  def DeclareLocks(self, level):
4957
    if level == locking.LEVEL_NODE:
4958
      self._LockInstancesNodes(primary_only=True)
4959

    
4960
  def BuildHooksEnv(self):
4961
    """Build hooks env.
4962

4963
    This runs on master, primary and secondary nodes of the instance.
4964

4965
    """
4966
    env = {
4967
      "TARGET_NODE": self.op.target_node,
4968
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4969
      }
4970
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4971
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4972
                                       self.op.target_node]
4973
    return env, nl, nl
4974

    
4975
  def CheckPrereq(self):
4976
    """Check prerequisites.
4977

4978
    This checks that the instance is in the cluster.
4979

4980
    """
4981
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4982
    assert self.instance is not None, \
4983
      "Cannot retrieve locked instance %s" % self.op.instance_name
4984

    
4985
    node = self.cfg.GetNodeInfo(self.op.target_node)
4986
    assert node is not None, \
4987
      "Cannot retrieve locked node %s" % self.op.target_node
4988

    
4989
    self.target_node = target_node = node.name
4990

    
4991
    if target_node == instance.primary_node:
4992
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4993
                                 (instance.name, target_node),
4994
                                 errors.ECODE_STATE)
4995

    
4996
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4997

    
4998
    for idx, dsk in enumerate(instance.disks):
4999
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5000
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5001
                                   " cannot copy" % idx, errors.ECODE_STATE)
5002

    
5003
    _CheckNodeOnline(self, target_node)
5004
    _CheckNodeNotDrained(self, target_node)
5005

    
5006
    if instance.admin_up:
5007
      # check memory requirements on the secondary node
5008
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5009
                           instance.name, bep[constants.BE_MEMORY],
5010
                           instance.hypervisor)
5011
    else:
5012
      self.LogInfo("Not checking memory on the secondary node as"
5013
                   " instance will not be started")
5014

    
5015
    # check bridge existance
5016
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5017

    
5018
  def Exec(self, feedback_fn):
5019
    """Move an instance.
5020

5021
    The move is done by shutting it down on its present node, copying
5022
    the data over (slow) and starting it on the new node.
5023

5024
    """
5025
    instance = self.instance
5026

    
5027
    source_node = instance.primary_node
5028
    target_node = self.target_node
5029

    
5030
    self.LogInfo("Shutting down instance %s on source node %s",
5031
                 instance.name, source_node)
5032

    
5033
    result = self.rpc.call_instance_shutdown(source_node, instance,
5034
                                             self.shutdown_timeout)
5035
    msg = result.fail_msg
5036
    if msg:
5037
      if self.op.ignore_consistency:
5038
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5039
                             " Proceeding anyway. Please make sure node"
5040
                             " %s is down. Error details: %s",
5041
                             instance.name, source_node, source_node, msg)
5042
      else:
5043
        raise errors.OpExecError("Could not shutdown instance %s on"
5044
                                 " node %s: %s" %
5045
                                 (instance.name, source_node, msg))
5046

    
5047
    # create the target disks
5048
    try:
5049
      _CreateDisks(self, instance, target_node=target_node)
5050
    except errors.OpExecError:
5051
      self.LogWarning("Device creation failed, reverting...")
5052
      try:
5053
        _RemoveDisks(self, instance, target_node=target_node)
5054
      finally:
5055
        self.cfg.ReleaseDRBDMinors(instance.name)
5056
        raise
5057

    
5058
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5059

    
5060
    errs = []
5061
    # activate, get path, copy the data over
5062
    for idx, disk in enumerate(instance.disks):
5063
      self.LogInfo("Copying data for disk %d", idx)
5064
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5065
                                               instance.name, True)
5066
      if result.fail_msg:
5067
        self.LogWarning("Can't assemble newly created disk %d: %s",
5068
                        idx, result.fail_msg)
5069
        errs.append(result.fail_msg)
5070
        break
5071
      dev_path = result.payload
5072
      result = self.rpc.call_blockdev_export(source_node, disk,
5073
                                             target_node, dev_path,
5074
                                             cluster_name)
5075
      if result.fail_msg:
5076
        self.LogWarning("Can't copy data over for disk %d: %s",
5077
                        idx, result.fail_msg)
5078
        errs.append(result.fail_msg)
5079
        break
5080

    
5081
    if errs:
5082
      self.LogWarning("Some disks failed to copy, aborting")
5083
      try:
5084
        _RemoveDisks(self, instance, target_node=target_node)
5085
      finally:
5086
        self.cfg.ReleaseDRBDMinors(instance.name)
5087
        raise errors.OpExecError("Errors during disk copy: %s" %
5088
                                 (",".join(errs),))
5089

    
5090
    instance.primary_node = target_node
5091
    self.cfg.Update(instance, feedback_fn)
5092

    
5093
    self.LogInfo("Removing the disks on the original node")
5094
    _RemoveDisks(self, instance, target_node=source_node)
5095

    
5096
    # Only start the instance if it's marked as up
5097
    if instance.admin_up:
5098
      self.LogInfo("Starting instance %s on node %s",
5099
                   instance.name, target_node)
5100

    
5101
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5102
                                           ignore_secondaries=True)
5103
      if not disks_ok:
5104
        _ShutdownInstanceDisks(self, instance)
5105
        raise errors.OpExecError("Can't activate the instance's disks")
5106

    
5107
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5108
      msg = result.fail_msg
5109
      if msg:
5110
        _ShutdownInstanceDisks(self, instance)
5111
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5112
                                 (instance.name, target_node, msg))
5113

    
5114

    
5115
class LUMigrateNode(LogicalUnit):
5116
  """Migrate all instances from a node.
5117

5118
  """
5119
  HPATH = "node-migrate"
5120
  HTYPE = constants.HTYPE_NODE
5121
  _OP_REQP = ["node_name", "live"]
5122
  REQ_BGL = False
5123

    
5124
  def ExpandNames(self):
5125
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5126

    
5127
    self.needed_locks = {
5128
      locking.LEVEL_NODE: [self.op.node_name],
5129
      }
5130

    
5131
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5132

    
5133
    # Create tasklets for migrating instances for all instances on this node
5134
    names = []
5135
    tasklets = []
5136

    
5137
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5138
      logging.debug("Migrating instance %s", inst.name)
5139
      names.append(inst.name)
5140

    
5141
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5142

    
5143
    self.tasklets = tasklets
5144

    
5145
    # Declare instance locks
5146
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5147

    
5148
  def DeclareLocks(self, level):
5149
    if level == locking.LEVEL_NODE:
5150
      self._LockInstancesNodes()
5151

    
5152
  def BuildHooksEnv(self):
5153
    """Build hooks env.
5154

5155
    This runs on the master, the primary and all the secondaries.
5156

5157
    """
5158
    env = {
5159
      "NODE_NAME": self.op.node_name,
5160
      }
5161

    
5162
    nl = [self.cfg.GetMasterNode()]
5163

    
5164
    return (env, nl, nl)
5165

    
5166

    
5167
class TLMigrateInstance(Tasklet):
5168
  def __init__(self, lu, instance_name, live, cleanup):
5169
    """Initializes this class.
5170

5171
    """
5172
    Tasklet.__init__(self, lu)
5173

    
5174
    # Parameters
5175
    self.instance_name = instance_name
5176
    self.live = live
5177
    self.cleanup = cleanup
5178

    
5179
  def CheckPrereq(self):
5180
    """Check prerequisites.
5181

5182
    This checks that the instance is in the cluster.
5183

5184
    """
5185
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5186
    instance = self.cfg.GetInstanceInfo(instance_name)
5187
    assert instance is not None
5188

    
5189
    if instance.disk_template != constants.DT_DRBD8:
5190
      raise errors.OpPrereqError("Instance's disk layout is not"
5191
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5192

    
5193
    secondary_nodes = instance.secondary_nodes
5194
    if not secondary_nodes:
5195
      raise errors.ConfigurationError("No secondary node but using"
5196
                                      " drbd8 disk template")
5197

    
5198
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5199

    
5200
    target_node = secondary_nodes[0]
5201
    # check memory requirements on the secondary node
5202
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5203
                         instance.name, i_be[constants.BE_MEMORY],
5204
                         instance.hypervisor)
5205

    
5206
    # check bridge existance
5207
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5208

    
5209
    if not self.cleanup:
5210
      _CheckNodeNotDrained(self, target_node)
5211
      result = self.rpc.call_instance_migratable(instance.primary_node,
5212
                                                 instance)
5213
      result.Raise("Can't migrate, please use failover",
5214
                   prereq=True, ecode=errors.ECODE_STATE)
5215

    
5216
    self.instance = instance
5217

    
5218
  def _WaitUntilSync(self):
5219
    """Poll with custom rpc for disk sync.
5220

5221
    This uses our own step-based rpc call.
5222

5223
    """
5224
    self.feedback_fn("* wait until resync is done")
5225
    all_done = False
5226
    while not all_done:
5227
      all_done = True
5228
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5229
                                            self.nodes_ip,
5230
                                            self.instance.disks)
5231
      min_percent = 100
5232
      for node, nres in result.items():
5233
        nres.Raise("Cannot resync disks on node %s" % node)
5234
        node_done, node_percent = nres.payload
5235
        all_done = all_done and node_done
5236
        if node_percent is not None:
5237
          min_percent = min(min_percent, node_percent)
5238
      if not all_done:
5239
        if min_percent < 100:
5240
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5241
        time.sleep(2)
5242

    
5243
  def _EnsureSecondary(self, node):
5244
    """Demote a node to secondary.
5245

5246
    """
5247
    self.feedback_fn("* switching node %s to secondary mode" % node)
5248

    
5249
    for dev in self.instance.disks:
5250
      self.cfg.SetDiskID(dev, node)
5251

    
5252
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5253
                                          self.instance.disks)
5254
    result.Raise("Cannot change disk to secondary on node %s" % node)
5255

    
5256
  def _GoStandalone(self):
5257
    """Disconnect from the network.
5258

5259
    """
5260
    self.feedback_fn("* changing into standalone mode")
5261
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5262
                                               self.instance.disks)
5263
    for node, nres in result.items():
5264
      nres.Raise("Cannot disconnect disks node %s" % node)
5265

    
5266
  def _GoReconnect(self, multimaster):
5267
    """Reconnect to the network.
5268

5269
    """
5270
    if multimaster:
5271
      msg = "dual-master"
5272
    else:
5273
      msg = "single-master"
5274
    self.feedback_fn("* changing disks into %s mode" % msg)
5275
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5276
                                           self.instance.disks,
5277
                                           self.instance.name, multimaster)
5278
    for node, nres in result.items():
5279
      nres.Raise("Cannot change disks config on node %s" % node)
5280

    
5281
  def _ExecCleanup(self):
5282
    """Try to cleanup after a failed migration.
5283

5284
    The cleanup is done by:
5285
      - check that the instance is running only on one node
5286
        (and update the config if needed)
5287
      - change disks on its secondary node to secondary
5288
      - wait until disks are fully synchronized
5289
      - disconnect from the network
5290
      - change disks into single-master mode
5291
      - wait again until disks are fully synchronized
5292

5293
    """
5294
    instance = self.instance
5295
    target_node = self.target_node
5296
    source_node = self.source_node
5297

    
5298
    # check running on only one node
5299
    self.feedback_fn("* checking where the instance actually runs"
5300
                     " (if this hangs, the hypervisor might be in"
5301
                     " a bad state)")
5302
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5303
    for node, result in ins_l.items():
5304
      result.Raise("Can't contact node %s" % node)
5305

    
5306
    runningon_source = instance.name in ins_l[source_node].payload
5307
    runningon_target = instance.name in ins_l[target_node].payload
5308

    
5309
    if runningon_source and runningon_target:
5310
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5311
                               " or the hypervisor is confused. You will have"
5312
                               " to ensure manually that it runs only on one"
5313
                               " and restart this operation.")
5314

    
5315
    if not (runningon_source or runningon_target):
5316
      raise errors.OpExecError("Instance does not seem to be running at all."
5317
                               " In this case, it's safer to repair by"
5318
                               " running 'gnt-instance stop' to ensure disk"
5319
                               " shutdown, and then restarting it.")
5320

    
5321
    if runningon_target:
5322
      # the migration has actually succeeded, we need to update the config
5323
      self.feedback_fn("* instance running on secondary node (%s),"
5324
                       " updating config" % target_node)
5325
      instance.primary_node = target_node
5326
      self.cfg.Update(instance, self.feedback_fn)
5327
      demoted_node = source_node
5328
    else:
5329
      self.feedback_fn("* instance confirmed to be running on its"
5330
                       " primary node (%s)" % source_node)
5331
      demoted_node = target_node
5332

    
5333
    self._EnsureSecondary(demoted_node)
5334
    try:
5335
      self._WaitUntilSync()
5336
    except errors.OpExecError:
5337
      # we ignore here errors, since if the device is standalone, it
5338
      # won't be able to sync
5339
      pass
5340
    self._GoStandalone()
5341
    self._GoReconnect(False)
5342
    self._WaitUntilSync()
5343

    
5344
    self.feedback_fn("* done")
5345

    
5346
  def _RevertDiskStatus(self):
5347
    """Try to revert the disk status after a failed migration.
5348

5349
    """
5350
    target_node = self.target_node
5351
    try:
5352
      self._EnsureSecondary(target_node)
5353
      self._GoStandalone()
5354
      self._GoReconnect(False)
5355
      self._WaitUntilSync()
5356
    except errors.OpExecError, err:
5357
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5358
                         " drives: error '%s'\n"
5359
                         "Please look and recover the instance status" %
5360
                         str(err))
5361

    
5362
  def _AbortMigration(self):
5363
    """Call the hypervisor code to abort a started migration.
5364

5365
    """
5366
    instance = self.instance
5367
    target_node = self.target_node
5368
    migration_info = self.migration_info
5369

    
5370
    abort_result = self.rpc.call_finalize_migration(target_node,
5371
                                                    instance,
5372
                                                    migration_info,
5373
                                                    False)
5374
    abort_msg = abort_result.fail_msg
5375
    if abort_msg:
5376
      logging.error("Aborting migration failed on target node %s: %s",
5377
                    target_node, abort_msg)
5378
      # Don't raise an exception here, as we stil have to try to revert the
5379
      # disk status, even if this step failed.
5380

    
5381
  def _ExecMigration(self):
5382
    """Migrate an instance.
5383

5384
    The migrate is done by:
5385
      - change the disks into dual-master mode
5386
      - wait until disks are fully synchronized again
5387
      - migrate the instance
5388
      - change disks on the new secondary node (the old primary) to secondary
5389
      - wait until disks are fully synchronized
5390
      - change disks into single-master mode
5391

5392
    """
5393
    instance = self.instance
5394
    target_node = self.target_node
5395
    source_node = self.source_node
5396

    
5397
    self.feedback_fn("* checking disk consistency between source and target")
5398
    for dev in instance.disks:
5399
      if not _CheckDiskConsistency(self, dev, target_node, False):
5400
        raise errors.OpExecError("Disk %s is degraded or not fully"
5401
                                 " synchronized on target node,"
5402
                                 " aborting migrate." % dev.iv_name)
5403

    
5404
    # First get the migration information from the remote node
5405
    result = self.rpc.call_migration_info(source_node, instance)
5406
    msg = result.fail_msg
5407
    if msg:
5408
      log_err = ("Failed fetching source migration information from %s: %s" %
5409
                 (source_node, msg))
5410
      logging.error(log_err)
5411
      raise errors.OpExecError(log_err)
5412

    
5413
    self.migration_info = migration_info = result.payload
5414

    
5415
    # Then switch the disks to master/master mode
5416
    self._EnsureSecondary(target_node)
5417
    self._GoStandalone()
5418
    self._GoReconnect(True)
5419
    self._WaitUntilSync()
5420

    
5421
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5422
    result = self.rpc.call_accept_instance(target_node,
5423
                                           instance,
5424
                                           migration_info,
5425
                                           self.nodes_ip[target_node])
5426

    
5427
    msg = result.fail_msg
5428
    if msg:
5429
      logging.error("Instance pre-migration failed, trying to revert"
5430
                    " disk status: %s", msg)
5431
      self.feedback_fn("Pre-migration failed, aborting")
5432
      self._AbortMigration()
5433
      self._RevertDiskStatus()
5434
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5435
                               (instance.name, msg))
5436

    
5437
    self.feedback_fn("* migrating instance to %s" % target_node)
5438
    time.sleep(10)
5439
    result = self.rpc.call_instance_migrate(source_node, instance,
5440
                                            self.nodes_ip[target_node],
5441
                                            self.live)
5442
    msg = result.fail_msg
5443
    if msg:
5444
      logging.error("Instance migration failed, trying to revert"
5445
                    " disk status: %s", msg)
5446
      self.feedback_fn("Migration failed, aborting")
5447
      self._AbortMigration()
5448
      self._RevertDiskStatus()
5449
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5450
                               (instance.name, msg))
5451
    time.sleep(10)
5452

    
5453
    instance.primary_node = target_node
5454
    # distribute new instance config to the other nodes
5455
    self.cfg.Update(instance, self.feedback_fn)
5456

    
5457
    result = self.rpc.call_finalize_migration(target_node,
5458
                                              instance,
5459
                                              migration_info,
5460
                                              True)
5461
    msg = result.fail_msg
5462
    if msg:
5463
      logging.error("Instance migration succeeded, but finalization failed:"
5464
                    " %s", msg)
5465
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5466
                               msg)
5467

    
5468
    self._EnsureSecondary(source_node)
5469
    self._WaitUntilSync()
5470
    self._GoStandalone()
5471
    self._GoReconnect(False)
5472
    self._WaitUntilSync()
5473

    
5474
    self.feedback_fn("* done")
5475

    
5476
  def Exec(self, feedback_fn):
5477
    """Perform the migration.
5478

5479
    """
5480
    feedback_fn("Migrating instance %s" % self.instance.name)
5481

    
5482
    self.feedback_fn = feedback_fn
5483

    
5484
    self.source_node = self.instance.primary_node
5485
    self.target_node = self.instance.secondary_nodes[0]
5486
    self.all_nodes = [self.source_node, self.target_node]
5487
    self.nodes_ip = {
5488
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5489
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5490
      }
5491

    
5492
    if self.cleanup:
5493
      return self._ExecCleanup()
5494
    else:
5495
      return self._ExecMigration()
5496

    
5497

    
5498
def _CreateBlockDev(lu, node, instance, device, force_create,
5499
                    info, force_open):
5500
  """Create a tree of block devices on a given node.
5501

5502
  If this device type has to be created on secondaries, create it and
5503
  all its children.
5504

5505
  If not, just recurse to children keeping the same 'force' value.
5506

5507
  @param lu: the lu on whose behalf we execute
5508
  @param node: the node on which to create the device
5509
  @type instance: L{objects.Instance}
5510
  @param instance: the instance which owns the device
5511
  @type device: L{objects.Disk}
5512
  @param device: the device to create
5513
  @type force_create: boolean
5514
  @param force_create: whether to force creation of this device; this
5515
      will be change to True whenever we find a device which has
5516
      CreateOnSecondary() attribute
5517
  @param info: the extra 'metadata' we should attach to the device
5518
      (this will be represented as a LVM tag)
5519
  @type force_open: boolean
5520
  @param force_open: this parameter will be passes to the
5521
      L{backend.BlockdevCreate} function where it specifies
5522
      whether we run on primary or not, and it affects both
5523
      the child assembly and the device own Open() execution
5524

5525
  """
5526
  if device.CreateOnSecondary():
5527
    force_create = True
5528

    
5529
  if device.children:
5530
    for child in device.children:
5531
      _CreateBlockDev(lu, node, instance, child, force_create,
5532
                      info, force_open)
5533

    
5534
  if not force_create:
5535
    return
5536

    
5537
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5538

    
5539

    
5540
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5541
  """Create a single block device on a given node.
5542

5543
  This will not recurse over children of the device, so they must be
5544
  created in advance.
5545

5546
  @param lu: the lu on whose behalf we execute
5547
  @param node: the node on which to create the device
5548
  @type instance: L{objects.Instance}
5549
  @param instance: the instance which owns the device
5550
  @type device: L{objects.Disk}
5551
  @param device: the device to create
5552
  @param info: the extra 'metadata' we should attach to the device
5553
      (this will be represented as a LVM tag)
5554
  @type force_open: boolean
5555
  @param force_open: this parameter will be passes to the
5556
      L{backend.BlockdevCreate} function where it specifies
5557
      whether we run on primary or not, and it affects both
5558
      the child assembly and the device own Open() execution
5559

5560
  """
5561
  lu.cfg.SetDiskID(device, node)
5562
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5563
                                       instance.name, force_open, info)
5564
  result.Raise("Can't create block device %s on"
5565
               " node %s for instance %s" % (device, node, instance.name))
5566
  if device.physical_id is None:
5567
    device.physical_id = result.payload
5568

    
5569

    
5570
def _GenerateUniqueNames(lu, exts):
5571
  """Generate a suitable LV name.
5572

5573
  This will generate a logical volume name for the given instance.
5574

5575
  """
5576
  results = []
5577
  for val in exts:
5578
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5579
    results.append("%s%s" % (new_id, val))
5580
  return results
5581

    
5582

    
5583
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5584
                         p_minor, s_minor):
5585
  """Generate a drbd8 device complete with its children.
5586

5587
  """
5588
  port = lu.cfg.AllocatePort()
5589
  vgname = lu.cfg.GetVGName()
5590
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5591
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5592
                          logical_id=(vgname, names[0]))
5593
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5594
                          logical_id=(vgname, names[1]))
5595
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5596
                          logical_id=(primary, secondary, port,
5597
                                      p_minor, s_minor,
5598
                                      shared_secret),
5599
                          children=[dev_data, dev_meta],
5600
                          iv_name=iv_name)
5601
  return drbd_dev
5602

    
5603

    
5604
def _GenerateDiskTemplate(lu, template_name,
5605
                          instance_name, primary_node,
5606
                          secondary_nodes, disk_info,
5607
                          file_storage_dir, file_driver,
5608
                          base_index):
5609
  """Generate the entire disk layout for a given template type.
5610

5611
  """
5612
  #TODO: compute space requirements
5613

    
5614
  vgname = lu.cfg.GetVGName()
5615
  disk_count = len(disk_info)
5616
  disks = []
5617
  if template_name == constants.DT_DISKLESS:
5618
    pass
5619
  elif template_name == constants.DT_PLAIN:
5620
    if len(secondary_nodes) != 0:
5621
      raise errors.ProgrammerError("Wrong template configuration")
5622

    
5623
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5624
                                      for i in range(disk_count)])
5625
    for idx, disk in enumerate(disk_info):
5626
      disk_index = idx + base_index
5627
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5628
                              logical_id=(vgname, names[idx]),
5629
                              iv_name="disk/%d" % disk_index,
5630
                              mode=disk["mode"])
5631
      disks.append(disk_dev)
5632
  elif template_name == constants.DT_DRBD8:
5633
    if len(secondary_nodes) != 1:
5634
      raise errors.ProgrammerError("Wrong template configuration")
5635
    remote_node = secondary_nodes[0]
5636
    minors = lu.cfg.AllocateDRBDMinor(
5637
      [primary_node, remote_node] * len(disk_info), instance_name)
5638

    
5639
    names = []
5640
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5641
                                               for i in range(disk_count)]):
5642
      names.append(lv_prefix + "_data")
5643
      names.append(lv_prefix + "_meta")
5644
    for idx, disk in enumerate(disk_info):
5645
      disk_index = idx + base_index
5646
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5647
                                      disk["size"], names[idx*2:idx*2+2],
5648
                                      "disk/%d" % disk_index,
5649
                                      minors[idx*2], minors[idx*2+1])
5650
      disk_dev.mode = disk["mode"]
5651
      disks.append(disk_dev)
5652
  elif template_name == constants.DT_FILE:
5653
    if len(secondary_nodes) != 0:
5654
      raise errors.ProgrammerError("Wrong template configuration")
5655

    
5656
    for idx, disk in enumerate(disk_info):
5657
      disk_index = idx + base_index
5658
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5659
                              iv_name="disk/%d" % disk_index,
5660
                              logical_id=(file_driver,
5661
                                          "%s/disk%d" % (file_storage_dir,
5662
                                                         disk_index)),
5663
                              mode=disk["mode"])
5664
      disks.append(disk_dev)
5665
  else:
5666
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5667
  return disks
5668

    
5669

    
5670
def _GetInstanceInfoText(instance):
5671
  """Compute that text that should be added to the disk's metadata.
5672

5673
  """
5674
  return "originstname+%s" % instance.name
5675

    
5676

    
5677
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5678
  """Create all disks for an instance.
5679

5680
  This abstracts away some work from AddInstance.
5681

5682
  @type lu: L{LogicalUnit}
5683
  @param lu: the logical unit on whose behalf we execute
5684
  @type instance: L{objects.Instance}
5685
  @param instance: the instance whose disks we should create
5686
  @type to_skip: list
5687
  @param to_skip: list of indices to skip
5688
  @type target_node: string
5689
  @param target_node: if passed, overrides the target node for creation
5690
  @rtype: boolean
5691
  @return: the success of the creation
5692

5693
  """
5694
  info = _GetInstanceInfoText(instance)
5695
  if target_node is None:
5696
    pnode = instance.primary_node
5697
    all_nodes = instance.all_nodes
5698
  else:
5699
    pnode = target_node
5700
    all_nodes = [pnode]
5701

    
5702
  if instance.disk_template == constants.DT_FILE:
5703
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5704
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5705

    
5706
    result.Raise("Failed to create directory '%s' on"
5707
                 " node %s" % (file_storage_dir, pnode))
5708

    
5709
  # Note: this needs to be kept in sync with adding of disks in
5710
  # LUSetInstanceParams
5711
  for idx, device in enumerate(instance.disks):
5712
    if to_skip and idx in to_skip:
5713
      continue
5714
    logging.info("Creating volume %s for instance %s",
5715
                 device.iv_name, instance.name)
5716
    #HARDCODE
5717
    for node in all_nodes:
5718
      f_create = node == pnode
5719
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5720

    
5721

    
5722
def _RemoveDisks(lu, instance, target_node=None):
5723
  """Remove all disks for an instance.
5724

5725
  This abstracts away some work from `AddInstance()` and
5726
  `RemoveInstance()`. Note that in case some of the devices couldn't
5727
  be removed, the removal will continue with the other ones (compare
5728
  with `_CreateDisks()`).
5729

5730
  @type lu: L{LogicalUnit}
5731
  @param lu: the logical unit on whose behalf we execute
5732
  @type instance: L{objects.Instance}
5733
  @param instance: the instance whose disks we should remove
5734
  @type target_node: string
5735
  @param target_node: used to override the node on which to remove the disks
5736
  @rtype: boolean
5737
  @return: the success of the removal
5738

5739
  """
5740
  logging.info("Removing block devices for instance %s", instance.name)
5741

    
5742
  all_result = True
5743
  for device in instance.disks:
5744
    if target_node:
5745
      edata = [(target_node, device)]
5746
    else:
5747
      edata = device.ComputeNodeTree(instance.primary_node)
5748
    for node, disk in edata:
5749
      lu.cfg.SetDiskID(disk, node)
5750
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5751
      if msg:
5752
        lu.LogWarning("Could not remove block device %s on node %s,"
5753
                      " continuing anyway: %s", device.iv_name, node, msg)
5754
        all_result = False
5755

    
5756
  if instance.disk_template == constants.DT_FILE:
5757
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5758
    if target_node:
5759
      tgt = target_node
5760
    else:
5761
      tgt = instance.primary_node
5762
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5763
    if result.fail_msg:
5764
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5765
                    file_storage_dir, instance.primary_node, result.fail_msg)
5766
      all_result = False
5767

    
5768
  return all_result
5769

    
5770

    
5771
def _ComputeDiskSize(disk_template, disks):
5772
  """Compute disk size requirements in the volume group
5773

5774
  """
5775
  # Required free disk space as a function of disk and swap space
5776
  req_size_dict = {
5777
    constants.DT_DISKLESS: None,
5778
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5779
    # 128 MB are added for drbd metadata for each disk
5780
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5781
    constants.DT_FILE: None,
5782
  }
5783

    
5784
  if disk_template not in req_size_dict:
5785
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5786
                                 " is unknown" %  disk_template)
5787

    
5788
  return req_size_dict[disk_template]
5789

    
5790

    
5791
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5792
  """Hypervisor parameter validation.
5793

5794
  This function abstract the hypervisor parameter validation to be
5795
  used in both instance create and instance modify.
5796

5797
  @type lu: L{LogicalUnit}
5798
  @param lu: the logical unit for which we check
5799
  @type nodenames: list
5800
  @param nodenames: the list of nodes on which we should check
5801
  @type hvname: string
5802
  @param hvname: the name of the hypervisor we should use
5803
  @type hvparams: dict
5804
  @param hvparams: the parameters which we need to check
5805
  @raise errors.OpPrereqError: if the parameters are not valid
5806

5807
  """
5808
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5809
                                                  hvname,
5810
                                                  hvparams)
5811
  for node in nodenames:
5812
    info = hvinfo[node]
5813
    if info.offline:
5814
      continue
5815
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5816

    
5817

    
5818
class LUCreateInstance(LogicalUnit):
5819
  """Create an instance.
5820

5821
  """
5822
  HPATH = "instance-add"
5823
  HTYPE = constants.HTYPE_INSTANCE
5824
  _OP_REQP = ["instance_name", "disks", "disk_template",
5825
              "mode", "start",
5826
              "wait_for_sync", "ip_check", "nics",
5827
              "hvparams", "beparams"]
5828
  REQ_BGL = False
5829

    
5830
  def CheckArguments(self):
5831
    """Check arguments.
5832

5833
    """
5834
    # set optional parameters to none if they don't exist
5835
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5836
      if not hasattr(self.op, attr):
5837
        setattr(self.op, attr, None)
5838

    
5839
    # do not require name_check to ease forward/backward compatibility
5840
    # for tools
5841
    if not hasattr(self.op, "name_check"):
5842
      self.op.name_check = True
5843
    # validate/normalize the instance name
5844
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
5845
    if self.op.ip_check and not self.op.name_check:
5846
      # TODO: make the ip check more flexible and not depend on the name check
5847
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
5848
                                 errors.ECODE_INVAL)
5849
    if (self.op.disk_template == constants.DT_FILE and
5850
        not constants.ENABLE_FILE_STORAGE):
5851
      raise errors.OpPrereqError("File storage disabled at configure time",
5852
                                 errors.ECODE_INVAL)
5853
    # check disk information: either all adopt, or no adopt
5854
    has_adopt = has_no_adopt = False
5855
    for disk in self.op.disks:
5856
      if "adopt" in disk:
5857
        has_adopt = True
5858
      else:
5859
        has_no_adopt = True
5860
    if has_adopt and has_no_adopt:
5861
      raise errors.OpPrereqError("Either all disks have are adoped or none is",
5862
                                 errors.ECODE_INVAL)
5863
    if has_adopt:
5864
      if self.op.disk_template != constants.DT_PLAIN:
5865
        raise errors.OpPrereqError("Disk adoption is only supported for the"
5866
                                   " 'plain' disk template",
5867
                                   errors.ECODE_INVAL)
5868
      if self.op.iallocator is not None:
5869
        raise errors.OpPrereqError("Disk adoption not allowed with an"
5870
                                   " iallocator script", errors.ECODE_INVAL)
5871
      if self.op.mode == constants.INSTANCE_IMPORT:
5872
        raise errors.OpPrereqError("Disk adoption not allowed for"
5873
                                   " instance import", errors.ECODE_INVAL)
5874

    
5875
    self.adopt_disks = has_adopt
5876

    
5877
  def ExpandNames(self):
5878
    """ExpandNames for CreateInstance.
5879

5880
    Figure out the right locks for instance creation.
5881

5882
    """
5883
    self.needed_locks = {}
5884

    
5885
    # cheap checks, mostly valid constants given
5886

    
5887
    # verify creation mode
5888
    if self.op.mode not in (constants.INSTANCE_CREATE,
5889
                            constants.INSTANCE_IMPORT):
5890
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5891
                                 self.op.mode, errors.ECODE_INVAL)
5892

    
5893
    # disk template and mirror node verification
5894
    _CheckDiskTemplate(self.op.disk_template)
5895

    
5896
    if self.op.hypervisor is None:
5897
      self.op.hypervisor = self.cfg.GetHypervisorType()
5898

    
5899
    cluster = self.cfg.GetClusterInfo()
5900
    enabled_hvs = cluster.enabled_hypervisors
5901
    if self.op.hypervisor not in enabled_hvs:
5902
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5903
                                 " cluster (%s)" % (self.op.hypervisor,
5904
                                  ",".join(enabled_hvs)),
5905
                                 errors.ECODE_STATE)
5906

    
5907
    # check hypervisor parameter syntax (locally)
5908
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5909
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5910
                                  self.op.hvparams)
5911
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5912
    hv_type.CheckParameterSyntax(filled_hvp)
5913
    self.hv_full = filled_hvp
5914
    # check that we don't specify global parameters on an instance
5915
    _CheckGlobalHvParams(self.op.hvparams)
5916

    
5917
    # fill and remember the beparams dict
5918
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5919
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5920
                                    self.op.beparams)
5921

    
5922
    #### instance parameters check
5923

    
5924
    # instance name verification
5925
    if self.op.name_check:
5926
      hostname1 = utils.GetHostInfo(self.op.instance_name)
5927
      self.op.instance_name = instance_name = hostname1.name
5928
      # used in CheckPrereq for ip ping check
5929
      self.check_ip = hostname1.ip
5930
    else:
5931
      instance_name = self.op.instance_name
5932
      self.check_ip = None
5933

    
5934
    # this is just a preventive check, but someone might still add this
5935
    # instance in the meantime, and creation will fail at lock-add time
5936
    if instance_name in self.cfg.GetInstanceList():
5937
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5938
                                 instance_name, errors.ECODE_EXISTS)
5939

    
5940
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5941

    
5942
    # NIC buildup
5943
    self.nics = []
5944
    for idx, nic in enumerate(self.op.nics):
5945
      nic_mode_req = nic.get("mode", None)
5946
      nic_mode = nic_mode_req
5947
      if nic_mode is None:
5948
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5949

    
5950
      # in routed mode, for the first nic, the default ip is 'auto'
5951
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5952
        default_ip_mode = constants.VALUE_AUTO
5953
      else:
5954
        default_ip_mode = constants.VALUE_NONE
5955

    
5956
      # ip validity checks
5957
      ip = nic.get("ip", default_ip_mode)
5958
      if ip is None or ip.lower() == constants.VALUE_NONE:
5959
        nic_ip = None
5960
      elif ip.lower() == constants.VALUE_AUTO:
5961
        if not self.op.name_check:
5962
          raise errors.OpPrereqError("IP address set to auto but name checks"
5963
                                     " have been skipped. Aborting.",
5964
                                     errors.ECODE_INVAL)
5965
        nic_ip = hostname1.ip
5966
      else:
5967
        if not utils.IsValidIP(ip):
5968
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5969
                                     " like a valid IP" % ip,
5970
                                     errors.ECODE_INVAL)
5971
        nic_ip = ip
5972

    
5973
      # TODO: check the ip address for uniqueness
5974
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5975
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
5976
                                   errors.ECODE_INVAL)
5977

    
5978
      # MAC address verification
5979
      mac = nic.get("mac", constants.VALUE_AUTO)
5980
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5981
        mac = utils.NormalizeAndValidateMac(mac)
5982

    
5983
        try:
5984
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
5985
        except errors.ReservationError:
5986
          raise errors.OpPrereqError("MAC address %s already in use"
5987
                                     " in cluster" % mac,
5988
                                     errors.ECODE_NOTUNIQUE)
5989

    
5990
      # bridge verification
5991
      bridge = nic.get("bridge", None)
5992
      link = nic.get("link", None)
5993
      if bridge and link:
5994
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5995
                                   " at the same time", errors.ECODE_INVAL)
5996
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5997
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5998
                                   errors.ECODE_INVAL)
5999
      elif bridge:
6000
        link = bridge
6001

    
6002
      nicparams = {}
6003
      if nic_mode_req:
6004
        nicparams[constants.NIC_MODE] = nic_mode_req
6005
      if link:
6006
        nicparams[constants.NIC_LINK] = link
6007

    
6008
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6009
                                      nicparams)
6010
      objects.NIC.CheckParameterSyntax(check_params)
6011
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6012

    
6013
    # disk checks/pre-build
6014
    self.disks = []
6015
    for disk in self.op.disks:
6016
      mode = disk.get("mode", constants.DISK_RDWR)
6017
      if mode not in constants.DISK_ACCESS_SET:
6018
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6019
                                   mode, errors.ECODE_INVAL)
6020
      size = disk.get("size", None)
6021
      if size is None:
6022
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6023
      try:
6024
        size = int(size)
6025
      except (TypeError, ValueError):
6026
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6027
                                   errors.ECODE_INVAL)
6028
      new_disk = {"size": size, "mode": mode}
6029
      if "adopt" in disk:
6030
        new_disk["adopt"] = disk["adopt"]
6031
      self.disks.append(new_disk)
6032

    
6033
    # file storage checks
6034
    if (self.op.file_driver and
6035
        not self.op.file_driver in constants.FILE_DRIVER):
6036
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6037
                                 self.op.file_driver, errors.ECODE_INVAL)
6038

    
6039
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6040
      raise errors.OpPrereqError("File storage directory path not absolute",
6041
                                 errors.ECODE_INVAL)
6042

    
6043
    ### Node/iallocator related checks
6044
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6045
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6046
                                 " node must be given",
6047
                                 errors.ECODE_INVAL)
6048

    
6049
    if self.op.iallocator:
6050
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6051
    else:
6052
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6053
      nodelist = [self.op.pnode]
6054
      if self.op.snode is not None:
6055
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6056
        nodelist.append(self.op.snode)
6057
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6058

    
6059
    # in case of import lock the source node too
6060
    if self.op.mode == constants.INSTANCE_IMPORT:
6061
      src_node = getattr(self.op, "src_node", None)
6062
      src_path = getattr(self.op, "src_path", None)
6063

    
6064
      if src_path is None:
6065
        self.op.src_path = src_path = self.op.instance_name
6066

    
6067
      if src_node is None:
6068
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6069
        self.op.src_node = None
6070
        if os.path.isabs(src_path):
6071
          raise errors.OpPrereqError("Importing an instance from an absolute"
6072
                                     " path requires a source node option.",
6073
                                     errors.ECODE_INVAL)
6074
      else:
6075
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6076
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6077
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6078
        if not os.path.isabs(src_path):
6079
          self.op.src_path = src_path = \
6080
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6081

    
6082
      # On import force_variant must be True, because if we forced it at
6083
      # initial install, our only chance when importing it back is that it
6084
      # works again!
6085
      self.op.force_variant = True
6086

    
6087
    else: # INSTANCE_CREATE
6088
      if getattr(self.op, "os_type", None) is None:
6089
        raise errors.OpPrereqError("No guest OS specified",
6090
                                   errors.ECODE_INVAL)
6091
      self.op.force_variant = getattr(self.op, "force_variant", False)
6092

    
6093
  def _RunAllocator(self):
6094
    """Run the allocator based on input opcode.
6095

6096
    """
6097
    nics = [n.ToDict() for n in self.nics]
6098
    ial = IAllocator(self.cfg, self.rpc,
6099
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6100
                     name=self.op.instance_name,
6101
                     disk_template=self.op.disk_template,
6102
                     tags=[],
6103
                     os=self.op.os_type,
6104
                     vcpus=self.be_full[constants.BE_VCPUS],
6105
                     mem_size=self.be_full[constants.BE_MEMORY],
6106
                     disks=self.disks,
6107
                     nics=nics,
6108
                     hypervisor=self.op.hypervisor,
6109
                     )
6110

    
6111
    ial.Run(self.op.iallocator)
6112

    
6113
    if not ial.success:
6114
      raise errors.OpPrereqError("Can't compute nodes using"
6115
                                 " iallocator '%s': %s" %
6116
                                 (self.op.iallocator, ial.info),
6117
                                 errors.ECODE_NORES)
6118
    if len(ial.result) != ial.required_nodes:
6119
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6120
                                 " of nodes (%s), required %s" %
6121
                                 (self.op.iallocator, len(ial.result),
6122
                                  ial.required_nodes), errors.ECODE_FAULT)
6123
    self.op.pnode = ial.result[0]
6124
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6125
                 self.op.instance_name, self.op.iallocator,
6126
                 utils.CommaJoin(ial.result))
6127
    if ial.required_nodes == 2:
6128
      self.op.snode = ial.result[1]
6129

    
6130
  def BuildHooksEnv(self):
6131
    """Build hooks env.
6132

6133
    This runs on master, primary and secondary nodes of the instance.
6134

6135
    """
6136
    env = {
6137
      "ADD_MODE": self.op.mode,
6138
      }
6139
    if self.op.mode == constants.INSTANCE_IMPORT:
6140
      env["SRC_NODE"] = self.op.src_node
6141
      env["SRC_PATH"] = self.op.src_path
6142
      env["SRC_IMAGES"] = self.src_images
6143

    
6144
    env.update(_BuildInstanceHookEnv(
6145
      name=self.op.instance_name,
6146
      primary_node=self.op.pnode,
6147
      secondary_nodes=self.secondaries,
6148
      status=self.op.start,
6149
      os_type=self.op.os_type,
6150
      memory=self.be_full[constants.BE_MEMORY],
6151
      vcpus=self.be_full[constants.BE_VCPUS],
6152
      nics=_NICListToTuple(self, self.nics),
6153
      disk_template=self.op.disk_template,
6154
      disks=[(d["size"], d["mode"]) for d in self.disks],
6155
      bep=self.be_full,
6156
      hvp=self.hv_full,
6157
      hypervisor_name=self.op.hypervisor,
6158
    ))
6159

    
6160
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6161
          self.secondaries)
6162
    return env, nl, nl
6163

    
6164

    
6165
  def CheckPrereq(self):
6166
    """Check prerequisites.
6167

6168
    """
6169
    if (not self.cfg.GetVGName() and
6170
        self.op.disk_template not in constants.DTS_NOT_LVM):
6171
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6172
                                 " instances", errors.ECODE_STATE)
6173

    
6174
    if self.op.mode == constants.INSTANCE_IMPORT:
6175
      src_node = self.op.src_node
6176
      src_path = self.op.src_path
6177

    
6178
      if src_node is None:
6179
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6180
        exp_list = self.rpc.call_export_list(locked_nodes)
6181
        found = False
6182
        for node in exp_list:
6183
          if exp_list[node].fail_msg:
6184
            continue
6185
          if src_path in exp_list[node].payload:
6186
            found = True
6187
            self.op.src_node = src_node = node
6188
            self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6189
                                                         src_path)
6190
            break
6191
        if not found:
6192
          raise errors.OpPrereqError("No export found for relative path %s" %
6193
                                      src_path, errors.ECODE_INVAL)
6194

    
6195
      _CheckNodeOnline(self, src_node)
6196
      result = self.rpc.call_export_info(src_node, src_path)
6197
      result.Raise("No export or invalid export found in dir %s" % src_path)
6198

    
6199
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6200
      if not export_info.has_section(constants.INISECT_EXP):
6201
        raise errors.ProgrammerError("Corrupted export config",
6202
                                     errors.ECODE_ENVIRON)
6203

    
6204
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
6205
      if (int(ei_version) != constants.EXPORT_VERSION):
6206
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6207
                                   (ei_version, constants.EXPORT_VERSION),
6208
                                   errors.ECODE_ENVIRON)
6209

    
6210
      # Check that the new instance doesn't have less disks than the export
6211
      instance_disks = len(self.disks)
6212
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6213
      if instance_disks < export_disks:
6214
        raise errors.OpPrereqError("Not enough disks to import."
6215
                                   " (instance: %d, export: %d)" %
6216
                                   (instance_disks, export_disks),
6217
                                   errors.ECODE_INVAL)
6218

    
6219
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6220
      disk_images = []
6221
      for idx in range(export_disks):
6222
        option = 'disk%d_dump' % idx
6223
        if export_info.has_option(constants.INISECT_INS, option):
6224
          # FIXME: are the old os-es, disk sizes, etc. useful?
6225
          export_name = export_info.get(constants.INISECT_INS, option)
6226
          image = utils.PathJoin(src_path, export_name)
6227
          disk_images.append(image)
6228
        else:
6229
          disk_images.append(False)
6230

    
6231
      self.src_images = disk_images
6232

    
6233
      old_name = export_info.get(constants.INISECT_INS, 'name')
6234
      # FIXME: int() here could throw a ValueError on broken exports
6235
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6236
      if self.op.instance_name == old_name:
6237
        for idx, nic in enumerate(self.nics):
6238
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6239
            nic_mac_ini = 'nic%d_mac' % idx
6240
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6241

    
6242
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6243

    
6244
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6245
    if self.op.ip_check:
6246
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6247
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6248
                                   (self.check_ip, self.op.instance_name),
6249
                                   errors.ECODE_NOTUNIQUE)
6250

    
6251
    #### mac address generation
6252
    # By generating here the mac address both the allocator and the hooks get
6253
    # the real final mac address rather than the 'auto' or 'generate' value.
6254
    # There is a race condition between the generation and the instance object
6255
    # creation, which means that we know the mac is valid now, but we're not
6256
    # sure it will be when we actually add the instance. If things go bad
6257
    # adding the instance will abort because of a duplicate mac, and the
6258
    # creation job will fail.
6259
    for nic in self.nics:
6260
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6261
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6262

    
6263
    #### allocator run
6264

    
6265
    if self.op.iallocator is not None:
6266
      self._RunAllocator()
6267

    
6268
    #### node related checks
6269

    
6270
    # check primary node
6271
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6272
    assert self.pnode is not None, \
6273
      "Cannot retrieve locked node %s" % self.op.pnode
6274
    if pnode.offline:
6275
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6276
                                 pnode.name, errors.ECODE_STATE)
6277
    if pnode.drained:
6278
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6279
                                 pnode.name, errors.ECODE_STATE)
6280

    
6281
    self.secondaries = []
6282

    
6283
    # mirror node verification
6284
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6285
      if self.op.snode is None:
6286
        raise errors.OpPrereqError("The networked disk templates need"
6287
                                   " a mirror node", errors.ECODE_INVAL)
6288
      if self.op.snode == pnode.name:
6289
        raise errors.OpPrereqError("The secondary node cannot be the"
6290
                                   " primary node.", errors.ECODE_INVAL)
6291
      _CheckNodeOnline(self, self.op.snode)
6292
      _CheckNodeNotDrained(self, self.op.snode)
6293
      self.secondaries.append(self.op.snode)
6294

    
6295
    nodenames = [pnode.name] + self.secondaries
6296

    
6297
    req_size = _ComputeDiskSize(self.op.disk_template,
6298
                                self.disks)
6299

    
6300
    # Check lv size requirements, if not adopting
6301
    if req_size is not None and not self.adopt_disks:
6302
      _CheckNodesFreeDisk(self, nodenames, req_size)
6303

    
6304
    if self.adopt_disks: # instead, we must check the adoption data
6305
      all_lvs = set([i["adopt"] for i in self.disks])
6306
      if len(all_lvs) != len(self.disks):
6307
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6308
                                   errors.ECODE_INVAL)
6309
      for lv_name in all_lvs:
6310
        try:
6311
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6312
        except errors.ReservationError:
6313
          raise errors.OpPrereqError("LV named %s used by another instance" %
6314
                                     lv_name, errors.ECODE_NOTUNIQUE)
6315

    
6316
      node_lvs = self.rpc.call_lv_list([pnode.name],
6317
                                       self.cfg.GetVGName())[pnode.name]
6318
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6319
      node_lvs = node_lvs.payload
6320
      delta = all_lvs.difference(node_lvs.keys())
6321
      if delta:
6322
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6323
                                   utils.CommaJoin(delta),
6324
                                   errors.ECODE_INVAL)
6325
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6326
      if online_lvs:
6327
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6328
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6329
                                   errors.ECODE_STATE)
6330
      # update the size of disk based on what is found
6331
      for dsk in self.disks:
6332
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6333

    
6334
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6335

    
6336
    # os verification
6337
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
6338
    result.Raise("OS '%s' not in supported os list for primary node %s" %
6339
                 (self.op.os_type, pnode.name),
6340
                 prereq=True, ecode=errors.ECODE_INVAL)
6341
    if not self.op.force_variant:
6342
      _CheckOSVariant(result.payload, self.op.os_type)
6343

    
6344
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6345

    
6346
    # memory check on primary node
6347
    if self.op.start:
6348
      _CheckNodeFreeMemory(self, self.pnode.name,
6349
                           "creating instance %s" % self.op.instance_name,
6350
                           self.be_full[constants.BE_MEMORY],
6351
                           self.op.hypervisor)
6352

    
6353
    self.dry_run_result = list(nodenames)
6354

    
6355
  def Exec(self, feedback_fn):
6356
    """Create and add the instance to the cluster.
6357

6358
    """
6359
    instance = self.op.instance_name
6360
    pnode_name = self.pnode.name
6361

    
6362
    ht_kind = self.op.hypervisor
6363
    if ht_kind in constants.HTS_REQ_PORT:
6364
      network_port = self.cfg.AllocatePort()
6365
    else:
6366
      network_port = None
6367

    
6368
    ##if self.op.vnc_bind_address is None:
6369
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6370

    
6371
    # this is needed because os.path.join does not accept None arguments
6372
    if self.op.file_storage_dir is None:
6373
      string_file_storage_dir = ""
6374
    else:
6375
      string_file_storage_dir = self.op.file_storage_dir
6376

    
6377
    # build the full file storage dir path
6378
    file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6379
                                      string_file_storage_dir, instance)
6380

    
6381

    
6382
    disks = _GenerateDiskTemplate(self,
6383
                                  self.op.disk_template,
6384
                                  instance, pnode_name,
6385
                                  self.secondaries,
6386
                                  self.disks,
6387
                                  file_storage_dir,
6388
                                  self.op.file_driver,
6389
                                  0)
6390

    
6391
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6392
                            primary_node=pnode_name,
6393
                            nics=self.nics, disks=disks,
6394
                            disk_template=self.op.disk_template,
6395
                            admin_up=False,
6396
                            network_port=network_port,
6397
                            beparams=self.op.beparams,
6398
                            hvparams=self.op.hvparams,
6399
                            hypervisor=self.op.hypervisor,
6400
                            )
6401

    
6402
    if self.adopt_disks:
6403
      # rename LVs to the newly-generated names; we need to construct
6404
      # 'fake' LV disks with the old data, plus the new unique_id
6405
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6406
      rename_to = []
6407
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6408
        rename_to.append(t_dsk.logical_id)
6409
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6410
        self.cfg.SetDiskID(t_dsk, pnode_name)
6411
      result = self.rpc.call_blockdev_rename(pnode_name,
6412
                                             zip(tmp_disks, rename_to))
6413
      result.Raise("Failed to rename adoped LVs")
6414
    else:
6415
      feedback_fn("* creating instance disks...")
6416
      try:
6417
        _CreateDisks(self, iobj)
6418
      except errors.OpExecError:
6419
        self.LogWarning("Device creation failed, reverting...")
6420
        try:
6421
          _RemoveDisks(self, iobj)
6422
        finally:
6423
          self.cfg.ReleaseDRBDMinors(instance)
6424
          raise
6425

    
6426
    feedback_fn("adding instance %s to cluster config" % instance)
6427

    
6428
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6429

    
6430
    # Declare that we don't want to remove the instance lock anymore, as we've
6431
    # added the instance to the config
6432
    del self.remove_locks[locking.LEVEL_INSTANCE]
6433
    # Unlock all the nodes
6434
    if self.op.mode == constants.INSTANCE_IMPORT:
6435
      nodes_keep = [self.op.src_node]
6436
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6437
                       if node != self.op.src_node]
6438
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6439
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6440
    else:
6441
      self.context.glm.release(locking.LEVEL_NODE)
6442
      del self.acquired_locks[locking.LEVEL_NODE]
6443

    
6444
    if self.op.wait_for_sync:
6445
      disk_abort = not _WaitForSync(self, iobj)
6446
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6447
      # make sure the disks are not degraded (still sync-ing is ok)
6448
      time.sleep(15)
6449
      feedback_fn("* checking mirrors status")
6450
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6451
    else:
6452
      disk_abort = False
6453

    
6454
    if disk_abort:
6455
      _RemoveDisks(self, iobj)
6456
      self.cfg.RemoveInstance(iobj.name)
6457
      # Make sure the instance lock gets removed
6458
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6459
      raise errors.OpExecError("There are some degraded disks for"
6460
                               " this instance")
6461

    
6462
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6463
      if self.op.mode == constants.INSTANCE_CREATE:
6464
        feedback_fn("* running the instance OS create scripts...")
6465
        # FIXME: pass debug option from opcode to backend
6466
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6467
                                               self.op.debug_level)
6468
        result.Raise("Could not add os for instance %s"
6469
                     " on node %s" % (instance, pnode_name))
6470

    
6471
      elif self.op.mode == constants.INSTANCE_IMPORT:
6472
        feedback_fn("* running the instance OS import scripts...")
6473
        src_node = self.op.src_node
6474
        src_images = self.src_images
6475
        cluster_name = self.cfg.GetClusterName()
6476
        # FIXME: pass debug option from opcode to backend
6477
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6478
                                                         src_node, src_images,
6479
                                                         cluster_name,
6480
                                                         self.op.debug_level)
6481
        msg = import_result.fail_msg
6482
        if msg:
6483
          self.LogWarning("Error while importing the disk images for instance"
6484
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6485
      else:
6486
        # also checked in the prereq part
6487
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6488
                                     % self.op.mode)
6489

    
6490
    if self.op.start:
6491
      iobj.admin_up = True
6492
      self.cfg.Update(iobj, feedback_fn)
6493
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6494
      feedback_fn("* starting instance...")
6495
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6496
      result.Raise("Could not start instance")
6497

    
6498
    return list(iobj.all_nodes)
6499

    
6500

    
6501
class LUConnectConsole(NoHooksLU):
6502
  """Connect to an instance's console.
6503

6504
  This is somewhat special in that it returns the command line that
6505
  you need to run on the master node in order to connect to the
6506
  console.
6507

6508
  """
6509
  _OP_REQP = ["instance_name"]
6510
  REQ_BGL = False
6511

    
6512
  def ExpandNames(self):
6513
    self._ExpandAndLockInstance()
6514

    
6515
  def CheckPrereq(self):
6516
    """Check prerequisites.
6517

6518
    This checks that the instance is in the cluster.
6519

6520
    """
6521
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6522
    assert self.instance is not None, \
6523
      "Cannot retrieve locked instance %s" % self.op.instance_name
6524
    _CheckNodeOnline(self, self.instance.primary_node)
6525

    
6526
  def Exec(self, feedback_fn):
6527
    """Connect to the console of an instance
6528

6529
    """
6530
    instance = self.instance
6531
    node = instance.primary_node
6532

    
6533
    node_insts = self.rpc.call_instance_list([node],
6534
                                             [instance.hypervisor])[node]
6535
    node_insts.Raise("Can't get node information from %s" % node)
6536

    
6537
    if instance.name not in node_insts.payload:
6538
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6539

    
6540
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6541

    
6542
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6543
    cluster = self.cfg.GetClusterInfo()
6544
    # beparams and hvparams are passed separately, to avoid editing the
6545
    # instance and then saving the defaults in the instance itself.
6546
    hvparams = cluster.FillHV(instance)
6547
    beparams = cluster.FillBE(instance)
6548
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6549

    
6550
    # build ssh cmdline
6551
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6552

    
6553

    
6554
class LUReplaceDisks(LogicalUnit):
6555
  """Replace the disks of an instance.
6556

6557
  """
6558
  HPATH = "mirrors-replace"
6559
  HTYPE = constants.HTYPE_INSTANCE
6560
  _OP_REQP = ["instance_name", "mode", "disks"]
6561
  REQ_BGL = False
6562

    
6563
  def CheckArguments(self):
6564
    if not hasattr(self.op, "remote_node"):
6565
      self.op.remote_node = None
6566
    if not hasattr(self.op, "iallocator"):
6567
      self.op.iallocator = None
6568
    if not hasattr(self.op, "early_release"):
6569
      self.op.early_release = False
6570

    
6571
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6572
                                  self.op.iallocator)
6573

    
6574
  def ExpandNames(self):
6575
    self._ExpandAndLockInstance()
6576

    
6577
    if self.op.iallocator is not None:
6578
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6579

    
6580
    elif self.op.remote_node is not None:
6581
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6582
      self.op.remote_node = remote_node
6583

    
6584
      # Warning: do not remove the locking of the new secondary here
6585
      # unless DRBD8.AddChildren is changed to work in parallel;
6586
      # currently it doesn't since parallel invocations of
6587
      # FindUnusedMinor will conflict
6588
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6589
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6590

    
6591
    else:
6592
      self.needed_locks[locking.LEVEL_NODE] = []
6593
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6594

    
6595
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6596
                                   self.op.iallocator, self.op.remote_node,
6597
                                   self.op.disks, False, self.op.early_release)
6598

    
6599
    self.tasklets = [self.replacer]
6600

    
6601
  def DeclareLocks(self, level):
6602
    # If we're not already locking all nodes in the set we have to declare the
6603
    # instance's primary/secondary nodes.
6604
    if (level == locking.LEVEL_NODE and
6605
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6606
      self._LockInstancesNodes()
6607

    
6608
  def BuildHooksEnv(self):
6609
    """Build hooks env.
6610

6611
    This runs on the master, the primary and all the secondaries.
6612

6613
    """
6614
    instance = self.replacer.instance
6615
    env = {
6616
      "MODE": self.op.mode,
6617
      "NEW_SECONDARY": self.op.remote_node,
6618
      "OLD_SECONDARY": instance.secondary_nodes[0],
6619
      }
6620
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6621
    nl = [
6622
      self.cfg.GetMasterNode(),
6623
      instance.primary_node,
6624
      ]
6625
    if self.op.remote_node is not None:
6626
      nl.append(self.op.remote_node)
6627
    return env, nl, nl
6628

    
6629

    
6630
class LUEvacuateNode(LogicalUnit):
6631
  """Relocate the secondary instances from a node.
6632

6633
  """
6634
  HPATH = "node-evacuate"
6635
  HTYPE = constants.HTYPE_NODE
6636
  _OP_REQP = ["node_name"]
6637
  REQ_BGL = False
6638

    
6639
  def CheckArguments(self):
6640
    if not hasattr(self.op, "remote_node"):
6641
      self.op.remote_node = None
6642
    if not hasattr(self.op, "iallocator"):
6643
      self.op.iallocator = None
6644
    if not hasattr(self.op, "early_release"):
6645
      self.op.early_release = False
6646

    
6647
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6648
                                  self.op.remote_node,
6649
                                  self.op.iallocator)
6650

    
6651
  def ExpandNames(self):
6652
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6653

    
6654
    self.needed_locks = {}
6655

    
6656
    # Declare node locks
6657
    if self.op.iallocator is not None:
6658
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6659

    
6660
    elif self.op.remote_node is not None:
6661
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6662

    
6663
      # Warning: do not remove the locking of the new secondary here
6664
      # unless DRBD8.AddChildren is changed to work in parallel;
6665
      # currently it doesn't since parallel invocations of
6666
      # FindUnusedMinor will conflict
6667
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6668
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6669

    
6670
    else:
6671
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6672

    
6673
    # Create tasklets for replacing disks for all secondary instances on this
6674
    # node
6675
    names = []
6676
    tasklets = []
6677

    
6678
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6679
      logging.debug("Replacing disks for instance %s", inst.name)
6680
      names.append(inst.name)
6681

    
6682
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6683
                                self.op.iallocator, self.op.remote_node, [],
6684
                                True, self.op.early_release)
6685
      tasklets.append(replacer)
6686

    
6687
    self.tasklets = tasklets
6688
    self.instance_names = names
6689

    
6690
    # Declare instance locks
6691
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6692

    
6693
  def DeclareLocks(self, level):
6694
    # If we're not already locking all nodes in the set we have to declare the
6695
    # instance's primary/secondary nodes.
6696
    if (level == locking.LEVEL_NODE and
6697
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6698
      self._LockInstancesNodes()
6699

    
6700
  def BuildHooksEnv(self):
6701
    """Build hooks env.
6702

6703
    This runs on the master, the primary and all the secondaries.
6704

6705
    """
6706
    env = {
6707
      "NODE_NAME": self.op.node_name,
6708
      }
6709

    
6710
    nl = [self.cfg.GetMasterNode()]
6711

    
6712
    if self.op.remote_node is not None:
6713
      env["NEW_SECONDARY"] = self.op.remote_node
6714
      nl.append(self.op.remote_node)
6715

    
6716
    return (env, nl, nl)
6717

    
6718

    
6719
class TLReplaceDisks(Tasklet):
6720
  """Replaces disks for an instance.
6721

6722
  Note: Locking is not within the scope of this class.
6723

6724
  """
6725
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6726
               disks, delay_iallocator, early_release):
6727
    """Initializes this class.
6728

6729
    """
6730
    Tasklet.__init__(self, lu)
6731

    
6732
    # Parameters
6733
    self.instance_name = instance_name
6734
    self.mode = mode
6735
    self.iallocator_name = iallocator_name
6736
    self.remote_node = remote_node
6737
    self.disks = disks
6738
    self.delay_iallocator = delay_iallocator
6739
    self.early_release = early_release
6740

    
6741
    # Runtime data
6742
    self.instance = None
6743
    self.new_node = None
6744
    self.target_node = None
6745
    self.other_node = None
6746
    self.remote_node_info = None
6747
    self.node_secondary_ip = None
6748

    
6749
  @staticmethod
6750
  def CheckArguments(mode, remote_node, iallocator):
6751
    """Helper function for users of this class.
6752

6753
    """
6754
    # check for valid parameter combination
6755
    if mode == constants.REPLACE_DISK_CHG:
6756
      if remote_node is None and iallocator is None:
6757
        raise errors.OpPrereqError("When changing the secondary either an"
6758
                                   " iallocator script must be used or the"
6759
                                   " new node given", errors.ECODE_INVAL)
6760

    
6761
      if remote_node is not None and iallocator is not None:
6762
        raise errors.OpPrereqError("Give either the iallocator or the new"
6763
                                   " secondary, not both", errors.ECODE_INVAL)
6764

    
6765
    elif remote_node is not None or iallocator is not None:
6766
      # Not replacing the secondary
6767
      raise errors.OpPrereqError("The iallocator and new node options can"
6768
                                 " only be used when changing the"
6769
                                 " secondary node", errors.ECODE_INVAL)
6770

    
6771
  @staticmethod
6772
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6773
    """Compute a new secondary node using an IAllocator.
6774

6775
    """
6776
    ial = IAllocator(lu.cfg, lu.rpc,
6777
                     mode=constants.IALLOCATOR_MODE_RELOC,
6778
                     name=instance_name,
6779
                     relocate_from=relocate_from)
6780

    
6781
    ial.Run(iallocator_name)
6782

    
6783
    if not ial.success:
6784
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6785
                                 " %s" % (iallocator_name, ial.info),
6786
                                 errors.ECODE_NORES)
6787

    
6788
    if len(ial.result) != ial.required_nodes:
6789
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6790
                                 " of nodes (%s), required %s" %
6791
                                 (iallocator_name,
6792
                                  len(ial.result), ial.required_nodes),
6793
                                 errors.ECODE_FAULT)
6794

    
6795
    remote_node_name = ial.result[0]
6796

    
6797
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6798
               instance_name, remote_node_name)
6799

    
6800
    return remote_node_name
6801

    
6802
  def _FindFaultyDisks(self, node_name):
6803
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6804
                                    node_name, True)
6805

    
6806
  def CheckPrereq(self):
6807
    """Check prerequisites.
6808

6809
    This checks that the instance is in the cluster.
6810

6811
    """
6812
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6813
    assert instance is not None, \
6814
      "Cannot retrieve locked instance %s" % self.instance_name
6815

    
6816
    if instance.disk_template != constants.DT_DRBD8:
6817
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6818
                                 " instances", errors.ECODE_INVAL)
6819

    
6820
    if len(instance.secondary_nodes) != 1:
6821
      raise errors.OpPrereqError("The instance has a strange layout,"
6822
                                 " expected one secondary but found %d" %
6823
                                 len(instance.secondary_nodes),
6824
                                 errors.ECODE_FAULT)
6825

    
6826
    if not self.delay_iallocator:
6827
      self._CheckPrereq2()
6828

    
6829
  def _CheckPrereq2(self):
6830
    """Check prerequisites, second part.
6831

6832
    This function should always be part of CheckPrereq. It was separated and is
6833
    now called from Exec because during node evacuation iallocator was only
6834
    called with an unmodified cluster model, not taking planned changes into
6835
    account.
6836

6837
    """
6838
    instance = self.instance
6839
    secondary_node = instance.secondary_nodes[0]
6840

    
6841
    if self.iallocator_name is None:
6842
      remote_node = self.remote_node
6843
    else:
6844
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6845
                                       instance.name, instance.secondary_nodes)
6846

    
6847
    if remote_node is not None:
6848
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6849
      assert self.remote_node_info is not None, \
6850
        "Cannot retrieve locked node %s" % remote_node
6851
    else:
6852
      self.remote_node_info = None
6853

    
6854
    if remote_node == self.instance.primary_node:
6855
      raise errors.OpPrereqError("The specified node is the primary node of"
6856
                                 " the instance.", errors.ECODE_INVAL)
6857

    
6858
    if remote_node == secondary_node:
6859
      raise errors.OpPrereqError("The specified node is already the"
6860
                                 " secondary node of the instance.",
6861
                                 errors.ECODE_INVAL)
6862

    
6863
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6864
                                    constants.REPLACE_DISK_CHG):
6865
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
6866
                                 errors.ECODE_INVAL)
6867

    
6868
    if self.mode == constants.REPLACE_DISK_AUTO:
6869
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
6870
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6871

    
6872
      if faulty_primary and faulty_secondary:
6873
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6874
                                   " one node and can not be repaired"
6875
                                   " automatically" % self.instance_name,
6876
                                   errors.ECODE_STATE)
6877

    
6878
      if faulty_primary:
6879
        self.disks = faulty_primary
6880
        self.target_node = instance.primary_node
6881
        self.other_node = secondary_node
6882
        check_nodes = [self.target_node, self.other_node]
6883
      elif faulty_secondary:
6884
        self.disks = faulty_secondary
6885
        self.target_node = secondary_node
6886
        self.other_node = instance.primary_node
6887
        check_nodes = [self.target_node, self.other_node]
6888
      else:
6889
        self.disks = []
6890
        check_nodes = []
6891

    
6892
    else:
6893
      # Non-automatic modes
6894
      if self.mode == constants.REPLACE_DISK_PRI:
6895
        self.target_node = instance.primary_node
6896
        self.other_node = secondary_node
6897
        check_nodes = [self.target_node, self.other_node]
6898

    
6899
      elif self.mode == constants.REPLACE_DISK_SEC:
6900
        self.target_node = secondary_node
6901
        self.other_node = instance.primary_node
6902
        check_nodes = [self.target_node, self.other_node]
6903

    
6904
      elif self.mode == constants.REPLACE_DISK_CHG:
6905
        self.new_node = remote_node
6906
        self.other_node = instance.primary_node
6907
        self.target_node = secondary_node
6908
        check_nodes = [self.new_node, self.other_node]
6909

    
6910
        _CheckNodeNotDrained(self.lu, remote_node)
6911

    
6912
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
6913
        assert old_node_info is not None
6914
        if old_node_info.offline and not self.early_release:
6915
          # doesn't make sense to delay the release
6916
          self.early_release = True
6917
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
6918
                          " early-release mode", secondary_node)
6919

    
6920
      else:
6921
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6922
                                     self.mode)
6923

    
6924
      # If not specified all disks should be replaced
6925
      if not self.disks:
6926
        self.disks = range(len(self.instance.disks))
6927

    
6928
    for node in check_nodes:
6929
      _CheckNodeOnline(self.lu, node)
6930

    
6931
    # Check whether disks are valid
6932
    for disk_idx in self.disks:
6933
      instance.FindDisk(disk_idx)
6934

    
6935
    # Get secondary node IP addresses
6936
    node_2nd_ip = {}
6937

    
6938
    for node_name in [self.target_node, self.other_node, self.new_node]:
6939
      if node_name is not None:
6940
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6941

    
6942
    self.node_secondary_ip = node_2nd_ip
6943

    
6944
  def Exec(self, feedback_fn):
6945
    """Execute disk replacement.
6946

6947
    This dispatches the disk replacement to the appropriate handler.
6948

6949
    """
6950
    if self.delay_iallocator:
6951
      self._CheckPrereq2()
6952

    
6953
    if not self.disks:
6954
      feedback_fn("No disks need replacement")
6955
      return
6956

    
6957
    feedback_fn("Replacing disk(s) %s for %s" %
6958
                (utils.CommaJoin(self.disks), self.instance.name))
6959

    
6960
    activate_disks = (not self.instance.admin_up)
6961

    
6962
    # Activate the instance disks if we're replacing them on a down instance
6963
    if activate_disks:
6964
      _StartInstanceDisks(self.lu, self.instance, True)
6965

    
6966
    try:
6967
      # Should we replace the secondary node?
6968
      if self.new_node is not None:
6969
        fn = self._ExecDrbd8Secondary
6970
      else:
6971
        fn = self._ExecDrbd8DiskOnly
6972

    
6973
      return fn(feedback_fn)
6974

    
6975
    finally:
6976
      # Deactivate the instance disks if we're replacing them on a
6977
      # down instance
6978
      if activate_disks:
6979
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6980

    
6981
  def _CheckVolumeGroup(self, nodes):
6982
    self.lu.LogInfo("Checking volume groups")
6983

    
6984
    vgname = self.cfg.GetVGName()
6985

    
6986
    # Make sure volume group exists on all involved nodes
6987
    results = self.rpc.call_vg_list(nodes)
6988
    if not results:
6989
      raise errors.OpExecError("Can't list volume groups on the nodes")
6990

    
6991
    for node in nodes:
6992
      res = results[node]
6993
      res.Raise("Error checking node %s" % node)
6994
      if vgname not in res.payload:
6995
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6996
                                 (vgname, node))
6997

    
6998
  def _CheckDisksExistence(self, nodes):
6999
    # Check disk existence
7000
    for idx, dev in enumerate(self.instance.disks):
7001
      if idx not in self.disks:
7002
        continue
7003

    
7004
      for node in nodes:
7005
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7006
        self.cfg.SetDiskID(dev, node)
7007

    
7008
        result = self.rpc.call_blockdev_find(node, dev)
7009

    
7010
        msg = result.fail_msg
7011
        if msg or not result.payload:
7012
          if not msg:
7013
            msg = "disk not found"
7014
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7015
                                   (idx, node, msg))
7016

    
7017
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7018
    for idx, dev in enumerate(self.instance.disks):
7019
      if idx not in self.disks:
7020
        continue
7021

    
7022
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7023
                      (idx, node_name))
7024

    
7025
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7026
                                   ldisk=ldisk):
7027
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7028
                                 " replace disks for instance %s" %
7029
                                 (node_name, self.instance.name))
7030

    
7031
  def _CreateNewStorage(self, node_name):
7032
    vgname = self.cfg.GetVGName()
7033
    iv_names = {}
7034

    
7035
    for idx, dev in enumerate(self.instance.disks):
7036
      if idx not in self.disks:
7037
        continue
7038

    
7039
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7040

    
7041
      self.cfg.SetDiskID(dev, node_name)
7042

    
7043
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7044
      names = _GenerateUniqueNames(self.lu, lv_names)
7045

    
7046
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7047
                             logical_id=(vgname, names[0]))
7048
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7049
                             logical_id=(vgname, names[1]))
7050

    
7051
      new_lvs = [lv_data, lv_meta]
7052
      old_lvs = dev.children
7053
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7054

    
7055
      # we pass force_create=True to force the LVM creation
7056
      for new_lv in new_lvs:
7057
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7058
                        _GetInstanceInfoText(self.instance), False)
7059

    
7060
    return iv_names
7061

    
7062
  def _CheckDevices(self, node_name, iv_names):
7063
    for name, (dev, _, _) in iv_names.iteritems():
7064
      self.cfg.SetDiskID(dev, node_name)
7065

    
7066
      result = self.rpc.call_blockdev_find(node_name, dev)
7067

    
7068
      msg = result.fail_msg
7069
      if msg or not result.payload:
7070
        if not msg:
7071
          msg = "disk not found"
7072
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7073
                                 (name, msg))
7074

    
7075
      if result.payload.is_degraded:
7076
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7077

    
7078
  def _RemoveOldStorage(self, node_name, iv_names):
7079
    for name, (_, old_lvs, _) in iv_names.iteritems():
7080
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7081

    
7082
      for lv in old_lvs:
7083
        self.cfg.SetDiskID(lv, node_name)
7084

    
7085
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7086
        if msg:
7087
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7088
                             hint="remove unused LVs manually")
7089

    
7090
  def _ReleaseNodeLock(self, node_name):
7091
    """Releases the lock for a given node."""
7092
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7093

    
7094
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7095
    """Replace a disk on the primary or secondary for DRBD 8.
7096

7097
    The algorithm for replace is quite complicated:
7098

7099
      1. for each disk to be replaced:
7100

7101
        1. create new LVs on the target node with unique names
7102
        1. detach old LVs from the drbd device
7103
        1. rename old LVs to name_replaced.<time_t>
7104
        1. rename new LVs to old LVs
7105
        1. attach the new LVs (with the old names now) to the drbd device
7106

7107
      1. wait for sync across all devices
7108

7109
      1. for each modified disk:
7110

7111
        1. remove old LVs (which have the name name_replaces.<time_t>)
7112

7113
    Failures are not very well handled.
7114

7115
    """
7116
    steps_total = 6
7117

    
7118
    # Step: check device activation
7119
    self.lu.LogStep(1, steps_total, "Check device existence")
7120
    self._CheckDisksExistence([self.other_node, self.target_node])
7121
    self._CheckVolumeGroup([self.target_node, self.other_node])
7122

    
7123
    # Step: check other node consistency
7124
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7125
    self._CheckDisksConsistency(self.other_node,
7126
                                self.other_node == self.instance.primary_node,
7127
                                False)
7128

    
7129
    # Step: create new storage
7130
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7131
    iv_names = self._CreateNewStorage(self.target_node)
7132

    
7133
    # Step: for each lv, detach+rename*2+attach
7134
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7135
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7136
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7137

    
7138
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7139
                                                     old_lvs)
7140
      result.Raise("Can't detach drbd from local storage on node"
7141
                   " %s for device %s" % (self.target_node, dev.iv_name))
7142
      #dev.children = []
7143
      #cfg.Update(instance)
7144

    
7145
      # ok, we created the new LVs, so now we know we have the needed
7146
      # storage; as such, we proceed on the target node to rename
7147
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7148
      # using the assumption that logical_id == physical_id (which in
7149
      # turn is the unique_id on that node)
7150

    
7151
      # FIXME(iustin): use a better name for the replaced LVs
7152
      temp_suffix = int(time.time())
7153
      ren_fn = lambda d, suff: (d.physical_id[0],
7154
                                d.physical_id[1] + "_replaced-%s" % suff)
7155

    
7156
      # Build the rename list based on what LVs exist on the node
7157
      rename_old_to_new = []
7158
      for to_ren in old_lvs:
7159
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7160
        if not result.fail_msg and result.payload:
7161
          # device exists
7162
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7163

    
7164
      self.lu.LogInfo("Renaming the old LVs on the target node")
7165
      result = self.rpc.call_blockdev_rename(self.target_node,
7166
                                             rename_old_to_new)
7167
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7168

    
7169
      # Now we rename the new LVs to the old LVs
7170
      self.lu.LogInfo("Renaming the new LVs on the target node")
7171
      rename_new_to_old = [(new, old.physical_id)
7172
                           for old, new in zip(old_lvs, new_lvs)]
7173
      result = self.rpc.call_blockdev_rename(self.target_node,
7174
                                             rename_new_to_old)
7175
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7176

    
7177
      for old, new in zip(old_lvs, new_lvs):
7178
        new.logical_id = old.logical_id
7179
        self.cfg.SetDiskID(new, self.target_node)
7180

    
7181
      for disk in old_lvs:
7182
        disk.logical_id = ren_fn(disk, temp_suffix)
7183
        self.cfg.SetDiskID(disk, self.target_node)
7184

    
7185
      # Now that the new lvs have the old name, we can add them to the device
7186
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7187
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7188
                                                  new_lvs)
7189
      msg = result.fail_msg
7190
      if msg:
7191
        for new_lv in new_lvs:
7192
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7193
                                               new_lv).fail_msg
7194
          if msg2:
7195
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7196
                               hint=("cleanup manually the unused logical"
7197
                                     "volumes"))
7198
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7199

    
7200
      dev.children = new_lvs
7201

    
7202
      self.cfg.Update(self.instance, feedback_fn)
7203

    
7204
    cstep = 5
7205
    if self.early_release:
7206
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7207
      cstep += 1
7208
      self._RemoveOldStorage(self.target_node, iv_names)
7209
      # WARNING: we release both node locks here, do not do other RPCs
7210
      # than WaitForSync to the primary node
7211
      self._ReleaseNodeLock([self.target_node, self.other_node])
7212

    
7213
    # Wait for sync
7214
    # This can fail as the old devices are degraded and _WaitForSync
7215
    # does a combined result over all disks, so we don't check its return value
7216
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7217
    cstep += 1
7218
    _WaitForSync(self.lu, self.instance)
7219

    
7220
    # Check all devices manually
7221
    self._CheckDevices(self.instance.primary_node, iv_names)
7222

    
7223
    # Step: remove old storage
7224
    if not self.early_release:
7225
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7226
      cstep += 1
7227
      self._RemoveOldStorage(self.target_node, iv_names)
7228

    
7229
  def _ExecDrbd8Secondary(self, feedback_fn):
7230
    """Replace the secondary node for DRBD 8.
7231

7232
    The algorithm for replace is quite complicated:
7233
      - for all disks of the instance:
7234
        - create new LVs on the new node with same names
7235
        - shutdown the drbd device on the old secondary
7236
        - disconnect the drbd network on the primary
7237
        - create the drbd device on the new secondary
7238
        - network attach the drbd on the primary, using an artifice:
7239
          the drbd code for Attach() will connect to the network if it
7240
          finds a device which is connected to the good local disks but
7241
          not network enabled
7242
      - wait for sync across all devices
7243
      - remove all disks from the old secondary
7244

7245
    Failures are not very well handled.
7246

7247
    """
7248
    steps_total = 6
7249

    
7250
    # Step: check device activation
7251
    self.lu.LogStep(1, steps_total, "Check device existence")
7252
    self._CheckDisksExistence([self.instance.primary_node])
7253
    self._CheckVolumeGroup([self.instance.primary_node])
7254

    
7255
    # Step: check other node consistency
7256
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7257
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7258

    
7259
    # Step: create new storage
7260
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7261
    for idx, dev in enumerate(self.instance.disks):
7262
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7263
                      (self.new_node, idx))
7264
      # we pass force_create=True to force LVM creation
7265
      for new_lv in dev.children:
7266
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7267
                        _GetInstanceInfoText(self.instance), False)
7268

    
7269
    # Step 4: dbrd minors and drbd setups changes
7270
    # after this, we must manually remove the drbd minors on both the
7271
    # error and the success paths
7272
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7273
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7274
                                         for dev in self.instance.disks],
7275
                                        self.instance.name)
7276
    logging.debug("Allocated minors %r", minors)
7277

    
7278
    iv_names = {}
7279
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7280
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7281
                      (self.new_node, idx))
7282
      # create new devices on new_node; note that we create two IDs:
7283
      # one without port, so the drbd will be activated without
7284
      # networking information on the new node at this stage, and one
7285
      # with network, for the latter activation in step 4
7286
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7287
      if self.instance.primary_node == o_node1:
7288
        p_minor = o_minor1
7289
      else:
7290
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7291
        p_minor = o_minor2
7292

    
7293
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7294
                      p_minor, new_minor, o_secret)
7295
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7296
                    p_minor, new_minor, o_secret)
7297

    
7298
      iv_names[idx] = (dev, dev.children, new_net_id)
7299
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7300
                    new_net_id)
7301
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7302
                              logical_id=new_alone_id,
7303
                              children=dev.children,
7304
                              size=dev.size)
7305
      try:
7306
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7307
                              _GetInstanceInfoText(self.instance), False)
7308
      except errors.GenericError:
7309
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7310
        raise
7311

    
7312
    # We have new devices, shutdown the drbd on the old secondary
7313
    for idx, dev in enumerate(self.instance.disks):
7314
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7315
      self.cfg.SetDiskID(dev, self.target_node)
7316
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7317
      if msg:
7318
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7319
                           "node: %s" % (idx, msg),
7320
                           hint=("Please cleanup this device manually as"
7321
                                 " soon as possible"))
7322

    
7323
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7324
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7325
                                               self.node_secondary_ip,
7326
                                               self.instance.disks)\
7327
                                              [self.instance.primary_node]
7328

    
7329
    msg = result.fail_msg
7330
    if msg:
7331
      # detaches didn't succeed (unlikely)
7332
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7333
      raise errors.OpExecError("Can't detach the disks from the network on"
7334
                               " old node: %s" % (msg,))
7335

    
7336
    # if we managed to detach at least one, we update all the disks of
7337
    # the instance to point to the new secondary
7338
    self.lu.LogInfo("Updating instance configuration")
7339
    for dev, _, new_logical_id in iv_names.itervalues():
7340
      dev.logical_id = new_logical_id
7341
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7342

    
7343
    self.cfg.Update(self.instance, feedback_fn)
7344

    
7345
    # and now perform the drbd attach
7346
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7347
                    " (standalone => connected)")
7348
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7349
                                            self.new_node],
7350
                                           self.node_secondary_ip,
7351
                                           self.instance.disks,
7352
                                           self.instance.name,
7353
                                           False)
7354
    for to_node, to_result in result.items():
7355
      msg = to_result.fail_msg
7356
      if msg:
7357
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7358
                           to_node, msg,
7359
                           hint=("please do a gnt-instance info to see the"
7360
                                 " status of disks"))
7361
    cstep = 5
7362
    if self.early_release:
7363
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7364
      cstep += 1
7365
      self._RemoveOldStorage(self.target_node, iv_names)
7366
      # WARNING: we release all node locks here, do not do other RPCs
7367
      # than WaitForSync to the primary node
7368
      self._ReleaseNodeLock([self.instance.primary_node,
7369
                             self.target_node,
7370
                             self.new_node])
7371

    
7372
    # Wait for sync
7373
    # This can fail as the old devices are degraded and _WaitForSync
7374
    # does a combined result over all disks, so we don't check its return value
7375
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7376
    cstep += 1
7377
    _WaitForSync(self.lu, self.instance)
7378

    
7379
    # Check all devices manually
7380
    self._CheckDevices(self.instance.primary_node, iv_names)
7381

    
7382
    # Step: remove old storage
7383
    if not self.early_release:
7384
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7385
      self._RemoveOldStorage(self.target_node, iv_names)
7386

    
7387

    
7388
class LURepairNodeStorage(NoHooksLU):
7389
  """Repairs the volume group on a node.
7390

7391
  """
7392
  _OP_REQP = ["node_name"]
7393
  REQ_BGL = False
7394

    
7395
  def CheckArguments(self):
7396
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7397

    
7398
  def ExpandNames(self):
7399
    self.needed_locks = {
7400
      locking.LEVEL_NODE: [self.op.node_name],
7401
      }
7402

    
7403
  def _CheckFaultyDisks(self, instance, node_name):
7404
    """Ensure faulty disks abort the opcode or at least warn."""
7405
    try:
7406
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7407
                                  node_name, True):
7408
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7409
                                   " node '%s'" % (instance.name, node_name),
7410
                                   errors.ECODE_STATE)
7411
    except errors.OpPrereqError, err:
7412
      if self.op.ignore_consistency:
7413
        self.proc.LogWarning(str(err.args[0]))
7414
      else:
7415
        raise
7416

    
7417
  def CheckPrereq(self):
7418
    """Check prerequisites.
7419

7420
    """
7421
    storage_type = self.op.storage_type
7422

    
7423
    if (constants.SO_FIX_CONSISTENCY not in
7424
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7425
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7426
                                 " repaired" % storage_type,
7427
                                 errors.ECODE_INVAL)
7428

    
7429
    # Check whether any instance on this node has faulty disks
7430
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7431
      if not inst.admin_up:
7432
        continue
7433
      check_nodes = set(inst.all_nodes)
7434
      check_nodes.discard(self.op.node_name)
7435
      for inst_node_name in check_nodes:
7436
        self._CheckFaultyDisks(inst, inst_node_name)
7437

    
7438
  def Exec(self, feedback_fn):
7439
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7440
                (self.op.name, self.op.node_name))
7441

    
7442
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7443
    result = self.rpc.call_storage_execute(self.op.node_name,
7444
                                           self.op.storage_type, st_args,
7445
                                           self.op.name,
7446
                                           constants.SO_FIX_CONSISTENCY)
7447
    result.Raise("Failed to repair storage unit '%s' on %s" %
7448
                 (self.op.name, self.op.node_name))
7449

    
7450

    
7451
class LUNodeEvacuationStrategy(NoHooksLU):
7452
  """Computes the node evacuation strategy.
7453

7454
  """
7455
  _OP_REQP = ["nodes"]
7456
  REQ_BGL = False
7457

    
7458
  def CheckArguments(self):
7459
    if not hasattr(self.op, "remote_node"):
7460
      self.op.remote_node = None
7461
    if not hasattr(self.op, "iallocator"):
7462
      self.op.iallocator = None
7463
    if self.op.remote_node is not None and self.op.iallocator is not None:
7464
      raise errors.OpPrereqError("Give either the iallocator or the new"
7465
                                 " secondary, not both", errors.ECODE_INVAL)
7466

    
7467
  def ExpandNames(self):
7468
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7469
    self.needed_locks = locks = {}
7470
    if self.op.remote_node is None:
7471
      locks[locking.LEVEL_NODE] = locking.ALL_SET
7472
    else:
7473
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7474
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7475

    
7476
  def CheckPrereq(self):
7477
    pass
7478

    
7479
  def Exec(self, feedback_fn):
7480
    if self.op.remote_node is not None:
7481
      instances = []
7482
      for node in self.op.nodes:
7483
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7484
      result = []
7485
      for i in instances:
7486
        if i.primary_node == self.op.remote_node:
7487
          raise errors.OpPrereqError("Node %s is the primary node of"
7488
                                     " instance %s, cannot use it as"
7489
                                     " secondary" %
7490
                                     (self.op.remote_node, i.name),
7491
                                     errors.ECODE_INVAL)
7492
        result.append([i.name, self.op.remote_node])
7493
    else:
7494
      ial = IAllocator(self.cfg, self.rpc,
7495
                       mode=constants.IALLOCATOR_MODE_MEVAC,
7496
                       evac_nodes=self.op.nodes)
7497
      ial.Run(self.op.iallocator, validate=True)
7498
      if not ial.success:
7499
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7500
                                 errors.ECODE_NORES)
7501
      result = ial.result
7502
    return result
7503

    
7504

    
7505
class LUGrowDisk(LogicalUnit):
7506
  """Grow a disk of an instance.
7507

7508
  """
7509
  HPATH = "disk-grow"
7510
  HTYPE = constants.HTYPE_INSTANCE
7511
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7512
  REQ_BGL = False
7513

    
7514
  def ExpandNames(self):
7515
    self._ExpandAndLockInstance()
7516
    self.needed_locks[locking.LEVEL_NODE] = []
7517
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7518

    
7519
  def DeclareLocks(self, level):
7520
    if level == locking.LEVEL_NODE:
7521
      self._LockInstancesNodes()
7522

    
7523
  def BuildHooksEnv(self):
7524
    """Build hooks env.
7525

7526
    This runs on the master, the primary and all the secondaries.
7527

7528
    """
7529
    env = {
7530
      "DISK": self.op.disk,
7531
      "AMOUNT": self.op.amount,
7532
      }
7533
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7534
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7535
    return env, nl, nl
7536

    
7537
  def CheckPrereq(self):
7538
    """Check prerequisites.
7539

7540
    This checks that the instance is in the cluster.
7541

7542
    """
7543
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7544
    assert instance is not None, \
7545
      "Cannot retrieve locked instance %s" % self.op.instance_name
7546
    nodenames = list(instance.all_nodes)
7547
    for node in nodenames:
7548
      _CheckNodeOnline(self, node)
7549

    
7550

    
7551
    self.instance = instance
7552

    
7553
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7554
      raise errors.OpPrereqError("Instance's disk layout does not support"
7555
                                 " growing.", errors.ECODE_INVAL)
7556

    
7557
    self.disk = instance.FindDisk(self.op.disk)
7558

    
7559
    _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7560

    
7561
  def Exec(self, feedback_fn):
7562
    """Execute disk grow.
7563

7564
    """
7565
    instance = self.instance
7566
    disk = self.disk
7567
    for node in instance.all_nodes:
7568
      self.cfg.SetDiskID(disk, node)
7569
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7570
      result.Raise("Grow request failed to node %s" % node)
7571

    
7572
      # TODO: Rewrite code to work properly
7573
      # DRBD goes into sync mode for a short amount of time after executing the
7574
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7575
      # calling "resize" in sync mode fails. Sleeping for a short amount of
7576
      # time is a work-around.
7577
      time.sleep(5)
7578

    
7579
    disk.RecordGrow(self.op.amount)
7580
    self.cfg.Update(instance, feedback_fn)
7581
    if self.op.wait_for_sync:
7582
      disk_abort = not _WaitForSync(self, instance)
7583
      if disk_abort:
7584
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7585
                             " status.\nPlease check the instance.")
7586

    
7587

    
7588
class LUQueryInstanceData(NoHooksLU):
7589
  """Query runtime instance data.
7590

7591
  """
7592
  _OP_REQP = ["instances", "static"]
7593
  REQ_BGL = False
7594

    
7595
  def ExpandNames(self):
7596
    self.needed_locks = {}
7597
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7598

    
7599
    if not isinstance(self.op.instances, list):
7600
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7601
                                 errors.ECODE_INVAL)
7602

    
7603
    if self.op.instances:
7604
      self.wanted_names = []
7605
      for name in self.op.instances:
7606
        full_name = _ExpandInstanceName(self.cfg, name)
7607
        self.wanted_names.append(full_name)
7608
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7609
    else:
7610
      self.wanted_names = None
7611
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7612

    
7613
    self.needed_locks[locking.LEVEL_NODE] = []
7614
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7615

    
7616
  def DeclareLocks(self, level):
7617
    if level == locking.LEVEL_NODE:
7618
      self._LockInstancesNodes()
7619

    
7620
  def CheckPrereq(self):
7621
    """Check prerequisites.
7622

7623
    This only checks the optional instance list against the existing names.
7624

7625
    """
7626
    if self.wanted_names is None:
7627
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7628

    
7629
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7630
                             in self.wanted_names]
7631
    return
7632

    
7633
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7634
    """Returns the status of a block device
7635

7636
    """
7637
    if self.op.static or not node:
7638
      return None
7639

    
7640
    self.cfg.SetDiskID(dev, node)
7641

    
7642
    result = self.rpc.call_blockdev_find(node, dev)
7643
    if result.offline:
7644
      return None
7645

    
7646
    result.Raise("Can't compute disk status for %s" % instance_name)
7647

    
7648
    status = result.payload
7649
    if status is None:
7650
      return None
7651

    
7652
    return (status.dev_path, status.major, status.minor,
7653
            status.sync_percent, status.estimated_time,
7654
            status.is_degraded, status.ldisk_status)
7655

    
7656
  def _ComputeDiskStatus(self, instance, snode, dev):
7657
    """Compute block device status.
7658

7659
    """
7660
    if dev.dev_type in constants.LDS_DRBD:
7661
      # we change the snode then (otherwise we use the one passed in)
7662
      if dev.logical_id[0] == instance.primary_node:
7663
        snode = dev.logical_id[1]
7664
      else:
7665
        snode = dev.logical_id[0]
7666

    
7667
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7668
                                              instance.name, dev)
7669
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7670

    
7671
    if dev.children:
7672
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
7673
                      for child in dev.children]
7674
    else:
7675
      dev_children = []
7676

    
7677
    data = {
7678
      "iv_name": dev.iv_name,
7679
      "dev_type": dev.dev_type,
7680
      "logical_id": dev.logical_id,
7681
      "physical_id": dev.physical_id,
7682
      "pstatus": dev_pstatus,
7683
      "sstatus": dev_sstatus,
7684
      "children": dev_children,
7685
      "mode": dev.mode,
7686
      "size": dev.size,
7687
      }
7688

    
7689
    return data
7690

    
7691
  def Exec(self, feedback_fn):
7692
    """Gather and return data"""
7693
    result = {}
7694

    
7695
    cluster = self.cfg.GetClusterInfo()
7696

    
7697
    for instance in self.wanted_instances:
7698
      if not self.op.static:
7699
        remote_info = self.rpc.call_instance_info(instance.primary_node,
7700
                                                  instance.name,
7701
                                                  instance.hypervisor)
7702
        remote_info.Raise("Error checking node %s" % instance.primary_node)
7703
        remote_info = remote_info.payload
7704
        if remote_info and "state" in remote_info:
7705
          remote_state = "up"
7706
        else:
7707
          remote_state = "down"
7708
      else:
7709
        remote_state = None
7710
      if instance.admin_up:
7711
        config_state = "up"
7712
      else:
7713
        config_state = "down"
7714

    
7715
      disks = [self._ComputeDiskStatus(instance, None, device)
7716
               for device in instance.disks]
7717

    
7718
      idict = {
7719
        "name": instance.name,
7720
        "config_state": config_state,
7721
        "run_state": remote_state,
7722
        "pnode": instance.primary_node,
7723
        "snodes": instance.secondary_nodes,
7724
        "os": instance.os,
7725
        # this happens to be the same format used for hooks
7726
        "nics": _NICListToTuple(self, instance.nics),
7727
        "disks": disks,
7728
        "hypervisor": instance.hypervisor,
7729
        "network_port": instance.network_port,
7730
        "hv_instance": instance.hvparams,
7731
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
7732
        "be_instance": instance.beparams,
7733
        "be_actual": cluster.FillBE(instance),
7734
        "serial_no": instance.serial_no,
7735
        "mtime": instance.mtime,
7736
        "ctime": instance.ctime,
7737
        "uuid": instance.uuid,
7738
        }
7739

    
7740
      result[instance.name] = idict
7741

    
7742
    return result
7743

    
7744

    
7745
class LUSetInstanceParams(LogicalUnit):
7746
  """Modifies an instances's parameters.
7747

7748
  """
7749
  HPATH = "instance-modify"
7750
  HTYPE = constants.HTYPE_INSTANCE
7751
  _OP_REQP = ["instance_name"]
7752
  REQ_BGL = False
7753

    
7754
  def CheckArguments(self):
7755
    if not hasattr(self.op, 'nics'):
7756
      self.op.nics = []
7757
    if not hasattr(self.op, 'disks'):
7758
      self.op.disks = []
7759
    if not hasattr(self.op, 'beparams'):
7760
      self.op.beparams = {}
7761
    if not hasattr(self.op, 'hvparams'):
7762
      self.op.hvparams = {}
7763
    self.op.force = getattr(self.op, "force", False)
7764
    if not (self.op.nics or self.op.disks or
7765
            self.op.hvparams or self.op.beparams):
7766
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7767

    
7768
    if self.op.hvparams:
7769
      _CheckGlobalHvParams(self.op.hvparams)
7770

    
7771
    # Disk validation
7772
    disk_addremove = 0
7773
    for disk_op, disk_dict in self.op.disks:
7774
      if disk_op == constants.DDM_REMOVE:
7775
        disk_addremove += 1
7776
        continue
7777
      elif disk_op == constants.DDM_ADD:
7778
        disk_addremove += 1
7779
      else:
7780
        if not isinstance(disk_op, int):
7781
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7782
        if not isinstance(disk_dict, dict):
7783
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7784
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7785

    
7786
      if disk_op == constants.DDM_ADD:
7787
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7788
        if mode not in constants.DISK_ACCESS_SET:
7789
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7790
                                     errors.ECODE_INVAL)
7791
        size = disk_dict.get('size', None)
7792
        if size is None:
7793
          raise errors.OpPrereqError("Required disk parameter size missing",
7794
                                     errors.ECODE_INVAL)
7795
        try:
7796
          size = int(size)
7797
        except (TypeError, ValueError), err:
7798
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7799
                                     str(err), errors.ECODE_INVAL)
7800
        disk_dict['size'] = size
7801
      else:
7802
        # modification of disk
7803
        if 'size' in disk_dict:
7804
          raise errors.OpPrereqError("Disk size change not possible, use"
7805
                                     " grow-disk", errors.ECODE_INVAL)
7806

    
7807
    if disk_addremove > 1:
7808
      raise errors.OpPrereqError("Only one disk add or remove operation"
7809
                                 " supported at a time", errors.ECODE_INVAL)
7810

    
7811
    # NIC validation
7812
    nic_addremove = 0
7813
    for nic_op, nic_dict in self.op.nics:
7814
      if nic_op == constants.DDM_REMOVE:
7815
        nic_addremove += 1
7816
        continue
7817
      elif nic_op == constants.DDM_ADD:
7818
        nic_addremove += 1
7819
      else:
7820
        if not isinstance(nic_op, int):
7821
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7822
        if not isinstance(nic_dict, dict):
7823
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7824
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7825

    
7826
      # nic_dict should be a dict
7827
      nic_ip = nic_dict.get('ip', None)
7828
      if nic_ip is not None:
7829
        if nic_ip.lower() == constants.VALUE_NONE:
7830
          nic_dict['ip'] = None
7831
        else:
7832
          if not utils.IsValidIP(nic_ip):
7833
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7834
                                       errors.ECODE_INVAL)
7835

    
7836
      nic_bridge = nic_dict.get('bridge', None)
7837
      nic_link = nic_dict.get('link', None)
7838
      if nic_bridge and nic_link:
7839
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7840
                                   " at the same time", errors.ECODE_INVAL)
7841
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7842
        nic_dict['bridge'] = None
7843
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7844
        nic_dict['link'] = None
7845

    
7846
      if nic_op == constants.DDM_ADD:
7847
        nic_mac = nic_dict.get('mac', None)
7848
        if nic_mac is None:
7849
          nic_dict['mac'] = constants.VALUE_AUTO
7850

    
7851
      if 'mac' in nic_dict:
7852
        nic_mac = nic_dict['mac']
7853
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7854
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
7855

    
7856
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7857
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7858
                                     " modifying an existing nic",
7859
                                     errors.ECODE_INVAL)
7860

    
7861
    if nic_addremove > 1:
7862
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7863
                                 " supported at a time", errors.ECODE_INVAL)
7864

    
7865
  def ExpandNames(self):
7866
    self._ExpandAndLockInstance()
7867
    self.needed_locks[locking.LEVEL_NODE] = []
7868
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7869

    
7870
  def DeclareLocks(self, level):
7871
    if level == locking.LEVEL_NODE:
7872
      self._LockInstancesNodes()
7873

    
7874
  def BuildHooksEnv(self):
7875
    """Build hooks env.
7876

7877
    This runs on the master, primary and secondaries.
7878

7879
    """
7880
    args = dict()
7881
    if constants.BE_MEMORY in self.be_new:
7882
      args['memory'] = self.be_new[constants.BE_MEMORY]
7883
    if constants.BE_VCPUS in self.be_new:
7884
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7885
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7886
    # information at all.
7887
    if self.op.nics:
7888
      args['nics'] = []
7889
      nic_override = dict(self.op.nics)
7890
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7891
      for idx, nic in enumerate(self.instance.nics):
7892
        if idx in nic_override:
7893
          this_nic_override = nic_override[idx]
7894
        else:
7895
          this_nic_override = {}
7896
        if 'ip' in this_nic_override:
7897
          ip = this_nic_override['ip']
7898
        else:
7899
          ip = nic.ip
7900
        if 'mac' in this_nic_override:
7901
          mac = this_nic_override['mac']
7902
        else:
7903
          mac = nic.mac
7904
        if idx in self.nic_pnew:
7905
          nicparams = self.nic_pnew[idx]
7906
        else:
7907
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7908
        mode = nicparams[constants.NIC_MODE]
7909
        link = nicparams[constants.NIC_LINK]
7910
        args['nics'].append((ip, mac, mode, link))
7911
      if constants.DDM_ADD in nic_override:
7912
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7913
        mac = nic_override[constants.DDM_ADD]['mac']
7914
        nicparams = self.nic_pnew[constants.DDM_ADD]
7915
        mode = nicparams[constants.NIC_MODE]
7916
        link = nicparams[constants.NIC_LINK]
7917
        args['nics'].append((ip, mac, mode, link))
7918
      elif constants.DDM_REMOVE in nic_override:
7919
        del args['nics'][-1]
7920

    
7921
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7922
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7923
    return env, nl, nl
7924

    
7925
  @staticmethod
7926
  def _GetUpdatedParams(old_params, update_dict,
7927
                        default_values, parameter_types):
7928
    """Return the new params dict for the given params.
7929

7930
    @type old_params: dict
7931
    @param old_params: old parameters
7932
    @type update_dict: dict
7933
    @param update_dict: dict containing new parameter values,
7934
                        or constants.VALUE_DEFAULT to reset the
7935
                        parameter to its default value
7936
    @type default_values: dict
7937
    @param default_values: default values for the filled parameters
7938
    @type parameter_types: dict
7939
    @param parameter_types: dict mapping target dict keys to types
7940
                            in constants.ENFORCEABLE_TYPES
7941
    @rtype: (dict, dict)
7942
    @return: (new_parameters, filled_parameters)
7943

7944
    """
7945
    params_copy = copy.deepcopy(old_params)
7946
    for key, val in update_dict.iteritems():
7947
      if val == constants.VALUE_DEFAULT:
7948
        try:
7949
          del params_copy[key]
7950
        except KeyError:
7951
          pass
7952
      else:
7953
        params_copy[key] = val
7954
    utils.ForceDictType(params_copy, parameter_types)
7955
    params_filled = objects.FillDict(default_values, params_copy)
7956
    return (params_copy, params_filled)
7957

    
7958
  def CheckPrereq(self):
7959
    """Check prerequisites.
7960

7961
    This only checks the instance list against the existing names.
7962

7963
    """
7964
    self.force = self.op.force
7965

    
7966
    # checking the new params on the primary/secondary nodes
7967

    
7968
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7969
    cluster = self.cluster = self.cfg.GetClusterInfo()
7970
    assert self.instance is not None, \
7971
      "Cannot retrieve locked instance %s" % self.op.instance_name
7972
    pnode = instance.primary_node
7973
    nodelist = list(instance.all_nodes)
7974

    
7975
    # hvparams processing
7976
    if self.op.hvparams:
7977
      i_hvdict, hv_new = self._GetUpdatedParams(
7978
                             instance.hvparams, self.op.hvparams,
7979
                             cluster.hvparams[instance.hypervisor],
7980
                             constants.HVS_PARAMETER_TYPES)
7981
      # local check
7982
      hypervisor.GetHypervisor(
7983
        instance.hypervisor).CheckParameterSyntax(hv_new)
7984
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7985
      self.hv_new = hv_new # the new actual values
7986
      self.hv_inst = i_hvdict # the new dict (without defaults)
7987
    else:
7988
      self.hv_new = self.hv_inst = {}
7989

    
7990
    # beparams processing
7991
    if self.op.beparams:
7992
      i_bedict, be_new = self._GetUpdatedParams(
7993
                             instance.beparams, self.op.beparams,
7994
                             cluster.beparams[constants.PP_DEFAULT],
7995
                             constants.BES_PARAMETER_TYPES)
7996
      self.be_new = be_new # the new actual values
7997
      self.be_inst = i_bedict # the new dict (without defaults)
7998
    else:
7999
      self.be_new = self.be_inst = {}
8000

    
8001
    self.warn = []
8002

    
8003
    if constants.BE_MEMORY in self.op.beparams and not self.force:
8004
      mem_check_list = [pnode]
8005
      if be_new[constants.BE_AUTO_BALANCE]:
8006
        # either we changed auto_balance to yes or it was from before
8007
        mem_check_list.extend(instance.secondary_nodes)
8008
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8009
                                                  instance.hypervisor)
8010
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8011
                                         instance.hypervisor)
8012
      pninfo = nodeinfo[pnode]
8013
      msg = pninfo.fail_msg
8014
      if msg:
8015
        # Assume the primary node is unreachable and go ahead
8016
        self.warn.append("Can't get info from primary node %s: %s" %
8017
                         (pnode,  msg))
8018
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8019
        self.warn.append("Node data from primary node %s doesn't contain"
8020
                         " free memory information" % pnode)
8021
      elif instance_info.fail_msg:
8022
        self.warn.append("Can't get instance runtime information: %s" %
8023
                        instance_info.fail_msg)
8024
      else:
8025
        if instance_info.payload:
8026
          current_mem = int(instance_info.payload['memory'])
8027
        else:
8028
          # Assume instance not running
8029
          # (there is a slight race condition here, but it's not very probable,
8030
          # and we have no other way to check)
8031
          current_mem = 0
8032
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8033
                    pninfo.payload['memory_free'])
8034
        if miss_mem > 0:
8035
          raise errors.OpPrereqError("This change will prevent the instance"
8036
                                     " from starting, due to %d MB of memory"
8037
                                     " missing on its primary node" % miss_mem,
8038
                                     errors.ECODE_NORES)
8039

    
8040
      if be_new[constants.BE_AUTO_BALANCE]:
8041
        for node, nres in nodeinfo.items():
8042
          if node not in instance.secondary_nodes:
8043
            continue
8044
          msg = nres.fail_msg
8045
          if msg:
8046
            self.warn.append("Can't get info from secondary node %s: %s" %
8047
                             (node, msg))
8048
          elif not isinstance(nres.payload.get('memory_free', None), int):
8049
            self.warn.append("Secondary node %s didn't return free"
8050
                             " memory information" % node)
8051
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8052
            self.warn.append("Not enough memory to failover instance to"
8053
                             " secondary node %s" % node)
8054

    
8055
    # NIC processing
8056
    self.nic_pnew = {}
8057
    self.nic_pinst = {}
8058
    for nic_op, nic_dict in self.op.nics:
8059
      if nic_op == constants.DDM_REMOVE:
8060
        if not instance.nics:
8061
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8062
                                     errors.ECODE_INVAL)
8063
        continue
8064
      if nic_op != constants.DDM_ADD:
8065
        # an existing nic
8066
        if not instance.nics:
8067
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8068
                                     " no NICs" % nic_op,
8069
                                     errors.ECODE_INVAL)
8070
        if nic_op < 0 or nic_op >= len(instance.nics):
8071
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8072
                                     " are 0 to %d" %
8073
                                     (nic_op, len(instance.nics) - 1),
8074
                                     errors.ECODE_INVAL)
8075
        old_nic_params = instance.nics[nic_op].nicparams
8076
        old_nic_ip = instance.nics[nic_op].ip
8077
      else:
8078
        old_nic_params = {}
8079
        old_nic_ip = None
8080

    
8081
      update_params_dict = dict([(key, nic_dict[key])
8082
                                 for key in constants.NICS_PARAMETERS
8083
                                 if key in nic_dict])
8084

    
8085
      if 'bridge' in nic_dict:
8086
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8087

    
8088
      new_nic_params, new_filled_nic_params = \
8089
          self._GetUpdatedParams(old_nic_params, update_params_dict,
8090
                                 cluster.nicparams[constants.PP_DEFAULT],
8091
                                 constants.NICS_PARAMETER_TYPES)
8092
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8093
      self.nic_pinst[nic_op] = new_nic_params
8094
      self.nic_pnew[nic_op] = new_filled_nic_params
8095
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8096

    
8097
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8098
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8099
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8100
        if msg:
8101
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8102
          if self.force:
8103
            self.warn.append(msg)
8104
          else:
8105
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8106
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8107
        if 'ip' in nic_dict:
8108
          nic_ip = nic_dict['ip']
8109
        else:
8110
          nic_ip = old_nic_ip
8111
        if nic_ip is None:
8112
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8113
                                     ' on a routed nic', errors.ECODE_INVAL)
8114
      if 'mac' in nic_dict:
8115
        nic_mac = nic_dict['mac']
8116
        if nic_mac is None:
8117
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8118
                                     errors.ECODE_INVAL)
8119
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8120
          # otherwise generate the mac
8121
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8122
        else:
8123
          # or validate/reserve the current one
8124
          try:
8125
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8126
          except errors.ReservationError:
8127
            raise errors.OpPrereqError("MAC address %s already in use"
8128
                                       " in cluster" % nic_mac,
8129
                                       errors.ECODE_NOTUNIQUE)
8130

    
8131
    # DISK processing
8132
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8133
      raise errors.OpPrereqError("Disk operations not supported for"
8134
                                 " diskless instances",
8135
                                 errors.ECODE_INVAL)
8136
    for disk_op, _ in self.op.disks:
8137
      if disk_op == constants.DDM_REMOVE:
8138
        if len(instance.disks) == 1:
8139
          raise errors.OpPrereqError("Cannot remove the last disk of"
8140
                                     " an instance",
8141
                                     errors.ECODE_INVAL)
8142
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
8143
        ins_l = ins_l[pnode]
8144
        msg = ins_l.fail_msg
8145
        if msg:
8146
          raise errors.OpPrereqError("Can't contact node %s: %s" %
8147
                                     (pnode, msg), errors.ECODE_ENVIRON)
8148
        if instance.name in ins_l.payload:
8149
          raise errors.OpPrereqError("Instance is running, can't remove"
8150
                                     " disks.", errors.ECODE_STATE)
8151

    
8152
      if (disk_op == constants.DDM_ADD and
8153
          len(instance.nics) >= constants.MAX_DISKS):
8154
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8155
                                   " add more" % constants.MAX_DISKS,
8156
                                   errors.ECODE_STATE)
8157
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8158
        # an existing disk
8159
        if disk_op < 0 or disk_op >= len(instance.disks):
8160
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8161
                                     " are 0 to %d" %
8162
                                     (disk_op, len(instance.disks)),
8163
                                     errors.ECODE_INVAL)
8164

    
8165
    return
8166

    
8167
  def Exec(self, feedback_fn):
8168
    """Modifies an instance.
8169

8170
    All parameters take effect only at the next restart of the instance.
8171

8172
    """
8173
    # Process here the warnings from CheckPrereq, as we don't have a
8174
    # feedback_fn there.
8175
    for warn in self.warn:
8176
      feedback_fn("WARNING: %s" % warn)
8177

    
8178
    result = []
8179
    instance = self.instance
8180
    # disk changes
8181
    for disk_op, disk_dict in self.op.disks:
8182
      if disk_op == constants.DDM_REMOVE:
8183
        # remove the last disk
8184
        device = instance.disks.pop()
8185
        device_idx = len(instance.disks)
8186
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8187
          self.cfg.SetDiskID(disk, node)
8188
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8189
          if msg:
8190
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8191
                            " continuing anyway", device_idx, node, msg)
8192
        result.append(("disk/%d" % device_idx, "remove"))
8193
      elif disk_op == constants.DDM_ADD:
8194
        # add a new disk
8195
        if instance.disk_template == constants.DT_FILE:
8196
          file_driver, file_path = instance.disks[0].logical_id
8197
          file_path = os.path.dirname(file_path)
8198
        else:
8199
          file_driver = file_path = None
8200
        disk_idx_base = len(instance.disks)
8201
        new_disk = _GenerateDiskTemplate(self,
8202
                                         instance.disk_template,
8203
                                         instance.name, instance.primary_node,
8204
                                         instance.secondary_nodes,
8205
                                         [disk_dict],
8206
                                         file_path,
8207
                                         file_driver,
8208
                                         disk_idx_base)[0]
8209
        instance.disks.append(new_disk)
8210
        info = _GetInstanceInfoText(instance)
8211

    
8212
        logging.info("Creating volume %s for instance %s",
8213
                     new_disk.iv_name, instance.name)
8214
        # Note: this needs to be kept in sync with _CreateDisks
8215
        #HARDCODE
8216
        for node in instance.all_nodes:
8217
          f_create = node == instance.primary_node
8218
          try:
8219
            _CreateBlockDev(self, node, instance, new_disk,
8220
                            f_create, info, f_create)
8221
          except errors.OpExecError, err:
8222
            self.LogWarning("Failed to create volume %s (%s) on"
8223
                            " node %s: %s",
8224
                            new_disk.iv_name, new_disk, node, err)
8225
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8226
                       (new_disk.size, new_disk.mode)))
8227
      else:
8228
        # change a given disk
8229
        instance.disks[disk_op].mode = disk_dict['mode']
8230
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8231
    # NIC changes
8232
    for nic_op, nic_dict in self.op.nics:
8233
      if nic_op == constants.DDM_REMOVE:
8234
        # remove the last nic
8235
        del instance.nics[-1]
8236
        result.append(("nic.%d" % len(instance.nics), "remove"))
8237
      elif nic_op == constants.DDM_ADD:
8238
        # mac and bridge should be set, by now
8239
        mac = nic_dict['mac']
8240
        ip = nic_dict.get('ip', None)
8241
        nicparams = self.nic_pinst[constants.DDM_ADD]
8242
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8243
        instance.nics.append(new_nic)
8244
        result.append(("nic.%d" % (len(instance.nics) - 1),
8245
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
8246
                       (new_nic.mac, new_nic.ip,
8247
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8248
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8249
                       )))
8250
      else:
8251
        for key in 'mac', 'ip':
8252
          if key in nic_dict:
8253
            setattr(instance.nics[nic_op], key, nic_dict[key])
8254
        if nic_op in self.nic_pinst:
8255
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8256
        for key, val in nic_dict.iteritems():
8257
          result.append(("nic.%s/%d" % (key, nic_op), val))
8258

    
8259
    # hvparams changes
8260
    if self.op.hvparams:
8261
      instance.hvparams = self.hv_inst
8262
      for key, val in self.op.hvparams.iteritems():
8263
        result.append(("hv/%s" % key, val))
8264

    
8265
    # beparams changes
8266
    if self.op.beparams:
8267
      instance.beparams = self.be_inst
8268
      for key, val in self.op.beparams.iteritems():
8269
        result.append(("be/%s" % key, val))
8270

    
8271
    self.cfg.Update(instance, feedback_fn)
8272

    
8273
    return result
8274

    
8275

    
8276
class LUQueryExports(NoHooksLU):
8277
  """Query the exports list
8278

8279
  """
8280
  _OP_REQP = ['nodes']
8281
  REQ_BGL = False
8282

    
8283
  def ExpandNames(self):
8284
    self.needed_locks = {}
8285
    self.share_locks[locking.LEVEL_NODE] = 1
8286
    if not self.op.nodes:
8287
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8288
    else:
8289
      self.needed_locks[locking.LEVEL_NODE] = \
8290
        _GetWantedNodes(self, self.op.nodes)
8291

    
8292
  def CheckPrereq(self):
8293
    """Check prerequisites.
8294

8295
    """
8296
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8297

    
8298
  def Exec(self, feedback_fn):
8299
    """Compute the list of all the exported system images.
8300

8301
    @rtype: dict
8302
    @return: a dictionary with the structure node->(export-list)
8303
        where export-list is a list of the instances exported on
8304
        that node.
8305

8306
    """
8307
    rpcresult = self.rpc.call_export_list(self.nodes)
8308
    result = {}
8309
    for node in rpcresult:
8310
      if rpcresult[node].fail_msg:
8311
        result[node] = False
8312
      else:
8313
        result[node] = rpcresult[node].payload
8314

    
8315
    return result
8316

    
8317

    
8318
class LUExportInstance(LogicalUnit):
8319
  """Export an instance to an image in the cluster.
8320

8321
  """
8322
  HPATH = "instance-export"
8323
  HTYPE = constants.HTYPE_INSTANCE
8324
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8325
  REQ_BGL = False
8326

    
8327
  def CheckArguments(self):
8328
    """Check the arguments.
8329

8330
    """
8331
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8332
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
8333

    
8334
  def ExpandNames(self):
8335
    self._ExpandAndLockInstance()
8336
    # FIXME: lock only instance primary and destination node
8337
    #
8338
    # Sad but true, for now we have do lock all nodes, as we don't know where
8339
    # the previous export might be, and and in this LU we search for it and
8340
    # remove it from its current node. In the future we could fix this by:
8341
    #  - making a tasklet to search (share-lock all), then create the new one,
8342
    #    then one to remove, after
8343
    #  - removing the removal operation altogether
8344
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8345

    
8346
  def DeclareLocks(self, level):
8347
    """Last minute lock declaration."""
8348
    # All nodes are locked anyway, so nothing to do here.
8349

    
8350
  def BuildHooksEnv(self):
8351
    """Build hooks env.
8352

8353
    This will run on the master, primary node and target node.
8354

8355
    """
8356
    env = {
8357
      "EXPORT_NODE": self.op.target_node,
8358
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8359
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8360
      }
8361
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8362
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8363
          self.op.target_node]
8364
    return env, nl, nl
8365

    
8366
  def CheckPrereq(self):
8367
    """Check prerequisites.
8368

8369
    This checks that the instance and node names are valid.
8370

8371
    """
8372
    instance_name = self.op.instance_name
8373
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8374
    assert self.instance is not None, \
8375
          "Cannot retrieve locked instance %s" % self.op.instance_name
8376
    _CheckNodeOnline(self, self.instance.primary_node)
8377

    
8378
    self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8379
    self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8380
    assert self.dst_node is not None
8381

    
8382
    _CheckNodeOnline(self, self.dst_node.name)
8383
    _CheckNodeNotDrained(self, self.dst_node.name)
8384

    
8385
    # instance disk type verification
8386
    for disk in self.instance.disks:
8387
      if disk.dev_type == constants.LD_FILE:
8388
        raise errors.OpPrereqError("Export not supported for instances with"
8389
                                   " file-based disks", errors.ECODE_INVAL)
8390

    
8391
  def Exec(self, feedback_fn):
8392
    """Export an instance to an image in the cluster.
8393

8394
    """
8395
    instance = self.instance
8396
    dst_node = self.dst_node
8397
    src_node = instance.primary_node
8398

    
8399
    if self.op.shutdown:
8400
      # shutdown the instance, but not the disks
8401
      feedback_fn("Shutting down instance %s" % instance.name)
8402
      result = self.rpc.call_instance_shutdown(src_node, instance,
8403
                                               self.shutdown_timeout)
8404
      result.Raise("Could not shutdown instance %s on"
8405
                   " node %s" % (instance.name, src_node))
8406

    
8407
    vgname = self.cfg.GetVGName()
8408

    
8409
    snap_disks = []
8410

    
8411
    # set the disks ID correctly since call_instance_start needs the
8412
    # correct drbd minor to create the symlinks
8413
    for disk in instance.disks:
8414
      self.cfg.SetDiskID(disk, src_node)
8415

    
8416
    activate_disks = (not instance.admin_up)
8417

    
8418
    if activate_disks:
8419
      # Activate the instance disks if we'exporting a stopped instance
8420
      feedback_fn("Activating disks for %s" % instance.name)
8421
      _StartInstanceDisks(self, instance, None)
8422

    
8423
    try:
8424
      # per-disk results
8425
      dresults = []
8426
      try:
8427
        for idx, disk in enumerate(instance.disks):
8428
          feedback_fn("Creating a snapshot of disk/%s on node %s" %
8429
                      (idx, src_node))
8430

    
8431
          # result.payload will be a snapshot of an lvm leaf of the one we
8432
          # passed
8433
          result = self.rpc.call_blockdev_snapshot(src_node, disk)
8434
          msg = result.fail_msg
8435
          if msg:
8436
            self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8437
                            idx, src_node, msg)
8438
            snap_disks.append(False)
8439
          else:
8440
            disk_id = (vgname, result.payload)
8441
            new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8442
                                   logical_id=disk_id, physical_id=disk_id,
8443
                                   iv_name=disk.iv_name)
8444
            snap_disks.append(new_dev)
8445

    
8446
      finally:
8447
        if self.op.shutdown and instance.admin_up:
8448
          feedback_fn("Starting instance %s" % instance.name)
8449
          result = self.rpc.call_instance_start(src_node, instance, None, None)
8450
          msg = result.fail_msg
8451
          if msg:
8452
            _ShutdownInstanceDisks(self, instance)
8453
            raise errors.OpExecError("Could not start instance: %s" % msg)
8454

    
8455
      # TODO: check for size
8456

    
8457
      cluster_name = self.cfg.GetClusterName()
8458
      for idx, dev in enumerate(snap_disks):
8459
        feedback_fn("Exporting snapshot %s from %s to %s" %
8460
                    (idx, src_node, dst_node.name))
8461
        if dev:
8462
          # FIXME: pass debug from opcode to backend
8463
          result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8464
                                                 instance, cluster_name,
8465
                                                 idx, self.op.debug_level)
8466
          msg = result.fail_msg
8467
          if msg:
8468
            self.LogWarning("Could not export disk/%s from node %s to"
8469
                            " node %s: %s", idx, src_node, dst_node.name, msg)
8470
            dresults.append(False)
8471
          else:
8472
            dresults.append(True)
8473
          msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8474
          if msg:
8475
            self.LogWarning("Could not remove snapshot for disk/%d from node"
8476
                            " %s: %s", idx, src_node, msg)
8477
        else:
8478
          dresults.append(False)
8479

    
8480
      feedback_fn("Finalizing export on %s" % dst_node.name)
8481
      result = self.rpc.call_finalize_export(dst_node.name, instance,
8482
                                             snap_disks)
8483
      fin_resu = True
8484
      msg = result.fail_msg
8485
      if msg:
8486
        self.LogWarning("Could not finalize export for instance %s"
8487
                        " on node %s: %s", instance.name, dst_node.name, msg)
8488
        fin_resu = False
8489

    
8490
    finally:
8491
      if activate_disks:
8492
        feedback_fn("Deactivating disks for %s" % instance.name)
8493
        _ShutdownInstanceDisks(self, instance)
8494

    
8495
    nodelist = self.cfg.GetNodeList()
8496
    nodelist.remove(dst_node.name)
8497

    
8498
    # on one-node clusters nodelist will be empty after the removal
8499
    # if we proceed the backup would be removed because OpQueryExports
8500
    # substitutes an empty list with the full cluster node list.
8501
    iname = instance.name
8502
    if nodelist:
8503
      feedback_fn("Removing old exports for instance %s" % iname)
8504
      exportlist = self.rpc.call_export_list(nodelist)
8505
      for node in exportlist:
8506
        if exportlist[node].fail_msg:
8507
          continue
8508
        if iname in exportlist[node].payload:
8509
          msg = self.rpc.call_export_remove(node, iname).fail_msg
8510
          if msg:
8511
            self.LogWarning("Could not remove older export for instance %s"
8512
                            " on node %s: %s", iname, node, msg)
8513
    return fin_resu, dresults
8514

    
8515

    
8516
class LURemoveExport(NoHooksLU):
8517
  """Remove exports related to the named instance.
8518

8519
  """
8520
  _OP_REQP = ["instance_name"]
8521
  REQ_BGL = False
8522

    
8523
  def ExpandNames(self):
8524
    self.needed_locks = {}
8525
    # We need all nodes to be locked in order for RemoveExport to work, but we
8526
    # don't need to lock the instance itself, as nothing will happen to it (and
8527
    # we can remove exports also for a removed instance)
8528
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8529

    
8530
  def CheckPrereq(self):
8531
    """Check prerequisites.
8532
    """
8533
    pass
8534

    
8535
  def Exec(self, feedback_fn):
8536
    """Remove any export.
8537

8538
    """
8539
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8540
    # If the instance was not found we'll try with the name that was passed in.
8541
    # This will only work if it was an FQDN, though.
8542
    fqdn_warn = False
8543
    if not instance_name:
8544
      fqdn_warn = True
8545
      instance_name = self.op.instance_name
8546

    
8547
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8548
    exportlist = self.rpc.call_export_list(locked_nodes)
8549
    found = False
8550
    for node in exportlist:
8551
      msg = exportlist[node].fail_msg
8552
      if msg:
8553
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8554
        continue
8555
      if instance_name in exportlist[node].payload:
8556
        found = True
8557
        result = self.rpc.call_export_remove(node, instance_name)
8558
        msg = result.fail_msg
8559
        if msg:
8560
          logging.error("Could not remove export for instance %s"
8561
                        " on node %s: %s", instance_name, node, msg)
8562

    
8563
    if fqdn_warn and not found:
8564
      feedback_fn("Export not found. If trying to remove an export belonging"
8565
                  " to a deleted instance please use its Fully Qualified"
8566
                  " Domain Name.")
8567

    
8568

    
8569
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8570
  """Generic tags LU.
8571

8572
  This is an abstract class which is the parent of all the other tags LUs.
8573

8574
  """
8575

    
8576
  def ExpandNames(self):
8577
    self.needed_locks = {}
8578
    if self.op.kind == constants.TAG_NODE:
8579
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
8580
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
8581
    elif self.op.kind == constants.TAG_INSTANCE:
8582
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
8583
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
8584

    
8585
  def CheckPrereq(self):
8586
    """Check prerequisites.
8587

8588
    """
8589
    if self.op.kind == constants.TAG_CLUSTER:
8590
      self.target = self.cfg.GetClusterInfo()
8591
    elif self.op.kind == constants.TAG_NODE:
8592
      self.target = self.cfg.GetNodeInfo(self.op.name)
8593
    elif self.op.kind == constants.TAG_INSTANCE:
8594
      self.target = self.cfg.GetInstanceInfo(self.op.name)
8595
    else:
8596
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8597
                                 str(self.op.kind), errors.ECODE_INVAL)
8598

    
8599

    
8600
class LUGetTags(TagsLU):
8601
  """Returns the tags of a given object.
8602

8603
  """
8604
  _OP_REQP = ["kind", "name"]
8605
  REQ_BGL = False
8606

    
8607
  def Exec(self, feedback_fn):
8608
    """Returns the tag list.
8609

8610
    """
8611
    return list(self.target.GetTags())
8612

    
8613

    
8614
class LUSearchTags(NoHooksLU):
8615
  """Searches the tags for a given pattern.
8616

8617
  """
8618
  _OP_REQP = ["pattern"]
8619
  REQ_BGL = False
8620

    
8621
  def ExpandNames(self):
8622
    self.needed_locks = {}
8623

    
8624
  def CheckPrereq(self):
8625
    """Check prerequisites.
8626

8627
    This checks the pattern passed for validity by compiling it.
8628

8629
    """
8630
    try:
8631
      self.re = re.compile(self.op.pattern)
8632
    except re.error, err:
8633
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8634
                                 (self.op.pattern, err), errors.ECODE_INVAL)
8635

    
8636
  def Exec(self, feedback_fn):
8637
    """Returns the tag list.
8638

8639
    """
8640
    cfg = self.cfg
8641
    tgts = [("/cluster", cfg.GetClusterInfo())]
8642
    ilist = cfg.GetAllInstancesInfo().values()
8643
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8644
    nlist = cfg.GetAllNodesInfo().values()
8645
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8646
    results = []
8647
    for path, target in tgts:
8648
      for tag in target.GetTags():
8649
        if self.re.search(tag):
8650
          results.append((path, tag))
8651
    return results
8652

    
8653

    
8654
class LUAddTags(TagsLU):
8655
  """Sets a tag on a given object.
8656

8657
  """
8658
  _OP_REQP = ["kind", "name", "tags"]
8659
  REQ_BGL = False
8660

    
8661
  def CheckPrereq(self):
8662
    """Check prerequisites.
8663

8664
    This checks the type and length of the tag name and value.
8665

8666
    """
8667
    TagsLU.CheckPrereq(self)
8668
    for tag in self.op.tags:
8669
      objects.TaggableObject.ValidateTag(tag)
8670

    
8671
  def Exec(self, feedback_fn):
8672
    """Sets the tag.
8673

8674
    """
8675
    try:
8676
      for tag in self.op.tags:
8677
        self.target.AddTag(tag)
8678
    except errors.TagError, err:
8679
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
8680
    self.cfg.Update(self.target, feedback_fn)
8681

    
8682

    
8683
class LUDelTags(TagsLU):
8684
  """Delete a list of tags from a given object.
8685

8686
  """
8687
  _OP_REQP = ["kind", "name", "tags"]
8688
  REQ_BGL = False
8689

    
8690
  def CheckPrereq(self):
8691
    """Check prerequisites.
8692

8693
    This checks that we have the given tag.
8694

8695
    """
8696
    TagsLU.CheckPrereq(self)
8697
    for tag in self.op.tags:
8698
      objects.TaggableObject.ValidateTag(tag)
8699
    del_tags = frozenset(self.op.tags)
8700
    cur_tags = self.target.GetTags()
8701
    if not del_tags <= cur_tags:
8702
      diff_tags = del_tags - cur_tags
8703
      diff_names = ["'%s'" % tag for tag in diff_tags]
8704
      diff_names.sort()
8705
      raise errors.OpPrereqError("Tag(s) %s not found" %
8706
                                 (",".join(diff_names)), errors.ECODE_NOENT)
8707

    
8708
  def Exec(self, feedback_fn):
8709
    """Remove the tag from the object.
8710

8711
    """
8712
    for tag in self.op.tags:
8713
      self.target.RemoveTag(tag)
8714
    self.cfg.Update(self.target, feedback_fn)
8715

    
8716

    
8717
class LUTestDelay(NoHooksLU):
8718
  """Sleep for a specified amount of time.
8719

8720
  This LU sleeps on the master and/or nodes for a specified amount of
8721
  time.
8722

8723
  """
8724
  _OP_REQP = ["duration", "on_master", "on_nodes"]
8725
  REQ_BGL = False
8726

    
8727
  def ExpandNames(self):
8728
    """Expand names and set required locks.
8729

8730
    This expands the node list, if any.
8731

8732
    """
8733
    self.needed_locks = {}
8734
    if self.op.on_nodes:
8735
      # _GetWantedNodes can be used here, but is not always appropriate to use
8736
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8737
      # more information.
8738
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8739
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8740

    
8741
  def CheckPrereq(self):
8742
    """Check prerequisites.
8743

8744
    """
8745

    
8746
  def Exec(self, feedback_fn):
8747
    """Do the actual sleep.
8748

8749
    """
8750
    if self.op.on_master:
8751
      if not utils.TestDelay(self.op.duration):
8752
        raise errors.OpExecError("Error during master delay test")
8753
    if self.op.on_nodes:
8754
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8755
      for node, node_result in result.items():
8756
        node_result.Raise("Failure during rpc call to node %s" % node)
8757

    
8758

    
8759
class IAllocator(object):
8760
  """IAllocator framework.
8761

8762
  An IAllocator instance has three sets of attributes:
8763
    - cfg that is needed to query the cluster
8764
    - input data (all members of the _KEYS class attribute are required)
8765
    - four buffer attributes (in|out_data|text), that represent the
8766
      input (to the external script) in text and data structure format,
8767
      and the output from it, again in two formats
8768
    - the result variables from the script (success, info, nodes) for
8769
      easy usage
8770

8771
  """
8772
  # pylint: disable-msg=R0902
8773
  # lots of instance attributes
8774
  _ALLO_KEYS = [
8775
    "name", "mem_size", "disks", "disk_template",
8776
    "os", "tags", "nics", "vcpus", "hypervisor",
8777
    ]
8778
  _RELO_KEYS = [
8779
    "name", "relocate_from",
8780
    ]
8781
  _EVAC_KEYS = [
8782
    "evac_nodes",
8783
    ]
8784

    
8785
  def __init__(self, cfg, rpc, mode, **kwargs):
8786
    self.cfg = cfg
8787
    self.rpc = rpc
8788
    # init buffer variables
8789
    self.in_text = self.out_text = self.in_data = self.out_data = None
8790
    # init all input fields so that pylint is happy
8791
    self.mode = mode
8792
    self.mem_size = self.disks = self.disk_template = None
8793
    self.os = self.tags = self.nics = self.vcpus = None
8794
    self.hypervisor = None
8795
    self.relocate_from = None
8796
    self.name = None
8797
    self.evac_nodes = None
8798
    # computed fields
8799
    self.required_nodes = None
8800
    # init result fields
8801
    self.success = self.info = self.result = None
8802
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8803
      keyset = self._ALLO_KEYS
8804
      fn = self._AddNewInstance
8805
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8806
      keyset = self._RELO_KEYS
8807
      fn = self._AddRelocateInstance
8808
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8809
      keyset = self._EVAC_KEYS
8810
      fn = self._AddEvacuateNodes
8811
    else:
8812
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8813
                                   " IAllocator" % self.mode)
8814
    for key in kwargs:
8815
      if key not in keyset:
8816
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8817
                                     " IAllocator" % key)
8818
      setattr(self, key, kwargs[key])
8819

    
8820
    for key in keyset:
8821
      if key not in kwargs:
8822
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8823
                                     " IAllocator" % key)
8824
    self._BuildInputData(fn)
8825

    
8826
  def _ComputeClusterData(self):
8827
    """Compute the generic allocator input data.
8828

8829
    This is the data that is independent of the actual operation.
8830

8831
    """
8832
    cfg = self.cfg
8833
    cluster_info = cfg.GetClusterInfo()
8834
    # cluster data
8835
    data = {
8836
      "version": constants.IALLOCATOR_VERSION,
8837
      "cluster_name": cfg.GetClusterName(),
8838
      "cluster_tags": list(cluster_info.GetTags()),
8839
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8840
      # we don't have job IDs
8841
      }
8842
    iinfo = cfg.GetAllInstancesInfo().values()
8843
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8844

    
8845
    # node data
8846
    node_results = {}
8847
    node_list = cfg.GetNodeList()
8848

    
8849
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8850
      hypervisor_name = self.hypervisor
8851
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8852
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8853
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8854
      hypervisor_name = cluster_info.enabled_hypervisors[0]
8855

    
8856
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8857
                                        hypervisor_name)
8858
    node_iinfo = \
8859
      self.rpc.call_all_instances_info(node_list,
8860
                                       cluster_info.enabled_hypervisors)
8861
    for nname, nresult in node_data.items():
8862
      # first fill in static (config-based) values
8863
      ninfo = cfg.GetNodeInfo(nname)
8864
      pnr = {
8865
        "tags": list(ninfo.GetTags()),
8866
        "primary_ip": ninfo.primary_ip,
8867
        "secondary_ip": ninfo.secondary_ip,
8868
        "offline": ninfo.offline,
8869
        "drained": ninfo.drained,
8870
        "master_candidate": ninfo.master_candidate,
8871
        }
8872

    
8873
      if not (ninfo.offline or ninfo.drained):
8874
        nresult.Raise("Can't get data for node %s" % nname)
8875
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8876
                                nname)
8877
        remote_info = nresult.payload
8878

    
8879
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8880
                     'vg_size', 'vg_free', 'cpu_total']:
8881
          if attr not in remote_info:
8882
            raise errors.OpExecError("Node '%s' didn't return attribute"
8883
                                     " '%s'" % (nname, attr))
8884
          if not isinstance(remote_info[attr], int):
8885
            raise errors.OpExecError("Node '%s' returned invalid value"
8886
                                     " for '%s': %s" %
8887
                                     (nname, attr, remote_info[attr]))
8888
        # compute memory used by primary instances
8889
        i_p_mem = i_p_up_mem = 0
8890
        for iinfo, beinfo in i_list:
8891
          if iinfo.primary_node == nname:
8892
            i_p_mem += beinfo[constants.BE_MEMORY]
8893
            if iinfo.name not in node_iinfo[nname].payload:
8894
              i_used_mem = 0
8895
            else:
8896
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8897
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8898
            remote_info['memory_free'] -= max(0, i_mem_diff)
8899

    
8900
            if iinfo.admin_up:
8901
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8902

    
8903
        # compute memory used by instances
8904
        pnr_dyn = {
8905
          "total_memory": remote_info['memory_total'],
8906
          "reserved_memory": remote_info['memory_dom0'],
8907
          "free_memory": remote_info['memory_free'],
8908
          "total_disk": remote_info['vg_size'],
8909
          "free_disk": remote_info['vg_free'],
8910
          "total_cpus": remote_info['cpu_total'],
8911
          "i_pri_memory": i_p_mem,
8912
          "i_pri_up_memory": i_p_up_mem,
8913
          }
8914
        pnr.update(pnr_dyn)
8915

    
8916
      node_results[nname] = pnr
8917
    data["nodes"] = node_results
8918

    
8919
    # instance data
8920
    instance_data = {}
8921
    for iinfo, beinfo in i_list:
8922
      nic_data = []
8923
      for nic in iinfo.nics:
8924
        filled_params = objects.FillDict(
8925
            cluster_info.nicparams[constants.PP_DEFAULT],
8926
            nic.nicparams)
8927
        nic_dict = {"mac": nic.mac,
8928
                    "ip": nic.ip,
8929
                    "mode": filled_params[constants.NIC_MODE],
8930
                    "link": filled_params[constants.NIC_LINK],
8931
                   }
8932
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8933
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8934
        nic_data.append(nic_dict)
8935
      pir = {
8936
        "tags": list(iinfo.GetTags()),
8937
        "admin_up": iinfo.admin_up,
8938
        "vcpus": beinfo[constants.BE_VCPUS],
8939
        "memory": beinfo[constants.BE_MEMORY],
8940
        "os": iinfo.os,
8941
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8942
        "nics": nic_data,
8943
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8944
        "disk_template": iinfo.disk_template,
8945
        "hypervisor": iinfo.hypervisor,
8946
        }
8947
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8948
                                                 pir["disks"])
8949
      instance_data[iinfo.name] = pir
8950

    
8951
    data["instances"] = instance_data
8952

    
8953
    self.in_data = data
8954

    
8955
  def _AddNewInstance(self):
8956
    """Add new instance data to allocator structure.
8957

8958
    This in combination with _AllocatorGetClusterData will create the
8959
    correct structure needed as input for the allocator.
8960

8961
    The checks for the completeness of the opcode must have already been
8962
    done.
8963

8964
    """
8965
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8966

    
8967
    if self.disk_template in constants.DTS_NET_MIRROR:
8968
      self.required_nodes = 2
8969
    else:
8970
      self.required_nodes = 1
8971
    request = {
8972
      "name": self.name,
8973
      "disk_template": self.disk_template,
8974
      "tags": self.tags,
8975
      "os": self.os,
8976
      "vcpus": self.vcpus,
8977
      "memory": self.mem_size,
8978
      "disks": self.disks,
8979
      "disk_space_total": disk_space,
8980
      "nics": self.nics,
8981
      "required_nodes": self.required_nodes,
8982
      }
8983
    return request
8984

    
8985
  def _AddRelocateInstance(self):
8986
    """Add relocate instance data to allocator structure.
8987

8988
    This in combination with _IAllocatorGetClusterData will create the
8989
    correct structure needed as input for the allocator.
8990

8991
    The checks for the completeness of the opcode must have already been
8992
    done.
8993

8994
    """
8995
    instance = self.cfg.GetInstanceInfo(self.name)
8996
    if instance is None:
8997
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8998
                                   " IAllocator" % self.name)
8999

    
9000
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9001
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9002
                                 errors.ECODE_INVAL)
9003

    
9004
    if len(instance.secondary_nodes) != 1:
9005
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
9006
                                 errors.ECODE_STATE)
9007

    
9008
    self.required_nodes = 1
9009
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9010
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9011

    
9012
    request = {
9013
      "name": self.name,
9014
      "disk_space_total": disk_space,
9015
      "required_nodes": self.required_nodes,
9016
      "relocate_from": self.relocate_from,
9017
      }
9018
    return request
9019

    
9020
  def _AddEvacuateNodes(self):
9021
    """Add evacuate nodes data to allocator structure.
9022

9023
    """
9024
    request = {
9025
      "evac_nodes": self.evac_nodes
9026
      }
9027
    return request
9028

    
9029
  def _BuildInputData(self, fn):
9030
    """Build input data structures.
9031

9032
    """
9033
    self._ComputeClusterData()
9034

    
9035
    request = fn()
9036
    request["type"] = self.mode
9037
    self.in_data["request"] = request
9038

    
9039
    self.in_text = serializer.Dump(self.in_data)
9040

    
9041
  def Run(self, name, validate=True, call_fn=None):
9042
    """Run an instance allocator and return the results.
9043

9044
    """
9045
    if call_fn is None:
9046
      call_fn = self.rpc.call_iallocator_runner
9047

    
9048
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9049
    result.Raise("Failure while running the iallocator script")
9050

    
9051
    self.out_text = result.payload
9052
    if validate:
9053
      self._ValidateResult()
9054

    
9055
  def _ValidateResult(self):
9056
    """Process the allocator results.
9057

9058
    This will process and if successful save the result in
9059
    self.out_data and the other parameters.
9060

9061
    """
9062
    try:
9063
      rdict = serializer.Load(self.out_text)
9064
    except Exception, err:
9065
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9066

    
9067
    if not isinstance(rdict, dict):
9068
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
9069

    
9070
    # TODO: remove backwards compatiblity in later versions
9071
    if "nodes" in rdict and "result" not in rdict:
9072
      rdict["result"] = rdict["nodes"]
9073
      del rdict["nodes"]
9074

    
9075
    for key in "success", "info", "result":
9076
      if key not in rdict:
9077
        raise errors.OpExecError("Can't parse iallocator results:"
9078
                                 " missing key '%s'" % key)
9079
      setattr(self, key, rdict[key])
9080

    
9081
    if not isinstance(rdict["result"], list):
9082
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9083
                               " is not a list")
9084
    self.out_data = rdict
9085

    
9086

    
9087
class LUTestAllocator(NoHooksLU):
9088
  """Run allocator tests.
9089

9090
  This LU runs the allocator tests
9091

9092
  """
9093
  _OP_REQP = ["direction", "mode", "name"]
9094

    
9095
  def CheckPrereq(self):
9096
    """Check prerequisites.
9097

9098
    This checks the opcode parameters depending on the director and mode test.
9099

9100
    """
9101
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9102
      for attr in ["name", "mem_size", "disks", "disk_template",
9103
                   "os", "tags", "nics", "vcpus"]:
9104
        if not hasattr(self.op, attr):
9105
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9106
                                     attr, errors.ECODE_INVAL)
9107
      iname = self.cfg.ExpandInstanceName(self.op.name)
9108
      if iname is not None:
9109
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9110
                                   iname, errors.ECODE_EXISTS)
9111
      if not isinstance(self.op.nics, list):
9112
        raise errors.OpPrereqError("Invalid parameter 'nics'",
9113
                                   errors.ECODE_INVAL)
9114
      for row in self.op.nics:
9115
        if (not isinstance(row, dict) or
9116
            "mac" not in row or
9117
            "ip" not in row or
9118
            "bridge" not in row):
9119
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
9120
                                     " parameter", errors.ECODE_INVAL)
9121
      if not isinstance(self.op.disks, list):
9122
        raise errors.OpPrereqError("Invalid parameter 'disks'",
9123
                                   errors.ECODE_INVAL)
9124
      for row in self.op.disks:
9125
        if (not isinstance(row, dict) or
9126
            "size" not in row or
9127
            not isinstance(row["size"], int) or
9128
            "mode" not in row or
9129
            row["mode"] not in ['r', 'w']):
9130
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
9131
                                     " parameter", errors.ECODE_INVAL)
9132
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9133
        self.op.hypervisor = self.cfg.GetHypervisorType()
9134
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9135
      if not hasattr(self.op, "name"):
9136
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9137
                                   errors.ECODE_INVAL)
9138
      fname = _ExpandInstanceName(self.cfg, self.op.name)
9139
      self.op.name = fname
9140
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9141
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9142
      if not hasattr(self.op, "evac_nodes"):
9143
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9144
                                   " opcode input", errors.ECODE_INVAL)
9145
    else:
9146
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9147
                                 self.op.mode, errors.ECODE_INVAL)
9148

    
9149
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9150
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
9151
        raise errors.OpPrereqError("Missing allocator name",
9152
                                   errors.ECODE_INVAL)
9153
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9154
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
9155
                                 self.op.direction, errors.ECODE_INVAL)
9156

    
9157
  def Exec(self, feedback_fn):
9158
    """Run the allocator test.
9159

9160
    """
9161
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9162
      ial = IAllocator(self.cfg, self.rpc,
9163
                       mode=self.op.mode,
9164
                       name=self.op.name,
9165
                       mem_size=self.op.mem_size,
9166
                       disks=self.op.disks,
9167
                       disk_template=self.op.disk_template,
9168
                       os=self.op.os,
9169
                       tags=self.op.tags,
9170
                       nics=self.op.nics,
9171
                       vcpus=self.op.vcpus,
9172
                       hypervisor=self.op.hypervisor,
9173
                       )
9174
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9175
      ial = IAllocator(self.cfg, self.rpc,
9176
                       mode=self.op.mode,
9177
                       name=self.op.name,
9178
                       relocate_from=list(self.relocate_from),
9179
                       )
9180
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9181
      ial = IAllocator(self.cfg, self.rpc,
9182
                       mode=self.op.mode,
9183
                       evac_nodes=self.op.evac_nodes)
9184
    else:
9185
      raise errors.ProgrammerError("Uncatched mode %s in"
9186
                                   " LUTestAllocator.Exec", self.op.mode)
9187

    
9188
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
9189
      result = ial.in_text
9190
    else:
9191
      ial.Run(self.op.allocator, validate=False)
9192
      result = ial.out_text
9193
    return result