4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0613,W0201
35 from ganeti import rpc
36 from ganeti import ssh
37 from ganeti import logger
38 from ganeti import utils
39 from ganeti import errors
40 from ganeti import hypervisor
41 from ganeti import config
42 from ganeti import constants
43 from ganeti import objects
44 from ganeti import opcodes
45 from ganeti import ssconf
47 class LogicalUnit(object):
48 """Logical Unit base class.
50 Subclasses must follow these rules:
51 - implement CheckPrereq which also fills in the opcode instance
52 with all the fields (even if as None)
54 - implement BuildHooksEnv
55 - redefine HPATH and HTYPE
56 - optionally redefine their run requirements (REQ_CLUSTER,
57 REQ_MASTER); note that all commands require root permissions
66 def __init__(self, processor, op, cfg, sstore):
67 """Constructor for LogicalUnit.
69 This needs to be overriden in derived classes in order to check op
73 self.processor = processor
77 for attr_name in self._OP_REQP:
78 attr_val = getattr(op, attr_name, None)
80 raise errors.OpPrereqError, ("Required parameter '%s' missing" %
83 if not cfg.IsCluster():
84 raise errors.OpPrereqError, ("Cluster not initialized yet,"
85 " use 'gnt-cluster init' first.")
87 master = sstore.GetMasterNode()
88 if master != socket.gethostname():
89 raise errors.OpPrereqError, ("Commands must be run on the master"
92 def CheckPrereq(self):
93 """Check prerequisites for this LU.
95 This method should check that the prerequisites for the execution
96 of this LU are fulfilled. It can do internode communication, but
97 it should be idempotent - no cluster or system changes are
100 The method should raise errors.OpPrereqError in case something is
101 not fulfilled. Its return value is ignored.
103 This method should also update all the parameters of the opcode to
104 their canonical form; e.g. a short node name must be fully
105 expanded after this method has successfully completed (so that
106 hooks, logging, etc. work correctly).
109 raise NotImplementedError
111 def Exec(self, feedback_fn):
114 This method should implement the actual work. It should raise
115 errors.OpExecError for failures that are somewhat dealt with in
119 raise NotImplementedError
121 def BuildHooksEnv(self):
122 """Build hooks environment for this LU.
124 This method should return a three-node tuple consisting of: a dict
125 containing the environment that will be used for running the
126 specific hook for this LU, a list of node names on which the hook
127 should run before the execution, and a list of node names on which
128 the hook should run after the execution.
130 The keys of the dict must not have 'GANETI_' prefixed as this will
131 be handled in the hooks runner. Also note additional keys will be
132 added by the hooks runner. If the LU doesn't define any
133 environment, an empty dict (and not None) should be returned.
135 As for the node lists, the master should not be included in the
136 them, as it will be added by the hooks runner in case this LU
137 requires a cluster to run on (otherwise we don't have a node
138 list). No nodes should be returned as an empty list (and not
141 Note that if the HPATH for a LU class is None, this function will
145 raise NotImplementedError
148 class NoHooksLU(LogicalUnit):
149 """Simple LU which runs no hooks.
151 This LU is intended as a parent for other LogicalUnits which will
152 run no hooks, in order to reduce duplicate code.
158 def BuildHooksEnv(self):
161 This is a no-op, since we don't run hooks.
167 def _GetWantedNodes(lu, nodes):
168 """Returns list of checked and expanded nodes.
171 nodes: List of nodes (strings) or None for all
174 if nodes is not None and not isinstance(nodes, list):
175 raise errors.OpPrereqError, "Invalid argument type 'nodes'"
181 node = lu.cfg.GetNodeInfo(lu.cfg.ExpandNodeName(name))
183 raise errors.OpPrereqError, ("No such node name '%s'" % name)
184 wanted_nodes.append(node)
188 return [lu.cfg.GetNodeInfo(name) for name in lu.cfg.GetNodeList()]
191 def _CheckOutputFields(static, dynamic, selected):
192 """Checks whether all selected fields are valid.
195 static: Static fields
196 dynamic: Dynamic fields
199 static_fields = frozenset(static)
200 dynamic_fields = frozenset(dynamic)
202 all_fields = static_fields | dynamic_fields
204 if not all_fields.issuperset(selected):
205 raise errors.OpPrereqError, ("Unknown output fields selected: %s"
206 % ",".join(frozenset(selected).
207 difference(all_fields)))
210 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
211 memory, vcpus, nics):
212 """Builds instance related env variables for hooks from single variables.
215 secondary_nodes: List of secondary nodes as strings
218 "INSTANCE_NAME": name,
219 "INSTANCE_PRIMARY": primary_node,
220 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
221 "INSTANCE_OS_TYPE": os_type,
222 "INSTANCE_STATUS": status,
223 "INSTANCE_MEMORY": memory,
224 "INSTANCE_VCPUS": vcpus,
228 nic_count = len(nics)
229 for idx, (ip, bridge) in enumerate(nics):
232 env["INSTANCE_NIC%d_IP" % idx] = ip
233 env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
237 env["INSTANCE_NIC_COUNT"] = nic_count
242 def _BuildInstanceHookEnvByObject(instance, override=None):
243 """Builds instance related env variables for hooks from an object.
246 instance: objects.Instance object of instance
247 override: dict of values to override
250 'name': instance.name,
251 'primary_node': instance.primary_node,
252 'secondary_nodes': instance.secondary_nodes,
253 'os_type': instance.os,
254 'status': instance.os,
255 'memory': instance.memory,
256 'vcpus': instance.vcpus,
257 'nics': [(nic.ip, nic.bridge) for nic in instance.nics],
260 args.update(override)
261 return _BuildInstanceHookEnv(**args)
264 def _UpdateEtcHosts(fullnode, ip):
265 """Ensure a node has a correct entry in /etc/hosts.
268 fullnode - Fully qualified domain name of host. (str)
269 ip - IPv4 address of host (str)
272 node = fullnode.split(".", 1)[0]
274 f = open('/etc/hosts', 'r+')
283 rawline = f.readline()
289 line = rawline.split('\n')[0]
292 line = line.split('#')[0]
295 # Entire line was comment, skip
296 save_lines.append(rawline)
299 fields = line.split()
303 for spec in [ ip, fullnode, node ]:
304 if spec not in fields:
311 save_lines.append(rawline)
314 if havesome and not haveall:
315 # Line (old, or manual?) which is missing some. Remove.
319 save_lines.append(rawline)
322 add_lines.append('%s\t%s %s\n' % (ip, fullnode, node))
326 save_lines = save_lines + add_lines
328 # We removed a line, write a new file and replace old.
329 fd, tmpname = tempfile.mkstemp('tmp', 'hosts_', '/etc')
330 newfile = os.fdopen(fd, 'w')
331 newfile.write(''.join(save_lines))
333 os.rename(tmpname, '/etc/hosts')
336 # Simply appending a new line will do the trick.
338 for add in add_lines:
344 def _UpdateKnownHosts(fullnode, ip, pubkey):
345 """Ensure a node has a correct known_hosts entry.
348 fullnode - Fully qualified domain name of host. (str)
349 ip - IPv4 address of host (str)
350 pubkey - the public key of the cluster
353 if os.path.exists('/etc/ssh/ssh_known_hosts'):
354 f = open('/etc/ssh/ssh_known_hosts', 'r+')
356 f = open('/etc/ssh/ssh_known_hosts', 'w+')
365 rawline = f.readline()
366 logger.Debug('read %s' % (repr(rawline),))
372 line = rawline.split('\n')[0]
374 parts = line.split(' ')
375 fields = parts[0].split(',')
380 for spec in [ ip, fullnode ]:
381 if spec not in fields:
386 logger.Debug("key, pubkey = %s." % (repr((key, pubkey)),))
387 if haveall and key == pubkey:
389 save_lines.append(rawline)
390 logger.Debug("Keeping known_hosts '%s'." % (repr(rawline),))
393 if havesome and (not haveall or key != pubkey):
395 logger.Debug("Discarding known_hosts '%s'." % (repr(rawline),))
398 save_lines.append(rawline)
401 add_lines.append('%s,%s ssh-rsa %s\n' % (fullnode, ip, pubkey))
402 logger.Debug("Adding known_hosts '%s'." % (repr(add_lines[-1]),))
405 save_lines = save_lines + add_lines
407 # Write a new file and replace old.
408 fd, tmpname = tempfile.mkstemp('tmp', 'ssh_known_hosts_', '/etc/ssh')
409 newfile = os.fdopen(fd, 'w')
410 newfile.write(''.join(save_lines))
412 logger.Debug("Wrote new known_hosts.")
413 os.rename(tmpname, '/etc/ssh/ssh_known_hosts')
416 # Simply appending a new line will do the trick.
418 for add in add_lines:
424 def _HasValidVG(vglist, vgname):
425 """Checks if the volume group list is valid.
427 A non-None return value means there's an error, and the return value
428 is the error message.
431 vgsize = vglist.get(vgname, None)
433 return "volume group '%s' missing" % vgname
435 return ("volume group '%s' too small (20480MiB required, %dMib found)" %
440 def _InitSSHSetup(node):
441 """Setup the SSH configuration for the cluster.
444 This generates a dsa keypair for root, adds the pub key to the
445 permitted hosts and adds the hostkey to its own known hosts.
448 node: the name of this host as a fqdn
451 utils.RemoveFile('/root/.ssh/known_hosts')
453 if os.path.exists('/root/.ssh/id_dsa'):
454 utils.CreateBackup('/root/.ssh/id_dsa')
455 if os.path.exists('/root/.ssh/id_dsa.pub'):
456 utils.CreateBackup('/root/.ssh/id_dsa.pub')
458 utils.RemoveFile('/root/.ssh/id_dsa')
459 utils.RemoveFile('/root/.ssh/id_dsa.pub')
461 result = utils.RunCmd(["ssh-keygen", "-t", "dsa",
462 "-f", "/root/.ssh/id_dsa",
465 raise errors.OpExecError, ("could not generate ssh keypair, error %s" %
468 f = open('/root/.ssh/id_dsa.pub', 'r')
470 utils.AddAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
475 def _InitGanetiServerSetup(ss):
476 """Setup the necessary configuration for the initial node daemon.
478 This creates the nodepass file containing the shared password for
479 the cluster and also generates the SSL certificate.
482 # Create pseudo random password
483 randpass = sha.new(os.urandom(64)).hexdigest()
484 # and write it into sstore
485 ss.SetKey(ss.SS_NODED_PASS, randpass)
487 result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024",
488 "-days", str(365*5), "-nodes", "-x509",
489 "-keyout", constants.SSL_CERT_FILE,
490 "-out", constants.SSL_CERT_FILE, "-batch"])
492 raise errors.OpExecError, ("could not generate server ssl cert, command"
493 " %s had exitcode %s and error message %s" %
494 (result.cmd, result.exit_code, result.output))
496 os.chmod(constants.SSL_CERT_FILE, 0400)
498 result = utils.RunCmd([constants.NODE_INITD_SCRIPT, "restart"])
501 raise errors.OpExecError, ("could not start the node daemon, command %s"
502 " had exitcode %s and error %s" %
503 (result.cmd, result.exit_code, result.output))
506 class LUInitCluster(LogicalUnit):
507 """Initialise the cluster.
510 HPATH = "cluster-init"
511 HTYPE = constants.HTYPE_CLUSTER
512 _OP_REQP = ["cluster_name", "hypervisor_type", "vg_name", "mac_prefix",
513 "def_bridge", "master_netdev"]
516 def BuildHooksEnv(self):
519 Notes: Since we don't require a cluster, we must manually add
520 ourselves in the post-run node list.
524 "CLUSTER": self.op.cluster_name,
525 "MASTER": self.hostname['hostname_full'],
527 return env, [], [self.hostname['hostname_full']]
529 def CheckPrereq(self):
530 """Verify that the passed name is a valid one.
533 if config.ConfigWriter.IsCluster():
534 raise errors.OpPrereqError, ("Cluster is already initialised")
536 hostname_local = socket.gethostname()
537 self.hostname = hostname = utils.LookupHostname(hostname_local)
539 raise errors.OpPrereqError, ("Cannot resolve my own hostname ('%s')" %
542 self.clustername = clustername = utils.LookupHostname(self.op.cluster_name)
544 raise errors.OpPrereqError, ("Cannot resolve given cluster name ('%s')"
545 % self.op.cluster_name)
547 result = utils.RunCmd(["fping", "-S127.0.0.1", "-q", hostname['ip']])
549 raise errors.OpPrereqError, ("Inconsistency: this host's name resolves"
550 " to %s,\nbut this ip address does not"
551 " belong to this host."
552 " Aborting." % hostname['ip'])
554 secondary_ip = getattr(self.op, "secondary_ip", None)
555 if secondary_ip and not utils.IsValidIP(secondary_ip):
556 raise errors.OpPrereqError, ("Invalid secondary ip given")
557 if secondary_ip and secondary_ip != hostname['ip']:
558 result = utils.RunCmd(["fping", "-S127.0.0.1", "-q", secondary_ip])
560 raise errors.OpPrereqError, ("You gave %s as secondary IP,\n"
561 "but it does not belong to this host." %
563 self.secondary_ip = secondary_ip
565 # checks presence of the volume group given
566 vgstatus = _HasValidVG(utils.ListVolumeGroups(), self.op.vg_name)
569 raise errors.OpPrereqError, ("Error: %s" % vgstatus)
571 if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$",
573 raise errors.OpPrereqError, ("Invalid mac prefix given '%s'" %
576 if self.op.hypervisor_type not in hypervisor.VALID_HTYPES:
577 raise errors.OpPrereqError, ("Invalid hypervisor type given '%s'" %
578 self.op.hypervisor_type)
580 result = utils.RunCmd(["ip", "link", "show", "dev", self.op.master_netdev])
582 raise errors.OpPrereqError, ("Invalid master netdev given (%s): '%s'" %
583 (self.op.master_netdev, result.output))
585 def Exec(self, feedback_fn):
586 """Initialize the cluster.
589 clustername = self.clustername
590 hostname = self.hostname
592 # set up the simple store
593 ss = ssconf.SimpleStore()
594 ss.SetKey(ss.SS_HYPERVISOR, self.op.hypervisor_type)
595 ss.SetKey(ss.SS_MASTER_NODE, hostname['hostname_full'])
596 ss.SetKey(ss.SS_MASTER_IP, clustername['ip'])
597 ss.SetKey(ss.SS_MASTER_NETDEV, self.op.master_netdev)
598 ss.SetKey(ss.SS_CLUSTER_NAME, clustername['hostname'])
600 # set up the inter-node password and certificate
601 _InitGanetiServerSetup(ss)
603 # start the master ip
604 rpc.call_node_start_master(hostname['hostname_full'])
606 # set up ssh config and /etc/hosts
607 f = open('/etc/ssh/ssh_host_rsa_key.pub', 'r')
612 sshkey = sshline.split(" ")[1]
614 _UpdateEtcHosts(hostname['hostname_full'],
618 _UpdateKnownHosts(hostname['hostname_full'],
623 _InitSSHSetup(hostname['hostname'])
625 # init of cluster config file
626 cfgw = config.ConfigWriter()
627 cfgw.InitConfig(hostname['hostname'], hostname['ip'], self.secondary_ip,
628 sshkey, self.op.mac_prefix,
629 self.op.vg_name, self.op.def_bridge)
632 class LUDestroyCluster(NoHooksLU):
633 """Logical unit for destroying the cluster.
638 def CheckPrereq(self):
639 """Check prerequisites.
641 This checks whether the cluster is empty.
643 Any errors are signalled by raising errors.OpPrereqError.
646 master = self.sstore.GetMasterNode()
648 nodelist = self.cfg.GetNodeList()
649 if len(nodelist) != 1 or nodelist[0] != master:
650 raise errors.OpPrereqError, ("There are still %d node(s) in "
651 "this cluster." % (len(nodelist) - 1))
652 instancelist = self.cfg.GetInstanceList()
654 raise errors.OpPrereqError, ("There are still %d instance(s) in "
655 "this cluster." % len(instancelist))
657 def Exec(self, feedback_fn):
658 """Destroys the cluster.
661 utils.CreateBackup('/root/.ssh/id_dsa')
662 utils.CreateBackup('/root/.ssh/id_dsa.pub')
663 rpc.call_node_leave_cluster(self.sstore.GetMasterNode())
666 class LUVerifyCluster(NoHooksLU):
667 """Verifies the cluster status.
672 def _VerifyNode(self, node, file_list, local_cksum, vglist, node_result,
673 remote_version, feedback_fn):
674 """Run multiple tests against a node.
677 - compares ganeti version
678 - checks vg existance and size > 20G
679 - checks config file checksum
680 - checks ssh to other nodes
683 node: name of the node to check
684 file_list: required list of files
685 local_cksum: dictionary of local files and their checksums
688 # compares ganeti version
689 local_version = constants.PROTOCOL_VERSION
690 if not remote_version:
691 feedback_fn(" - ERROR: connection to %s failed" % (node))
694 if local_version != remote_version:
695 feedback_fn(" - ERROR: sw version mismatch: master %s, node(%s) %s" %
696 (local_version, node, remote_version))
699 # checks vg existance and size > 20G
703 feedback_fn(" - ERROR: unable to check volume groups on node %s." %
707 vgstatus = _HasValidVG(vglist, self.cfg.GetVGName())
709 feedback_fn(" - ERROR: %s on node %s" % (vgstatus, node))
712 # checks config file checksum
715 if 'filelist' not in node_result:
717 feedback_fn(" - ERROR: node hasn't returned file checksum data")
719 remote_cksum = node_result['filelist']
720 for file_name in file_list:
721 if file_name not in remote_cksum:
723 feedback_fn(" - ERROR: file '%s' missing" % file_name)
724 elif remote_cksum[file_name] != local_cksum[file_name]:
726 feedback_fn(" - ERROR: file '%s' has wrong checksum" % file_name)
728 if 'nodelist' not in node_result:
730 feedback_fn(" - ERROR: node hasn't returned node connectivity data")
732 if node_result['nodelist']:
734 for node in node_result['nodelist']:
735 feedback_fn(" - ERROR: communication with node '%s': %s" %
736 (node, node_result['nodelist'][node]))
737 hyp_result = node_result.get('hypervisor', None)
738 if hyp_result is not None:
739 feedback_fn(" - ERROR: hypervisor verify failure: '%s'" % hyp_result)
742 def _VerifyInstance(self, instance, node_vol_is, node_instance, feedback_fn):
743 """Verify an instance.
745 This function checks to see if the required block devices are
746 available on the instance's node.
751 instancelist = self.cfg.GetInstanceList()
752 if not instance in instancelist:
753 feedback_fn(" - ERROR: instance %s not in instance list %s" %
754 (instance, instancelist))
757 instanceconfig = self.cfg.GetInstanceInfo(instance)
758 node_current = instanceconfig.primary_node
761 instanceconfig.MapLVsByNode(node_vol_should)
763 for node in node_vol_should:
764 for volume in node_vol_should[node]:
765 if node not in node_vol_is or volume not in node_vol_is[node]:
766 feedback_fn(" - ERROR: volume %s missing on node %s" %
770 if not instanceconfig.status == 'down':
771 if not instance in node_instance[node_current]:
772 feedback_fn(" - ERROR: instance %s not running on node %s" %
773 (instance, node_current))
776 for node in node_instance:
777 if (not node == node_current):
778 if instance in node_instance[node]:
779 feedback_fn(" - ERROR: instance %s should not run on node %s" %
785 def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
786 """Verify if there are any unknown volumes in the cluster.
788 The .os, .swap and backup volumes are ignored. All other volumes are
794 for node in node_vol_is:
795 for volume in node_vol_is[node]:
796 if node not in node_vol_should or volume not in node_vol_should[node]:
797 feedback_fn(" - ERROR: volume %s on node %s should not exist" %
802 def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
803 """Verify the list of running instances.
805 This checks what instances are running but unknown to the cluster.
809 for node in node_instance:
810 for runninginstance in node_instance[node]:
811 if runninginstance not in instancelist:
812 feedback_fn(" - ERROR: instance %s on node %s should not exist" %
813 (runninginstance, node))
817 def CheckPrereq(self):
818 """Check prerequisites.
820 This has no prerequisites.
825 def Exec(self, feedback_fn):
826 """Verify integrity of cluster, performing various test on nodes.
830 feedback_fn("* Verifying global settings")
831 self.cfg.VerifyConfig()
833 master = self.sstore.GetMasterNode()
834 vg_name = self.cfg.GetVGName()
835 nodelist = utils.NiceSort(self.cfg.GetNodeList())
836 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
840 # FIXME: verify OS list
842 file_names = list(self.sstore.GetFileList())
843 file_names.append(constants.SSL_CERT_FILE)
844 file_names.append(constants.CLUSTER_CONF_FILE)
845 local_checksums = utils.FingerprintFiles(file_names)
847 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
848 all_volumeinfo = rpc.call_volume_list(nodelist, vg_name)
849 all_instanceinfo = rpc.call_instance_list(nodelist)
850 all_vglist = rpc.call_vg_list(nodelist)
851 node_verify_param = {
852 'filelist': file_names,
853 'nodelist': nodelist,
856 all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param)
857 all_rversion = rpc.call_version(nodelist)
859 for node in nodelist:
860 feedback_fn("* Verifying node %s" % node)
861 result = self._VerifyNode(node, file_names, local_checksums,
862 all_vglist[node], all_nvinfo[node],
863 all_rversion[node], feedback_fn)
867 volumeinfo = all_volumeinfo[node]
869 if type(volumeinfo) != dict:
870 feedback_fn(" - ERROR: connection to %s failed" % (node,))
874 node_volume[node] = volumeinfo
877 nodeinstance = all_instanceinfo[node]
878 if type(nodeinstance) != list:
879 feedback_fn(" - ERROR: connection to %s failed" % (node,))
883 node_instance[node] = nodeinstance
887 for instance in instancelist:
888 feedback_fn("* Verifying instance %s" % instance)
889 result = self._VerifyInstance(instance, node_volume, node_instance,
893 inst_config = self.cfg.GetInstanceInfo(instance)
895 inst_config.MapLVsByNode(node_vol_should)
897 feedback_fn("* Verifying orphan volumes")
898 result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
902 feedback_fn("* Verifying remaining instances")
903 result = self._VerifyOrphanInstances(instancelist, node_instance,
910 def _WaitForSync(cfgw, instance, oneshot=False, unlock=False):
911 """Sleep and poll for an instance's disk to sync.
914 if not instance.disks:
918 logger.ToStdout("Waiting for instance %s to sync disks." % instance.name)
920 node = instance.primary_node
922 for dev in instance.disks:
923 cfgw.SetDiskID(dev, node)
929 cumul_degraded = False
930 rstats = rpc.call_blockdev_getmirrorstatus(node, instance.disks)
932 logger.ToStderr("Can't get any data from node %s" % node)
935 raise errors.RemoteError, ("Can't contact node %s for mirror data,"
940 for i in range(len(rstats)):
943 logger.ToStderr("Can't compute data for node %s/%s" %
944 (node, instance.disks[i].iv_name))
946 perc_done, est_time, is_degraded = mstat
947 cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
948 if perc_done is not None:
950 if est_time is not None:
951 rem_time = "%d estimated seconds remaining" % est_time
954 rem_time = "no time estimate"
955 logger.ToStdout("- device %s: %5.2f%% done, %s" %
956 (instance.disks[i].iv_name, perc_done, rem_time))
963 time.sleep(min(60, max_time))
969 logger.ToStdout("Instance %s's disks are in sync." % instance.name)
970 return not cumul_degraded
973 def _CheckDiskConsistency(cfgw, dev, node, on_primary):
974 """Check that mirrors are not degraded.
977 cfgw.SetDiskID(dev, node)
980 if on_primary or dev.AssembleOnSecondary():
981 rstats = rpc.call_blockdev_find(node, dev)
983 logger.ToStderr("Can't get any data from node %s" % node)
986 result = result and (not rstats[5])
988 for child in dev.children:
989 result = result and _CheckDiskConsistency(cfgw, child, node, on_primary)
994 class LUDiagnoseOS(NoHooksLU):
995 """Logical unit for OS diagnose/query.
1000 def CheckPrereq(self):
1001 """Check prerequisites.
1003 This always succeeds, since this is a pure query LU.
1008 def Exec(self, feedback_fn):
1009 """Compute the list of OSes.
1012 node_list = self.cfg.GetNodeList()
1013 node_data = rpc.call_os_diagnose(node_list)
1014 if node_data == False:
1015 raise errors.OpExecError, "Can't gather the list of OSes"
1019 class LURemoveNode(LogicalUnit):
1020 """Logical unit for removing a node.
1023 HPATH = "node-remove"
1024 HTYPE = constants.HTYPE_NODE
1025 _OP_REQP = ["node_name"]
1027 def BuildHooksEnv(self):
1030 This doesn't run on the target node in the pre phase as a failed
1031 node would not allows itself to run.
1035 "NODE_NAME": self.op.node_name,
1037 all_nodes = self.cfg.GetNodeList()
1038 all_nodes.remove(self.op.node_name)
1039 return env, all_nodes, all_nodes
1041 def CheckPrereq(self):
1042 """Check prerequisites.
1045 - the node exists in the configuration
1046 - it does not have primary or secondary instances
1047 - it's not the master
1049 Any errors are signalled by raising errors.OpPrereqError.
1052 node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1054 logger.Error("Error: Node '%s' is unknown." % self.op.node_name)
1057 instance_list = self.cfg.GetInstanceList()
1059 masternode = self.sstore.GetMasterNode()
1060 if node.name == masternode:
1061 raise errors.OpPrereqError, ("Node is the master node,"
1062 " you need to failover first.")
1064 for instance_name in instance_list:
1065 instance = self.cfg.GetInstanceInfo(instance_name)
1066 if node.name == instance.primary_node:
1067 raise errors.OpPrereqError, ("Instance %s still running on the node,"
1068 " please remove first." % instance_name)
1069 if node.name in instance.secondary_nodes:
1070 raise errors.OpPrereqError, ("Instance %s has node as a secondary,"
1071 " please remove first." % instance_name)
1072 self.op.node_name = node.name
1075 def Exec(self, feedback_fn):
1076 """Removes the node from the cluster.
1080 logger.Info("stopping the node daemon and removing configs from node %s" %
1083 rpc.call_node_leave_cluster(node.name)
1085 ssh.SSHCall(node.name, 'root', "%s stop" % constants.NODE_INITD_SCRIPT)
1087 logger.Info("Removing node %s from config" % node.name)
1089 self.cfg.RemoveNode(node.name)
1092 class LUQueryNodes(NoHooksLU):
1093 """Logical unit for querying nodes.
1096 _OP_REQP = ["output_fields"]
1098 def CheckPrereq(self):
1099 """Check prerequisites.
1101 This checks that the fields required are valid output fields.
1104 self.dynamic_fields = frozenset(["dtotal", "dfree",
1105 "mtotal", "mnode", "mfree"])
1107 _CheckOutputFields(static=["name", "pinst", "sinst", "pip", "sip"],
1108 dynamic=self.dynamic_fields,
1109 selected=self.op.output_fields)
1112 def Exec(self, feedback_fn):
1113 """Computes the list of nodes and their attributes.
1116 nodenames = utils.NiceSort(self.cfg.GetNodeList())
1117 nodelist = [self.cfg.GetNodeInfo(name) for name in nodenames]
1120 # begin data gathering
1122 if self.dynamic_fields.intersection(self.op.output_fields):
1124 node_data = rpc.call_node_info(nodenames, self.cfg.GetVGName())
1125 for name in nodenames:
1126 nodeinfo = node_data.get(name, None)
1129 "mtotal": utils.TryConvert(int, nodeinfo['memory_total']),
1130 "mnode": utils.TryConvert(int, nodeinfo['memory_dom0']),
1131 "mfree": utils.TryConvert(int, nodeinfo['memory_free']),
1132 "dtotal": utils.TryConvert(int, nodeinfo['vg_size']),
1133 "dfree": utils.TryConvert(int, nodeinfo['vg_free']),
1136 live_data[name] = {}
1138 live_data = dict.fromkeys(nodenames, {})
1140 node_to_primary = dict.fromkeys(nodenames, 0)
1141 node_to_secondary = dict.fromkeys(nodenames, 0)
1143 if "pinst" in self.op.output_fields or "sinst" in self.op.output_fields:
1144 instancelist = self.cfg.GetInstanceList()
1146 for instance in instancelist:
1147 instanceinfo = self.cfg.GetInstanceInfo(instance)
1148 node_to_primary[instanceinfo.primary_node] += 1
1149 for secnode in instanceinfo.secondary_nodes:
1150 node_to_secondary[secnode] += 1
1152 # end data gathering
1155 for node in nodelist:
1157 for field in self.op.output_fields:
1160 elif field == "pinst":
1161 val = node_to_primary[node.name]
1162 elif field == "sinst":
1163 val = node_to_secondary[node.name]
1164 elif field == "pip":
1165 val = node.primary_ip
1166 elif field == "sip":
1167 val = node.secondary_ip
1168 elif field in self.dynamic_fields:
1169 val = live_data[node.name].get(field, "?")
1171 raise errors.ParameterError, field
1173 node_output.append(val)
1174 output.append(node_output)
1179 class LUQueryNodeVolumes(NoHooksLU):
1180 """Logical unit for getting volumes on node(s).
1183 _OP_REQP = ["nodes", "output_fields"]
1185 def CheckPrereq(self):
1186 """Check prerequisites.
1188 This checks that the fields required are valid output fields.
1191 self.nodes = _GetWantedNodes(self, self.op.nodes)
1193 _CheckOutputFields(static=["node"],
1194 dynamic=["phys", "vg", "name", "size", "instance"],
1195 selected=self.op.output_fields)
1198 def Exec(self, feedback_fn):
1199 """Computes the list of nodes and their attributes.
1202 nodenames = utils.NiceSort([node.name for node in self.nodes])
1203 volumes = rpc.call_node_volumes(nodenames)
1205 ilist = [self.cfg.GetInstanceInfo(iname) for iname
1206 in self.cfg.GetInstanceList()]
1208 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
1211 for node in nodenames:
1212 node_vols = volumes[node][:]
1213 node_vols.sort(key=lambda vol: vol['dev'])
1215 for vol in node_vols:
1217 for field in self.op.output_fields:
1220 elif field == "phys":
1224 elif field == "name":
1226 elif field == "size":
1227 val = int(float(vol['size']))
1228 elif field == "instance":
1230 if node not in lv_by_node[inst]:
1232 if vol['name'] in lv_by_node[inst][node]:
1238 raise errors.ParameterError, field
1239 node_output.append(str(val))
1241 output.append(node_output)
1246 class LUAddNode(LogicalUnit):
1247 """Logical unit for adding node to the cluster.
1251 HTYPE = constants.HTYPE_NODE
1252 _OP_REQP = ["node_name"]
1254 def BuildHooksEnv(self):
1257 This will run on all nodes before, and on all nodes + the new node after.
1261 "NODE_NAME": self.op.node_name,
1262 "NODE_PIP": self.op.primary_ip,
1263 "NODE_SIP": self.op.secondary_ip,
1265 nodes_0 = self.cfg.GetNodeList()
1266 nodes_1 = nodes_0 + [self.op.node_name, ]
1267 return env, nodes_0, nodes_1
1269 def CheckPrereq(self):
1270 """Check prerequisites.
1273 - the new node is not already in the config
1275 - its parameters (single/dual homed) matches the cluster
1277 Any errors are signalled by raising errors.OpPrereqError.
1280 node_name = self.op.node_name
1283 dns_data = utils.LookupHostname(node_name)
1285 raise errors.OpPrereqError, ("Node %s is not resolvable" % node_name)
1287 node = dns_data['hostname']
1288 primary_ip = self.op.primary_ip = dns_data['ip']
1289 secondary_ip = getattr(self.op, "secondary_ip", None)
1290 if secondary_ip is None:
1291 secondary_ip = primary_ip
1292 if not utils.IsValidIP(secondary_ip):
1293 raise errors.OpPrereqError, ("Invalid secondary IP given")
1294 self.op.secondary_ip = secondary_ip
1295 node_list = cfg.GetNodeList()
1296 if node in node_list:
1297 raise errors.OpPrereqError, ("Node %s is already in the configuration"
1300 for existing_node_name in node_list:
1301 existing_node = cfg.GetNodeInfo(existing_node_name)
1302 if (existing_node.primary_ip == primary_ip or
1303 existing_node.secondary_ip == primary_ip or
1304 existing_node.primary_ip == secondary_ip or
1305 existing_node.secondary_ip == secondary_ip):
1306 raise errors.OpPrereqError, ("New node ip address(es) conflict with"
1307 " existing node %s" % existing_node.name)
1309 # check that the type of the node (single versus dual homed) is the
1310 # same as for the master
1311 myself = cfg.GetNodeInfo(self.sstore.GetMasterNode())
1312 master_singlehomed = myself.secondary_ip == myself.primary_ip
1313 newbie_singlehomed = secondary_ip == primary_ip
1314 if master_singlehomed != newbie_singlehomed:
1315 if master_singlehomed:
1316 raise errors.OpPrereqError, ("The master has no private ip but the"
1317 " new node has one")
1319 raise errors.OpPrereqError ("The master has a private ip but the"
1320 " new node doesn't have one")
1322 # checks reachablity
1323 command = ["fping", "-q", primary_ip]
1324 result = utils.RunCmd(command)
1326 raise errors.OpPrereqError, ("Node not reachable by ping")
1328 if not newbie_singlehomed:
1329 # check reachability from my secondary ip to newbie's secondary ip
1330 command = ["fping", "-S%s" % myself.secondary_ip, "-q", secondary_ip]
1331 result = utils.RunCmd(command)
1333 raise errors.OpPrereqError, ("Node secondary ip not reachable by ping")
1335 self.new_node = objects.Node(name=node,
1336 primary_ip=primary_ip,
1337 secondary_ip=secondary_ip)
1339 def Exec(self, feedback_fn):
1340 """Adds the new node to the cluster.
1343 new_node = self.new_node
1344 node = new_node.name
1346 # set up inter-node password and certificate and restarts the node daemon
1347 gntpass = self.sstore.GetNodeDaemonPassword()
1348 if not re.match('^[a-zA-Z0-9.]{1,64}$', gntpass):
1349 raise errors.OpExecError, ("ganeti password corruption detected")
1350 f = open(constants.SSL_CERT_FILE)
1352 gntpem = f.read(8192)
1355 # in the base64 pem encoding, neither '!' nor '.' are valid chars,
1356 # so we use this to detect an invalid certificate; as long as the
1357 # cert doesn't contain this, the here-document will be correctly
1358 # parsed by the shell sequence below
1359 if re.search('^!EOF\.', gntpem, re.MULTILINE):
1360 raise errors.OpExecError, ("invalid PEM encoding in the SSL certificate")
1361 if not gntpem.endswith("\n"):
1362 raise errors.OpExecError, ("PEM must end with newline")
1363 logger.Info("copy cluster pass to %s and starting the node daemon" % node)
1365 # remove first the root's known_hosts file
1366 utils.RemoveFile("/root/.ssh/known_hosts")
1367 # and then connect with ssh to set password and start ganeti-noded
1368 # note that all the below variables are sanitized at this point,
1369 # either by being constants or by the checks above
1371 mycommand = ("umask 077 && "
1372 "echo '%s' > '%s' && "
1373 "cat > '%s' << '!EOF.' && \n"
1374 "%s!EOF.\n%s restart" %
1375 (gntpass, ss.KeyToFilename(ss.SS_NODED_PASS),
1376 constants.SSL_CERT_FILE, gntpem,
1377 constants.NODE_INITD_SCRIPT))
1379 result = ssh.SSHCall(node, 'root', mycommand, batch=False, ask_key=True)
1381 raise errors.OpExecError, ("Remote command on node %s, error: %s,"
1383 (node, result.fail_reason, result.output))
1385 # check connectivity
1388 result = rpc.call_version([node])[node]
1390 if constants.PROTOCOL_VERSION == result:
1391 logger.Info("communication to node %s fine, sw version %s match" %
1394 raise errors.OpExecError, ("Version mismatch master version %s,"
1395 " node version %s" %
1396 (constants.PROTOCOL_VERSION, result))
1398 raise errors.OpExecError, ("Cannot get version from the new node")
1401 logger.Info("copy ssh key to node %s" % node)
1403 keyfiles = ["/etc/ssh/ssh_host_dsa_key", "/etc/ssh/ssh_host_dsa_key.pub",
1404 "/etc/ssh/ssh_host_rsa_key", "/etc/ssh/ssh_host_rsa_key.pub",
1405 "/root/.ssh/id_dsa", "/root/.ssh/id_dsa.pub"]
1410 keyarray.append(f.read())
1414 result = rpc.call_node_add(node, keyarray[0], keyarray[1], keyarray[2],
1415 keyarray[3], keyarray[4], keyarray[5])
1418 raise errors.OpExecError, ("Cannot transfer ssh keys to the new node")
1420 # Add node to our /etc/hosts, and add key to known_hosts
1421 _UpdateEtcHosts(new_node.name, new_node.primary_ip)
1422 _UpdateKnownHosts(new_node.name, new_node.primary_ip,
1423 self.cfg.GetHostKey())
1425 if new_node.secondary_ip != new_node.primary_ip:
1426 result = ssh.SSHCall(node, "root",
1427 "fping -S 127.0.0.1 -q %s" % new_node.secondary_ip)
1429 raise errors.OpExecError, ("Node claims it doesn't have the"
1430 " secondary ip you gave (%s).\n"
1431 "Please fix and re-run this command." %
1432 new_node.secondary_ip)
1434 # Distribute updated /etc/hosts and known_hosts to all nodes,
1435 # including the node just added
1436 myself = self.cfg.GetNodeInfo(self.sstore.GetMasterNode())
1437 dist_nodes = self.cfg.GetNodeList() + [node]
1438 if myself.name in dist_nodes:
1439 dist_nodes.remove(myself.name)
1441 logger.Debug("Copying hosts and known_hosts to all nodes")
1442 for fname in ("/etc/hosts", "/etc/ssh/ssh_known_hosts"):
1443 result = rpc.call_upload_file(dist_nodes, fname)
1444 for to_node in dist_nodes:
1445 if not result[to_node]:
1446 logger.Error("copy of file %s to node %s failed" %
1449 to_copy = ss.GetFileList()
1450 for fname in to_copy:
1451 if not ssh.CopyFileToNode(node, fname):
1452 logger.Error("could not copy file %s to node %s" % (fname, node))
1454 logger.Info("adding node %s to cluster.conf" % node)
1455 self.cfg.AddNode(new_node)
1458 class LUMasterFailover(LogicalUnit):
1459 """Failover the master node to the current node.
1461 This is a special LU in that it must run on a non-master node.
1464 HPATH = "master-failover"
1465 HTYPE = constants.HTYPE_CLUSTER
1469 def BuildHooksEnv(self):
1472 This will run on the new master only in the pre phase, and on all
1473 the nodes in the post phase.
1477 "NEW_MASTER": self.new_master,
1478 "OLD_MASTER": self.old_master,
1480 return env, [self.new_master], self.cfg.GetNodeList()
1482 def CheckPrereq(self):
1483 """Check prerequisites.
1485 This checks that we are not already the master.
1488 self.new_master = socket.gethostname()
1490 self.old_master = self.sstore.GetMasterNode()
1492 if self.old_master == self.new_master:
1493 raise errors.OpPrereqError, ("This commands must be run on the node"
1494 " where you want the new master to be.\n"
1495 "%s is already the master" %
1498 def Exec(self, feedback_fn):
1499 """Failover the master node.
1501 This command, when run on a non-master node, will cause the current
1502 master to cease being master, and the non-master to become new
1506 #TODO: do not rely on gethostname returning the FQDN
1507 logger.Info("setting master to %s, old master: %s" %
1508 (self.new_master, self.old_master))
1510 if not rpc.call_node_stop_master(self.old_master):
1511 logger.Error("could disable the master role on the old master"
1512 " %s, please disable manually" % self.old_master)
1515 ss.SetKey(ss.SS_MASTER_NODE, self.new_master)
1516 if not rpc.call_upload_file(self.cfg.GetNodeList(),
1517 ss.KeyToFilename(ss.SS_MASTER_NODE)):
1518 logger.Error("could not distribute the new simple store master file"
1519 " to the other nodes, please check.")
1521 if not rpc.call_node_start_master(self.new_master):
1522 logger.Error("could not start the master role on the new master"
1523 " %s, please check" % self.new_master)
1524 feedback_fn("Error in activating the master IP on the new master,\n"
1525 "please fix manually.")
1529 class LUQueryClusterInfo(NoHooksLU):
1530 """Query cluster configuration.
1536 def CheckPrereq(self):
1537 """No prerequsites needed for this LU.
1542 def Exec(self, feedback_fn):
1543 """Return cluster config.
1547 "name": self.sstore.GetClusterName(),
1548 "software_version": constants.RELEASE_VERSION,
1549 "protocol_version": constants.PROTOCOL_VERSION,
1550 "config_version": constants.CONFIG_VERSION,
1551 "os_api_version": constants.OS_API_VERSION,
1552 "export_version": constants.EXPORT_VERSION,
1553 "master": self.sstore.GetMasterNode(),
1554 "architecture": (platform.architecture()[0], platform.machine()),
1560 class LUClusterCopyFile(NoHooksLU):
1561 """Copy file to cluster.
1564 _OP_REQP = ["nodes", "filename"]
1566 def CheckPrereq(self):
1567 """Check prerequisites.
1569 It should check that the named file exists and that the given list
1573 if not os.path.exists(self.op.filename):
1574 raise errors.OpPrereqError("No such filename '%s'" % self.op.filename)
1576 self.nodes = _GetWantedNodes(self, self.op.nodes)
1578 def Exec(self, feedback_fn):
1579 """Copy a file from master to some nodes.
1582 opts - class with options as members
1583 args - list containing a single element, the file name
1585 nodes - list containing the name of target nodes; if empty, all nodes
1588 filename = self.op.filename
1590 myname = socket.gethostname()
1592 for node in self.nodes:
1595 if not ssh.CopyFileToNode(node, filename):
1596 logger.Error("Copy of file %s to node %s failed" % (filename, node))
1599 class LUDumpClusterConfig(NoHooksLU):
1600 """Return a text-representation of the cluster-config.
1605 def CheckPrereq(self):
1606 """No prerequisites.
1611 def Exec(self, feedback_fn):
1612 """Dump a representation of the cluster config to the standard output.
1615 return self.cfg.DumpConfig()
1618 class LURunClusterCommand(NoHooksLU):
1619 """Run a command on some nodes.
1622 _OP_REQP = ["command", "nodes"]
1624 def CheckPrereq(self):
1625 """Check prerequisites.
1627 It checks that the given list of nodes is valid.
1630 self.nodes = _GetWantedNodes(self, self.op.nodes)
1632 def Exec(self, feedback_fn):
1633 """Run a command on some nodes.
1637 for node in self.nodes:
1638 result = utils.RunCmd(["ssh", node.name, self.op.command])
1639 data.append((node.name, result.cmd, result.output, result.exit_code))
1644 class LUActivateInstanceDisks(NoHooksLU):
1645 """Bring up an instance's disks.
1648 _OP_REQP = ["instance_name"]
1650 def CheckPrereq(self):
1651 """Check prerequisites.
1653 This checks that the instance is in the cluster.
1656 instance = self.cfg.GetInstanceInfo(
1657 self.cfg.ExpandInstanceName(self.op.instance_name))
1658 if instance is None:
1659 raise errors.OpPrereqError, ("Instance '%s' not known" %
1660 self.op.instance_name)
1661 self.instance = instance
1664 def Exec(self, feedback_fn):
1665 """Activate the disks.
1668 disks_ok, disks_info = _AssembleInstanceDisks(self.instance, self.cfg)
1670 raise errors.OpExecError, ("Cannot activate block devices")
1675 def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False):
1676 """Prepare the block devices for an instance.
1678 This sets up the block devices on all nodes.
1681 instance: a ganeti.objects.Instance object
1682 ignore_secondaries: if true, errors on secondary nodes won't result
1683 in an error return from the function
1686 false if the operation failed
1687 list of (host, instance_visible_name, node_visible_name) if the operation
1688 suceeded with the mapping from node devices to instance devices
1692 for inst_disk in instance.disks:
1693 master_result = None
1694 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
1695 cfg.SetDiskID(node_disk, node)
1696 is_primary = node == instance.primary_node
1697 result = rpc.call_blockdev_assemble(node, node_disk, is_primary)
1699 logger.Error("could not prepare block device %s on node %s (is_pri"
1700 "mary=%s)" % (inst_disk.iv_name, node, is_primary))
1701 if is_primary or not ignore_secondaries:
1704 master_result = result
1705 device_info.append((instance.primary_node, inst_disk.iv_name,
1708 return disks_ok, device_info
1711 def _StartInstanceDisks(cfg, instance, force):
1712 disks_ok, dummy = _AssembleInstanceDisks(instance, cfg,
1713 ignore_secondaries=force)
1715 _ShutdownInstanceDisks(instance, cfg)
1716 if force is not None and not force:
1717 logger.Error("If the message above refers to a secondary node,"
1718 " you can retry the operation using '--force'.")
1719 raise errors.OpExecError, ("Disk consistency error")
1722 class LUDeactivateInstanceDisks(NoHooksLU):
1723 """Shutdown an instance's disks.
1726 _OP_REQP = ["instance_name"]
1728 def CheckPrereq(self):
1729 """Check prerequisites.
1731 This checks that the instance is in the cluster.
1734 instance = self.cfg.GetInstanceInfo(
1735 self.cfg.ExpandInstanceName(self.op.instance_name))
1736 if instance is None:
1737 raise errors.OpPrereqError, ("Instance '%s' not known" %
1738 self.op.instance_name)
1739 self.instance = instance
1741 def Exec(self, feedback_fn):
1742 """Deactivate the disks
1745 instance = self.instance
1746 ins_l = rpc.call_instance_list([instance.primary_node])
1747 ins_l = ins_l[instance.primary_node]
1748 if not type(ins_l) is list:
1749 raise errors.OpExecError, ("Can't contact node '%s'" %
1750 instance.primary_node)
1752 if self.instance.name in ins_l:
1753 raise errors.OpExecError, ("Instance is running, can't shutdown"
1756 _ShutdownInstanceDisks(instance, self.cfg)
1759 def _ShutdownInstanceDisks(instance, cfg, ignore_primary=False):
1760 """Shutdown block devices of an instance.
1762 This does the shutdown on all nodes of the instance.
1764 If the ignore_primary is false, errors on the primary node are
1769 for disk in instance.disks:
1770 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
1771 cfg.SetDiskID(top_disk, node)
1772 if not rpc.call_blockdev_shutdown(node, top_disk):
1773 logger.Error("could not shutdown block device %s on node %s" %
1774 (disk.iv_name, node))
1775 if not ignore_primary or node != instance.primary_node:
1780 class LUStartupInstance(LogicalUnit):
1781 """Starts an instance.
1784 HPATH = "instance-start"
1785 HTYPE = constants.HTYPE_INSTANCE
1786 _OP_REQP = ["instance_name", "force"]
1788 def BuildHooksEnv(self):
1791 This runs on master, primary and secondary nodes of the instance.
1795 "FORCE": self.op.force,
1797 env.update(_BuildInstanceHookEnvByObject(self.instance))
1798 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
1799 list(self.instance.secondary_nodes))
1802 def CheckPrereq(self):
1803 """Check prerequisites.
1805 This checks that the instance is in the cluster.
1808 instance = self.cfg.GetInstanceInfo(
1809 self.cfg.ExpandInstanceName(self.op.instance_name))
1810 if instance is None:
1811 raise errors.OpPrereqError, ("Instance '%s' not known" %
1812 self.op.instance_name)
1814 # check bridges existance
1815 brlist = [nic.bridge for nic in instance.nics]
1816 if not rpc.call_bridges_exist(instance.primary_node, brlist):
1817 raise errors.OpPrereqError, ("one or more target bridges %s does not"
1818 " exist on destination node '%s'" %
1819 (brlist, instance.primary_node))
1821 self.instance = instance
1822 self.op.instance_name = instance.name
1824 def Exec(self, feedback_fn):
1825 """Start the instance.
1828 instance = self.instance
1829 force = self.op.force
1830 extra_args = getattr(self.op, "extra_args", "")
1832 node_current = instance.primary_node
1834 nodeinfo = rpc.call_node_info([node_current], self.cfg.GetVGName())
1836 raise errors.OpExecError, ("Could not contact node %s for infos" %
1839 freememory = nodeinfo[node_current]['memory_free']
1840 memory = instance.memory
1841 if memory > freememory:
1842 raise errors.OpExecError, ("Not enough memory to start instance"
1844 " needed %s MiB, available %s MiB" %
1845 (instance.name, node_current, memory,
1848 _StartInstanceDisks(self.cfg, instance, force)
1850 if not rpc.call_instance_start(node_current, instance, extra_args):
1851 _ShutdownInstanceDisks(instance, self.cfg)
1852 raise errors.OpExecError, ("Could not start instance")
1854 self.cfg.MarkInstanceUp(instance.name)
1857 class LUShutdownInstance(LogicalUnit):
1858 """Shutdown an instance.
1861 HPATH = "instance-stop"
1862 HTYPE = constants.HTYPE_INSTANCE
1863 _OP_REQP = ["instance_name"]
1865 def BuildHooksEnv(self):
1868 This runs on master, primary and secondary nodes of the instance.
1871 env = _BuildInstanceHookEnvByObject(self.instance)
1872 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
1873 list(self.instance.secondary_nodes))
1876 def CheckPrereq(self):
1877 """Check prerequisites.
1879 This checks that the instance is in the cluster.
1882 instance = self.cfg.GetInstanceInfo(
1883 self.cfg.ExpandInstanceName(self.op.instance_name))
1884 if instance is None:
1885 raise errors.OpPrereqError, ("Instance '%s' not known" %
1886 self.op.instance_name)
1887 self.instance = instance
1889 def Exec(self, feedback_fn):
1890 """Shutdown the instance.
1893 instance = self.instance
1894 node_current = instance.primary_node
1895 if not rpc.call_instance_shutdown(node_current, instance):
1896 logger.Error("could not shutdown instance")
1898 self.cfg.MarkInstanceDown(instance.name)
1899 _ShutdownInstanceDisks(instance, self.cfg)
1902 class LUReinstallInstance(LogicalUnit):
1903 """Reinstall an instance.
1906 HPATH = "instance-reinstall"
1907 HTYPE = constants.HTYPE_INSTANCE
1908 _OP_REQP = ["instance_name"]
1910 def BuildHooksEnv(self):
1913 This runs on master, primary and secondary nodes of the instance.
1916 env = _BuildInstanceHookEnvByObject(self.instance)
1917 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
1918 list(self.instance.secondary_nodes))
1921 def CheckPrereq(self):
1922 """Check prerequisites.
1924 This checks that the instance is in the cluster and is not running.
1927 instance = self.cfg.GetInstanceInfo(
1928 self.cfg.ExpandInstanceName(self.op.instance_name))
1929 if instance is None:
1930 raise errors.OpPrereqError, ("Instance '%s' not known" %
1931 self.op.instance_name)
1932 if instance.disk_template == constants.DT_DISKLESS:
1933 raise errors.OpPrereqError, ("Instance '%s' has no disks" %
1934 self.op.instance_name)
1935 if instance.status != "down":
1936 raise errors.OpPrereqError, ("Instance '%s' is marked to be up" %
1937 self.op.instance_name)
1938 remote_info = rpc.call_instance_info(instance.primary_node, instance.name)
1940 raise errors.OpPrereqError, ("Instance '%s' is running on the node %s" %
1941 (self.op.instance_name,
1942 instance.primary_node))
1944 self.op.os_type = getattr(self.op, "os_type", None)
1945 if self.op.os_type is not None:
1947 pnode = self.cfg.GetNodeInfo(
1948 self.cfg.ExpandNodeName(instance.primary_node))
1950 raise errors.OpPrereqError, ("Primary node '%s' is unknown" %
1952 os_obj = rpc.call_os_get([pnode.name], self.op.os_type)[pnode.name]
1953 if not isinstance(os_obj, objects.OS):
1954 raise errors.OpPrereqError, ("OS '%s' not in supported OS list for"
1955 " primary node" % self.op.os_type)
1957 self.instance = instance
1959 def Exec(self, feedback_fn):
1960 """Reinstall the instance.
1963 inst = self.instance
1965 if self.op.os_type is not None:
1966 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
1967 inst.os = self.op.os_type
1968 self.cfg.AddInstance(inst)
1970 _StartInstanceDisks(self.cfg, inst, None)
1972 feedback_fn("Running the instance OS create scripts...")
1973 if not rpc.call_instance_os_add(inst.primary_node, inst, "sda", "sdb"):
1974 raise errors.OpExecError, ("Could not install OS for instance %s "
1976 (inst.name, inst.primary_node))
1978 _ShutdownInstanceDisks(inst, self.cfg)
1981 class LURemoveInstance(LogicalUnit):
1982 """Remove an instance.
1985 HPATH = "instance-remove"
1986 HTYPE = constants.HTYPE_INSTANCE
1987 _OP_REQP = ["instance_name"]
1989 def BuildHooksEnv(self):
1992 This runs on master, primary and secondary nodes of the instance.
1995 env = _BuildInstanceHookEnvByObject(self.instance)
1996 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
1997 list(self.instance.secondary_nodes))
2000 def CheckPrereq(self):
2001 """Check prerequisites.
2003 This checks that the instance is in the cluster.
2006 instance = self.cfg.GetInstanceInfo(
2007 self.cfg.ExpandInstanceName(self.op.instance_name))
2008 if instance is None:
2009 raise errors.OpPrereqError, ("Instance '%s' not known" %
2010 self.op.instance_name)
2011 self.instance = instance
2013 def Exec(self, feedback_fn):
2014 """Remove the instance.
2017 instance = self.instance
2018 logger.Info("shutting down instance %s on node %s" %
2019 (instance.name, instance.primary_node))
2021 if not rpc.call_instance_shutdown(instance.primary_node, instance):
2022 raise errors.OpExecError, ("Could not shutdown instance %s on node %s" %
2023 (instance.name, instance.primary_node))
2025 logger.Info("removing block devices for instance %s" % instance.name)
2027 _RemoveDisks(instance, self.cfg)
2029 logger.Info("removing instance %s out of cluster config" % instance.name)
2031 self.cfg.RemoveInstance(instance.name)
2034 class LUQueryInstances(NoHooksLU):
2035 """Logical unit for querying instances.
2038 _OP_REQP = ["output_fields"]
2040 def CheckPrereq(self):
2041 """Check prerequisites.
2043 This checks that the fields required are valid output fields.
2046 self.dynamic_fields = frozenset(["oper_state", "oper_ram"])
2047 _CheckOutputFields(static=["name", "os", "pnode", "snodes",
2048 "admin_state", "admin_ram",
2049 "disk_template", "ip", "mac", "bridge"],
2050 dynamic=self.dynamic_fields,
2051 selected=self.op.output_fields)
2053 def Exec(self, feedback_fn):
2054 """Computes the list of nodes and their attributes.
2057 instance_names = utils.NiceSort(self.cfg.GetInstanceList())
2058 instance_list = [self.cfg.GetInstanceInfo(iname) for iname
2061 # begin data gathering
2063 nodes = frozenset([inst.primary_node for inst in instance_list])
2066 if self.dynamic_fields.intersection(self.op.output_fields):
2068 node_data = rpc.call_all_instances_info(nodes)
2070 result = node_data[name]
2072 live_data.update(result)
2073 elif result == False:
2074 bad_nodes.append(name)
2075 # else no instance is alive
2077 live_data = dict([(name, {}) for name in instance_names])
2079 # end data gathering
2082 for instance in instance_list:
2084 for field in self.op.output_fields:
2089 elif field == "pnode":
2090 val = instance.primary_node
2091 elif field == "snodes":
2092 val = ",".join(instance.secondary_nodes) or "-"
2093 elif field == "admin_state":
2094 if instance.status == "down":
2098 elif field == "oper_state":
2099 if instance.primary_node in bad_nodes:
2102 if live_data.get(instance.name):
2106 elif field == "admin_ram":
2107 val = instance.memory
2108 elif field == "oper_ram":
2109 if instance.primary_node in bad_nodes:
2111 elif instance.name in live_data:
2112 val = live_data[instance.name].get("memory", "?")
2115 elif field == "disk_template":
2116 val = instance.disk_template
2118 val = instance.nics[0].ip
2119 elif field == "bridge":
2120 val = instance.nics[0].bridge
2121 elif field == "mac":
2122 val = instance.nics[0].mac
2124 raise errors.ParameterError, field
2132 class LUFailoverInstance(LogicalUnit):
2133 """Failover an instance.
2136 HPATH = "instance-failover"
2137 HTYPE = constants.HTYPE_INSTANCE
2138 _OP_REQP = ["instance_name", "ignore_consistency"]
2140 def BuildHooksEnv(self):
2143 This runs on master, primary and secondary nodes of the instance.
2147 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
2149 env.update(_BuildInstanceHookEnvByObject(self.instance))
2150 nl = [self.sstore.GetMasterNode()] + list(self.instance.secondary_nodes)
2153 def CheckPrereq(self):
2154 """Check prerequisites.
2156 This checks that the instance is in the cluster.
2159 instance = self.cfg.GetInstanceInfo(
2160 self.cfg.ExpandInstanceName(self.op.instance_name))
2161 if instance is None:
2162 raise errors.OpPrereqError, ("Instance '%s' not known" %
2163 self.op.instance_name)
2165 # check memory requirements on the secondary node
2166 target_node = instance.secondary_nodes[0]
2167 nodeinfo = rpc.call_node_info([target_node], self.cfg.GetVGName())
2168 info = nodeinfo.get(target_node, None)
2170 raise errors.OpPrereqError, ("Cannot get current information"
2171 " from node '%s'" % nodeinfo)
2172 if instance.memory > info['memory_free']:
2173 raise errors.OpPrereqError, ("Not enough memory on target node %s."
2174 " %d MB available, %d MB required" %
2175 (target_node, info['memory_free'],
2178 # check bridge existance
2179 brlist = [nic.bridge for nic in instance.nics]
2180 if not rpc.call_bridges_exist(instance.primary_node, brlist):
2181 raise errors.OpPrereqError, ("one or more target bridges %s does not"
2182 " exist on destination node '%s'" %
2183 (brlist, instance.primary_node))
2185 self.instance = instance
2187 def Exec(self, feedback_fn):
2188 """Failover an instance.
2190 The failover is done by shutting it down on its present node and
2191 starting it on the secondary.
2194 instance = self.instance
2196 source_node = instance.primary_node
2197 target_node = instance.secondary_nodes[0]
2199 feedback_fn("* checking disk consistency between source and target")
2200 for dev in instance.disks:
2201 # for remote_raid1, these are md over drbd
2202 if not _CheckDiskConsistency(self.cfg, dev, target_node, False):
2203 if not self.op.ignore_consistency:
2204 raise errors.OpExecError, ("Disk %s is degraded on target node,"
2205 " aborting failover." % dev.iv_name)
2207 feedback_fn("* checking target node resource availability")
2208 nodeinfo = rpc.call_node_info([target_node], self.cfg.GetVGName())
2211 raise errors.OpExecError, ("Could not contact target node %s." %
2214 free_memory = int(nodeinfo[target_node]['memory_free'])
2215 memory = instance.memory
2216 if memory > free_memory:
2217 raise errors.OpExecError, ("Not enough memory to create instance %s on"
2218 " node %s. needed %s MiB, available %s MiB" %
2219 (instance.name, target_node, memory,
2222 feedback_fn("* shutting down instance on source node")
2223 logger.Info("Shutting down instance %s on node %s" %
2224 (instance.name, source_node))
2226 if not rpc.call_instance_shutdown(source_node, instance):
2227 logger.Error("Could not shutdown instance %s on node %s. Proceeding"
2228 " anyway. Please make sure node %s is down" %
2229 (instance.name, source_node, source_node))
2231 feedback_fn("* deactivating the instance's disks on source node")
2232 if not _ShutdownInstanceDisks(instance, self.cfg, ignore_primary=True):
2233 raise errors.OpExecError, ("Can't shut down the instance's disks.")
2235 instance.primary_node = target_node
2236 # distribute new instance config to the other nodes
2237 self.cfg.AddInstance(instance)
2239 feedback_fn("* activating the instance's disks on target node")
2240 logger.Info("Starting instance %s on node %s" %
2241 (instance.name, target_node))
2243 disks_ok, dummy = _AssembleInstanceDisks(instance, self.cfg,
2244 ignore_secondaries=True)
2246 _ShutdownInstanceDisks(instance, self.cfg)
2247 raise errors.OpExecError, ("Can't activate the instance's disks")
2249 feedback_fn("* starting the instance on the target node")
2250 if not rpc.call_instance_start(target_node, instance, None):
2251 _ShutdownInstanceDisks(instance, self.cfg)
2252 raise errors.OpExecError("Could not start instance %s on node %s." %
2253 (instance.name, target_node))
2256 def _CreateBlockDevOnPrimary(cfg, node, device):
2257 """Create a tree of block devices on the primary node.
2259 This always creates all devices.
2263 for child in device.children:
2264 if not _CreateBlockDevOnPrimary(cfg, node, child):
2267 cfg.SetDiskID(device, node)
2268 new_id = rpc.call_blockdev_create(node, device, device.size, True)
2271 if device.physical_id is None:
2272 device.physical_id = new_id
2276 def _CreateBlockDevOnSecondary(cfg, node, device, force):
2277 """Create a tree of block devices on a secondary node.
2279 If this device type has to be created on secondaries, create it and
2282 If not, just recurse to children keeping the same 'force' value.
2285 if device.CreateOnSecondary():
2288 for child in device.children:
2289 if not _CreateBlockDevOnSecondary(cfg, node, child, force):
2294 cfg.SetDiskID(device, node)
2295 new_id = rpc.call_blockdev_create(node, device, device.size, False)
2298 if device.physical_id is None:
2299 device.physical_id = new_id
2303 def _GenerateMDDRBDBranch(cfg, vgname, primary, secondary, size, base):
2304 """Generate a drbd device complete with its children.
2307 port = cfg.AllocatePort()
2308 base = "%s_%s" % (base, port)
2309 dev_data = objects.Disk(dev_type="lvm", size=size,
2310 logical_id=(vgname, "%s.data" % base))
2311 dev_meta = objects.Disk(dev_type="lvm", size=128,
2312 logical_id=(vgname, "%s.meta" % base))
2313 drbd_dev = objects.Disk(dev_type="drbd", size=size,
2314 logical_id = (primary, secondary, port),
2315 children = [dev_data, dev_meta])
2319 def _GenerateDiskTemplate(cfg, vgname, template_name,
2320 instance_name, primary_node,
2321 secondary_nodes, disk_sz, swap_sz):
2322 """Generate the entire disk layout for a given template type.
2325 #TODO: compute space requirements
2327 if template_name == "diskless":
2329 elif template_name == "plain":
2330 if len(secondary_nodes) != 0:
2331 raise errors.ProgrammerError("Wrong template configuration")
2332 sda_dev = objects.Disk(dev_type="lvm", size=disk_sz,
2333 logical_id=(vgname, "%s.os" % instance_name),
2335 sdb_dev = objects.Disk(dev_type="lvm", size=swap_sz,
2336 logical_id=(vgname, "%s.swap" % instance_name),
2338 disks = [sda_dev, sdb_dev]
2339 elif template_name == "local_raid1":
2340 if len(secondary_nodes) != 0:
2341 raise errors.ProgrammerError("Wrong template configuration")
2342 sda_dev_m1 = objects.Disk(dev_type="lvm", size=disk_sz,
2343 logical_id=(vgname, "%s.os_m1" % instance_name))
2344 sda_dev_m2 = objects.Disk(dev_type="lvm", size=disk_sz,
2345 logical_id=(vgname, "%s.os_m2" % instance_name))
2346 md_sda_dev = objects.Disk(dev_type="md_raid1", iv_name = "sda",
2348 children = [sda_dev_m1, sda_dev_m2])
2349 sdb_dev_m1 = objects.Disk(dev_type="lvm", size=swap_sz,
2350 logical_id=(vgname, "%s.swap_m1" %
2352 sdb_dev_m2 = objects.Disk(dev_type="lvm", size=swap_sz,
2353 logical_id=(vgname, "%s.swap_m2" %
2355 md_sdb_dev = objects.Disk(dev_type="md_raid1", iv_name = "sdb",
2357 children = [sdb_dev_m1, sdb_dev_m2])
2358 disks = [md_sda_dev, md_sdb_dev]
2359 elif template_name == "remote_raid1":
2360 if len(secondary_nodes) != 1:
2361 raise errors.ProgrammerError("Wrong template configuration")
2362 remote_node = secondary_nodes[0]
2363 drbd_sda_dev = _GenerateMDDRBDBranch(cfg, vgname,
2364 primary_node, remote_node, disk_sz,
2365 "%s-sda" % instance_name)
2366 md_sda_dev = objects.Disk(dev_type="md_raid1", iv_name="sda",
2367 children = [drbd_sda_dev], size=disk_sz)
2368 drbd_sdb_dev = _GenerateMDDRBDBranch(cfg, vgname,
2369 primary_node, remote_node, swap_sz,
2370 "%s-sdb" % instance_name)
2371 md_sdb_dev = objects.Disk(dev_type="md_raid1", iv_name="sdb",
2372 children = [drbd_sdb_dev], size=swap_sz)
2373 disks = [md_sda_dev, md_sdb_dev]
2375 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
2379 def _CreateDisks(cfg, instance):
2380 """Create all disks for an instance.
2382 This abstracts away some work from AddInstance.
2385 instance: the instance object
2388 True or False showing the success of the creation process
2391 for device in instance.disks:
2392 logger.Info("creating volume %s for instance %s" %
2393 (device.iv_name, instance.name))
2395 for secondary_node in instance.secondary_nodes:
2396 if not _CreateBlockDevOnSecondary(cfg, secondary_node, device, False):
2397 logger.Error("failed to create volume %s (%s) on secondary node %s!" %
2398 (device.iv_name, device, secondary_node))
2401 if not _CreateBlockDevOnPrimary(cfg, instance.primary_node, device):
2402 logger.Error("failed to create volume %s on primary!" %
2408 def _RemoveDisks(instance, cfg):
2409 """Remove all disks for an instance.
2411 This abstracts away some work from `AddInstance()` and
2412 `RemoveInstance()`. Note that in case some of the devices couldn't
2413 be remove, the removal will continue with the other ones (compare
2414 with `_CreateDisks()`).
2417 instance: the instance object
2420 True or False showing the success of the removal proces
2423 logger.Info("removing block devices for instance %s" % instance.name)
2426 for device in instance.disks:
2427 for node, disk in device.ComputeNodeTree(instance.primary_node):
2428 cfg.SetDiskID(disk, node)
2429 if not rpc.call_blockdev_remove(node, disk):
2430 logger.Error("could not remove block device %s on node %s,"
2431 " continuing anyway" %
2432 (device.iv_name, node))
2437 class LUCreateInstance(LogicalUnit):
2438 """Create an instance.
2441 HPATH = "instance-add"
2442 HTYPE = constants.HTYPE_INSTANCE
2443 _OP_REQP = ["instance_name", "mem_size", "disk_size", "pnode",
2444 "disk_template", "swap_size", "mode", "start", "vcpus",
2447 def BuildHooksEnv(self):
2450 This runs on master, primary and secondary nodes of the instance.
2454 "INSTANCE_DISK_TEMPLATE": self.op.disk_template,
2455 "INSTANCE_DISK_SIZE": self.op.disk_size,
2456 "INSTANCE_SWAP_SIZE": self.op.swap_size,
2457 "INSTANCE_ADD_MODE": self.op.mode,
2459 if self.op.mode == constants.INSTANCE_IMPORT:
2460 env["INSTANCE_SRC_NODE"] = self.op.src_node
2461 env["INSTANCE_SRC_PATH"] = self.op.src_path
2462 env["INSTANCE_SRC_IMAGE"] = self.src_image
2464 env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
2465 primary_node=self.op.pnode,
2466 secondary_nodes=self.secondaries,
2467 status=self.instance_status,
2468 os_type=self.op.os_type,
2469 memory=self.op.mem_size,
2470 vcpus=self.op.vcpus,
2471 nics=[(self.inst_ip, self.op.bridge)],
2474 nl = ([self.sstore.GetMasterNode(), self.op.pnode] +
2479 def CheckPrereq(self):
2480 """Check prerequisites.
2483 if self.op.mode not in (constants.INSTANCE_CREATE,
2484 constants.INSTANCE_IMPORT):
2485 raise errors.OpPrereqError, ("Invalid instance creation mode '%s'" %
2488 if self.op.mode == constants.INSTANCE_IMPORT:
2489 src_node = getattr(self.op, "src_node", None)
2490 src_path = getattr(self.op, "src_path", None)
2491 if src_node is None or src_path is None:
2492 raise errors.OpPrereqError, ("Importing an instance requires source"
2493 " node and path options")
2494 src_node_full = self.cfg.ExpandNodeName(src_node)
2495 if src_node_full is None:
2496 raise errors.OpPrereqError, ("Unknown source node '%s'" % src_node)
2497 self.op.src_node = src_node = src_node_full
2499 if not os.path.isabs(src_path):
2500 raise errors.OpPrereqError, ("The source path must be absolute")
2502 export_info = rpc.call_export_info(src_node, src_path)
2505 raise errors.OpPrereqError, ("No export found in dir %s" % src_path)
2507 if not export_info.has_section(constants.INISECT_EXP):
2508 raise errors.ProgrammerError, ("Corrupted export config")
2510 ei_version = export_info.get(constants.INISECT_EXP, 'version')
2511 if (int(ei_version) != constants.EXPORT_VERSION):
2512 raise errors.OpPrereqError, ("Wrong export version %s (wanted %d)" %
2513 (ei_version, constants.EXPORT_VERSION))
2515 if int(export_info.get(constants.INISECT_INS, 'disk_count')) > 1:
2516 raise errors.OpPrereqError, ("Can't import instance with more than"
2519 # FIXME: are the old os-es, disk sizes, etc. useful?
2520 self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
2521 diskimage = os.path.join(src_path, export_info.get(constants.INISECT_INS,
2523 self.src_image = diskimage
2524 else: # INSTANCE_CREATE
2525 if getattr(self.op, "os_type", None) is None:
2526 raise errors.OpPrereqError, ("No guest OS specified")
2528 # check primary node
2529 pnode = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.pnode))
2531 raise errors.OpPrereqError, ("Primary node '%s' is unknown" %
2533 self.op.pnode = pnode.name
2535 self.secondaries = []
2536 # disk template and mirror node verification
2537 if self.op.disk_template not in constants.DISK_TEMPLATES:
2538 raise errors.OpPrereqError, ("Invalid disk template name")
2540 if self.op.disk_template == constants.DT_REMOTE_RAID1:
2541 if getattr(self.op, "snode", None) is None:
2542 raise errors.OpPrereqError, ("The 'remote_raid1' disk template needs"
2545 snode_name = self.cfg.ExpandNodeName(self.op.snode)
2546 if snode_name is None:
2547 raise errors.OpPrereqError, ("Unknown secondary node '%s'" %
2549 elif snode_name == pnode.name:
2550 raise errors.OpPrereqError, ("The secondary node cannot be"
2551 " the primary node.")
2552 self.secondaries.append(snode_name)
2554 # Check lv size requirements
2555 nodenames = [pnode.name] + self.secondaries
2556 nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
2558 # Required free disk space as a function of disk and swap space
2560 constants.DT_DISKLESS: 0,
2561 constants.DT_PLAIN: self.op.disk_size + self.op.swap_size,
2562 constants.DT_LOCAL_RAID1: (self.op.disk_size + self.op.swap_size) * 2,
2563 # 256 MB are added for drbd metadata, 128MB for each drbd device
2564 constants.DT_REMOTE_RAID1: self.op.disk_size + self.op.swap_size + 256,
2567 if self.op.disk_template not in req_size_dict:
2568 raise errors.ProgrammerError, ("Disk template '%s' size requirement"
2569 " is unknown" % self.op.disk_template)
2571 req_size = req_size_dict[self.op.disk_template]
2573 for node in nodenames:
2574 info = nodeinfo.get(node, None)
2576 raise errors.OpPrereqError, ("Cannot get current information"
2577 " from node '%s'" % nodeinfo)
2578 if req_size > info['vg_free']:
2579 raise errors.OpPrereqError, ("Not enough disk space on target node %s."
2580 " %d MB available, %d MB required" %
2581 (node, info['vg_free'], req_size))
2584 os_obj = rpc.call_os_get([pnode.name], self.op.os_type)[pnode.name]
2585 if not isinstance(os_obj, objects.OS):
2586 raise errors.OpPrereqError, ("OS '%s' not in supported os list for"
2587 " primary node" % self.op.os_type)
2589 # instance verification
2590 hostname1 = utils.LookupHostname(self.op.instance_name)
2592 raise errors.OpPrereqError, ("Instance name '%s' not found in dns" %
2593 self.op.instance_name)
2595 self.op.instance_name = instance_name = hostname1['hostname']
2596 instance_list = self.cfg.GetInstanceList()
2597 if instance_name in instance_list:
2598 raise errors.OpPrereqError, ("Instance '%s' is already in the cluster" %
2601 ip = getattr(self.op, "ip", None)
2602 if ip is None or ip.lower() == "none":
2604 elif ip.lower() == "auto":
2605 inst_ip = hostname1['ip']
2607 if not utils.IsValidIP(ip):
2608 raise errors.OpPrereqError, ("given IP address '%s' doesn't look"
2609 " like a valid IP" % ip)
2611 self.inst_ip = inst_ip
2613 command = ["fping", "-q", hostname1['ip']]
2614 result = utils.RunCmd(command)
2615 if not result.failed:
2616 raise errors.OpPrereqError, ("IP %s of instance %s already in use" %
2617 (hostname1['ip'], instance_name))
2619 # bridge verification
2620 bridge = getattr(self.op, "bridge", None)
2622 self.op.bridge = self.cfg.GetDefBridge()
2624 self.op.bridge = bridge
2626 if not rpc.call_bridges_exist(self.pnode.name, [self.op.bridge]):
2627 raise errors.OpPrereqError, ("target bridge '%s' does not exist on"
2628 " destination node '%s'" %
2629 (self.op.bridge, pnode.name))
2632 self.instance_status = 'up'
2634 self.instance_status = 'down'
2636 def Exec(self, feedback_fn):
2637 """Create and add the instance to the cluster.
2640 instance = self.op.instance_name
2641 pnode_name = self.pnode.name
2643 nic = objects.NIC(bridge=self.op.bridge, mac=self.cfg.GenerateMAC())
2644 if self.inst_ip is not None:
2645 nic.ip = self.inst_ip
2647 disks = _GenerateDiskTemplate(self.cfg, self.cfg.GetVGName(),
2648 self.op.disk_template,
2649 instance, pnode_name,
2650 self.secondaries, self.op.disk_size,
2653 iobj = objects.Instance(name=instance, os=self.op.os_type,
2654 primary_node=pnode_name,
2655 memory=self.op.mem_size,
2656 vcpus=self.op.vcpus,
2657 nics=[nic], disks=disks,
2658 disk_template=self.op.disk_template,
2659 status=self.instance_status,
2662 feedback_fn("* creating instance disks...")
2663 if not _CreateDisks(self.cfg, iobj):
2664 _RemoveDisks(iobj, self.cfg)
2665 raise errors.OpExecError, ("Device creation failed, reverting...")
2667 feedback_fn("adding instance %s to cluster config" % instance)
2669 self.cfg.AddInstance(iobj)
2671 if self.op.wait_for_sync:
2672 disk_abort = not _WaitForSync(self.cfg, iobj)
2673 elif iobj.disk_template == "remote_raid1":
2674 # make sure the disks are not degraded (still sync-ing is ok)
2676 feedback_fn("* checking mirrors status")
2677 disk_abort = not _WaitForSync(self.cfg, iobj, oneshot=True)
2682 _RemoveDisks(iobj, self.cfg)
2683 self.cfg.RemoveInstance(iobj.name)
2684 raise errors.OpExecError, ("There are some degraded disks for"
2687 feedback_fn("creating os for instance %s on node %s" %
2688 (instance, pnode_name))
2690 if iobj.disk_template != constants.DT_DISKLESS:
2691 if self.op.mode == constants.INSTANCE_CREATE:
2692 feedback_fn("* running the instance OS create scripts...")
2693 if not rpc.call_instance_os_add(pnode_name, iobj, "sda", "sdb"):
2694 raise errors.OpExecError, ("could not add os for instance %s"
2696 (instance, pnode_name))
2698 elif self.op.mode == constants.INSTANCE_IMPORT:
2699 feedback_fn("* running the instance OS import scripts...")
2700 src_node = self.op.src_node
2701 src_image = self.src_image
2702 if not rpc.call_instance_os_import(pnode_name, iobj, "sda", "sdb",
2703 src_node, src_image):
2704 raise errors.OpExecError, ("Could not import os for instance"
2706 (instance, pnode_name))
2708 # also checked in the prereq part
2709 raise errors.ProgrammerError, ("Unknown OS initialization mode '%s'"
2713 logger.Info("starting instance %s on node %s" % (instance, pnode_name))
2714 feedback_fn("* starting instance...")
2715 if not rpc.call_instance_start(pnode_name, iobj, None):
2716 raise errors.OpExecError, ("Could not start instance")
2719 class LUConnectConsole(NoHooksLU):
2720 """Connect to an instance's console.
2722 This is somewhat special in that it returns the command line that
2723 you need to run on the master node in order to connect to the
2727 _OP_REQP = ["instance_name"]
2729 def CheckPrereq(self):
2730 """Check prerequisites.
2732 This checks that the instance is in the cluster.
2735 instance = self.cfg.GetInstanceInfo(
2736 self.cfg.ExpandInstanceName(self.op.instance_name))
2737 if instance is None:
2738 raise errors.OpPrereqError, ("Instance '%s' not known" %
2739 self.op.instance_name)
2740 self.instance = instance
2742 def Exec(self, feedback_fn):
2743 """Connect to the console of an instance
2746 instance = self.instance
2747 node = instance.primary_node
2749 node_insts = rpc.call_instance_list([node])[node]
2750 if node_insts is False:
2751 raise errors.OpExecError, ("Can't connect to node %s." % node)
2753 if instance.name not in node_insts:
2754 raise errors.OpExecError, ("Instance %s is not running." % instance.name)
2756 logger.Debug("connecting to console of %s on %s" % (instance.name, node))
2758 hyper = hypervisor.GetHypervisor()
2759 console_cmd = hyper.GetShellCommandForConsole(instance.name)
2760 return node, console_cmd
2763 class LUAddMDDRBDComponent(LogicalUnit):
2764 """Adda new mirror member to an instance's disk.
2767 HPATH = "mirror-add"
2768 HTYPE = constants.HTYPE_INSTANCE
2769 _OP_REQP = ["instance_name", "remote_node", "disk_name"]
2771 def BuildHooksEnv(self):
2774 This runs on the master, the primary and all the secondaries.
2778 "NEW_SECONDARY": self.op.remote_node,
2779 "DISK_NAME": self.op.disk_name,
2781 env.update(_BuildInstanceHookEnvByObject(self.instance))
2782 nl = [self.sstore.GetMasterNode(), self.instance.primary_node,
2783 self.op.remote_node,] + list(self.instance.secondary_nodes)
2786 def CheckPrereq(self):
2787 """Check prerequisites.
2789 This checks that the instance is in the cluster.
2792 instance = self.cfg.GetInstanceInfo(
2793 self.cfg.ExpandInstanceName(self.op.instance_name))
2794 if instance is None:
2795 raise errors.OpPrereqError, ("Instance '%s' not known" %
2796 self.op.instance_name)
2797 self.instance = instance
2799 remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
2800 if remote_node is None:
2801 raise errors.OpPrereqError, ("Node '%s' not known" % self.op.remote_node)
2802 self.remote_node = remote_node
2804 if remote_node == instance.primary_node:
2805 raise errors.OpPrereqError, ("The specified node is the primary node of"
2808 if instance.disk_template != constants.DT_REMOTE_RAID1:
2809 raise errors.OpPrereqError, ("Instance's disk layout is not"
2811 for disk in instance.disks:
2812 if disk.iv_name == self.op.disk_name:
2815 raise errors.OpPrereqError, ("Can't find this device ('%s') in the"
2816 " instance." % self.op.disk_name)
2817 if len(disk.children) > 1:
2818 raise errors.OpPrereqError, ("The device already has two slave"
2820 "This would create a 3-disk raid1"
2821 " which we don't allow.")
2824 def Exec(self, feedback_fn):
2825 """Add the mirror component
2829 instance = self.instance
2831 remote_node = self.remote_node
2832 new_drbd = _GenerateMDDRBDBranch(self.cfg, self.cfg.GetVGName(),
2833 instance.primary_node, remote_node,
2834 disk.size, "%s-%s" %
2835 (instance.name, self.op.disk_name))
2837 logger.Info("adding new mirror component on secondary")
2839 if not _CreateBlockDevOnSecondary(self.cfg, remote_node, new_drbd, False):
2840 raise errors.OpExecError, ("Failed to create new component on secondary"
2841 " node %s" % remote_node)
2843 logger.Info("adding new mirror component on primary")
2845 if not _CreateBlockDevOnPrimary(self.cfg, instance.primary_node, new_drbd):
2846 # remove secondary dev
2847 self.cfg.SetDiskID(new_drbd, remote_node)
2848 rpc.call_blockdev_remove(remote_node, new_drbd)
2849 raise errors.OpExecError, ("Failed to create volume on primary")
2851 # the device exists now
2852 # call the primary node to add the mirror to md
2853 logger.Info("adding new mirror component to md")
2854 if not rpc.call_blockdev_addchild(instance.primary_node,
2856 logger.Error("Can't add mirror compoment to md!")
2857 self.cfg.SetDiskID(new_drbd, remote_node)
2858 if not rpc.call_blockdev_remove(remote_node, new_drbd):
2859 logger.Error("Can't rollback on secondary")
2860 self.cfg.SetDiskID(new_drbd, instance.primary_node)
2861 if not rpc.call_blockdev_remove(instance.primary_node, new_drbd):
2862 logger.Error("Can't rollback on primary")
2863 raise errors.OpExecError, "Can't add mirror component to md array"
2865 disk.children.append(new_drbd)
2867 self.cfg.AddInstance(instance)
2869 _WaitForSync(self.cfg, instance)
2874 class LURemoveMDDRBDComponent(LogicalUnit):
2875 """Remove a component from a remote_raid1 disk.
2878 HPATH = "mirror-remove"
2879 HTYPE = constants.HTYPE_INSTANCE
2880 _OP_REQP = ["instance_name", "disk_name", "disk_id"]
2882 def BuildHooksEnv(self):
2885 This runs on the master, the primary and all the secondaries.
2889 "DISK_NAME": self.op.disk_name,
2890 "DISK_ID": self.op.disk_id,
2891 "OLD_SECONDARY": self.old_secondary,
2893 env.update(_BuildInstanceHookEnvByObject(self.instance))
2894 nl = [self.sstore.GetMasterNode(),
2895 self.instance.primary_node] + list(self.instance.secondary_nodes)
2898 def CheckPrereq(self):
2899 """Check prerequisites.
2901 This checks that the instance is in the cluster.
2904 instance = self.cfg.GetInstanceInfo(
2905 self.cfg.ExpandInstanceName(self.op.instance_name))
2906 if instance is None:
2907 raise errors.OpPrereqError, ("Instance '%s' not known" %
2908 self.op.instance_name)
2909 self.instance = instance
2911 if instance.disk_template != constants.DT_REMOTE_RAID1:
2912 raise errors.OpPrereqError, ("Instance's disk layout is not"
2914 for disk in instance.disks:
2915 if disk.iv_name == self.op.disk_name:
2918 raise errors.OpPrereqError, ("Can't find this device ('%s') in the"
2919 " instance." % self.op.disk_name)
2920 for child in disk.children:
2921 if child.dev_type == "drbd" and child.logical_id[2] == self.op.disk_id:
2924 raise errors.OpPrereqError, ("Can't find the device with this port.")
2926 if len(disk.children) < 2:
2927 raise errors.OpPrereqError, ("Cannot remove the last component from"
2931 if self.child.logical_id[0] == instance.primary_node:
2935 self.old_secondary = self.child.logical_id[oid]
2937 def Exec(self, feedback_fn):
2938 """Remove the mirror component
2941 instance = self.instance
2944 logger.Info("remove mirror component")
2945 self.cfg.SetDiskID(disk, instance.primary_node)
2946 if not rpc.call_blockdev_removechild(instance.primary_node,
2948 raise errors.OpExecError, ("Can't remove child from mirror.")
2950 for node in child.logical_id[:2]:
2951 self.cfg.SetDiskID(child, node)
2952 if not rpc.call_blockdev_remove(node, child):
2953 logger.Error("Warning: failed to remove device from node %s,"
2954 " continuing operation." % node)
2956 disk.children.remove(child)
2957 self.cfg.AddInstance(instance)
2960 class LUReplaceDisks(LogicalUnit):
2961 """Replace the disks of an instance.
2964 HPATH = "mirrors-replace"
2965 HTYPE = constants.HTYPE_INSTANCE
2966 _OP_REQP = ["instance_name"]
2968 def BuildHooksEnv(self):
2971 This runs on the master, the primary and all the secondaries.
2975 "NEW_SECONDARY": self.op.remote_node,
2976 "OLD_SECONDARY": self.instance.secondary_nodes[0],
2978 env.update(_BuildInstanceHookEnvByObject(self.instance))
2979 nl = [self.sstore.GetMasterNode(),
2980 self.instance.primary_node] + list(self.instance.secondary_nodes)
2983 def CheckPrereq(self):
2984 """Check prerequisites.
2986 This checks that the instance is in the cluster.
2989 instance = self.cfg.GetInstanceInfo(
2990 self.cfg.ExpandInstanceName(self.op.instance_name))
2991 if instance is None:
2992 raise errors.OpPrereqError, ("Instance '%s' not known" %
2993 self.op.instance_name)
2994 self.instance = instance
2996 if instance.disk_template != constants.DT_REMOTE_RAID1:
2997 raise errors.OpPrereqError, ("Instance's disk layout is not"
3000 if len(instance.secondary_nodes) != 1:
3001 raise errors.OpPrereqError, ("The instance has a strange layout,"
3002 " expected one secondary but found %d" %
3003 len(instance.secondary_nodes))
3005 remote_node = getattr(self.op, "remote_node", None)
3006 if remote_node is None:
3007 remote_node = instance.secondary_nodes[0]
3009 remote_node = self.cfg.ExpandNodeName(remote_node)
3010 if remote_node is None:
3011 raise errors.OpPrereqError, ("Node '%s' not known" %
3012 self.op.remote_node)
3013 if remote_node == instance.primary_node:
3014 raise errors.OpPrereqError, ("The specified node is the primary node of"
3016 self.op.remote_node = remote_node
3018 def Exec(self, feedback_fn):
3019 """Replace the disks of an instance.
3022 instance = self.instance
3025 remote_node = self.op.remote_node
3027 vgname = cfg.GetVGName()
3028 for dev in instance.disks:
3030 new_drbd = _GenerateMDDRBDBranch(cfg, vgname, instance.primary_node,
3032 "%s-%s" % (instance.name, dev.iv_name))
3033 iv_names[dev.iv_name] = (dev, dev.children[0], new_drbd)
3034 logger.Info("adding new mirror component on secondary for %s" %
3037 if not _CreateBlockDevOnSecondary(cfg, remote_node, new_drbd, False):
3038 raise errors.OpExecError, ("Failed to create new component on"
3039 " secondary node %s\n"
3040 "Full abort, cleanup manually!" %
3043 logger.Info("adding new mirror component on primary")
3045 if not _CreateBlockDevOnPrimary(cfg, instance.primary_node, new_drbd):
3046 # remove secondary dev
3047 cfg.SetDiskID(new_drbd, remote_node)
3048 rpc.call_blockdev_remove(remote_node, new_drbd)
3049 raise errors.OpExecError("Failed to create volume on primary!\n"
3050 "Full abort, cleanup manually!!")
3052 # the device exists now
3053 # call the primary node to add the mirror to md
3054 logger.Info("adding new mirror component to md")
3055 if not rpc.call_blockdev_addchild(instance.primary_node, dev,
3057 logger.Error("Can't add mirror compoment to md!")
3058 cfg.SetDiskID(new_drbd, remote_node)
3059 if not rpc.call_blockdev_remove(remote_node, new_drbd):
3060 logger.Error("Can't rollback on secondary")
3061 cfg.SetDiskID(new_drbd, instance.primary_node)
3062 if not rpc.call_blockdev_remove(instance.primary_node, new_drbd):
3063 logger.Error("Can't rollback on primary")
3064 raise errors.OpExecError, ("Full abort, cleanup manually!!")
3066 dev.children.append(new_drbd)
3067 cfg.AddInstance(instance)
3069 # this can fail as the old devices are degraded and _WaitForSync
3070 # does a combined result over all disks, so we don't check its
3072 _WaitForSync(cfg, instance, unlock=True)
3074 # so check manually all the devices
3075 for name in iv_names:
3076 dev, child, new_drbd = iv_names[name]
3077 cfg.SetDiskID(dev, instance.primary_node)
3078 is_degr = rpc.call_blockdev_find(instance.primary_node, dev)[5]
3080 raise errors.OpExecError, ("MD device %s is degraded!" % name)
3081 cfg.SetDiskID(new_drbd, instance.primary_node)
3082 is_degr = rpc.call_blockdev_find(instance.primary_node, new_drbd)[5]
3084 raise errors.OpExecError, ("New drbd device %s is degraded!" % name)
3086 for name in iv_names:
3087 dev, child, new_drbd = iv_names[name]
3088 logger.Info("remove mirror %s component" % name)
3089 cfg.SetDiskID(dev, instance.primary_node)
3090 if not rpc.call_blockdev_removechild(instance.primary_node,
3092 logger.Error("Can't remove child from mirror, aborting"
3093 " *this device cleanup*.\nYou need to cleanup manually!!")
3096 for node in child.logical_id[:2]:
3097 logger.Info("remove child device on %s" % node)
3098 cfg.SetDiskID(child, node)
3099 if not rpc.call_blockdev_remove(node, child):
3100 logger.Error("Warning: failed to remove device from node %s,"
3101 " continuing operation." % node)
3103 dev.children.remove(child)
3105 cfg.AddInstance(instance)
3108 class LUQueryInstanceData(NoHooksLU):
3109 """Query runtime instance data.
3112 _OP_REQP = ["instances"]
3114 def CheckPrereq(self):
3115 """Check prerequisites.
3117 This only checks the optional instance list against the existing names.
3120 if not isinstance(self.op.instances, list):
3121 raise errors.OpPrereqError, "Invalid argument type 'instances'"
3122 if self.op.instances:
3123 self.wanted_instances = []
3124 names = self.op.instances
3126 instance = self.cfg.GetInstanceInfo(self.cfg.ExpandInstanceName(name))
3127 if instance is None:
3128 raise errors.OpPrereqError, ("No such instance name '%s'" % name)
3129 self.wanted_instances.append(instance)
3131 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
3132 in self.cfg.GetInstanceList()]
3136 def _ComputeDiskStatus(self, instance, snode, dev):
3137 """Compute block device status.
3140 self.cfg.SetDiskID(dev, instance.primary_node)
3141 dev_pstatus = rpc.call_blockdev_find(instance.primary_node, dev)
3142 if dev.dev_type == "drbd":
3143 # we change the snode then (otherwise we use the one passed in)
3144 if dev.logical_id[0] == instance.primary_node:
3145 snode = dev.logical_id[1]
3147 snode = dev.logical_id[0]
3150 self.cfg.SetDiskID(dev, snode)
3151 dev_sstatus = rpc.call_blockdev_find(snode, dev)
3156 dev_children = [self._ComputeDiskStatus(instance, snode, child)
3157 for child in dev.children]
3162 "iv_name": dev.iv_name,
3163 "dev_type": dev.dev_type,
3164 "logical_id": dev.logical_id,
3165 "physical_id": dev.physical_id,
3166 "pstatus": dev_pstatus,
3167 "sstatus": dev_sstatus,
3168 "children": dev_children,
3173 def Exec(self, feedback_fn):
3174 """Gather and return data"""
3176 for instance in self.wanted_instances:
3177 remote_info = rpc.call_instance_info(instance.primary_node,
3179 if remote_info and "state" in remote_info:
3182 remote_state = "down"
3183 if instance.status == "down":
3184 config_state = "down"
3188 disks = [self._ComputeDiskStatus(instance, None, device)
3189 for device in instance.disks]
3192 "name": instance.name,
3193 "config_state": config_state,
3194 "run_state": remote_state,
3195 "pnode": instance.primary_node,
3196 "snodes": instance.secondary_nodes,
3198 "memory": instance.memory,
3199 "nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
3203 result[instance.name] = idict
3208 class LUQueryNodeData(NoHooksLU):
3209 """Logical unit for querying node data.
3212 _OP_REQP = ["nodes"]
3214 def CheckPrereq(self):
3215 """Check prerequisites.
3217 This only checks the optional node list against the existing names.
3220 self.wanted_nodes = _GetWantedNodes(self, self.op.nodes)
3222 def Exec(self, feedback_fn):
3223 """Compute and return the list of nodes.
3226 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3227 in self.cfg.GetInstanceList()]
3229 for node in self.wanted_nodes:
3230 result.append((node.name, node.primary_ip, node.secondary_ip,
3231 [inst.name for inst in ilist
3232 if inst.primary_node == node.name],
3233 [inst.name for inst in ilist
3234 if node.name in inst.secondary_nodes],
3239 class LUSetInstanceParms(LogicalUnit):
3240 """Modifies an instances's parameters.
3243 HPATH = "instance-modify"
3244 HTYPE = constants.HTYPE_INSTANCE
3245 _OP_REQP = ["instance_name"]
3247 def BuildHooksEnv(self):
3250 This runs on the master, primary and secondaries.
3255 args['memory'] = self.mem
3257 args['vcpus'] = self.vcpus
3258 if self.do_ip or self.do_bridge:
3262 ip = self.instance.nics[0].ip
3264 bridge = self.bridge
3266 bridge = self.instance.nics[0].bridge
3267 args['nics'] = [(ip, bridge)]
3268 env = _BuildInstanceHookEnvByObject(self.instance, override=args)
3269 nl = [self.sstore.GetMasterNode(),
3270 self.instance.primary_node] + list(self.instance.secondary_nodes)
3273 def CheckPrereq(self):
3274 """Check prerequisites.
3276 This only checks the instance list against the existing names.
3279 self.mem = getattr(self.op, "mem", None)
3280 self.vcpus = getattr(self.op, "vcpus", None)
3281 self.ip = getattr(self.op, "ip", None)
3282 self.bridge = getattr(self.op, "bridge", None)
3283 if [self.mem, self.vcpus, self.ip, self.bridge].count(None) == 4:
3284 raise errors.OpPrereqError, ("No changes submitted")
3285 if self.mem is not None:
3287 self.mem = int(self.mem)
3288 except ValueError, err:
3289 raise errors.OpPrereqError, ("Invalid memory size: %s" % str(err))
3290 if self.vcpus is not None:
3292 self.vcpus = int(self.vcpus)
3293 except ValueError, err:
3294 raise errors.OpPrereqError, ("Invalid vcpus number: %s" % str(err))
3295 if self.ip is not None:
3297 if self.ip.lower() == "none":
3300 if not utils.IsValidIP(self.ip):
3301 raise errors.OpPrereqError, ("Invalid IP address '%s'." % self.ip)
3304 self.do_bridge = (self.bridge is not None)
3306 instance = self.cfg.GetInstanceInfo(
3307 self.cfg.ExpandInstanceName(self.op.instance_name))
3308 if instance is None:
3309 raise errors.OpPrereqError, ("No such instance name '%s'" %
3310 self.op.instance_name)
3311 self.op.instance_name = instance.name
3312 self.instance = instance
3315 def Exec(self, feedback_fn):
3316 """Modifies an instance.
3318 All parameters take effect only at the next restart of the instance.
3321 instance = self.instance
3323 instance.memory = self.mem
3324 result.append(("mem", self.mem))
3326 instance.vcpus = self.vcpus
3327 result.append(("vcpus", self.vcpus))
3329 instance.nics[0].ip = self.ip
3330 result.append(("ip", self.ip))
3332 instance.nics[0].bridge = self.bridge
3333 result.append(("bridge", self.bridge))
3335 self.cfg.AddInstance(instance)
3340 class LUQueryExports(NoHooksLU):
3341 """Query the exports list
3346 def CheckPrereq(self):
3347 """Check that the nodelist contains only existing nodes.
3350 self.nodes = _GetWantedNodes(self, getattr(self.op, "nodes", None))
3352 def Exec(self, feedback_fn):
3353 """Compute the list of all the exported system images.
3356 a dictionary with the structure node->(export-list)
3357 where export-list is a list of the instances exported on
3361 return rpc.call_export_list([node.name for node in self.nodes])
3364 class LUExportInstance(LogicalUnit):
3365 """Export an instance to an image in the cluster.
3368 HPATH = "instance-export"
3369 HTYPE = constants.HTYPE_INSTANCE
3370 _OP_REQP = ["instance_name", "target_node", "shutdown"]
3372 def BuildHooksEnv(self):
3375 This will run on the master, primary node and target node.
3379 "EXPORT_NODE": self.op.target_node,
3380 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
3382 env.update(_BuildInstanceHookEnvByObject(self.instance))
3383 nl = [self.sstore.GetMasterNode(), self.instance.primary_node,
3384 self.op.target_node]
3387 def CheckPrereq(self):
3388 """Check prerequisites.
3390 This checks that the instance name is a valid one.
3393 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
3394 self.instance = self.cfg.GetInstanceInfo(instance_name)
3395 if self.instance is None:
3396 raise errors.OpPrereqError, ("Instance '%s' not found" %
3397 self.op.instance_name)
3400 dst_node_short = self.cfg.ExpandNodeName(self.op.target_node)
3401 self.dst_node = self.cfg.GetNodeInfo(dst_node_short)
3403 if self.dst_node is None:
3404 raise errors.OpPrereqError, ("Destination node '%s' is unknown." %
3405 self.op.target_node)
3406 self.op.target_node = self.dst_node.name
3408 def Exec(self, feedback_fn):
3409 """Export an instance to an image in the cluster.
3412 instance = self.instance
3413 dst_node = self.dst_node
3414 src_node = instance.primary_node
3415 # shutdown the instance, unless requested not to do so
3416 if self.op.shutdown:
3417 op = opcodes.OpShutdownInstance(instance_name=instance.name)
3418 self.processor.ChainOpCode(op, feedback_fn)
3420 vgname = self.cfg.GetVGName()
3425 for disk in instance.disks:
3426 if disk.iv_name == "sda":
3427 # new_dev_name will be a snapshot of an lvm leaf of the one we passed
3428 new_dev_name = rpc.call_blockdev_snapshot(src_node, disk)
3430 if not new_dev_name:
3431 logger.Error("could not snapshot block device %s on node %s" %
3432 (disk.logical_id[1], src_node))
3434 new_dev = objects.Disk(dev_type="lvm", size=disk.size,
3435 logical_id=(vgname, new_dev_name),
3436 physical_id=(vgname, new_dev_name),
3437 iv_name=disk.iv_name)
3438 snap_disks.append(new_dev)
3441 if self.op.shutdown:
3442 op = opcodes.OpStartupInstance(instance_name=instance.name,
3444 self.processor.ChainOpCode(op, feedback_fn)
3446 # TODO: check for size
3448 for dev in snap_disks:
3449 if not rpc.call_snapshot_export(src_node, dev, dst_node.name,
3451 logger.Error("could not export block device %s from node"
3453 (dev.logical_id[1], src_node, dst_node.name))
3454 if not rpc.call_blockdev_remove(src_node, dev):
3455 logger.Error("could not remove snapshot block device %s from"
3456 " node %s" % (dev.logical_id[1], src_node))
3458 if not rpc.call_finalize_export(dst_node.name, instance, snap_disks):
3459 logger.Error("could not finalize export for instance %s on node %s" %
3460 (instance.name, dst_node.name))
3462 nodelist = self.cfg.GetNodeList()
3463 nodelist.remove(dst_node.name)
3465 # on one-node clusters nodelist will be empty after the removal
3466 # if we proceed the backup would be removed because OpQueryExports
3467 # substitutes an empty list with the full cluster node list.
3469 op = opcodes.OpQueryExports(nodes=nodelist)
3470 exportlist = self.processor.ChainOpCode(op, feedback_fn)
3471 for node in exportlist:
3472 if instance.name in exportlist[node]:
3473 if not rpc.call_export_remove(node, instance.name):
3474 logger.Error("could not remove older export for instance %s"
3475 " on node %s" % (instance.name, node))