4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0613,W0201
34 from ganeti import rpc
35 from ganeti import ssh
36 from ganeti import logger
37 from ganeti import utils
38 from ganeti import errors
39 from ganeti import hypervisor
40 from ganeti import config
41 from ganeti import constants
42 from ganeti import objects
43 from ganeti import opcodes
44 from ganeti import ssconf
46 class LogicalUnit(object):
47 """Logical Unit base class.
49 Subclasses must follow these rules:
50 - implement CheckPrereq which also fills in the opcode instance
51 with all the fields (even if as None)
53 - implement BuildHooksEnv
54 - redefine HPATH and HTYPE
55 - optionally redefine their run requirements (REQ_CLUSTER,
56 REQ_MASTER); note that all commands require root permissions
65 def __init__(self, processor, op, cfg, sstore):
66 """Constructor for LogicalUnit.
68 This needs to be overriden in derived classes in order to check op
72 self.processor = processor
76 for attr_name in self._OP_REQP:
77 attr_val = getattr(op, attr_name, None)
79 raise errors.OpPrereqError("Required parameter '%s' missing" %
82 if not cfg.IsCluster():
83 raise errors.OpPrereqError("Cluster not initialized yet,"
84 " use 'gnt-cluster init' first.")
86 master = sstore.GetMasterNode()
87 if master != utils.HostInfo().name:
88 raise errors.OpPrereqError("Commands must be run on the master"
91 def CheckPrereq(self):
92 """Check prerequisites for this LU.
94 This method should check that the prerequisites for the execution
95 of this LU are fulfilled. It can do internode communication, but
96 it should be idempotent - no cluster or system changes are
99 The method should raise errors.OpPrereqError in case something is
100 not fulfilled. Its return value is ignored.
102 This method should also update all the parameters of the opcode to
103 their canonical form; e.g. a short node name must be fully
104 expanded after this method has successfully completed (so that
105 hooks, logging, etc. work correctly).
108 raise NotImplementedError
110 def Exec(self, feedback_fn):
113 This method should implement the actual work. It should raise
114 errors.OpExecError for failures that are somewhat dealt with in
118 raise NotImplementedError
120 def BuildHooksEnv(self):
121 """Build hooks environment for this LU.
123 This method should return a three-node tuple consisting of: a dict
124 containing the environment that will be used for running the
125 specific hook for this LU, a list of node names on which the hook
126 should run before the execution, and a list of node names on which
127 the hook should run after the execution.
129 The keys of the dict must not have 'GANETI_' prefixed as this will
130 be handled in the hooks runner. Also note additional keys will be
131 added by the hooks runner. If the LU doesn't define any
132 environment, an empty dict (and not None) should be returned.
134 As for the node lists, the master should not be included in the
135 them, as it will be added by the hooks runner in case this LU
136 requires a cluster to run on (otherwise we don't have a node
137 list). No nodes should be returned as an empty list (and not
140 Note that if the HPATH for a LU class is None, this function will
144 raise NotImplementedError
147 class NoHooksLU(LogicalUnit):
148 """Simple LU which runs no hooks.
150 This LU is intended as a parent for other LogicalUnits which will
151 run no hooks, in order to reduce duplicate code.
157 def BuildHooksEnv(self):
160 This is a no-op, since we don't run hooks.
166 def _GetWantedNodes(lu, nodes):
167 """Returns list of checked and expanded node names.
170 nodes: List of nodes (strings) or None for all
173 if not isinstance(nodes, list):
174 raise errors.OpPrereqError("Invalid argument type 'nodes'")
180 node = lu.cfg.ExpandNodeName(name)
182 raise errors.OpPrereqError("No such node name '%s'" % name)
186 wanted = lu.cfg.GetNodeList()
187 return utils.NiceSort(wanted)
190 def _GetWantedInstances(lu, instances):
191 """Returns list of checked and expanded instance names.
194 instances: List of instances (strings) or None for all
197 if not isinstance(instances, list):
198 raise errors.OpPrereqError("Invalid argument type 'instances'")
203 for name in instances:
204 instance = lu.cfg.ExpandInstanceName(name)
206 raise errors.OpPrereqError("No such instance name '%s'" % name)
207 wanted.append(instance)
210 wanted = lu.cfg.GetInstanceList()
211 return utils.NiceSort(wanted)
214 def _CheckOutputFields(static, dynamic, selected):
215 """Checks whether all selected fields are valid.
218 static: Static fields
219 dynamic: Dynamic fields
222 static_fields = frozenset(static)
223 dynamic_fields = frozenset(dynamic)
225 all_fields = static_fields | dynamic_fields
227 if not all_fields.issuperset(selected):
228 raise errors.OpPrereqError("Unknown output fields selected: %s"
229 % ",".join(frozenset(selected).
230 difference(all_fields)))
233 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
234 memory, vcpus, nics):
235 """Builds instance related env variables for hooks from single variables.
238 secondary_nodes: List of secondary nodes as strings
242 "INSTANCE_NAME": name,
243 "INSTANCE_PRIMARY": primary_node,
244 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
245 "INSTANCE_OS_TYPE": os_type,
246 "INSTANCE_STATUS": status,
247 "INSTANCE_MEMORY": memory,
248 "INSTANCE_VCPUS": vcpus,
252 nic_count = len(nics)
253 for idx, (ip, bridge) in enumerate(nics):
256 env["INSTANCE_NIC%d_IP" % idx] = ip
257 env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
261 env["INSTANCE_NIC_COUNT"] = nic_count
266 def _BuildInstanceHookEnvByObject(instance, override=None):
267 """Builds instance related env variables for hooks from an object.
270 instance: objects.Instance object of instance
271 override: dict of values to override
274 'name': instance.name,
275 'primary_node': instance.primary_node,
276 'secondary_nodes': instance.secondary_nodes,
277 'os_type': instance.os,
278 'status': instance.os,
279 'memory': instance.memory,
280 'vcpus': instance.vcpus,
281 'nics': [(nic.ip, nic.bridge) for nic in instance.nics],
284 args.update(override)
285 return _BuildInstanceHookEnv(**args)
288 def _UpdateEtcHosts(fullnode, ip):
289 """Ensure a node has a correct entry in /etc/hosts.
292 fullnode - Fully qualified domain name of host. (str)
293 ip - IPv4 address of host (str)
296 node = fullnode.split(".", 1)[0]
298 f = open('/etc/hosts', 'r+')
307 rawline = f.readline()
313 line = rawline.split('\n')[0]
316 line = line.split('#')[0]
319 # Entire line was comment, skip
320 save_lines.append(rawline)
323 fields = line.split()
327 for spec in [ ip, fullnode, node ]:
328 if spec not in fields:
335 save_lines.append(rawline)
338 if havesome and not haveall:
339 # Line (old, or manual?) which is missing some. Remove.
343 save_lines.append(rawline)
346 add_lines.append('%s\t%s %s\n' % (ip, fullnode, node))
350 save_lines = save_lines + add_lines
352 # We removed a line, write a new file and replace old.
353 fd, tmpname = tempfile.mkstemp('tmp', 'hosts_', '/etc')
354 newfile = os.fdopen(fd, 'w')
355 newfile.write(''.join(save_lines))
357 os.rename(tmpname, '/etc/hosts')
360 # Simply appending a new line will do the trick.
362 for add in add_lines:
368 def _UpdateKnownHosts(fullnode, ip, pubkey):
369 """Ensure a node has a correct known_hosts entry.
372 fullnode - Fully qualified domain name of host. (str)
373 ip - IPv4 address of host (str)
374 pubkey - the public key of the cluster
377 if os.path.exists(constants.SSH_KNOWN_HOSTS_FILE):
378 f = open(constants.SSH_KNOWN_HOSTS_FILE, 'r+')
380 f = open(constants.SSH_KNOWN_HOSTS_FILE, 'w+')
389 logger.Debug('read %s' % (repr(rawline),))
391 parts = rawline.rstrip('\r\n').split()
393 # Ignore unwanted lines
394 if len(parts) >= 3 and not rawline.lstrip()[0] == '#':
395 fields = parts[0].split(',')
400 for spec in [ ip, fullnode ]:
401 if spec not in fields:
406 logger.Debug("key, pubkey = %s." % (repr((key, pubkey)),))
407 if haveall and key == pubkey:
409 save_lines.append(rawline)
410 logger.Debug("Keeping known_hosts '%s'." % (repr(rawline),))
413 if havesome and (not haveall or key != pubkey):
415 logger.Debug("Discarding known_hosts '%s'." % (repr(rawline),))
418 save_lines.append(rawline)
421 add_lines.append('%s,%s ssh-rsa %s\n' % (fullnode, ip, pubkey))
422 logger.Debug("Adding known_hosts '%s'." % (repr(add_lines[-1]),))
425 save_lines = save_lines + add_lines
427 # Write a new file and replace old.
428 fd, tmpname = tempfile.mkstemp('.tmp', 'known_hosts.',
430 newfile = os.fdopen(fd, 'w')
432 newfile.write(''.join(save_lines))
435 logger.Debug("Wrote new known_hosts.")
436 os.rename(tmpname, constants.SSH_KNOWN_HOSTS_FILE)
439 # Simply appending a new line will do the trick.
441 for add in add_lines:
447 def _HasValidVG(vglist, vgname):
448 """Checks if the volume group list is valid.
450 A non-None return value means there's an error, and the return value
451 is the error message.
454 vgsize = vglist.get(vgname, None)
456 return "volume group '%s' missing" % vgname
458 return ("volume group '%s' too small (20480MiB required, %dMib found)" %
463 def _InitSSHSetup(node):
464 """Setup the SSH configuration for the cluster.
467 This generates a dsa keypair for root, adds the pub key to the
468 permitted hosts and adds the hostkey to its own known hosts.
471 node: the name of this host as a fqdn
474 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
476 for name in priv_key, pub_key:
477 if os.path.exists(name):
478 utils.CreateBackup(name)
479 utils.RemoveFile(name)
481 result = utils.RunCmd(["ssh-keygen", "-t", "dsa",
485 raise errors.OpExecError("Could not generate ssh keypair, error %s" %
488 f = open(pub_key, 'r')
490 utils.AddAuthorizedKey(auth_keys, f.read(8192))
495 def _InitGanetiServerSetup(ss):
496 """Setup the necessary configuration for the initial node daemon.
498 This creates the nodepass file containing the shared password for
499 the cluster and also generates the SSL certificate.
502 # Create pseudo random password
503 randpass = sha.new(os.urandom(64)).hexdigest()
504 # and write it into sstore
505 ss.SetKey(ss.SS_NODED_PASS, randpass)
507 result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024",
508 "-days", str(365*5), "-nodes", "-x509",
509 "-keyout", constants.SSL_CERT_FILE,
510 "-out", constants.SSL_CERT_FILE, "-batch"])
512 raise errors.OpExecError("could not generate server ssl cert, command"
513 " %s had exitcode %s and error message %s" %
514 (result.cmd, result.exit_code, result.output))
516 os.chmod(constants.SSL_CERT_FILE, 0400)
518 result = utils.RunCmd([constants.NODE_INITD_SCRIPT, "restart"])
521 raise errors.OpExecError("Could not start the node daemon, command %s"
522 " had exitcode %s and error %s" %
523 (result.cmd, result.exit_code, result.output))
526 def _CheckInstanceBridgesExist(instance):
527 """Check that the brigdes needed by an instance exist.
530 # check bridges existance
531 brlist = [nic.bridge for nic in instance.nics]
532 if not rpc.call_bridges_exist(instance.primary_node, brlist):
533 raise errors.OpPrereqError("one or more target bridges %s does not"
534 " exist on destination node '%s'" %
535 (brlist, instance.primary_node))
538 class LUInitCluster(LogicalUnit):
539 """Initialise the cluster.
542 HPATH = "cluster-init"
543 HTYPE = constants.HTYPE_CLUSTER
544 _OP_REQP = ["cluster_name", "hypervisor_type", "vg_name", "mac_prefix",
545 "def_bridge", "master_netdev"]
548 def BuildHooksEnv(self):
551 Notes: Since we don't require a cluster, we must manually add
552 ourselves in the post-run node list.
555 env = {"OP_TARGET": self.op.cluster_name}
556 return env, [], [self.hostname.name]
558 def CheckPrereq(self):
559 """Verify that the passed name is a valid one.
562 if config.ConfigWriter.IsCluster():
563 raise errors.OpPrereqError("Cluster is already initialised")
565 self.hostname = hostname = utils.HostInfo()
567 if hostname.ip.startswith("127."):
568 raise errors.OpPrereqError("This host's IP resolves to the private"
569 " range (%s). Please fix DNS or /etc/hosts." %
572 self.clustername = clustername = utils.HostInfo(self.op.cluster_name)
574 if not utils.TcpPing(constants.LOCALHOST_IP_ADDRESS, hostname.ip,
575 constants.DEFAULT_NODED_PORT):
576 raise errors.OpPrereqError("Inconsistency: this host's name resolves"
577 " to %s,\nbut this ip address does not"
578 " belong to this host."
579 " Aborting." % hostname.ip)
581 secondary_ip = getattr(self.op, "secondary_ip", None)
582 if secondary_ip and not utils.IsValidIP(secondary_ip):
583 raise errors.OpPrereqError("Invalid secondary ip given")
585 secondary_ip != hostname.ip and
586 (not utils.TcpPing(constants.LOCALHOST_IP_ADDRESS, secondary_ip,
587 constants.DEFAULT_NODED_PORT))):
588 raise errors.OpPrereqError("You gave %s as secondary IP,\n"
589 "but it does not belong to this host." %
591 self.secondary_ip = secondary_ip
593 # checks presence of the volume group given
594 vgstatus = _HasValidVG(utils.ListVolumeGroups(), self.op.vg_name)
597 raise errors.OpPrereqError("Error: %s" % vgstatus)
599 if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$",
601 raise errors.OpPrereqError("Invalid mac prefix given '%s'" %
604 if self.op.hypervisor_type not in hypervisor.VALID_HTYPES:
605 raise errors.OpPrereqError("Invalid hypervisor type given '%s'" %
606 self.op.hypervisor_type)
608 result = utils.RunCmd(["ip", "link", "show", "dev", self.op.master_netdev])
610 raise errors.OpPrereqError("Invalid master netdev given (%s): '%s'" %
611 (self.op.master_netdev,
612 result.output.strip()))
614 def Exec(self, feedback_fn):
615 """Initialize the cluster.
618 clustername = self.clustername
619 hostname = self.hostname
621 # set up the simple store
622 self.sstore = ss = ssconf.SimpleStore()
623 ss.SetKey(ss.SS_HYPERVISOR, self.op.hypervisor_type)
624 ss.SetKey(ss.SS_MASTER_NODE, hostname.name)
625 ss.SetKey(ss.SS_MASTER_IP, clustername.ip)
626 ss.SetKey(ss.SS_MASTER_NETDEV, self.op.master_netdev)
627 ss.SetKey(ss.SS_CLUSTER_NAME, clustername.name)
629 # set up the inter-node password and certificate
630 _InitGanetiServerSetup(ss)
632 # start the master ip
633 rpc.call_node_start_master(hostname.name)
635 # set up ssh config and /etc/hosts
636 f = open(constants.SSH_HOST_RSA_PUB, 'r')
641 sshkey = sshline.split(" ")[1]
643 _UpdateEtcHosts(hostname.name, hostname.ip)
645 _UpdateKnownHosts(hostname.name, hostname.ip, sshkey)
647 _InitSSHSetup(hostname.name)
649 # init of cluster config file
650 self.cfg = cfgw = config.ConfigWriter()
651 cfgw.InitConfig(hostname.name, hostname.ip, self.secondary_ip,
652 sshkey, self.op.mac_prefix,
653 self.op.vg_name, self.op.def_bridge)
656 class LUDestroyCluster(NoHooksLU):
657 """Logical unit for destroying the cluster.
662 def CheckPrereq(self):
663 """Check prerequisites.
665 This checks whether the cluster is empty.
667 Any errors are signalled by raising errors.OpPrereqError.
670 master = self.sstore.GetMasterNode()
672 nodelist = self.cfg.GetNodeList()
673 if len(nodelist) != 1 or nodelist[0] != master:
674 raise errors.OpPrereqError("There are still %d node(s) in"
675 " this cluster." % (len(nodelist) - 1))
676 instancelist = self.cfg.GetInstanceList()
678 raise errors.OpPrereqError("There are still %d instance(s) in"
679 " this cluster." % len(instancelist))
681 def Exec(self, feedback_fn):
682 """Destroys the cluster.
685 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
686 utils.CreateBackup(priv_key)
687 utils.CreateBackup(pub_key)
688 rpc.call_node_leave_cluster(self.sstore.GetMasterNode())
691 class LUVerifyCluster(NoHooksLU):
692 """Verifies the cluster status.
697 def _VerifyNode(self, node, file_list, local_cksum, vglist, node_result,
698 remote_version, feedback_fn):
699 """Run multiple tests against a node.
702 - compares ganeti version
703 - checks vg existance and size > 20G
704 - checks config file checksum
705 - checks ssh to other nodes
708 node: name of the node to check
709 file_list: required list of files
710 local_cksum: dictionary of local files and their checksums
713 # compares ganeti version
714 local_version = constants.PROTOCOL_VERSION
715 if not remote_version:
716 feedback_fn(" - ERROR: connection to %s failed" % (node))
719 if local_version != remote_version:
720 feedback_fn(" - ERROR: sw version mismatch: master %s, node(%s) %s" %
721 (local_version, node, remote_version))
724 # checks vg existance and size > 20G
728 feedback_fn(" - ERROR: unable to check volume groups on node %s." %
732 vgstatus = _HasValidVG(vglist, self.cfg.GetVGName())
734 feedback_fn(" - ERROR: %s on node %s" % (vgstatus, node))
737 # checks config file checksum
740 if 'filelist' not in node_result:
742 feedback_fn(" - ERROR: node hasn't returned file checksum data")
744 remote_cksum = node_result['filelist']
745 for file_name in file_list:
746 if file_name not in remote_cksum:
748 feedback_fn(" - ERROR: file '%s' missing" % file_name)
749 elif remote_cksum[file_name] != local_cksum[file_name]:
751 feedback_fn(" - ERROR: file '%s' has wrong checksum" % file_name)
753 if 'nodelist' not in node_result:
755 feedback_fn(" - ERROR: node hasn't returned node connectivity data")
757 if node_result['nodelist']:
759 for node in node_result['nodelist']:
760 feedback_fn(" - ERROR: communication with node '%s': %s" %
761 (node, node_result['nodelist'][node]))
762 hyp_result = node_result.get('hypervisor', None)
763 if hyp_result is not None:
764 feedback_fn(" - ERROR: hypervisor verify failure: '%s'" % hyp_result)
767 def _VerifyInstance(self, instance, node_vol_is, node_instance, feedback_fn):
768 """Verify an instance.
770 This function checks to see if the required block devices are
771 available on the instance's node.
776 instancelist = self.cfg.GetInstanceList()
777 if not instance in instancelist:
778 feedback_fn(" - ERROR: instance %s not in instance list %s" %
779 (instance, instancelist))
782 instanceconfig = self.cfg.GetInstanceInfo(instance)
783 node_current = instanceconfig.primary_node
786 instanceconfig.MapLVsByNode(node_vol_should)
788 for node in node_vol_should:
789 for volume in node_vol_should[node]:
790 if node not in node_vol_is or volume not in node_vol_is[node]:
791 feedback_fn(" - ERROR: volume %s missing on node %s" %
795 if not instanceconfig.status == 'down':
796 if not instance in node_instance[node_current]:
797 feedback_fn(" - ERROR: instance %s not running on node %s" %
798 (instance, node_current))
801 for node in node_instance:
802 if (not node == node_current):
803 if instance in node_instance[node]:
804 feedback_fn(" - ERROR: instance %s should not run on node %s" %
810 def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
811 """Verify if there are any unknown volumes in the cluster.
813 The .os, .swap and backup volumes are ignored. All other volumes are
819 for node in node_vol_is:
820 for volume in node_vol_is[node]:
821 if node not in node_vol_should or volume not in node_vol_should[node]:
822 feedback_fn(" - ERROR: volume %s on node %s should not exist" %
827 def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
828 """Verify the list of running instances.
830 This checks what instances are running but unknown to the cluster.
834 for node in node_instance:
835 for runninginstance in node_instance[node]:
836 if runninginstance not in instancelist:
837 feedback_fn(" - ERROR: instance %s on node %s should not exist" %
838 (runninginstance, node))
842 def CheckPrereq(self):
843 """Check prerequisites.
845 This has no prerequisites.
850 def Exec(self, feedback_fn):
851 """Verify integrity of cluster, performing various test on nodes.
855 feedback_fn("* Verifying global settings")
856 self.cfg.VerifyConfig()
858 vg_name = self.cfg.GetVGName()
859 nodelist = utils.NiceSort(self.cfg.GetNodeList())
860 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
864 # FIXME: verify OS list
866 file_names = list(self.sstore.GetFileList())
867 file_names.append(constants.SSL_CERT_FILE)
868 file_names.append(constants.CLUSTER_CONF_FILE)
869 local_checksums = utils.FingerprintFiles(file_names)
871 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
872 all_volumeinfo = rpc.call_volume_list(nodelist, vg_name)
873 all_instanceinfo = rpc.call_instance_list(nodelist)
874 all_vglist = rpc.call_vg_list(nodelist)
875 node_verify_param = {
876 'filelist': file_names,
877 'nodelist': nodelist,
880 all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param)
881 all_rversion = rpc.call_version(nodelist)
883 for node in nodelist:
884 feedback_fn("* Verifying node %s" % node)
885 result = self._VerifyNode(node, file_names, local_checksums,
886 all_vglist[node], all_nvinfo[node],
887 all_rversion[node], feedback_fn)
891 volumeinfo = all_volumeinfo[node]
893 if type(volumeinfo) != dict:
894 feedback_fn(" - ERROR: connection to %s failed" % (node,))
898 node_volume[node] = volumeinfo
901 nodeinstance = all_instanceinfo[node]
902 if type(nodeinstance) != list:
903 feedback_fn(" - ERROR: connection to %s failed" % (node,))
907 node_instance[node] = nodeinstance
911 for instance in instancelist:
912 feedback_fn("* Verifying instance %s" % instance)
913 result = self._VerifyInstance(instance, node_volume, node_instance,
917 inst_config = self.cfg.GetInstanceInfo(instance)
919 inst_config.MapLVsByNode(node_vol_should)
921 feedback_fn("* Verifying orphan volumes")
922 result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
926 feedback_fn("* Verifying remaining instances")
927 result = self._VerifyOrphanInstances(instancelist, node_instance,
934 class LURenameCluster(LogicalUnit):
935 """Rename the cluster.
938 HPATH = "cluster-rename"
939 HTYPE = constants.HTYPE_CLUSTER
942 def BuildHooksEnv(self):
947 "OP_TARGET": self.op.sstore.GetClusterName(),
948 "NEW_NAME": self.op.name,
950 mn = self.sstore.GetMasterNode()
951 return env, [mn], [mn]
953 def CheckPrereq(self):
954 """Verify that the passed name is a valid one.
957 hostname = utils.HostInfo(self.op.name)
959 new_name = hostname.name
960 self.ip = new_ip = hostname.ip
961 old_name = self.sstore.GetClusterName()
962 old_ip = self.sstore.GetMasterIP()
963 if new_name == old_name and new_ip == old_ip:
964 raise errors.OpPrereqError("Neither the name nor the IP address of the"
965 " cluster has changed")
967 result = utils.RunCmd(["fping", "-q", new_ip])
968 if not result.failed:
969 raise errors.OpPrereqError("The given cluster IP address (%s) is"
970 " reachable on the network. Aborting." %
973 self.op.name = new_name
975 def Exec(self, feedback_fn):
976 """Rename the cluster.
979 clustername = self.op.name
983 # shutdown the master IP
984 master = ss.GetMasterNode()
985 if not rpc.call_node_stop_master(master):
986 raise errors.OpExecError("Could not disable the master role")
990 ss.SetKey(ss.SS_MASTER_IP, ip)
991 ss.SetKey(ss.SS_CLUSTER_NAME, clustername)
993 # Distribute updated ss config to all nodes
994 myself = self.cfg.GetNodeInfo(master)
995 dist_nodes = self.cfg.GetNodeList()
996 if myself.name in dist_nodes:
997 dist_nodes.remove(myself.name)
999 logger.Debug("Copying updated ssconf data to all nodes")
1000 for keyname in [ss.SS_CLUSTER_NAME, ss.SS_MASTER_IP]:
1001 fname = ss.KeyToFilename(keyname)
1002 result = rpc.call_upload_file(dist_nodes, fname)
1003 for to_node in dist_nodes:
1004 if not result[to_node]:
1005 logger.Error("copy of file %s to node %s failed" %
1008 if not rpc.call_node_start_master(master):
1009 logger.Error("Could not re-enable the master role on the master,\n"
1010 "please restart manually.")
1013 def _WaitForSync(cfgw, instance, oneshot=False, unlock=False):
1014 """Sleep and poll for an instance's disk to sync.
1017 if not instance.disks:
1021 logger.ToStdout("Waiting for instance %s to sync disks." % instance.name)
1023 node = instance.primary_node
1025 for dev in instance.disks:
1026 cfgw.SetDiskID(dev, node)
1032 cumul_degraded = False
1033 rstats = rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1035 logger.ToStderr("Can't get any data from node %s" % node)
1038 raise errors.RemoteError("Can't contact node %s for mirror data,"
1039 " aborting." % node)
1043 for i in range(len(rstats)):
1046 logger.ToStderr("Can't compute data for node %s/%s" %
1047 (node, instance.disks[i].iv_name))
1049 perc_done, est_time, is_degraded = mstat
1050 cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1051 if perc_done is not None:
1053 if est_time is not None:
1054 rem_time = "%d estimated seconds remaining" % est_time
1057 rem_time = "no time estimate"
1058 logger.ToStdout("- device %s: %5.2f%% done, %s" %
1059 (instance.disks[i].iv_name, perc_done, rem_time))
1066 time.sleep(min(60, max_time))
1072 logger.ToStdout("Instance %s's disks are in sync." % instance.name)
1073 return not cumul_degraded
1076 def _CheckDiskConsistency(cfgw, dev, node, on_primary):
1077 """Check that mirrors are not degraded.
1080 cfgw.SetDiskID(dev, node)
1083 if on_primary or dev.AssembleOnSecondary():
1084 rstats = rpc.call_blockdev_find(node, dev)
1086 logger.ToStderr("Can't get any data from node %s" % node)
1089 result = result and (not rstats[5])
1091 for child in dev.children:
1092 result = result and _CheckDiskConsistency(cfgw, child, node, on_primary)
1097 class LUDiagnoseOS(NoHooksLU):
1098 """Logical unit for OS diagnose/query.
1103 def CheckPrereq(self):
1104 """Check prerequisites.
1106 This always succeeds, since this is a pure query LU.
1111 def Exec(self, feedback_fn):
1112 """Compute the list of OSes.
1115 node_list = self.cfg.GetNodeList()
1116 node_data = rpc.call_os_diagnose(node_list)
1117 if node_data == False:
1118 raise errors.OpExecError("Can't gather the list of OSes")
1122 class LURemoveNode(LogicalUnit):
1123 """Logical unit for removing a node.
1126 HPATH = "node-remove"
1127 HTYPE = constants.HTYPE_NODE
1128 _OP_REQP = ["node_name"]
1130 def BuildHooksEnv(self):
1133 This doesn't run on the target node in the pre phase as a failed
1134 node would not allows itself to run.
1138 "OP_TARGET": self.op.node_name,
1139 "NODE_NAME": self.op.node_name,
1141 all_nodes = self.cfg.GetNodeList()
1142 all_nodes.remove(self.op.node_name)
1143 return env, all_nodes, all_nodes
1145 def CheckPrereq(self):
1146 """Check prerequisites.
1149 - the node exists in the configuration
1150 - it does not have primary or secondary instances
1151 - it's not the master
1153 Any errors are signalled by raising errors.OpPrereqError.
1156 node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1158 raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1160 instance_list = self.cfg.GetInstanceList()
1162 masternode = self.sstore.GetMasterNode()
1163 if node.name == masternode:
1164 raise errors.OpPrereqError("Node is the master node,"
1165 " you need to failover first.")
1167 for instance_name in instance_list:
1168 instance = self.cfg.GetInstanceInfo(instance_name)
1169 if node.name == instance.primary_node:
1170 raise errors.OpPrereqError("Instance %s still running on the node,"
1171 " please remove first." % instance_name)
1172 if node.name in instance.secondary_nodes:
1173 raise errors.OpPrereqError("Instance %s has node as a secondary,"
1174 " please remove first." % instance_name)
1175 self.op.node_name = node.name
1178 def Exec(self, feedback_fn):
1179 """Removes the node from the cluster.
1183 logger.Info("stopping the node daemon and removing configs from node %s" %
1186 rpc.call_node_leave_cluster(node.name)
1188 ssh.SSHCall(node.name, 'root', "%s stop" % constants.NODE_INITD_SCRIPT)
1190 logger.Info("Removing node %s from config" % node.name)
1192 self.cfg.RemoveNode(node.name)
1195 class LUQueryNodes(NoHooksLU):
1196 """Logical unit for querying nodes.
1199 _OP_REQP = ["output_fields", "names"]
1201 def CheckPrereq(self):
1202 """Check prerequisites.
1204 This checks that the fields required are valid output fields.
1207 self.dynamic_fields = frozenset(["dtotal", "dfree",
1208 "mtotal", "mnode", "mfree",
1211 _CheckOutputFields(static=["name", "pinst_cnt", "sinst_cnt",
1212 "pinst_list", "sinst_list",
1214 dynamic=self.dynamic_fields,
1215 selected=self.op.output_fields)
1217 self.wanted = _GetWantedNodes(self, self.op.names)
1219 def Exec(self, feedback_fn):
1220 """Computes the list of nodes and their attributes.
1223 nodenames = self.wanted
1224 nodelist = [self.cfg.GetNodeInfo(name) for name in nodenames]
1226 # begin data gathering
1228 if self.dynamic_fields.intersection(self.op.output_fields):
1230 node_data = rpc.call_node_info(nodenames, self.cfg.GetVGName())
1231 for name in nodenames:
1232 nodeinfo = node_data.get(name, None)
1235 "mtotal": utils.TryConvert(int, nodeinfo['memory_total']),
1236 "mnode": utils.TryConvert(int, nodeinfo['memory_dom0']),
1237 "mfree": utils.TryConvert(int, nodeinfo['memory_free']),
1238 "dtotal": utils.TryConvert(int, nodeinfo['vg_size']),
1239 "dfree": utils.TryConvert(int, nodeinfo['vg_free']),
1240 "bootid": nodeinfo['bootid'],
1243 live_data[name] = {}
1245 live_data = dict.fromkeys(nodenames, {})
1247 node_to_primary = dict([(name, set()) for name in nodenames])
1248 node_to_secondary = dict([(name, set()) for name in nodenames])
1250 inst_fields = frozenset(("pinst_cnt", "pinst_list",
1251 "sinst_cnt", "sinst_list"))
1252 if inst_fields & frozenset(self.op.output_fields):
1253 instancelist = self.cfg.GetInstanceList()
1255 for instance_name in instancelist:
1256 inst = self.cfg.GetInstanceInfo(instance_name)
1257 if inst.primary_node in node_to_primary:
1258 node_to_primary[inst.primary_node].add(inst.name)
1259 for secnode in inst.secondary_nodes:
1260 if secnode in node_to_secondary:
1261 node_to_secondary[secnode].add(inst.name)
1263 # end data gathering
1266 for node in nodelist:
1268 for field in self.op.output_fields:
1271 elif field == "pinst_list":
1272 val = list(node_to_primary[node.name])
1273 elif field == "sinst_list":
1274 val = list(node_to_secondary[node.name])
1275 elif field == "pinst_cnt":
1276 val = len(node_to_primary[node.name])
1277 elif field == "sinst_cnt":
1278 val = len(node_to_secondary[node.name])
1279 elif field == "pip":
1280 val = node.primary_ip
1281 elif field == "sip":
1282 val = node.secondary_ip
1283 elif field in self.dynamic_fields:
1284 val = live_data[node.name].get(field, None)
1286 raise errors.ParameterError(field)
1287 node_output.append(val)
1288 output.append(node_output)
1293 class LUQueryNodeVolumes(NoHooksLU):
1294 """Logical unit for getting volumes on node(s).
1297 _OP_REQP = ["nodes", "output_fields"]
1299 def CheckPrereq(self):
1300 """Check prerequisites.
1302 This checks that the fields required are valid output fields.
1305 self.nodes = _GetWantedNodes(self, self.op.nodes)
1307 _CheckOutputFields(static=["node"],
1308 dynamic=["phys", "vg", "name", "size", "instance"],
1309 selected=self.op.output_fields)
1312 def Exec(self, feedback_fn):
1313 """Computes the list of nodes and their attributes.
1316 nodenames = self.nodes
1317 volumes = rpc.call_node_volumes(nodenames)
1319 ilist = [self.cfg.GetInstanceInfo(iname) for iname
1320 in self.cfg.GetInstanceList()]
1322 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
1325 for node in nodenames:
1326 if node not in volumes or not volumes[node]:
1329 node_vols = volumes[node][:]
1330 node_vols.sort(key=lambda vol: vol['dev'])
1332 for vol in node_vols:
1334 for field in self.op.output_fields:
1337 elif field == "phys":
1341 elif field == "name":
1343 elif field == "size":
1344 val = int(float(vol['size']))
1345 elif field == "instance":
1347 if node not in lv_by_node[inst]:
1349 if vol['name'] in lv_by_node[inst][node]:
1355 raise errors.ParameterError(field)
1356 node_output.append(str(val))
1358 output.append(node_output)
1363 class LUAddNode(LogicalUnit):
1364 """Logical unit for adding node to the cluster.
1368 HTYPE = constants.HTYPE_NODE
1369 _OP_REQP = ["node_name"]
1371 def BuildHooksEnv(self):
1374 This will run on all nodes before, and on all nodes + the new node after.
1378 "OP_TARGET": self.op.node_name,
1379 "NODE_NAME": self.op.node_name,
1380 "NODE_PIP": self.op.primary_ip,
1381 "NODE_SIP": self.op.secondary_ip,
1383 nodes_0 = self.cfg.GetNodeList()
1384 nodes_1 = nodes_0 + [self.op.node_name, ]
1385 return env, nodes_0, nodes_1
1387 def CheckPrereq(self):
1388 """Check prerequisites.
1391 - the new node is not already in the config
1393 - its parameters (single/dual homed) matches the cluster
1395 Any errors are signalled by raising errors.OpPrereqError.
1398 node_name = self.op.node_name
1401 dns_data = utils.HostInfo(node_name)
1403 node = dns_data.name
1404 primary_ip = self.op.primary_ip = dns_data.ip
1405 secondary_ip = getattr(self.op, "secondary_ip", None)
1406 if secondary_ip is None:
1407 secondary_ip = primary_ip
1408 if not utils.IsValidIP(secondary_ip):
1409 raise errors.OpPrereqError("Invalid secondary IP given")
1410 self.op.secondary_ip = secondary_ip
1411 node_list = cfg.GetNodeList()
1412 if node in node_list:
1413 raise errors.OpPrereqError("Node %s is already in the configuration"
1416 for existing_node_name in node_list:
1417 existing_node = cfg.GetNodeInfo(existing_node_name)
1418 if (existing_node.primary_ip == primary_ip or
1419 existing_node.secondary_ip == primary_ip or
1420 existing_node.primary_ip == secondary_ip or
1421 existing_node.secondary_ip == secondary_ip):
1422 raise errors.OpPrereqError("New node ip address(es) conflict with"
1423 " existing node %s" % existing_node.name)
1425 # check that the type of the node (single versus dual homed) is the
1426 # same as for the master
1427 myself = cfg.GetNodeInfo(self.sstore.GetMasterNode())
1428 master_singlehomed = myself.secondary_ip == myself.primary_ip
1429 newbie_singlehomed = secondary_ip == primary_ip
1430 if master_singlehomed != newbie_singlehomed:
1431 if master_singlehomed:
1432 raise errors.OpPrereqError("The master has no private ip but the"
1433 " new node has one")
1435 raise errors.OpPrereqError("The master has a private ip but the"
1436 " new node doesn't have one")
1438 # checks reachablity
1439 if not utils.TcpPing(utils.HostInfo().name,
1441 constants.DEFAULT_NODED_PORT):
1442 raise errors.OpPrereqError("Node not reachable by ping")
1444 if not newbie_singlehomed:
1445 # check reachability from my secondary ip to newbie's secondary ip
1446 if not utils.TcpPing(myself.secondary_ip,
1448 constants.DEFAULT_NODED_PORT):
1449 raise errors.OpPrereqError(
1450 "Node secondary ip not reachable by TCP based ping to noded port")
1452 self.new_node = objects.Node(name=node,
1453 primary_ip=primary_ip,
1454 secondary_ip=secondary_ip)
1456 def Exec(self, feedback_fn):
1457 """Adds the new node to the cluster.
1460 new_node = self.new_node
1461 node = new_node.name
1463 # set up inter-node password and certificate and restarts the node daemon
1464 gntpass = self.sstore.GetNodeDaemonPassword()
1465 if not re.match('^[a-zA-Z0-9.]{1,64}$', gntpass):
1466 raise errors.OpExecError("ganeti password corruption detected")
1467 f = open(constants.SSL_CERT_FILE)
1469 gntpem = f.read(8192)
1472 # in the base64 pem encoding, neither '!' nor '.' are valid chars,
1473 # so we use this to detect an invalid certificate; as long as the
1474 # cert doesn't contain this, the here-document will be correctly
1475 # parsed by the shell sequence below
1476 if re.search('^!EOF\.', gntpem, re.MULTILINE):
1477 raise errors.OpExecError("invalid PEM encoding in the SSL certificate")
1478 if not gntpem.endswith("\n"):
1479 raise errors.OpExecError("PEM must end with newline")
1480 logger.Info("copy cluster pass to %s and starting the node daemon" % node)
1482 # and then connect with ssh to set password and start ganeti-noded
1483 # note that all the below variables are sanitized at this point,
1484 # either by being constants or by the checks above
1486 mycommand = ("umask 077 && "
1487 "echo '%s' > '%s' && "
1488 "cat > '%s' << '!EOF.' && \n"
1489 "%s!EOF.\n%s restart" %
1490 (gntpass, ss.KeyToFilename(ss.SS_NODED_PASS),
1491 constants.SSL_CERT_FILE, gntpem,
1492 constants.NODE_INITD_SCRIPT))
1494 result = ssh.SSHCall(node, 'root', mycommand, batch=False, ask_key=True)
1496 raise errors.OpExecError("Remote command on node %s, error: %s,"
1498 (node, result.fail_reason, result.output))
1500 # check connectivity
1503 result = rpc.call_version([node])[node]
1505 if constants.PROTOCOL_VERSION == result:
1506 logger.Info("communication to node %s fine, sw version %s match" %
1509 raise errors.OpExecError("Version mismatch master version %s,"
1510 " node version %s" %
1511 (constants.PROTOCOL_VERSION, result))
1513 raise errors.OpExecError("Cannot get version from the new node")
1516 logger.Info("copy ssh key to node %s" % node)
1517 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1519 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
1520 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
1526 keyarray.append(f.read())
1530 result = rpc.call_node_add(node, keyarray[0], keyarray[1], keyarray[2],
1531 keyarray[3], keyarray[4], keyarray[5])
1534 raise errors.OpExecError("Cannot transfer ssh keys to the new node")
1536 # Add node to our /etc/hosts, and add key to known_hosts
1537 _UpdateEtcHosts(new_node.name, new_node.primary_ip)
1538 _UpdateKnownHosts(new_node.name, new_node.primary_ip,
1539 self.cfg.GetHostKey())
1541 if new_node.secondary_ip != new_node.primary_ip:
1542 if not rpc.call_node_tcp_ping(new_node.name,
1543 constants.LOCALHOST_IP_ADDRESS,
1544 new_node.secondary_ip,
1545 constants.DEFAULT_NODED_PORT,
1547 raise errors.OpExecError("Node claims it doesn't have the"
1548 " secondary ip you gave (%s).\n"
1549 "Please fix and re-run this command." %
1550 new_node.secondary_ip)
1552 success, msg = ssh.VerifyNodeHostname(node)
1554 raise errors.OpExecError("Node '%s' claims it has a different hostname"
1555 " than the one the resolver gives: %s.\n"
1556 "Please fix and re-run this command." %
1559 # Distribute updated /etc/hosts and known_hosts to all nodes,
1560 # including the node just added
1561 myself = self.cfg.GetNodeInfo(self.sstore.GetMasterNode())
1562 dist_nodes = self.cfg.GetNodeList() + [node]
1563 if myself.name in dist_nodes:
1564 dist_nodes.remove(myself.name)
1566 logger.Debug("Copying hosts and known_hosts to all nodes")
1567 for fname in ("/etc/hosts", constants.SSH_KNOWN_HOSTS_FILE):
1568 result = rpc.call_upload_file(dist_nodes, fname)
1569 for to_node in dist_nodes:
1570 if not result[to_node]:
1571 logger.Error("copy of file %s to node %s failed" %
1574 to_copy = ss.GetFileList()
1575 for fname in to_copy:
1576 if not ssh.CopyFileToNode(node, fname):
1577 logger.Error("could not copy file %s to node %s" % (fname, node))
1579 logger.Info("adding node %s to cluster.conf" % node)
1580 self.cfg.AddNode(new_node)
1583 class LUMasterFailover(LogicalUnit):
1584 """Failover the master node to the current node.
1586 This is a special LU in that it must run on a non-master node.
1589 HPATH = "master-failover"
1590 HTYPE = constants.HTYPE_CLUSTER
1594 def BuildHooksEnv(self):
1597 This will run on the new master only in the pre phase, and on all
1598 the nodes in the post phase.
1602 "OP_TARGET": self.new_master,
1603 "NEW_MASTER": self.new_master,
1604 "OLD_MASTER": self.old_master,
1606 return env, [self.new_master], self.cfg.GetNodeList()
1608 def CheckPrereq(self):
1609 """Check prerequisites.
1611 This checks that we are not already the master.
1614 self.new_master = utils.HostInfo().name
1615 self.old_master = self.sstore.GetMasterNode()
1617 if self.old_master == self.new_master:
1618 raise errors.OpPrereqError("This commands must be run on the node"
1619 " where you want the new master to be.\n"
1620 "%s is already the master" %
1623 def Exec(self, feedback_fn):
1624 """Failover the master node.
1626 This command, when run on a non-master node, will cause the current
1627 master to cease being master, and the non-master to become new
1631 #TODO: do not rely on gethostname returning the FQDN
1632 logger.Info("setting master to %s, old master: %s" %
1633 (self.new_master, self.old_master))
1635 if not rpc.call_node_stop_master(self.old_master):
1636 logger.Error("could disable the master role on the old master"
1637 " %s, please disable manually" % self.old_master)
1640 ss.SetKey(ss.SS_MASTER_NODE, self.new_master)
1641 if not rpc.call_upload_file(self.cfg.GetNodeList(),
1642 ss.KeyToFilename(ss.SS_MASTER_NODE)):
1643 logger.Error("could not distribute the new simple store master file"
1644 " to the other nodes, please check.")
1646 if not rpc.call_node_start_master(self.new_master):
1647 logger.Error("could not start the master role on the new master"
1648 " %s, please check" % self.new_master)
1649 feedback_fn("Error in activating the master IP on the new master,\n"
1650 "please fix manually.")
1654 class LUQueryClusterInfo(NoHooksLU):
1655 """Query cluster configuration.
1661 def CheckPrereq(self):
1662 """No prerequsites needed for this LU.
1667 def Exec(self, feedback_fn):
1668 """Return cluster config.
1672 "name": self.sstore.GetClusterName(),
1673 "software_version": constants.RELEASE_VERSION,
1674 "protocol_version": constants.PROTOCOL_VERSION,
1675 "config_version": constants.CONFIG_VERSION,
1676 "os_api_version": constants.OS_API_VERSION,
1677 "export_version": constants.EXPORT_VERSION,
1678 "master": self.sstore.GetMasterNode(),
1679 "architecture": (platform.architecture()[0], platform.machine()),
1685 class LUClusterCopyFile(NoHooksLU):
1686 """Copy file to cluster.
1689 _OP_REQP = ["nodes", "filename"]
1691 def CheckPrereq(self):
1692 """Check prerequisites.
1694 It should check that the named file exists and that the given list
1698 if not os.path.exists(self.op.filename):
1699 raise errors.OpPrereqError("No such filename '%s'" % self.op.filename)
1701 self.nodes = _GetWantedNodes(self, self.op.nodes)
1703 def Exec(self, feedback_fn):
1704 """Copy a file from master to some nodes.
1707 opts - class with options as members
1708 args - list containing a single element, the file name
1710 nodes - list containing the name of target nodes; if empty, all nodes
1713 filename = self.op.filename
1715 myname = utils.HostInfo().name
1717 for node in self.nodes:
1720 if not ssh.CopyFileToNode(node, filename):
1721 logger.Error("Copy of file %s to node %s failed" % (filename, node))
1724 class LUDumpClusterConfig(NoHooksLU):
1725 """Return a text-representation of the cluster-config.
1730 def CheckPrereq(self):
1731 """No prerequisites.
1736 def Exec(self, feedback_fn):
1737 """Dump a representation of the cluster config to the standard output.
1740 return self.cfg.DumpConfig()
1743 class LURunClusterCommand(NoHooksLU):
1744 """Run a command on some nodes.
1747 _OP_REQP = ["command", "nodes"]
1749 def CheckPrereq(self):
1750 """Check prerequisites.
1752 It checks that the given list of nodes is valid.
1755 self.nodes = _GetWantedNodes(self, self.op.nodes)
1757 def Exec(self, feedback_fn):
1758 """Run a command on some nodes.
1762 for node in self.nodes:
1763 result = ssh.SSHCall(node, "root", self.op.command)
1764 data.append((node, result.output, result.exit_code))
1769 class LUActivateInstanceDisks(NoHooksLU):
1770 """Bring up an instance's disks.
1773 _OP_REQP = ["instance_name"]
1775 def CheckPrereq(self):
1776 """Check prerequisites.
1778 This checks that the instance is in the cluster.
1781 instance = self.cfg.GetInstanceInfo(
1782 self.cfg.ExpandInstanceName(self.op.instance_name))
1783 if instance is None:
1784 raise errors.OpPrereqError("Instance '%s' not known" %
1785 self.op.instance_name)
1786 self.instance = instance
1789 def Exec(self, feedback_fn):
1790 """Activate the disks.
1793 disks_ok, disks_info = _AssembleInstanceDisks(self.instance, self.cfg)
1795 raise errors.OpExecError("Cannot activate block devices")
1800 def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False):
1801 """Prepare the block devices for an instance.
1803 This sets up the block devices on all nodes.
1806 instance: a ganeti.objects.Instance object
1807 ignore_secondaries: if true, errors on secondary nodes won't result
1808 in an error return from the function
1811 false if the operation failed
1812 list of (host, instance_visible_name, node_visible_name) if the operation
1813 suceeded with the mapping from node devices to instance devices
1817 for inst_disk in instance.disks:
1818 master_result = None
1819 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
1820 cfg.SetDiskID(node_disk, node)
1821 is_primary = node == instance.primary_node
1822 result = rpc.call_blockdev_assemble(node, node_disk,
1823 instance.name, is_primary)
1825 logger.Error("could not prepare block device %s on node %s (is_pri"
1826 "mary=%s)" % (inst_disk.iv_name, node, is_primary))
1827 if is_primary or not ignore_secondaries:
1830 master_result = result
1831 device_info.append((instance.primary_node, inst_disk.iv_name,
1834 # leave the disks configured for the primary node
1835 # this is a workaround that would be fixed better by
1836 # improving the logical/physical id handling
1837 for disk in instance.disks:
1838 cfg.SetDiskID(disk, instance.primary_node)
1840 return disks_ok, device_info
1843 def _StartInstanceDisks(cfg, instance, force):
1844 """Start the disks of an instance.
1847 disks_ok, dummy = _AssembleInstanceDisks(instance, cfg,
1848 ignore_secondaries=force)
1850 _ShutdownInstanceDisks(instance, cfg)
1851 if force is not None and not force:
1852 logger.Error("If the message above refers to a secondary node,"
1853 " you can retry the operation using '--force'.")
1854 raise errors.OpExecError("Disk consistency error")
1857 class LUDeactivateInstanceDisks(NoHooksLU):
1858 """Shutdown an instance's disks.
1861 _OP_REQP = ["instance_name"]
1863 def CheckPrereq(self):
1864 """Check prerequisites.
1866 This checks that the instance is in the cluster.
1869 instance = self.cfg.GetInstanceInfo(
1870 self.cfg.ExpandInstanceName(self.op.instance_name))
1871 if instance is None:
1872 raise errors.OpPrereqError("Instance '%s' not known" %
1873 self.op.instance_name)
1874 self.instance = instance
1876 def Exec(self, feedback_fn):
1877 """Deactivate the disks
1880 instance = self.instance
1881 ins_l = rpc.call_instance_list([instance.primary_node])
1882 ins_l = ins_l[instance.primary_node]
1883 if not type(ins_l) is list:
1884 raise errors.OpExecError("Can't contact node '%s'" %
1885 instance.primary_node)
1887 if self.instance.name in ins_l:
1888 raise errors.OpExecError("Instance is running, can't shutdown"
1891 _ShutdownInstanceDisks(instance, self.cfg)
1894 def _ShutdownInstanceDisks(instance, cfg, ignore_primary=False):
1895 """Shutdown block devices of an instance.
1897 This does the shutdown on all nodes of the instance.
1899 If the ignore_primary is false, errors on the primary node are
1904 for disk in instance.disks:
1905 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
1906 cfg.SetDiskID(top_disk, node)
1907 if not rpc.call_blockdev_shutdown(node, top_disk):
1908 logger.Error("could not shutdown block device %s on node %s" %
1909 (disk.iv_name, node))
1910 if not ignore_primary or node != instance.primary_node:
1915 class LUStartupInstance(LogicalUnit):
1916 """Starts an instance.
1919 HPATH = "instance-start"
1920 HTYPE = constants.HTYPE_INSTANCE
1921 _OP_REQP = ["instance_name", "force"]
1923 def BuildHooksEnv(self):
1926 This runs on master, primary and secondary nodes of the instance.
1930 "FORCE": self.op.force,
1932 env.update(_BuildInstanceHookEnvByObject(self.instance))
1933 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
1934 list(self.instance.secondary_nodes))
1937 def CheckPrereq(self):
1938 """Check prerequisites.
1940 This checks that the instance is in the cluster.
1943 instance = self.cfg.GetInstanceInfo(
1944 self.cfg.ExpandInstanceName(self.op.instance_name))
1945 if instance is None:
1946 raise errors.OpPrereqError("Instance '%s' not known" %
1947 self.op.instance_name)
1949 # check bridges existance
1950 _CheckInstanceBridgesExist(instance)
1952 self.instance = instance
1953 self.op.instance_name = instance.name
1955 def Exec(self, feedback_fn):
1956 """Start the instance.
1959 instance = self.instance
1960 force = self.op.force
1961 extra_args = getattr(self.op, "extra_args", "")
1963 node_current = instance.primary_node
1965 nodeinfo = rpc.call_node_info([node_current], self.cfg.GetVGName())
1967 raise errors.OpExecError("Could not contact node %s for infos" %
1970 freememory = nodeinfo[node_current]['memory_free']
1971 memory = instance.memory
1972 if memory > freememory:
1973 raise errors.OpExecError("Not enough memory to start instance"
1975 " needed %s MiB, available %s MiB" %
1976 (instance.name, node_current, memory,
1979 _StartInstanceDisks(self.cfg, instance, force)
1981 if not rpc.call_instance_start(node_current, instance, extra_args):
1982 _ShutdownInstanceDisks(instance, self.cfg)
1983 raise errors.OpExecError("Could not start instance")
1985 self.cfg.MarkInstanceUp(instance.name)
1988 class LURebootInstance(LogicalUnit):
1989 """Reboot an instance.
1992 HPATH = "instance-reboot"
1993 HTYPE = constants.HTYPE_INSTANCE
1994 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
1996 def BuildHooksEnv(self):
1999 This runs on master, primary and secondary nodes of the instance.
2003 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
2005 env.update(_BuildInstanceHookEnvByObject(self.instance))
2006 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2007 list(self.instance.secondary_nodes))
2010 def CheckPrereq(self):
2011 """Check prerequisites.
2013 This checks that the instance is in the cluster.
2016 instance = self.cfg.GetInstanceInfo(
2017 self.cfg.ExpandInstanceName(self.op.instance_name))
2018 if instance is None:
2019 raise errors.OpPrereqError("Instance '%s' not known" %
2020 self.op.instance_name)
2022 # check bridges existance
2023 _CheckInstanceBridgesExist(instance)
2025 self.instance = instance
2026 self.op.instance_name = instance.name
2028 def Exec(self, feedback_fn):
2029 """Reboot the instance.
2032 instance = self.instance
2033 ignore_secondaries = self.op.ignore_secondaries
2034 reboot_type = self.op.reboot_type
2035 extra_args = getattr(self.op, "extra_args", "")
2037 node_current = instance.primary_node
2039 if reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
2040 constants.INSTANCE_REBOOT_HARD,
2041 constants.INSTANCE_REBOOT_FULL]:
2042 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
2043 (constants.INSTANCE_REBOOT_SOFT,
2044 constants.INSTANCE_REBOOT_HARD,
2045 constants.INSTANCE_REBOOT_FULL))
2047 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
2048 constants.INSTANCE_REBOOT_HARD]:
2049 if not rpc.call_instance_reboot(node_current, instance,
2050 reboot_type, extra_args):
2051 raise errors.OpExecError("Could not reboot instance")
2053 if not rpc.call_instance_shutdown(node_current, instance):
2054 raise errors.OpExecError("could not shutdown instance for full reboot")
2055 _ShutdownInstanceDisks(instance, self.cfg)
2056 _StartInstanceDisks(self.cfg, instance, ignore_secondaries)
2057 if not rpc.call_instance_start(node_current, instance, extra_args):
2058 _ShutdownInstanceDisks(instance, self.cfg)
2059 raise errors.OpExecError("Could not start instance for full reboot")
2061 self.cfg.MarkInstanceUp(instance.name)
2064 class LUShutdownInstance(LogicalUnit):
2065 """Shutdown an instance.
2068 HPATH = "instance-stop"
2069 HTYPE = constants.HTYPE_INSTANCE
2070 _OP_REQP = ["instance_name"]
2072 def BuildHooksEnv(self):
2075 This runs on master, primary and secondary nodes of the instance.
2078 env = _BuildInstanceHookEnvByObject(self.instance)
2079 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2080 list(self.instance.secondary_nodes))
2083 def CheckPrereq(self):
2084 """Check prerequisites.
2086 This checks that the instance is in the cluster.
2089 instance = self.cfg.GetInstanceInfo(
2090 self.cfg.ExpandInstanceName(self.op.instance_name))
2091 if instance is None:
2092 raise errors.OpPrereqError("Instance '%s' not known" %
2093 self.op.instance_name)
2094 self.instance = instance
2096 def Exec(self, feedback_fn):
2097 """Shutdown the instance.
2100 instance = self.instance
2101 node_current = instance.primary_node
2102 if not rpc.call_instance_shutdown(node_current, instance):
2103 logger.Error("could not shutdown instance")
2105 self.cfg.MarkInstanceDown(instance.name)
2106 _ShutdownInstanceDisks(instance, self.cfg)
2109 class LUReinstallInstance(LogicalUnit):
2110 """Reinstall an instance.
2113 HPATH = "instance-reinstall"
2114 HTYPE = constants.HTYPE_INSTANCE
2115 _OP_REQP = ["instance_name"]
2117 def BuildHooksEnv(self):
2120 This runs on master, primary and secondary nodes of the instance.
2123 env = _BuildInstanceHookEnvByObject(self.instance)
2124 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2125 list(self.instance.secondary_nodes))
2128 def CheckPrereq(self):
2129 """Check prerequisites.
2131 This checks that the instance is in the cluster and is not running.
2134 instance = self.cfg.GetInstanceInfo(
2135 self.cfg.ExpandInstanceName(self.op.instance_name))
2136 if instance is None:
2137 raise errors.OpPrereqError("Instance '%s' not known" %
2138 self.op.instance_name)
2139 if instance.disk_template == constants.DT_DISKLESS:
2140 raise errors.OpPrereqError("Instance '%s' has no disks" %
2141 self.op.instance_name)
2142 if instance.status != "down":
2143 raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2144 self.op.instance_name)
2145 remote_info = rpc.call_instance_info(instance.primary_node, instance.name)
2147 raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2148 (self.op.instance_name,
2149 instance.primary_node))
2151 self.op.os_type = getattr(self.op, "os_type", None)
2152 if self.op.os_type is not None:
2154 pnode = self.cfg.GetNodeInfo(
2155 self.cfg.ExpandNodeName(instance.primary_node))
2157 raise errors.OpPrereqError("Primary node '%s' is unknown" %
2159 os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
2161 raise errors.OpPrereqError("OS '%s' not in supported OS list for"
2162 " primary node" % self.op.os_type)
2164 self.instance = instance
2166 def Exec(self, feedback_fn):
2167 """Reinstall the instance.
2170 inst = self.instance
2172 if self.op.os_type is not None:
2173 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
2174 inst.os = self.op.os_type
2175 self.cfg.AddInstance(inst)
2177 _StartInstanceDisks(self.cfg, inst, None)
2179 feedback_fn("Running the instance OS create scripts...")
2180 if not rpc.call_instance_os_add(inst.primary_node, inst, "sda", "sdb"):
2181 raise errors.OpExecError("Could not install OS for instance %s "
2183 (inst.name, inst.primary_node))
2185 _ShutdownInstanceDisks(inst, self.cfg)
2188 class LURenameInstance(LogicalUnit):
2189 """Rename an instance.
2192 HPATH = "instance-rename"
2193 HTYPE = constants.HTYPE_INSTANCE
2194 _OP_REQP = ["instance_name", "new_name"]
2196 def BuildHooksEnv(self):
2199 This runs on master, primary and secondary nodes of the instance.
2202 env = _BuildInstanceHookEnvByObject(self.instance)
2203 env["INSTANCE_NEW_NAME"] = self.op.new_name
2204 nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2205 list(self.instance.secondary_nodes))
2208 def CheckPrereq(self):
2209 """Check prerequisites.
2211 This checks that the instance is in the cluster and is not running.
2214 instance = self.cfg.GetInstanceInfo(
2215 self.cfg.ExpandInstanceName(self.op.instance_name))
2216 if instance is None:
2217 raise errors.OpPrereqError("Instance '%s' not known" %
2218 self.op.instance_name)
2219 if instance.status != "down":
2220 raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2221 self.op.instance_name)
2222 remote_info = rpc.call_instance_info(instance.primary_node, instance.name)
2224 raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2225 (self.op.instance_name,
2226 instance.primary_node))
2227 self.instance = instance
2229 # new name verification
2230 name_info = utils.HostInfo(self.op.new_name)
2232 self.op.new_name = new_name = name_info.name
2233 if not getattr(self.op, "ignore_ip", False):
2234 command = ["fping", "-q", name_info.ip]
2235 result = utils.RunCmd(command)
2236 if not result.failed:
2237 raise errors.OpPrereqError("IP %s of instance %s already in use" %
2238 (name_info.ip, new_name))
2241 def Exec(self, feedback_fn):
2242 """Reinstall the instance.
2245 inst = self.instance
2246 old_name = inst.name
2248 self.cfg.RenameInstance(inst.name, self.op.new_name)
2250 # re-read the instance from the configuration after rename
2251 inst = self.cfg.GetInstanceInfo(self.op.new_name)
2253 _StartInstanceDisks(self.cfg, inst, None)
2255 if not rpc.call_instance_run_rename(inst.primary_node, inst, old_name,
2257 msg = ("Could run OS rename script for instance %s\n"
2259 "(but the instance has been renamed in Ganeti)" %
2260 (inst.name, inst.primary_node))
2263 _ShutdownInstanceDisks(inst, self.cfg)
2266 class LURemoveInstance(LogicalUnit):
2267 """Remove an instance.
2270 HPATH = "instance-remove"
2271 HTYPE = constants.HTYPE_INSTANCE
2272 _OP_REQP = ["instance_name"]
2274 def BuildHooksEnv(self):
2277 This runs on master, primary and secondary nodes of the instance.
2280 env = _BuildInstanceHookEnvByObject(self.instance)
2281 nl = [self.sstore.GetMasterNode()]
2284 def CheckPrereq(self):
2285 """Check prerequisites.
2287 This checks that the instance is in the cluster.
2290 instance = self.cfg.GetInstanceInfo(
2291 self.cfg.ExpandInstanceName(self.op.instance_name))
2292 if instance is None:
2293 raise errors.OpPrereqError("Instance '%s' not known" %
2294 self.op.instance_name)
2295 self.instance = instance
2297 def Exec(self, feedback_fn):
2298 """Remove the instance.
2301 instance = self.instance
2302 logger.Info("shutting down instance %s on node %s" %
2303 (instance.name, instance.primary_node))
2305 if not rpc.call_instance_shutdown(instance.primary_node, instance):
2306 if self.op.ignore_failures:
2307 feedback_fn("Warning: can't shutdown instance")
2309 raise errors.OpExecError("Could not shutdown instance %s on node %s" %
2310 (instance.name, instance.primary_node))
2312 logger.Info("removing block devices for instance %s" % instance.name)
2314 if not _RemoveDisks(instance, self.cfg):
2315 if self.op.ignore_failures:
2316 feedback_fn("Warning: can't remove instance's disks")
2318 raise errors.OpExecError("Can't remove instance's disks")
2320 logger.Info("removing instance %s out of cluster config" % instance.name)
2322 self.cfg.RemoveInstance(instance.name)
2325 class LUQueryInstances(NoHooksLU):
2326 """Logical unit for querying instances.
2329 _OP_REQP = ["output_fields", "names"]
2331 def CheckPrereq(self):
2332 """Check prerequisites.
2334 This checks that the fields required are valid output fields.
2337 self.dynamic_fields = frozenset(["oper_state", "oper_ram"])
2338 _CheckOutputFields(static=["name", "os", "pnode", "snodes",
2339 "admin_state", "admin_ram",
2340 "disk_template", "ip", "mac", "bridge",
2341 "sda_size", "sdb_size"],
2342 dynamic=self.dynamic_fields,
2343 selected=self.op.output_fields)
2345 self.wanted = _GetWantedInstances(self, self.op.names)
2347 def Exec(self, feedback_fn):
2348 """Computes the list of nodes and their attributes.
2351 instance_names = self.wanted
2352 instance_list = [self.cfg.GetInstanceInfo(iname) for iname
2355 # begin data gathering
2357 nodes = frozenset([inst.primary_node for inst in instance_list])
2360 if self.dynamic_fields.intersection(self.op.output_fields):
2362 node_data = rpc.call_all_instances_info(nodes)
2364 result = node_data[name]
2366 live_data.update(result)
2367 elif result == False:
2368 bad_nodes.append(name)
2369 # else no instance is alive
2371 live_data = dict([(name, {}) for name in instance_names])
2373 # end data gathering
2376 for instance in instance_list:
2378 for field in self.op.output_fields:
2383 elif field == "pnode":
2384 val = instance.primary_node
2385 elif field == "snodes":
2386 val = list(instance.secondary_nodes)
2387 elif field == "admin_state":
2388 val = (instance.status != "down")
2389 elif field == "oper_state":
2390 if instance.primary_node in bad_nodes:
2393 val = bool(live_data.get(instance.name))
2394 elif field == "admin_ram":
2395 val = instance.memory
2396 elif field == "oper_ram":
2397 if instance.primary_node in bad_nodes:
2399 elif instance.name in live_data:
2400 val = live_data[instance.name].get("memory", "?")
2403 elif field == "disk_template":
2404 val = instance.disk_template
2406 val = instance.nics[0].ip
2407 elif field == "bridge":
2408 val = instance.nics[0].bridge
2409 elif field == "mac":
2410 val = instance.nics[0].mac
2411 elif field == "sda_size" or field == "sdb_size":
2412 disk = instance.FindDisk(field[:3])
2418 raise errors.ParameterError(field)
2425 class LUFailoverInstance(LogicalUnit):
2426 """Failover an instance.
2429 HPATH = "instance-failover"
2430 HTYPE = constants.HTYPE_INSTANCE
2431 _OP_REQP = ["instance_name", "ignore_consistency"]
2433 def BuildHooksEnv(self):
2436 This runs on master, primary and secondary nodes of the instance.
2440 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
2442 env.update(_BuildInstanceHookEnvByObject(self.instance))
2443 nl = [self.sstore.GetMasterNode()] + list(self.instance.secondary_nodes)
2446 def CheckPrereq(self):
2447 """Check prerequisites.
2449 This checks that the instance is in the cluster.
2452 instance = self.cfg.GetInstanceInfo(
2453 self.cfg.ExpandInstanceName(self.op.instance_name))
2454 if instance is None:
2455 raise errors.OpPrereqError("Instance '%s' not known" %
2456 self.op.instance_name)
2458 if instance.disk_template not in constants.DTS_NET_MIRROR:
2459 raise errors.OpPrereqError("Instance's disk layout is not"
2460 " network mirrored, cannot failover.")
2462 secondary_nodes = instance.secondary_nodes
2463 if not secondary_nodes:
2464 raise errors.ProgrammerError("no secondary node but using "
2465 "DT_REMOTE_RAID1 template")
2467 # check memory requirements on the secondary node
2468 target_node = secondary_nodes[0]
2469 nodeinfo = rpc.call_node_info([target_node], self.cfg.GetVGName())
2470 info = nodeinfo.get(target_node, None)
2472 raise errors.OpPrereqError("Cannot get current information"
2473 " from node '%s'" % nodeinfo)
2474 if instance.memory > info['memory_free']:
2475 raise errors.OpPrereqError("Not enough memory on target node %s."
2476 " %d MB available, %d MB required" %
2477 (target_node, info['memory_free'],
2480 # check bridge existance
2481 brlist = [nic.bridge for nic in instance.nics]
2482 if not rpc.call_bridges_exist(target_node, brlist):
2483 raise errors.OpPrereqError("One or more target bridges %s does not"
2484 " exist on destination node '%s'" %
2485 (brlist, target_node))
2487 self.instance = instance
2489 def Exec(self, feedback_fn):
2490 """Failover an instance.
2492 The failover is done by shutting it down on its present node and
2493 starting it on the secondary.
2496 instance = self.instance
2498 source_node = instance.primary_node
2499 target_node = instance.secondary_nodes[0]
2501 feedback_fn("* checking disk consistency between source and target")
2502 for dev in instance.disks:
2503 # for remote_raid1, these are md over drbd
2504 if not _CheckDiskConsistency(self.cfg, dev, target_node, False):
2505 if not self.op.ignore_consistency:
2506 raise errors.OpExecError("Disk %s is degraded on target node,"
2507 " aborting failover." % dev.iv_name)
2509 feedback_fn("* checking target node resource availability")
2510 nodeinfo = rpc.call_node_info([target_node], self.cfg.GetVGName())
2513 raise errors.OpExecError("Could not contact target node %s." %
2516 free_memory = int(nodeinfo[target_node]['memory_free'])
2517 memory = instance.memory
2518 if memory > free_memory:
2519 raise errors.OpExecError("Not enough memory to create instance %s on"
2520 " node %s. needed %s MiB, available %s MiB" %
2521 (instance.name, target_node, memory,
2524 feedback_fn("* shutting down instance on source node")
2525 logger.Info("Shutting down instance %s on node %s" %
2526 (instance.name, source_node))
2528 if not rpc.call_instance_shutdown(source_node, instance):
2529 if self.op.ignore_consistency:
2530 logger.Error("Could not shutdown instance %s on node %s. Proceeding"
2531 " anyway. Please make sure node %s is down" %
2532 (instance.name, source_node, source_node))
2534 raise errors.OpExecError("Could not shutdown instance %s on node %s" %
2535 (instance.name, source_node))
2537 feedback_fn("* deactivating the instance's disks on source node")
2538 if not _ShutdownInstanceDisks(instance, self.cfg, ignore_primary=True):
2539 raise errors.OpExecError("Can't shut down the instance's disks.")
2541 instance.primary_node = target_node
2542 # distribute new instance config to the other nodes
2543 self.cfg.AddInstance(instance)
2545 feedback_fn("* activating the instance's disks on target node")
2546 logger.Info("Starting instance %s on node %s" %
2547 (instance.name, target_node))
2549 disks_ok, dummy = _AssembleInstanceDisks(instance, self.cfg,
2550 ignore_secondaries=True)
2552 _ShutdownInstanceDisks(instance, self.cfg)
2553 raise errors.OpExecError("Can't activate the instance's disks")
2555 feedback_fn("* starting the instance on the target node")
2556 if not rpc.call_instance_start(target_node, instance, None):
2557 _ShutdownInstanceDisks(instance, self.cfg)
2558 raise errors.OpExecError("Could not start instance %s on node %s." %
2559 (instance.name, target_node))
2562 def _CreateBlockDevOnPrimary(cfg, node, instance, device, info):
2563 """Create a tree of block devices on the primary node.
2565 This always creates all devices.
2569 for child in device.children:
2570 if not _CreateBlockDevOnPrimary(cfg, node, instance, child, info):
2573 cfg.SetDiskID(device, node)
2574 new_id = rpc.call_blockdev_create(node, device, device.size,
2575 instance.name, True, info)
2578 if device.physical_id is None:
2579 device.physical_id = new_id
2583 def _CreateBlockDevOnSecondary(cfg, node, instance, device, force, info):
2584 """Create a tree of block devices on a secondary node.
2586 If this device type has to be created on secondaries, create it and
2589 If not, just recurse to children keeping the same 'force' value.
2592 if device.CreateOnSecondary():
2595 for child in device.children:
2596 if not _CreateBlockDevOnSecondary(cfg, node, instance,
2597 child, force, info):
2602 cfg.SetDiskID(device, node)
2603 new_id = rpc.call_blockdev_create(node, device, device.size,
2604 instance.name, False, info)
2607 if device.physical_id is None:
2608 device.physical_id = new_id
2612 def _GenerateUniqueNames(cfg, exts):
2613 """Generate a suitable LV name.
2615 This will generate a logical volume name for the given instance.
2620 new_id = cfg.GenerateUniqueID()
2621 results.append("%s%s" % (new_id, val))
2625 def _GenerateMDDRBDBranch(cfg, primary, secondary, size, names):
2626 """Generate a drbd device complete with its children.
2629 port = cfg.AllocatePort()
2630 vgname = cfg.GetVGName()
2631 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
2632 logical_id=(vgname, names[0]))
2633 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
2634 logical_id=(vgname, names[1]))
2635 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD7, size=size,
2636 logical_id = (primary, secondary, port),
2637 children = [dev_data, dev_meta])
2641 def _GenerateDRBD8Branch(cfg, primary, secondary, size, names, iv_name):
2642 """Generate a drbd8 device complete with its children.
2645 port = cfg.AllocatePort()
2646 vgname = cfg.GetVGName()
2647 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
2648 logical_id=(vgname, names[0]))
2649 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
2650 logical_id=(vgname, names[1]))
2651 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
2652 logical_id = (primary, secondary, port),
2653 children = [dev_data, dev_meta],
2657 def _GenerateDiskTemplate(cfg, template_name,
2658 instance_name, primary_node,
2659 secondary_nodes, disk_sz, swap_sz):
2660 """Generate the entire disk layout for a given template type.
2663 #TODO: compute space requirements
2665 vgname = cfg.GetVGName()
2666 if template_name == "diskless":
2668 elif template_name == "plain":
2669 if len(secondary_nodes) != 0:
2670 raise errors.ProgrammerError("Wrong template configuration")
2672 names = _GenerateUniqueNames(cfg, [".sda", ".sdb"])
2673 sda_dev = objects.Disk(dev_type=constants.LD_LV, size=disk_sz,
2674 logical_id=(vgname, names[0]),
2676 sdb_dev = objects.Disk(dev_type=constants.LD_LV, size=swap_sz,
2677 logical_id=(vgname, names[1]),
2679 disks = [sda_dev, sdb_dev]
2680 elif template_name == "local_raid1":
2681 if len(secondary_nodes) != 0:
2682 raise errors.ProgrammerError("Wrong template configuration")
2685 names = _GenerateUniqueNames(cfg, [".sda_m1", ".sda_m2",
2686 ".sdb_m1", ".sdb_m2"])
2687 sda_dev_m1 = objects.Disk(dev_type=constants.LD_LV, size=disk_sz,
2688 logical_id=(vgname, names[0]))
2689 sda_dev_m2 = objects.Disk(dev_type=constants.LD_LV, size=disk_sz,
2690 logical_id=(vgname, names[1]))
2691 md_sda_dev = objects.Disk(dev_type=constants.LD_MD_R1, iv_name = "sda",
2693 children = [sda_dev_m1, sda_dev_m2])
2694 sdb_dev_m1 = objects.Disk(dev_type=constants.LD_LV, size=swap_sz,
2695 logical_id=(vgname, names[2]))
2696 sdb_dev_m2 = objects.Disk(dev_type=constants.LD_LV, size=swap_sz,
2697 logical_id=(vgname, names[3]))
2698 md_sdb_dev = objects.Disk(dev_type=constants.LD_MD_R1, iv_name = "sdb",
2700 children = [sdb_dev_m1, sdb_dev_m2])
2701 disks = [md_sda_dev, md_sdb_dev]
2702 elif template_name == constants.DT_REMOTE_RAID1:
2703 if len(secondary_nodes) != 1:
2704 raise errors.ProgrammerError("Wrong template configuration")
2705 remote_node = secondary_nodes[0]
2706 names = _GenerateUniqueNames(cfg, [".sda_data", ".sda_meta",
2707 ".sdb_data", ".sdb_meta"])
2708 drbd_sda_dev = _GenerateMDDRBDBranch(cfg, primary_node, remote_node,
2709 disk_sz, names[0:2])
2710 md_sda_dev = objects.Disk(dev_type=constants.LD_MD_R1, iv_name="sda",
2711 children = [drbd_sda_dev], size=disk_sz)
2712 drbd_sdb_dev = _GenerateMDDRBDBranch(cfg, primary_node, remote_node,
2713 swap_sz, names[2:4])
2714 md_sdb_dev = objects.Disk(dev_type=constants.LD_MD_R1, iv_name="sdb",
2715 children = [drbd_sdb_dev], size=swap_sz)
2716 disks = [md_sda_dev, md_sdb_dev]
2717 elif template_name == constants.DT_DRBD8:
2718 if len(secondary_nodes) != 1:
2719 raise errors.ProgrammerError("Wrong template configuration")
2720 remote_node = secondary_nodes[0]
2721 names = _GenerateUniqueNames(cfg, [".sda_data", ".sda_meta",
2722 ".sdb_data", ".sdb_meta"])
2723 drbd_sda_dev = _GenerateDRBD8Branch(cfg, primary_node, remote_node,
2724 disk_sz, names[0:2], "sda")
2725 drbd_sdb_dev = _GenerateDRBD8Branch(cfg, primary_node, remote_node,
2726 swap_sz, names[2:4], "sdb")
2727 disks = [drbd_sda_dev, drbd_sdb_dev]
2729 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
2733 def _GetInstanceInfoText(instance):
2734 """Compute that text that should be added to the disk's metadata.
2737 return "originstname+%s" % instance.name
2740 def _CreateDisks(cfg, instance):
2741 """Create all disks for an instance.
2743 This abstracts away some work from AddInstance.
2746 instance: the instance object
2749 True or False showing the success of the creation process
2752 info = _GetInstanceInfoText(instance)
2754 for device in instance.disks:
2755 logger.Info("creating volume %s for instance %s" %
2756 (device.iv_name, instance.name))
2758 for secondary_node in instance.secondary_nodes:
2759 if not _CreateBlockDevOnSecondary(cfg, secondary_node, instance,
2760 device, False, info):
2761 logger.Error("failed to create volume %s (%s) on secondary node %s!" %
2762 (device.iv_name, device, secondary_node))
2765 if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
2766 instance, device, info):
2767 logger.Error("failed to create volume %s on primary!" %
2773 def _RemoveDisks(instance, cfg):
2774 """Remove all disks for an instance.
2776 This abstracts away some work from `AddInstance()` and
2777 `RemoveInstance()`. Note that in case some of the devices couldn't
2778 be removed, the removal will continue with the other ones (compare
2779 with `_CreateDisks()`).
2782 instance: the instance object
2785 True or False showing the success of the removal proces
2788 logger.Info("removing block devices for instance %s" % instance.name)
2791 for device in instance.disks:
2792 for node, disk in device.ComputeNodeTree(instance.primary_node):
2793 cfg.SetDiskID(disk, node)
2794 if not rpc.call_blockdev_remove(node, disk):
2795 logger.Error("could not remove block device %s on node %s,"
2796 " continuing anyway" %
2797 (device.iv_name, node))
2802 class LUCreateInstance(LogicalUnit):
2803 """Create an instance.
2806 HPATH = "instance-add"
2807 HTYPE = constants.HTYPE_INSTANCE
2808 _OP_REQP = ["instance_name", "mem_size", "disk_size", "pnode",
2809 "disk_template", "swap_size", "mode", "start", "vcpus",
2810 "wait_for_sync", "ip_check"]
2812 def BuildHooksEnv(self):
2815 This runs on master, primary and secondary nodes of the instance.
2819 "INSTANCE_DISK_TEMPLATE": self.op.disk_template,
2820 "INSTANCE_DISK_SIZE": self.op.disk_size,
2821 "INSTANCE_SWAP_SIZE": self.op.swap_size,
2822 "INSTANCE_ADD_MODE": self.op.mode,
2824 if self.op.mode == constants.INSTANCE_IMPORT:
2825 env["INSTANCE_SRC_NODE"] = self.op.src_node
2826 env["INSTANCE_SRC_PATH"] = self.op.src_path
2827 env["INSTANCE_SRC_IMAGE"] = self.src_image
2829 env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
2830 primary_node=self.op.pnode,
2831 secondary_nodes=self.secondaries,
2832 status=self.instance_status,
2833 os_type=self.op.os_type,
2834 memory=self.op.mem_size,
2835 vcpus=self.op.vcpus,
2836 nics=[(self.inst_ip, self.op.bridge)],
2839 nl = ([self.sstore.GetMasterNode(), self.op.pnode] +
2844 def CheckPrereq(self):
2845 """Check prerequisites.
2848 if self.op.mode not in (constants.INSTANCE_CREATE,
2849 constants.INSTANCE_IMPORT):
2850 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
2853 if self.op.mode == constants.INSTANCE_IMPORT:
2854 src_node = getattr(self.op, "src_node", None)
2855 src_path = getattr(self.op, "src_path", None)
2856 if src_node is None or src_path is None:
2857 raise errors.OpPrereqError("Importing an instance requires source"
2858 " node and path options")
2859 src_node_full = self.cfg.ExpandNodeName(src_node)
2860 if src_node_full is None:
2861 raise errors.OpPrereqError("Unknown source node '%s'" % src_node)
2862 self.op.src_node = src_node = src_node_full
2864 if not os.path.isabs(src_path):
2865 raise errors.OpPrereqError("The source path must be absolute")
2867 export_info = rpc.call_export_info(src_node, src_path)
2870 raise errors.OpPrereqError("No export found in dir %s" % src_path)
2872 if not export_info.has_section(constants.INISECT_EXP):
2873 raise errors.ProgrammerError("Corrupted export config")
2875 ei_version = export_info.get(constants.INISECT_EXP, 'version')
2876 if (int(ei_version) != constants.EXPORT_VERSION):
2877 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
2878 (ei_version, constants.EXPORT_VERSION))
2880 if int(export_info.get(constants.INISECT_INS, 'disk_count')) > 1:
2881 raise errors.OpPrereqError("Can't import instance with more than"
2884 # FIXME: are the old os-es, disk sizes, etc. useful?
2885 self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
2886 diskimage = os.path.join(src_path, export_info.get(constants.INISECT_INS,
2888 self.src_image = diskimage
2889 else: # INSTANCE_CREATE
2890 if getattr(self.op, "os_type", None) is None:
2891 raise errors.OpPrereqError("No guest OS specified")
2893 # check primary node
2894 pnode = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.pnode))
2896 raise errors.OpPrereqError("Primary node '%s' is unknown" %
2898 self.op.pnode = pnode.name
2900 self.secondaries = []
2901 # disk template and mirror node verification
2902 if self.op.disk_template not in constants.DISK_TEMPLATES:
2903 raise errors.OpPrereqError("Invalid disk template name")
2905 if self.op.disk_template in constants.DTS_NET_MIRROR:
2906 if getattr(self.op, "snode", None) is None:
2907 raise errors.OpPrereqError("The networked disk templates need"
2910 snode_name = self.cfg.ExpandNodeName(self.op.snode)
2911 if snode_name is None:
2912 raise errors.OpPrereqError("Unknown secondary node '%s'" %
2914 elif snode_name == pnode.name:
2915 raise errors.OpPrereqError("The secondary node cannot be"
2916 " the primary node.")
2917 self.secondaries.append(snode_name)
2919 # Check lv size requirements
2920 nodenames = [pnode.name] + self.secondaries
2921 nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
2923 # Required free disk space as a function of disk and swap space
2925 constants.DT_DISKLESS: 0,
2926 constants.DT_PLAIN: self.op.disk_size + self.op.swap_size,
2927 constants.DT_LOCAL_RAID1: (self.op.disk_size + self.op.swap_size) * 2,
2928 # 256 MB are added for drbd metadata, 128MB for each drbd device
2929 constants.DT_REMOTE_RAID1: self.op.disk_size + self.op.swap_size + 256,
2930 constants.DT_DRBD8: self.op.disk_size + self.op.swap_size + 256,
2933 if self.op.disk_template not in req_size_dict:
2934 raise errors.ProgrammerError("Disk template '%s' size requirement"
2935 " is unknown" % self.op.disk_template)
2937 req_size = req_size_dict[self.op.disk_template]
2939 for node in nodenames:
2940 info = nodeinfo.get(node, None)
2942 raise errors.OpPrereqError("Cannot get current information"
2943 " from node '%s'" % nodeinfo)
2944 if req_size > info['vg_free']:
2945 raise errors.OpPrereqError("Not enough disk space on target node %s."
2946 " %d MB available, %d MB required" %
2947 (node, info['vg_free'], req_size))
2950 os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
2952 raise errors.OpPrereqError("OS '%s' not in supported os list for"
2953 " primary node" % self.op.os_type)
2955 # instance verification
2956 hostname1 = utils.HostInfo(self.op.instance_name)
2958 self.op.instance_name = instance_name = hostname1.name
2959 instance_list = self.cfg.GetInstanceList()
2960 if instance_name in instance_list:
2961 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
2964 ip = getattr(self.op, "ip", None)
2965 if ip is None or ip.lower() == "none":
2967 elif ip.lower() == "auto":
2968 inst_ip = hostname1.ip
2970 if not utils.IsValidIP(ip):
2971 raise errors.OpPrereqError("given IP address '%s' doesn't look"
2972 " like a valid IP" % ip)
2974 self.inst_ip = inst_ip
2976 if self.op.start and not self.op.ip_check:
2977 raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
2978 " adding an instance in start mode")
2980 if self.op.ip_check:
2981 if utils.TcpPing(utils.HostInfo().name, hostname1.ip,
2982 constants.DEFAULT_NODED_PORT):
2983 raise errors.OpPrereqError("IP %s of instance %s already in use" %
2984 (hostname1.ip, instance_name))
2986 # bridge verification
2987 bridge = getattr(self.op, "bridge", None)
2989 self.op.bridge = self.cfg.GetDefBridge()
2991 self.op.bridge = bridge
2993 if not rpc.call_bridges_exist(self.pnode.name, [self.op.bridge]):
2994 raise errors.OpPrereqError("target bridge '%s' does not exist on"
2995 " destination node '%s'" %
2996 (self.op.bridge, pnode.name))
2999 self.instance_status = 'up'
3001 self.instance_status = 'down'
3003 def Exec(self, feedback_fn):
3004 """Create and add the instance to the cluster.
3007 instance = self.op.instance_name
3008 pnode_name = self.pnode.name
3010 nic = objects.NIC(bridge=self.op.bridge, mac=self.cfg.GenerateMAC())
3011 if self.inst_ip is not None:
3012 nic.ip = self.inst_ip
3014 disks = _GenerateDiskTemplate(self.cfg,
3015 self.op.disk_template,
3016 instance, pnode_name,
3017 self.secondaries, self.op.disk_size,
3020 iobj = objects.Instance(name=instance, os=self.op.os_type,
3021 primary_node=pnode_name,
3022 memory=self.op.mem_size,
3023 vcpus=self.op.vcpus,
3024 nics=[nic], disks=disks,
3025 disk_template=self.op.disk_template,
3026 status=self.instance_status,
3029 feedback_fn("* creating instance disks...")
3030 if not _CreateDisks(self.cfg, iobj):
3031 _RemoveDisks(iobj, self.cfg)
3032 raise errors.OpExecError("Device creation failed, reverting...")
3034 feedback_fn("adding instance %s to cluster config" % instance)
3036 self.cfg.AddInstance(iobj)
3038 if self.op.wait_for_sync:
3039 disk_abort = not _WaitForSync(self.cfg, iobj)
3040 elif iobj.disk_template in constants.DTS_NET_MIRROR:
3041 # make sure the disks are not degraded (still sync-ing is ok)
3043 feedback_fn("* checking mirrors status")
3044 disk_abort = not _WaitForSync(self.cfg, iobj, oneshot=True)
3049 _RemoveDisks(iobj, self.cfg)
3050 self.cfg.RemoveInstance(iobj.name)
3051 raise errors.OpExecError("There are some degraded disks for"
3054 feedback_fn("creating os for instance %s on node %s" %
3055 (instance, pnode_name))
3057 if iobj.disk_template != constants.DT_DISKLESS:
3058 if self.op.mode == constants.INSTANCE_CREATE:
3059 feedback_fn("* running the instance OS create scripts...")
3060 if not rpc.call_instance_os_add(pnode_name, iobj, "sda", "sdb"):
3061 raise errors.OpExecError("could not add os for instance %s"
3063 (instance, pnode_name))
3065 elif self.op.mode == constants.INSTANCE_IMPORT:
3066 feedback_fn("* running the instance OS import scripts...")
3067 src_node = self.op.src_node
3068 src_image = self.src_image
3069 if not rpc.call_instance_os_import(pnode_name, iobj, "sda", "sdb",
3070 src_node, src_image):
3071 raise errors.OpExecError("Could not import os for instance"
3073 (instance, pnode_name))
3075 # also checked in the prereq part
3076 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
3080 logger.Info("starting instance %s on node %s" % (instance, pnode_name))
3081 feedback_fn("* starting instance...")
3082 if not rpc.call_instance_start(pnode_name, iobj, None):
3083 raise errors.OpExecError("Could not start instance")
3086 class LUConnectConsole(NoHooksLU):
3087 """Connect to an instance's console.
3089 This is somewhat special in that it returns the command line that
3090 you need to run on the master node in order to connect to the
3094 _OP_REQP = ["instance_name"]
3096 def CheckPrereq(self):
3097 """Check prerequisites.
3099 This checks that the instance is in the cluster.
3102 instance = self.cfg.GetInstanceInfo(
3103 self.cfg.ExpandInstanceName(self.op.instance_name))
3104 if instance is None:
3105 raise errors.OpPrereqError("Instance '%s' not known" %
3106 self.op.instance_name)
3107 self.instance = instance
3109 def Exec(self, feedback_fn):
3110 """Connect to the console of an instance
3113 instance = self.instance
3114 node = instance.primary_node
3116 node_insts = rpc.call_instance_list([node])[node]
3117 if node_insts is False:
3118 raise errors.OpExecError("Can't connect to node %s." % node)
3120 if instance.name not in node_insts:
3121 raise errors.OpExecError("Instance %s is not running." % instance.name)
3123 logger.Debug("connecting to console of %s on %s" % (instance.name, node))
3125 hyper = hypervisor.GetHypervisor()
3126 console_cmd = hyper.GetShellCommandForConsole(instance.name)
3128 argv = ["ssh", "-q", "-t"]
3129 argv.extend(ssh.KNOWN_HOSTS_OPTS)
3130 argv.extend(ssh.BATCH_MODE_OPTS)
3132 argv.append(console_cmd)
3136 class LUAddMDDRBDComponent(LogicalUnit):
3137 """Adda new mirror member to an instance's disk.
3140 HPATH = "mirror-add"
3141 HTYPE = constants.HTYPE_INSTANCE
3142 _OP_REQP = ["instance_name", "remote_node", "disk_name"]
3144 def BuildHooksEnv(self):
3147 This runs on the master, the primary and all the secondaries.
3151 "NEW_SECONDARY": self.op.remote_node,
3152 "DISK_NAME": self.op.disk_name,
3154 env.update(_BuildInstanceHookEnvByObject(self.instance))
3155 nl = [self.sstore.GetMasterNode(), self.instance.primary_node,
3156 self.op.remote_node,] + list(self.instance.secondary_nodes)
3159 def CheckPrereq(self):
3160 """Check prerequisites.
3162 This checks that the instance is in the cluster.
3165 instance = self.cfg.GetInstanceInfo(
3166 self.cfg.ExpandInstanceName(self.op.instance_name))
3167 if instance is None:
3168 raise errors.OpPrereqError("Instance '%s' not known" %
3169 self.op.instance_name)
3170 self.instance = instance
3172 remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
3173 if remote_node is None:
3174 raise errors.OpPrereqError("Node '%s' not known" % self.op.remote_node)
3175 self.remote_node = remote_node
3177 if remote_node == instance.primary_node:
3178 raise errors.OpPrereqError("The specified node is the primary node of"
3181 if instance.disk_template != constants.DT_REMOTE_RAID1:
3182 raise errors.OpPrereqError("Instance's disk layout is not"
3184 for disk in instance.disks:
3185 if disk.iv_name == self.op.disk_name:
3188 raise errors.OpPrereqError("Can't find this device ('%s') in the"
3189 " instance." % self.op.disk_name)
3190 if len(disk.children) > 1:
3191 raise errors.OpPrereqError("The device already has two slave"
3193 "This would create a 3-disk raid1"
3194 " which we don't allow.")
3197 def Exec(self, feedback_fn):
3198 """Add the mirror component
3202 instance = self.instance
3204 remote_node = self.remote_node
3205 lv_names = [".%s_%s" % (disk.iv_name, suf) for suf in ["data", "meta"]]
3206 names = _GenerateUniqueNames(self.cfg, lv_names)
3207 new_drbd = _GenerateMDDRBDBranch(self.cfg, instance.primary_node,
3208 remote_node, disk.size, names)
3210 logger.Info("adding new mirror component on secondary")
3212 if not _CreateBlockDevOnSecondary(self.cfg, remote_node, instance,
3214 _GetInstanceInfoText(instance)):
3215 raise errors.OpExecError("Failed to create new component on secondary"
3216 " node %s" % remote_node)
3218 logger.Info("adding new mirror component on primary")
3220 if not _CreateBlockDevOnPrimary(self.cfg, instance.primary_node,
3222 _GetInstanceInfoText(instance)):
3223 # remove secondary dev
3224 self.cfg.SetDiskID(new_drbd, remote_node)
3225 rpc.call_blockdev_remove(remote_node, new_drbd)
3226 raise errors.OpExecError("Failed to create volume on primary")
3228 # the device exists now
3229 # call the primary node to add the mirror to md
3230 logger.Info("adding new mirror component to md")
3231 if not rpc.call_blockdev_addchildren(instance.primary_node,
3233 logger.Error("Can't add mirror compoment to md!")
3234 self.cfg.SetDiskID(new_drbd, remote_node)
3235 if not rpc.call_blockdev_remove(remote_node, new_drbd):
3236 logger.Error("Can't rollback on secondary")
3237 self.cfg.SetDiskID(new_drbd, instance.primary_node)
3238 if not rpc.call_blockdev_remove(instance.primary_node, new_drbd):
3239 logger.Error("Can't rollback on primary")
3240 raise errors.OpExecError("Can't add mirror component to md array")
3242 disk.children.append(new_drbd)
3244 self.cfg.AddInstance(instance)
3246 _WaitForSync(self.cfg, instance)
3251 class LURemoveMDDRBDComponent(LogicalUnit):
3252 """Remove a component from a remote_raid1 disk.
3255 HPATH = "mirror-remove"
3256 HTYPE = constants.HTYPE_INSTANCE
3257 _OP_REQP = ["instance_name", "disk_name", "disk_id"]
3259 def BuildHooksEnv(self):
3262 This runs on the master, the primary and all the secondaries.
3266 "DISK_NAME": self.op.disk_name,
3267 "DISK_ID": self.op.disk_id,
3268 "OLD_SECONDARY": self.old_secondary,
3270 env.update(_BuildInstanceHookEnvByObject(self.instance))
3271 nl = [self.sstore.GetMasterNode(),
3272 self.instance.primary_node] + list(self.instance.secondary_nodes)
3275 def CheckPrereq(self):
3276 """Check prerequisites.
3278 This checks that the instance is in the cluster.
3281 instance = self.cfg.GetInstanceInfo(
3282 self.cfg.ExpandInstanceName(self.op.instance_name))
3283 if instance is None:
3284 raise errors.OpPrereqError("Instance '%s' not known" %
3285 self.op.instance_name)
3286 self.instance = instance
3288 if instance.disk_template != constants.DT_REMOTE_RAID1:
3289 raise errors.OpPrereqError("Instance's disk layout is not"
3291 for disk in instance.disks:
3292 if disk.iv_name == self.op.disk_name:
3295 raise errors.OpPrereqError("Can't find this device ('%s') in the"
3296 " instance." % self.op.disk_name)
3297 for child in disk.children:
3298 if (child.dev_type == constants.LD_DRBD7 and
3299 child.logical_id[2] == self.op.disk_id):
3302 raise errors.OpPrereqError("Can't find the device with this port.")
3304 if len(disk.children) < 2:
3305 raise errors.OpPrereqError("Cannot remove the last component from"
3309 if self.child.logical_id[0] == instance.primary_node:
3313 self.old_secondary = self.child.logical_id[oid]
3315 def Exec(self, feedback_fn):
3316 """Remove the mirror component
3319 instance = self.instance
3322 logger.Info("remove mirror component")
3323 self.cfg.SetDiskID(disk, instance.primary_node)
3324 if not rpc.call_blockdev_removechildren(instance.primary_node,
3326 raise errors.OpExecError("Can't remove child from mirror.")
3328 for node in child.logical_id[:2]:
3329 self.cfg.SetDiskID(child, node)
3330 if not rpc.call_blockdev_remove(node, child):
3331 logger.Error("Warning: failed to remove device from node %s,"
3332 " continuing operation." % node)
3334 disk.children.remove(child)
3335 self.cfg.AddInstance(instance)
3338 class LUReplaceDisks(LogicalUnit):
3339 """Replace the disks of an instance.
3342 HPATH = "mirrors-replace"
3343 HTYPE = constants.HTYPE_INSTANCE
3344 _OP_REQP = ["instance_name", "mode", "disks"]
3346 def BuildHooksEnv(self):
3349 This runs on the master, the primary and all the secondaries.
3353 "MODE": self.op.mode,
3354 "NEW_SECONDARY": self.op.remote_node,
3355 "OLD_SECONDARY": self.instance.secondary_nodes[0],
3357 env.update(_BuildInstanceHookEnvByObject(self.instance))
3358 nl = [self.sstore.GetMasterNode(),
3359 self.instance.primary_node] + list(self.instance.secondary_nodes)
3362 def CheckPrereq(self):
3363 """Check prerequisites.
3365 This checks that the instance is in the cluster.
3368 instance = self.cfg.GetInstanceInfo(
3369 self.cfg.ExpandInstanceName(self.op.instance_name))
3370 if instance is None:
3371 raise errors.OpPrereqError("Instance '%s' not known" %
3372 self.op.instance_name)
3373 self.instance = instance
3375 if instance.disk_template not in constants.DTS_NET_MIRROR:
3376 raise errors.OpPrereqError("Instance's disk layout is not"
3377 " network mirrored.")
3379 if len(instance.secondary_nodes) != 1:
3380 raise errors.OpPrereqError("The instance has a strange layout,"
3381 " expected one secondary but found %d" %
3382 len(instance.secondary_nodes))
3384 self.sec_node = instance.secondary_nodes[0]
3386 remote_node = getattr(self.op, "remote_node", None)
3387 if remote_node is not None:
3388 remote_node = self.cfg.ExpandNodeName(remote_node)
3389 if remote_node is None:
3390 raise errors.OpPrereqError("Node '%s' not known" %
3391 self.op.remote_node)
3392 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
3394 self.remote_node_info = None
3395 if remote_node == instance.primary_node:
3396 raise errors.OpPrereqError("The specified node is the primary node of"
3398 elif remote_node == self.sec_node:
3399 # the user gave the current secondary, switch to
3400 # 'no-replace-secondary' mode
3402 if (instance.disk_template == constants.DT_REMOTE_RAID1 and
3403 self.op.mode != constants.REPLACE_DISK_ALL):
3404 raise errors.OpPrereqError("Template 'remote_raid1' only allows all"
3405 " disks replacement, not individual ones")
3406 if instance.disk_template == constants.DT_DRBD8:
3407 if self.op.mode == constants.REPLACE_DISK_ALL:
3408 raise errors.OpPrereqError("Template 'drbd8' only allows primary or"
3409 " secondary disk replacement, not"
3411 elif self.op.mode == constants.REPLACE_DISK_PRI:
3412 if remote_node is not None:
3413 raise errors.OpPrereqError("Template 'drbd8' does not allow changing"
3414 " the secondary while doing a primary"
3415 " node disk replacement")
3416 self.tgt_node = instance.primary_node
3417 elif self.op.mode == constants.REPLACE_DISK_SEC:
3418 self.new_node = remote_node # this can be None, in which case
3419 # we don't change the secondary
3420 self.tgt_node = instance.secondary_nodes[0]
3422 raise errors.ProgrammerError("Unhandled disk replace mode")
3424 for name in self.op.disks:
3425 if instance.FindDisk(name) is None:
3426 raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
3427 (name, instance.name))
3428 self.op.remote_node = remote_node
3430 def _ExecRR1(self, feedback_fn):
3431 """Replace the disks of an instance.
3434 instance = self.instance
3437 if self.op.remote_node is None:
3438 remote_node = self.sec_node
3440 remote_node = self.op.remote_node
3442 for dev in instance.disks:
3444 lv_names = [".%s_%s" % (dev.iv_name, suf) for suf in ["data", "meta"]]
3445 names = _GenerateUniqueNames(cfg, lv_names)
3446 new_drbd = _GenerateMDDRBDBranch(cfg, instance.primary_node,
3447 remote_node, size, names)
3448 iv_names[dev.iv_name] = (dev, dev.children[0], new_drbd)
3449 logger.Info("adding new mirror component on secondary for %s" %
3452 if not _CreateBlockDevOnSecondary(cfg, remote_node, instance,
3454 _GetInstanceInfoText(instance)):
3455 raise errors.OpExecError("Failed to create new component on"
3456 " secondary node %s\n"
3457 "Full abort, cleanup manually!" %
3460 logger.Info("adding new mirror component on primary")
3462 if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
3464 _GetInstanceInfoText(instance)):
3465 # remove secondary dev
3466 cfg.SetDiskID(new_drbd, remote_node)
3467 rpc.call_blockdev_remove(remote_node, new_drbd)
3468 raise errors.OpExecError("Failed to create volume on primary!\n"
3469 "Full abort, cleanup manually!!")
3471 # the device exists now
3472 # call the primary node to add the mirror to md
3473 logger.Info("adding new mirror component to md")
3474 if not rpc.call_blockdev_addchildren(instance.primary_node, dev,
3476 logger.Error("Can't add mirror compoment to md!")
3477 cfg.SetDiskID(new_drbd, remote_node)
3478 if not rpc.call_blockdev_remove(remote_node, new_drbd):
3479 logger.Error("Can't rollback on secondary")
3480 cfg.SetDiskID(new_drbd, instance.primary_node)
3481 if not rpc.call_blockdev_remove(instance.primary_node, new_drbd):
3482 logger.Error("Can't rollback on primary")
3483 raise errors.OpExecError("Full abort, cleanup manually!!")
3485 dev.children.append(new_drbd)
3486 cfg.AddInstance(instance)
3488 # this can fail as the old devices are degraded and _WaitForSync
3489 # does a combined result over all disks, so we don't check its
3491 _WaitForSync(cfg, instance, unlock=True)
3493 # so check manually all the devices
3494 for name in iv_names:
3495 dev, child, new_drbd = iv_names[name]
3496 cfg.SetDiskID(dev, instance.primary_node)
3497 is_degr = rpc.call_blockdev_find(instance.primary_node, dev)[5]
3499 raise errors.OpExecError("MD device %s is degraded!" % name)
3500 cfg.SetDiskID(new_drbd, instance.primary_node)
3501 is_degr = rpc.call_blockdev_find(instance.primary_node, new_drbd)[5]
3503 raise errors.OpExecError("New drbd device %s is degraded!" % name)
3505 for name in iv_names:
3506 dev, child, new_drbd = iv_names[name]
3507 logger.Info("remove mirror %s component" % name)
3508 cfg.SetDiskID(dev, instance.primary_node)
3509 if not rpc.call_blockdev_removechildren(instance.primary_node,
3511 logger.Error("Can't remove child from mirror, aborting"
3512 " *this device cleanup*.\nYou need to cleanup manually!!")
3515 for node in child.logical_id[:2]:
3516 logger.Info("remove child device on %s" % node)
3517 cfg.SetDiskID(child, node)
3518 if not rpc.call_blockdev_remove(node, child):
3519 logger.Error("Warning: failed to remove device from node %s,"
3520 " continuing operation." % node)
3522 dev.children.remove(child)
3524 cfg.AddInstance(instance)
3526 def _ExecD8DiskOnly(self, feedback_fn):
3527 """Replace a disk on the primary or secondary for dbrd8.
3529 The algorithm for replace is quite complicated:
3530 - for each disk to be replaced:
3531 - create new LVs on the target node with unique names
3532 - detach old LVs from the drbd device
3533 - rename old LVs to name_replaced.<time_t>
3534 - rename new LVs to old LVs
3535 - attach the new LVs (with the old names now) to the drbd device
3536 - wait for sync across all devices
3537 - for each modified disk:
3538 - remove old LVs (which have the name name_replaces.<time_t>)
3540 Failures are not very well handled.
3542 instance = self.instance
3544 vgname = self.cfg.GetVGName()
3547 tgt_node = self.tgt_node
3548 for dev in instance.disks:
3549 if not dev.iv_name in self.op.disks:
3552 cfg.SetDiskID(dev, tgt_node)
3553 lv_names = [".%s_%s" % (dev.iv_name, suf) for suf in ["data", "meta"]]
3554 names = _GenerateUniqueNames(cfg, lv_names)
3555 lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
3556 logical_id=(vgname, names[0]))
3557 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
3558 logical_id=(vgname, names[1]))
3559 new_lvs = [lv_data, lv_meta]
3560 old_lvs = dev.children
3561 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
3562 logger.Info("adding new local storage on %s for %s" %
3563 (tgt_node, dev.iv_name))
3564 # since we *always* want to create this LV, we use the
3565 # _Create...OnPrimary (which forces the creation), even if we
3566 # are talking about the secondary node
3567 for new_lv in new_lvs:
3568 if not _CreateBlockDevOnPrimary(cfg, tgt_node, instance, new_lv,
3569 _GetInstanceInfoText(instance)):
3570 raise errors.OpExecError("Failed to create new LV named '%s' on"
3572 (new_lv.logical_id[1], tgt_node))
3574 if not rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs):
3575 raise errors.OpExecError("Can't detach drbd from local storage on node"
3576 " %s for device %s" % (tgt_node, dev.iv_name))
3578 cfg.Update(instance)
3580 # ok, we created the new LVs, so now we know we have the needed
3581 # storage; as such, we proceed on the target node to rename
3582 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
3583 # using the assumption than logical_id == physical_id (which in
3584 # turn is the unique_id on that node)
3585 temp_suffix = int(time.time())
3586 logger.Info("renaming the old LVs on the target node")
3587 ren_fn = lambda d, suff: (d.physical_id[0],
3588 d.physical_id[1] + "_replaced-%s" % suff)
3589 rlist = [(disk, ren_fn(disk, temp_suffix)) for disk in old_lvs]
3590 if not rpc.call_blockdev_rename(tgt_node, rlist):
3591 logger.Error("Can't rename old LVs on node %s" % tgt_node)
3592 do_change_old = False
3594 do_change_old = True
3595 # now we rename the new LVs to the old LVs
3596 logger.Info("renaming the new LVs on the target node")
3597 rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
3598 if not rpc.call_blockdev_rename(tgt_node, rlist):
3599 logger.Error("Can't rename new LVs on node %s" % tgt_node)
3601 for old, new in zip(old_lvs, new_lvs):
3602 new.logical_id = old.logical_id
3603 cfg.SetDiskID(new, tgt_node)
3606 for disk in old_lvs:
3607 disk.logical_id = ren_fn(disk, temp_suffix)
3608 cfg.SetDiskID(disk, tgt_node)
3610 # now that the new lvs have the old name, we can add them to the device
3611 logger.Info("adding new mirror component on %s" % tgt_node)
3612 if not rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs):
3613 logger.Error("Can't add local storage to drbd!")
3614 for new_lv in new_lvs:
3615 if not rpc.call_blockdev_remove(tgt_node, new_lv):
3616 logger.Error("Can't rollback device %s")
3619 dev.children = new_lvs
3620 cfg.Update(instance)
3623 # this can fail as the old devices are degraded and _WaitForSync
3624 # does a combined result over all disks, so we don't check its
3626 logger.Info("Done changing drbd configs, waiting for sync")
3627 _WaitForSync(cfg, instance, unlock=True)
3629 # so check manually all the devices
3630 for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
3631 cfg.SetDiskID(dev, instance.primary_node)
3632 is_degr = rpc.call_blockdev_find(instance.primary_node, dev)[5]
3634 raise errors.OpExecError("DRBD device %s is degraded!" % name)
3636 for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
3637 logger.Info("remove logical volumes for %s" % name)
3639 cfg.SetDiskID(lv, tgt_node)
3640 if not rpc.call_blockdev_remove(tgt_node, lv):
3641 logger.Error("Can't cleanup child device, skipping. You need to"
3645 def _ExecD8Secondary(self, feedback_fn):
3646 """Replace the secondary node for drbd8.
3648 The algorithm for replace is quite complicated:
3649 - for all disks of the instance:
3650 - create new LVs on the new node with same names
3651 - shutdown the drbd device on the old secondary
3652 - disconnect the drbd network on the primary
3653 - create the drbd device on the new secondary
3654 - network attach the drbd on the primary, using an artifice:
3655 the drbd code for Attach() will connect to the network if it
3656 finds a device which is connected to the good local disks but
3658 - wait for sync across all devices
3659 - remove all disks from the old secondary
3661 Failures are not very well handled.
3663 instance = self.instance
3665 vgname = self.cfg.GetVGName()
3668 old_node = self.tgt_node
3669 new_node = self.new_node
3670 pri_node = instance.primary_node
3671 for dev in instance.disks:
3673 logger.Info("adding new local storage on %s for %s" %
3674 (new_node, dev.iv_name))
3675 # since we *always* want to create this LV, we use the
3676 # _Create...OnPrimary (which forces the creation), even if we
3677 # are talking about the secondary node
3678 for new_lv in dev.children:
3679 if not _CreateBlockDevOnPrimary(cfg, new_node, instance, new_lv,
3680 _GetInstanceInfoText(instance)):
3681 raise errors.OpExecError("Failed to create new LV named '%s' on"
3683 (new_lv.logical_id[1], new_node))
3685 # create new devices on new_node
3686 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
3687 logical_id=(pri_node, new_node,
3689 children=dev.children)
3690 if not _CreateBlockDevOnSecondary(cfg, new_node, instance,
3692 _GetInstanceInfoText(instance)):
3693 raise errors.OpExecError("Failed to create new DRBD on"
3694 " node '%s'" % new_node)
3696 # we have new devices, shutdown the drbd on the old secondary
3697 cfg.SetDiskID(dev, old_node)
3698 if not rpc.call_blockdev_shutdown(old_node, dev):
3699 raise errors.OpExecError("Failed to shutdown DRBD on old node")
3701 # we have new storage, we 'rename' the network on the primary
3702 cfg.SetDiskID(dev, pri_node)
3703 # rename to the ip of the new node
3704 new_uid = list(dev.physical_id)
3705 new_uid[2] = self.remote_node_info.secondary_ip
3706 rlist = [(dev, tuple(new_uid))]
3707 if not rpc.call_blockdev_rename(pri_node, rlist):
3708 raise errors.OpExecError("Can't detach re-attach drbd %s on node"
3709 " %s from %s to %s" %
3710 (dev.iv_name, pri_node, old_node, new_node))
3711 dev.logical_id = (pri_node, new_node, dev.logical_id[2])
3712 cfg.SetDiskID(dev, pri_node)
3713 cfg.Update(instance)
3715 iv_names[dev.iv_name] = (dev, dev.children)
3717 # this can fail as the old devices are degraded and _WaitForSync
3718 # does a combined result over all disks, so we don't check its
3720 logger.Info("Done changing drbd configs, waiting for sync")
3721 _WaitForSync(cfg, instance, unlock=True)
3723 # so check manually all the devices
3724 for name, (dev, old_lvs) in iv_names.iteritems():
3725 cfg.SetDiskID(dev, pri_node)
3726 is_degr = rpc.call_blockdev_find(pri_node, dev)[5]
3728 raise errors.OpExecError("DRBD device %s is degraded!" % name)
3730 for name, (dev, old_lvs) in iv_names.iteritems():
3731 logger.Info("remove logical volumes for %s" % name)
3733 cfg.SetDiskID(lv, old_node)
3734 if not rpc.call_blockdev_remove(old_node, lv):
3735 logger.Error("Can't cleanup child device, skipping. You need to"
3739 def Exec(self, feedback_fn):
3740 """Execute disk replacement.
3742 This dispatches the disk replacement to the appropriate handler.
3745 instance = self.instance
3746 if instance.disk_template == constants.DT_REMOTE_RAID1:
3748 elif instance.disk_template == constants.DT_DRBD8:
3749 if self.op.remote_node is None:
3750 fn = self._ExecD8DiskOnly
3752 fn = self._ExecD8Secondary
3754 raise errors.ProgrammerError("Unhandled disk replacement case")
3755 return fn(feedback_fn)
3758 class LUQueryInstanceData(NoHooksLU):
3759 """Query runtime instance data.
3762 _OP_REQP = ["instances"]
3764 def CheckPrereq(self):
3765 """Check prerequisites.
3767 This only checks the optional instance list against the existing names.
3770 if not isinstance(self.op.instances, list):
3771 raise errors.OpPrereqError("Invalid argument type 'instances'")
3772 if self.op.instances:
3773 self.wanted_instances = []
3774 names = self.op.instances
3776 instance = self.cfg.GetInstanceInfo(self.cfg.ExpandInstanceName(name))
3777 if instance is None:
3778 raise errors.OpPrereqError("No such instance name '%s'" % name)
3779 self.wanted_instances.append(instance)
3781 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
3782 in self.cfg.GetInstanceList()]
3786 def _ComputeDiskStatus(self, instance, snode, dev):
3787 """Compute block device status.
3790 self.cfg.SetDiskID(dev, instance.primary_node)
3791 dev_pstatus = rpc.call_blockdev_find(instance.primary_node, dev)
3792 if dev.dev_type in constants.LDS_DRBD:
3793 # we change the snode then (otherwise we use the one passed in)
3794 if dev.logical_id[0] == instance.primary_node:
3795 snode = dev.logical_id[1]
3797 snode = dev.logical_id[0]
3800 self.cfg.SetDiskID(dev, snode)
3801 dev_sstatus = rpc.call_blockdev_find(snode, dev)
3806 dev_children = [self._ComputeDiskStatus(instance, snode, child)
3807 for child in dev.children]
3812 "iv_name": dev.iv_name,
3813 "dev_type": dev.dev_type,
3814 "logical_id": dev.logical_id,
3815 "physical_id": dev.physical_id,
3816 "pstatus": dev_pstatus,
3817 "sstatus": dev_sstatus,
3818 "children": dev_children,
3823 def Exec(self, feedback_fn):
3824 """Gather and return data"""
3826 for instance in self.wanted_instances:
3827 remote_info = rpc.call_instance_info(instance.primary_node,
3829 if remote_info and "state" in remote_info:
3832 remote_state = "down"
3833 if instance.status == "down":
3834 config_state = "down"
3838 disks = [self._ComputeDiskStatus(instance, None, device)
3839 for device in instance.disks]
3842 "name": instance.name,
3843 "config_state": config_state,
3844 "run_state": remote_state,
3845 "pnode": instance.primary_node,
3846 "snodes": instance.secondary_nodes,
3848 "memory": instance.memory,
3849 "nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
3851 "vcpus": instance.vcpus,
3854 result[instance.name] = idict
3859 class LUSetInstanceParms(LogicalUnit):
3860 """Modifies an instances's parameters.
3863 HPATH = "instance-modify"
3864 HTYPE = constants.HTYPE_INSTANCE
3865 _OP_REQP = ["instance_name"]
3867 def BuildHooksEnv(self):
3870 This runs on the master, primary and secondaries.
3875 args['memory'] = self.mem
3877 args['vcpus'] = self.vcpus
3878 if self.do_ip or self.do_bridge:
3882 ip = self.instance.nics[0].ip
3884 bridge = self.bridge
3886 bridge = self.instance.nics[0].bridge
3887 args['nics'] = [(ip, bridge)]
3888 env = _BuildInstanceHookEnvByObject(self.instance, override=args)
3889 nl = [self.sstore.GetMasterNode(),
3890 self.instance.primary_node] + list(self.instance.secondary_nodes)
3893 def CheckPrereq(self):
3894 """Check prerequisites.
3896 This only checks the instance list against the existing names.
3899 self.mem = getattr(self.op, "mem", None)
3900 self.vcpus = getattr(self.op, "vcpus", None)
3901 self.ip = getattr(self.op, "ip", None)
3902 self.bridge = getattr(self.op, "bridge", None)
3903 if [self.mem, self.vcpus, self.ip, self.bridge].count(None) == 4:
3904 raise errors.OpPrereqError("No changes submitted")
3905 if self.mem is not None:
3907 self.mem = int(self.mem)
3908 except ValueError, err:
3909 raise errors.OpPrereqError("Invalid memory size: %s" % str(err))
3910 if self.vcpus is not None:
3912 self.vcpus = int(self.vcpus)
3913 except ValueError, err:
3914 raise errors.OpPrereqError("Invalid vcpus number: %s" % str(err))
3915 if self.ip is not None:
3917 if self.ip.lower() == "none":
3920 if not utils.IsValidIP(self.ip):
3921 raise errors.OpPrereqError("Invalid IP address '%s'." % self.ip)
3924 self.do_bridge = (self.bridge is not None)
3926 instance = self.cfg.GetInstanceInfo(
3927 self.cfg.ExpandInstanceName(self.op.instance_name))
3928 if instance is None:
3929 raise errors.OpPrereqError("No such instance name '%s'" %
3930 self.op.instance_name)
3931 self.op.instance_name = instance.name
3932 self.instance = instance
3935 def Exec(self, feedback_fn):
3936 """Modifies an instance.
3938 All parameters take effect only at the next restart of the instance.
3941 instance = self.instance
3943 instance.memory = self.mem
3944 result.append(("mem", self.mem))
3946 instance.vcpus = self.vcpus
3947 result.append(("vcpus", self.vcpus))
3949 instance.nics[0].ip = self.ip
3950 result.append(("ip", self.ip))
3952 instance.nics[0].bridge = self.bridge
3953 result.append(("bridge", self.bridge))
3955 self.cfg.AddInstance(instance)
3960 class LUQueryExports(NoHooksLU):
3961 """Query the exports list
3966 def CheckPrereq(self):
3967 """Check that the nodelist contains only existing nodes.
3970 self.nodes = _GetWantedNodes(self, getattr(self.op, "nodes", None))
3972 def Exec(self, feedback_fn):
3973 """Compute the list of all the exported system images.
3976 a dictionary with the structure node->(export-list)
3977 where export-list is a list of the instances exported on
3981 return rpc.call_export_list(self.nodes)
3984 class LUExportInstance(LogicalUnit):
3985 """Export an instance to an image in the cluster.
3988 HPATH = "instance-export"
3989 HTYPE = constants.HTYPE_INSTANCE
3990 _OP_REQP = ["instance_name", "target_node", "shutdown"]
3992 def BuildHooksEnv(self):
3995 This will run on the master, primary node and target node.
3999 "EXPORT_NODE": self.op.target_node,
4000 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
4002 env.update(_BuildInstanceHookEnvByObject(self.instance))
4003 nl = [self.sstore.GetMasterNode(), self.instance.primary_node,
4004 self.op.target_node]
4007 def CheckPrereq(self):
4008 """Check prerequisites.
4010 This checks that the instance name is a valid one.
4013 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
4014 self.instance = self.cfg.GetInstanceInfo(instance_name)
4015 if self.instance is None:
4016 raise errors.OpPrereqError("Instance '%s' not found" %
4017 self.op.instance_name)
4020 dst_node_short = self.cfg.ExpandNodeName(self.op.target_node)
4021 self.dst_node = self.cfg.GetNodeInfo(dst_node_short)
4023 if self.dst_node is None:
4024 raise errors.OpPrereqError("Destination node '%s' is unknown." %
4025 self.op.target_node)
4026 self.op.target_node = self.dst_node.name
4028 def Exec(self, feedback_fn):
4029 """Export an instance to an image in the cluster.
4032 instance = self.instance
4033 dst_node = self.dst_node
4034 src_node = instance.primary_node
4035 # shutdown the instance, unless requested not to do so
4036 if self.op.shutdown:
4037 op = opcodes.OpShutdownInstance(instance_name=instance.name)
4038 self.processor.ChainOpCode(op)
4040 vgname = self.cfg.GetVGName()
4045 for disk in instance.disks:
4046 if disk.iv_name == "sda":
4047 # new_dev_name will be a snapshot of an lvm leaf of the one we passed
4048 new_dev_name = rpc.call_blockdev_snapshot(src_node, disk)
4050 if not new_dev_name:
4051 logger.Error("could not snapshot block device %s on node %s" %
4052 (disk.logical_id[1], src_node))
4054 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
4055 logical_id=(vgname, new_dev_name),
4056 physical_id=(vgname, new_dev_name),
4057 iv_name=disk.iv_name)
4058 snap_disks.append(new_dev)
4061 if self.op.shutdown:
4062 op = opcodes.OpStartupInstance(instance_name=instance.name,
4064 self.processor.ChainOpCode(op)
4066 # TODO: check for size
4068 for dev in snap_disks:
4069 if not rpc.call_snapshot_export(src_node, dev, dst_node.name,
4071 logger.Error("could not export block device %s from node"
4073 (dev.logical_id[1], src_node, dst_node.name))
4074 if not rpc.call_blockdev_remove(src_node, dev):
4075 logger.Error("could not remove snapshot block device %s from"
4076 " node %s" % (dev.logical_id[1], src_node))
4078 if not rpc.call_finalize_export(dst_node.name, instance, snap_disks):
4079 logger.Error("could not finalize export for instance %s on node %s" %
4080 (instance.name, dst_node.name))
4082 nodelist = self.cfg.GetNodeList()
4083 nodelist.remove(dst_node.name)
4085 # on one-node clusters nodelist will be empty after the removal
4086 # if we proceed the backup would be removed because OpQueryExports
4087 # substitutes an empty list with the full cluster node list.
4089 op = opcodes.OpQueryExports(nodes=nodelist)
4090 exportlist = self.processor.ChainOpCode(op)
4091 for node in exportlist:
4092 if instance.name in exportlist[node]:
4093 if not rpc.call_export_remove(node, instance.name):
4094 logger.Error("could not remove older export for instance %s"
4095 " on node %s" % (instance.name, node))
4098 class TagsLU(NoHooksLU):
4101 This is an abstract class which is the parent of all the other tags LUs.
4104 def CheckPrereq(self):
4105 """Check prerequisites.
4108 if self.op.kind == constants.TAG_CLUSTER:
4109 self.target = self.cfg.GetClusterInfo()
4110 elif self.op.kind == constants.TAG_NODE:
4111 name = self.cfg.ExpandNodeName(self.op.name)
4113 raise errors.OpPrereqError("Invalid node name (%s)" %
4116 self.target = self.cfg.GetNodeInfo(name)
4117 elif self.op.kind == constants.TAG_INSTANCE:
4118 name = self.cfg.ExpandInstanceName(self.op.name)
4120 raise errors.OpPrereqError("Invalid instance name (%s)" %
4123 self.target = self.cfg.GetInstanceInfo(name)
4125 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
4129 class LUGetTags(TagsLU):
4130 """Returns the tags of a given object.
4133 _OP_REQP = ["kind", "name"]
4135 def Exec(self, feedback_fn):
4136 """Returns the tag list.
4139 return self.target.GetTags()
4142 class LUSearchTags(NoHooksLU):
4143 """Searches the tags for a given pattern.
4146 _OP_REQP = ["pattern"]
4148 def CheckPrereq(self):
4149 """Check prerequisites.
4151 This checks the pattern passed for validity by compiling it.
4155 self.re = re.compile(self.op.pattern)
4156 except re.error, err:
4157 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
4158 (self.op.pattern, err))
4160 def Exec(self, feedback_fn):
4161 """Returns the tag list.
4165 tgts = [("/cluster", cfg.GetClusterInfo())]
4166 ilist = [cfg.GetInstanceInfo(name) for name in cfg.GetInstanceList()]
4167 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
4168 nlist = [cfg.GetNodeInfo(name) for name in cfg.GetNodeList()]
4169 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
4171 for path, target in tgts:
4172 for tag in target.GetTags():
4173 if self.re.search(tag):
4174 results.append((path, tag))
4178 class LUAddTags(TagsLU):
4179 """Sets a tag on a given object.
4182 _OP_REQP = ["kind", "name", "tags"]
4184 def CheckPrereq(self):
4185 """Check prerequisites.
4187 This checks the type and length of the tag name and value.
4190 TagsLU.CheckPrereq(self)
4191 for tag in self.op.tags:
4192 objects.TaggableObject.ValidateTag(tag)
4194 def Exec(self, feedback_fn):
4199 for tag in self.op.tags:
4200 self.target.AddTag(tag)
4201 except errors.TagError, err:
4202 raise errors.OpExecError("Error while setting tag: %s" % str(err))
4204 self.cfg.Update(self.target)
4205 except errors.ConfigurationError:
4206 raise errors.OpRetryError("There has been a modification to the"
4207 " config file and the operation has been"
4208 " aborted. Please retry.")
4211 class LUDelTags(TagsLU):
4212 """Delete a list of tags from a given object.
4215 _OP_REQP = ["kind", "name", "tags"]
4217 def CheckPrereq(self):
4218 """Check prerequisites.
4220 This checks that we have the given tag.
4223 TagsLU.CheckPrereq(self)
4224 for tag in self.op.tags:
4225 objects.TaggableObject.ValidateTag(tag)
4226 del_tags = frozenset(self.op.tags)
4227 cur_tags = self.target.GetTags()
4228 if not del_tags <= cur_tags:
4229 diff_tags = del_tags - cur_tags
4230 diff_names = ["'%s'" % tag for tag in diff_tags]
4232 raise errors.OpPrereqError("Tag(s) %s not found" %
4233 (",".join(diff_names)))
4235 def Exec(self, feedback_fn):
4236 """Remove the tag from the object.
4239 for tag in self.op.tags:
4240 self.target.RemoveTag(tag)
4242 self.cfg.Update(self.target)
4243 except errors.ConfigurationError:
4244 raise errors.OpRetryError("There has been a modification to the"
4245 " config file and the operation has been"
4246 " aborted. Please retry.")