-#!/usr/bin/python
+#
#
# Copyright (C) 2006, 2007 Google Inc.
This is a no-op, since we don't run hooks.
"""
- return
+ return {}, [], []
def _GetWantedNodes(lu, nodes):
secondary_nodes: List of secondary nodes as strings
"""
env = {
+ "OP_TARGET": name,
"INSTANCE_NAME": name,
"INSTANCE_PRIMARY": primary_node,
"INSTANCE_SECONDARIES": " ".join(secondary_nodes),
node: the name of this host as a fqdn
"""
- if os.path.exists('/root/.ssh/id_dsa'):
- utils.CreateBackup('/root/.ssh/id_dsa')
- if os.path.exists('/root/.ssh/id_dsa.pub'):
- utils.CreateBackup('/root/.ssh/id_dsa.pub')
+ priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
- utils.RemoveFile('/root/.ssh/id_dsa')
- utils.RemoveFile('/root/.ssh/id_dsa.pub')
+ for name in priv_key, pub_key:
+ if os.path.exists(name):
+ utils.CreateBackup(name)
+ utils.RemoveFile(name)
result = utils.RunCmd(["ssh-keygen", "-t", "dsa",
- "-f", "/root/.ssh/id_dsa",
+ "-f", priv_key,
"-q", "-N", ""])
if result.failed:
raise errors.OpExecError("Could not generate ssh keypair, error %s" %
result.output)
- f = open('/root/.ssh/id_dsa.pub', 'r')
+ f = open(pub_key, 'r')
try:
- utils.AddAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
+ utils.AddAuthorizedKey(auth_keys, f.read(8192))
finally:
f.close()
(result.cmd, result.exit_code, result.output))
+def _CheckInstanceBridgesExist(instance):
+ """Check that the brigdes needed by an instance exist.
+
+ """
+ # check bridges existance
+ brlist = [nic.bridge for nic in instance.nics]
+ if not rpc.call_bridges_exist(instance.primary_node, brlist):
+ raise errors.OpPrereqError("one or more target bridges %s does not"
+ " exist on destination node '%s'" %
+ (brlist, instance.primary_node))
+
+
class LUInitCluster(LogicalUnit):
"""Initialise the cluster.
ourselves in the post-run node list.
"""
- env = {
- "CLUSTER": self.op.cluster_name,
- "MASTER": self.hostname.name,
- }
+ env = {"OP_TARGET": self.op.cluster_name}
return env, [], [self.hostname.name]
def CheckPrereq(self):
self.clustername = clustername = utils.HostInfo(self.op.cluster_name)
- result = utils.RunCmd(["fping", "-S127.0.0.1", "-q", hostname.ip])
- if result.failed:
+ if not utils.TcpPing(constants.LOCALHOST_IP_ADDRESS, hostname.ip,
+ constants.DEFAULT_NODED_PORT):
raise errors.OpPrereqError("Inconsistency: this host's name resolves"
" to %s,\nbut this ip address does not"
" belong to this host."
secondary_ip = getattr(self.op, "secondary_ip", None)
if secondary_ip and not utils.IsValidIP(secondary_ip):
raise errors.OpPrereqError("Invalid secondary ip given")
- if secondary_ip and secondary_ip != hostname.ip:
- result = utils.RunCmd(["fping", "-S127.0.0.1", "-q", secondary_ip])
- if result.failed:
- raise errors.OpPrereqError("You gave %s as secondary IP,\n"
- "but it does not belong to this host." %
- secondary_ip)
+ if (secondary_ip and
+ secondary_ip != hostname.ip and
+ (not utils.TcpPing(constants.LOCALHOST_IP_ADDRESS, secondary_ip,
+ constants.DEFAULT_NODED_PORT))):
+ raise errors.OpPrereqError("You gave %s as secondary IP,\n"
+ "but it does not belong to this host." %
+ secondary_ip)
self.secondary_ip = secondary_ip
# checks presence of the volume group given
hostname = self.hostname
# set up the simple store
- ss = ssconf.SimpleStore()
+ self.sstore = ss = ssconf.SimpleStore()
ss.SetKey(ss.SS_HYPERVISOR, self.op.hypervisor_type)
ss.SetKey(ss.SS_MASTER_NODE, hostname.name)
ss.SetKey(ss.SS_MASTER_IP, clustername.ip)
rpc.call_node_start_master(hostname.name)
# set up ssh config and /etc/hosts
- f = open('/etc/ssh/ssh_host_rsa_key.pub', 'r')
+ f = open(constants.SSH_HOST_RSA_PUB, 'r')
try:
sshline = f.read()
finally:
_InitSSHSetup(hostname.name)
# init of cluster config file
- cfgw = config.ConfigWriter()
+ self.cfg = cfgw = config.ConfigWriter()
cfgw.InitConfig(hostname.name, hostname.ip, self.secondary_ip,
sshkey, self.op.mac_prefix,
self.op.vg_name, self.op.def_bridge)
"""Destroys the cluster.
"""
- utils.CreateBackup('/root/.ssh/id_dsa')
- utils.CreateBackup('/root/.ssh/id_dsa.pub')
+ priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
+ utils.CreateBackup(priv_key)
+ utils.CreateBackup(pub_key)
rpc.call_node_leave_cluster(self.sstore.GetMasterNode())
(instance, node))
bad = True
- return not bad
+ return bad
def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
"""Verify if there are any unknown volumes in the cluster.
"""
env = {
+ "OP_TARGET": self.op.sstore.GetClusterName(),
"NEW_NAME": self.op.name,
}
mn = self.sstore.GetMasterNode()
"""
env = {
+ "OP_TARGET": self.op.node_name,
"NODE_NAME": self.op.node_name,
}
all_nodes = self.cfg.GetNodeList()
"""
env = {
+ "OP_TARGET": self.op.node_name,
"NODE_NAME": self.op.node_name,
"NODE_PIP": self.op.primary_ip,
"NODE_SIP": self.op.secondary_ip,
" new node doesn't have one")
# checks reachablity
- command = ["fping", "-q", primary_ip]
- result = utils.RunCmd(command)
- if result.failed:
+ if not utils.TcpPing(utils.HostInfo().name,
+ primary_ip,
+ constants.DEFAULT_NODED_PORT):
raise errors.OpPrereqError("Node not reachable by ping")
if not newbie_singlehomed:
# check reachability from my secondary ip to newbie's secondary ip
- command = ["fping", "-S%s" % myself.secondary_ip, "-q", secondary_ip]
- result = utils.RunCmd(command)
- if result.failed:
- raise errors.OpPrereqError("Node secondary ip not reachable by ping")
+ if not utils.TcpPing(myself.secondary_ip,
+ secondary_ip,
+ constants.DEFAULT_NODED_PORT):
+ raise errors.OpPrereqError(
+ "Node secondary ip not reachable by TCP based ping to noded port")
self.new_node = objects.Node(name=node,
primary_ip=primary_ip,
# setup ssh on node
logger.Info("copy ssh key to node %s" % node)
+ priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
keyarray = []
- keyfiles = ["/etc/ssh/ssh_host_dsa_key", "/etc/ssh/ssh_host_dsa_key.pub",
- "/etc/ssh/ssh_host_rsa_key", "/etc/ssh/ssh_host_rsa_key.pub",
- "/root/.ssh/id_dsa", "/root/.ssh/id_dsa.pub"]
+ keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
+ constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
+ priv_key, pub_key]
for i in keyfiles:
f = open(i, 'r')
self.cfg.GetHostKey())
if new_node.secondary_ip != new_node.primary_ip:
- result = ssh.SSHCall(node, "root",
- "fping -S 127.0.0.1 -q %s" % new_node.secondary_ip)
- if result.failed:
+ if not rpc.call_node_tcp_ping(new_node.name,
+ constants.LOCALHOST_IP_ADDRESS,
+ new_node.secondary_ip,
+ constants.DEFAULT_NODED_PORT,
+ 10, False):
raise errors.OpExecError("Node claims it doesn't have the"
" secondary ip you gave (%s).\n"
"Please fix and re-run this command." %
"""
env = {
+ "OP_TARGET": self.new_master,
"NEW_MASTER": self.new_master,
"OLD_MASTER": self.old_master,
}
device_info.append((instance.primary_node, inst_disk.iv_name,
master_result))
+ # leave the disks configured for the primary node
+ # this is a workaround that would be fixed better by
+ # improving the logical/physical id handling
+ for disk in instance.disks:
+ cfg.SetDiskID(disk, instance.primary_node)
+
return disks_ok, device_info
self.op.instance_name)
# check bridges existance
- brlist = [nic.bridge for nic in instance.nics]
- if not rpc.call_bridges_exist(instance.primary_node, brlist):
- raise errors.OpPrereqError("one or more target bridges %s does not"
- " exist on destination node '%s'" %
- (brlist, instance.primary_node))
+ _CheckInstanceBridgesExist(instance)
self.instance = instance
self.op.instance_name = instance.name
self.cfg.MarkInstanceUp(instance.name)
+class LURebootInstance(LogicalUnit):
+ """Reboot an instance.
+
+ """
+ HPATH = "instance-reboot"
+ HTYPE = constants.HTYPE_INSTANCE
+ _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on master, primary and secondary nodes of the instance.
+
+ """
+ env = {
+ "IGNORE_SECONDARIES": self.op.ignore_secondaries,
+ }
+ env.update(_BuildInstanceHookEnvByObject(self.instance))
+ nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
+ list(self.instance.secondary_nodes))
+ return env, nl, nl
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster.
+
+ """
+ instance = self.cfg.GetInstanceInfo(
+ self.cfg.ExpandInstanceName(self.op.instance_name))
+ if instance is None:
+ raise errors.OpPrereqError("Instance '%s' not known" %
+ self.op.instance_name)
+
+ # check bridges existance
+ _CheckInstanceBridgesExist(instance)
+
+ self.instance = instance
+ self.op.instance_name = instance.name
+
+ def Exec(self, feedback_fn):
+ """Reboot the instance.
+
+ """
+ instance = self.instance
+ ignore_secondaries = self.op.ignore_secondaries
+ reboot_type = self.op.reboot_type
+ extra_args = getattr(self.op, "extra_args", "")
+
+ node_current = instance.primary_node
+
+ if reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
+ constants.INSTANCE_REBOOT_HARD,
+ constants.INSTANCE_REBOOT_FULL]:
+ raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
+ (constants.INSTANCE_REBOOT_SOFT,
+ constants.INSTANCE_REBOOT_HARD,
+ constants.INSTANCE_REBOOT_FULL))
+
+ if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
+ constants.INSTANCE_REBOOT_HARD]:
+ if not rpc.call_instance_reboot(node_current, instance,
+ reboot_type, extra_args):
+ raise errors.OpExecError("Could not reboot instance")
+ else:
+ if not rpc.call_instance_shutdown(node_current, instance):
+ raise errors.OpExecError("could not shutdown instance for full reboot")
+ _ShutdownInstanceDisks(instance, self.cfg)
+ _StartInstanceDisks(self.cfg, instance, ignore_secondaries)
+ if not rpc.call_instance_start(node_current, instance, extra_args):
+ _ShutdownInstanceDisks(instance, self.cfg)
+ raise errors.OpExecError("Could not start instance for full reboot")
+
+ self.cfg.MarkInstanceUp(instance.name)
+
+
class LUShutdownInstance(LogicalUnit):
"""Shutdown an instance.
"""
env = _BuildInstanceHookEnvByObject(self.instance)
- nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
- list(self.instance.secondary_nodes))
+ nl = [self.sstore.GetMasterNode()]
return env, nl, nl
def CheckPrereq(self):
(instance.name, instance.primary_node))
if not rpc.call_instance_shutdown(instance.primary_node, instance):
- raise errors.OpExecError("Could not shutdown instance %s on node %s" %
- (instance.name, instance.primary_node))
+ if self.op.ignore_failures:
+ feedback_fn("Warning: can't shutdown instance")
+ else:
+ raise errors.OpExecError("Could not shutdown instance %s on node %s" %
+ (instance.name, instance.primary_node))
logger.Info("removing block devices for instance %s" % instance.name)
- _RemoveDisks(instance, self.cfg)
+ if not _RemoveDisks(instance, self.cfg):
+ if self.op.ignore_failures:
+ feedback_fn("Warning: can't remove instance's disks")
+ else:
+ raise errors.OpExecError("Can't remove instance's disks")
logger.Info("removing instance %s out of cluster config" % instance.name)
raise errors.OpPrereqError("Instance '%s' not known" %
self.op.instance_name)
- if instance.disk_template != constants.DT_REMOTE_RAID1:
+ if instance.disk_template not in constants.DTS_NET_MIRROR:
raise errors.OpPrereqError("Instance's disk layout is not"
- " remote_raid1.")
+ " network mirrored, cannot failover.")
secondary_nodes = instance.secondary_nodes
if not secondary_nodes:
"""
port = cfg.AllocatePort()
vgname = cfg.GetVGName()
- dev_data = objects.Disk(dev_type="lvm", size=size,
+ dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
logical_id=(vgname, names[0]))
- dev_meta = objects.Disk(dev_type="lvm", size=128,
+ dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
logical_id=(vgname, names[1]))
- drbd_dev = objects.Disk(dev_type="drbd", size=size,
+ drbd_dev = objects.Disk(dev_type=constants.LD_DRBD7, size=size,
logical_id = (primary, secondary, port),
children = [dev_data, dev_meta])
return drbd_dev
+def _GenerateDRBD8Branch(cfg, primary, secondary, size, names, iv_name):
+ """Generate a drbd8 device complete with its children.
+
+ """
+ port = cfg.AllocatePort()
+ vgname = cfg.GetVGName()
+ dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
+ logical_id=(vgname, names[0]))
+ dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
+ logical_id=(vgname, names[1]))
+ drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
+ logical_id = (primary, secondary, port),
+ children = [dev_data, dev_meta],
+ iv_name=iv_name)
+ return drbd_dev
+
def _GenerateDiskTemplate(cfg, template_name,
instance_name, primary_node,
secondary_nodes, disk_sz, swap_sz):
raise errors.ProgrammerError("Wrong template configuration")
names = _GenerateUniqueNames(cfg, [".sda", ".sdb"])
- sda_dev = objects.Disk(dev_type="lvm", size=disk_sz,
+ sda_dev = objects.Disk(dev_type=constants.LD_LV, size=disk_sz,
logical_id=(vgname, names[0]),
iv_name = "sda")
- sdb_dev = objects.Disk(dev_type="lvm", size=swap_sz,
+ sdb_dev = objects.Disk(dev_type=constants.LD_LV, size=swap_sz,
logical_id=(vgname, names[1]),
iv_name = "sdb")
disks = [sda_dev, sdb_dev]
names = _GenerateUniqueNames(cfg, [".sda_m1", ".sda_m2",
".sdb_m1", ".sdb_m2"])
- sda_dev_m1 = objects.Disk(dev_type="lvm", size=disk_sz,
+ sda_dev_m1 = objects.Disk(dev_type=constants.LD_LV, size=disk_sz,
logical_id=(vgname, names[0]))
- sda_dev_m2 = objects.Disk(dev_type="lvm", size=disk_sz,
+ sda_dev_m2 = objects.Disk(dev_type=constants.LD_LV, size=disk_sz,
logical_id=(vgname, names[1]))
- md_sda_dev = objects.Disk(dev_type="md_raid1", iv_name = "sda",
+ md_sda_dev = objects.Disk(dev_type=constants.LD_MD_R1, iv_name = "sda",
size=disk_sz,
children = [sda_dev_m1, sda_dev_m2])
- sdb_dev_m1 = objects.Disk(dev_type="lvm", size=swap_sz,
+ sdb_dev_m1 = objects.Disk(dev_type=constants.LD_LV, size=swap_sz,
logical_id=(vgname, names[2]))
- sdb_dev_m2 = objects.Disk(dev_type="lvm", size=swap_sz,
+ sdb_dev_m2 = objects.Disk(dev_type=constants.LD_LV, size=swap_sz,
logical_id=(vgname, names[3]))
- md_sdb_dev = objects.Disk(dev_type="md_raid1", iv_name = "sdb",
+ md_sdb_dev = objects.Disk(dev_type=constants.LD_MD_R1, iv_name = "sdb",
size=swap_sz,
children = [sdb_dev_m1, sdb_dev_m2])
disks = [md_sda_dev, md_sdb_dev]
".sdb_data", ".sdb_meta"])
drbd_sda_dev = _GenerateMDDRBDBranch(cfg, primary_node, remote_node,
disk_sz, names[0:2])
- md_sda_dev = objects.Disk(dev_type="md_raid1", iv_name="sda",
+ md_sda_dev = objects.Disk(dev_type=constants.LD_MD_R1, iv_name="sda",
children = [drbd_sda_dev], size=disk_sz)
drbd_sdb_dev = _GenerateMDDRBDBranch(cfg, primary_node, remote_node,
swap_sz, names[2:4])
- md_sdb_dev = objects.Disk(dev_type="md_raid1", iv_name="sdb",
+ md_sdb_dev = objects.Disk(dev_type=constants.LD_MD_R1, iv_name="sdb",
children = [drbd_sdb_dev], size=swap_sz)
disks = [md_sda_dev, md_sdb_dev]
+ elif template_name == constants.DT_DRBD8:
+ if len(secondary_nodes) != 1:
+ raise errors.ProgrammerError("Wrong template configuration")
+ remote_node = secondary_nodes[0]
+ names = _GenerateUniqueNames(cfg, [".sda_data", ".sda_meta",
+ ".sdb_data", ".sdb_meta"])
+ drbd_sda_dev = _GenerateDRBD8Branch(cfg, primary_node, remote_node,
+ disk_sz, names[0:2], "sda")
+ drbd_sdb_dev = _GenerateDRBD8Branch(cfg, primary_node, remote_node,
+ swap_sz, names[2:4], "sdb")
+ disks = [drbd_sda_dev, drbd_sdb_dev]
else:
raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
return disks
This abstracts away some work from `AddInstance()` and
`RemoveInstance()`. Note that in case some of the devices couldn't
- be remove, the removal will continue with the other ones (compare
+ be removed, the removal will continue with the other ones (compare
with `_CreateDisks()`).
Args:
if self.op.disk_template not in constants.DISK_TEMPLATES:
raise errors.OpPrereqError("Invalid disk template name")
- if self.op.disk_template == constants.DT_REMOTE_RAID1:
+ if self.op.disk_template in constants.DTS_NET_MIRROR:
if getattr(self.op, "snode", None) is None:
- raise errors.OpPrereqError("The 'remote_raid1' disk template needs"
+ raise errors.OpPrereqError("The networked disk templates need"
" a mirror node")
snode_name = self.cfg.ExpandNodeName(self.op.snode)
constants.DT_LOCAL_RAID1: (self.op.disk_size + self.op.swap_size) * 2,
# 256 MB are added for drbd metadata, 128MB for each drbd device
constants.DT_REMOTE_RAID1: self.op.disk_size + self.op.swap_size + 256,
+ constants.DT_DRBD8: self.op.disk_size + self.op.swap_size + 256,
}
if self.op.disk_template not in req_size_dict:
" adding an instance in start mode")
if self.op.ip_check:
- command = ["fping", "-q", hostname1.ip]
- result = utils.RunCmd(command)
- if not result.failed:
- raise errors.OpPrereqError("IP address %s of instance %s already"
- " in use" % (hostname1.ip, instance_name))
+ if utils.TcpPing(utils.HostInfo().name, hostname1.ip,
+ constants.DEFAULT_NODED_PORT):
+ raise errors.OpPrereqError("IP %s of instance %s already in use" %
+ (hostname1.ip, instance_name))
# bridge verification
bridge = getattr(self.op, "bridge", None)
if self.op.wait_for_sync:
disk_abort = not _WaitForSync(self.cfg, iobj)
- elif iobj.disk_template == constants.DT_REMOTE_RAID1:
+ elif iobj.disk_template in constants.DTS_NET_MIRROR:
# make sure the disks are not degraded (still sync-ing is ok)
time.sleep(15)
feedback_fn("* checking mirrors status")
# the device exists now
# call the primary node to add the mirror to md
logger.Info("adding new mirror component to md")
- if not rpc.call_blockdev_addchild(instance.primary_node,
- disk, new_drbd):
+ if not rpc.call_blockdev_addchildren(instance.primary_node,
+ disk, [new_drbd]):
logger.Error("Can't add mirror compoment to md!")
self.cfg.SetDiskID(new_drbd, remote_node)
if not rpc.call_blockdev_remove(remote_node, new_drbd):
raise errors.OpPrereqError("Can't find this device ('%s') in the"
" instance." % self.op.disk_name)
for child in disk.children:
- if child.dev_type == "drbd" and child.logical_id[2] == self.op.disk_id:
+ if (child.dev_type == constants.LD_DRBD7 and
+ child.logical_id[2] == self.op.disk_id):
break
else:
raise errors.OpPrereqError("Can't find the device with this port.")
child = self.child
logger.Info("remove mirror component")
self.cfg.SetDiskID(disk, instance.primary_node)
- if not rpc.call_blockdev_removechild(instance.primary_node,
- disk, child):
+ if not rpc.call_blockdev_removechildren(instance.primary_node,
+ disk, [child]):
raise errors.OpExecError("Can't remove child from mirror.")
for node in child.logical_id[:2]:
# the device exists now
# call the primary node to add the mirror to md
logger.Info("adding new mirror component to md")
- if not rpc.call_blockdev_addchild(instance.primary_node, dev,
- new_drbd):
+ if not rpc.call_blockdev_addchildren(instance.primary_node, dev,
+ [new_drbd]):
logger.Error("Can't add mirror compoment to md!")
cfg.SetDiskID(new_drbd, remote_node)
if not rpc.call_blockdev_remove(remote_node, new_drbd):
dev, child, new_drbd = iv_names[name]
logger.Info("remove mirror %s component" % name)
cfg.SetDiskID(dev, instance.primary_node)
- if not rpc.call_blockdev_removechild(instance.primary_node,
- dev, child):
+ if not rpc.call_blockdev_removechildren(instance.primary_node,
+ dev, [child]):
logger.Error("Can't remove child from mirror, aborting"
" *this device cleanup*.\nYou need to cleanup manually!!")
continue
"""
self.cfg.SetDiskID(dev, instance.primary_node)
dev_pstatus = rpc.call_blockdev_find(instance.primary_node, dev)
- if dev.dev_type == "drbd":
+ if dev.dev_type in constants.LDS_DRBD:
# we change the snode then (otherwise we use the one passed in)
if dev.logical_id[0] == instance.primary_node:
snode = dev.logical_id[1]
"memory": instance.memory,
"nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
"disks": disks,
+ "vcpus": instance.vcpus,
}
result[instance.name] = idict
logger.Error("could not snapshot block device %s on node %s" %
(disk.logical_id[1], src_node))
else:
- new_dev = objects.Disk(dev_type="lvm", size=disk.size,
+ new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
logical_id=(vgname, new_dev_name),
physical_id=(vgname, new_dev_name),
iv_name=disk.iv_name)