from ganeti import utils
from ganeti import errors
from ganeti import hypervisor
+from ganeti import locking
from ganeti import config
from ganeti import constants
from ganeti import objects
"""Logical Unit base class.
Subclasses must follow these rules:
- - implement CheckPrereq which also fills in the opcode instance
- with all the fields (even if as None)
+ - implement ExpandNames
+ - implement CheckPrereq
- implement Exec
- implement BuildHooksEnv
- redefine HPATH and HTYPE
- - optionally redefine their run requirements (REQ_CLUSTER,
- REQ_MASTER); note that all commands require root permissions
+ - optionally redefine their run requirements:
+ REQ_MASTER: the LU needs to run on the master node
+ REQ_WSSTORE: the LU needs a writable SimpleStore
+ REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
+
+ Note that all commands require root permissions.
"""
HPATH = None
HTYPE = None
_OP_REQP = []
- REQ_CLUSTER = True
REQ_MASTER = True
+ REQ_WSSTORE = False
+ REQ_BGL = True
- def __init__(self, processor, op, cfg, sstore):
+ def __init__(self, processor, op, context, sstore):
"""Constructor for LogicalUnit.
This needs to be overriden in derived classes in order to check op
"""
self.proc = processor
self.op = op
- self.cfg = cfg
+ self.cfg = context.cfg
self.sstore = sstore
+ self.context = context
+ self.needed_locks = None
self.__ssh = None
for attr_name in self._OP_REQP:
if attr_val is None:
raise errors.OpPrereqError("Required parameter '%s' missing" %
attr_name)
- if self.REQ_CLUSTER:
- if not cfg.IsCluster():
- raise errors.OpPrereqError("Cluster not initialized yet,"
- " use 'gnt-cluster init' first.")
- if self.REQ_MASTER:
- master = sstore.GetMasterNode()
- if master != utils.HostInfo().name:
- raise errors.OpPrereqError("Commands must be run on the master"
- " node %s" % master)
+
+ if not self.cfg.IsCluster():
+ raise errors.OpPrereqError("Cluster not initialized yet,"
+ " use 'gnt-cluster init' first.")
+ if self.REQ_MASTER:
+ master = sstore.GetMasterNode()
+ if master != utils.HostInfo().name:
+ raise errors.OpPrereqError("Commands must be run on the master"
+ " node %s" % master)
def __GetSSH(self):
"""Returns the SshRunner object
ssh = property(fget=__GetSSH)
+ def ExpandNames(self):
+ """Expand names for this LU.
+
+ This method is called before starting to execute the opcode, and it should
+ update all the parameters of the opcode to their canonical form (e.g. a
+ short node name must be fully expanded after this method has successfully
+ completed). This way locking, hooks, logging, ecc. can work correctly.
+
+ LUs which implement this method must also populate the self.needed_locks
+ member, as a dict with lock levels as keys, and a list of needed lock names
+ as values. Rules:
+ - Use an empty dict if you don't need any lock
+ - If you don't need any lock at a particular level omit that level
+ - Don't put anything for the BGL level
+ - If you want all locks at a level use None as a value
+ (this reflects what LockSet does, and will be replaced before
+ CheckPrereq with the full list of nodes that have been locked)
+
+ Examples:
+ # Acquire all nodes and one instance
+ self.needed_locks = {
+ locking.LEVEL_NODE: None,
+ locking.LEVEL_INSTANCES: ['instance1.example.tld'],
+ }
+ # Acquire just two nodes
+ self.needed_locks = {
+ locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
+ }
+ # Acquire no locks
+ self.needed_locks = {} # No, you can't leave it to the default value None
+
+ """
+ # The implementation of this method is mandatory only if the new LU is
+ # concurrent, so that old LUs don't need to be changed all at the same
+ # time.
+ if self.REQ_BGL:
+ self.needed_locks = {} # Exclusive LUs don't need locks.
+ else:
+ raise NotImplementedError
+
def CheckPrereq(self):
"""Check prerequisites for this LU.
not fulfilled. Its return value is ignored.
This method should also update all the parameters of the opcode to
- their canonical form; e.g. a short node name must be fully
- expanded after this method has successfully completed (so that
- hooks, logging, etc. work correctly).
+ their canonical form if it hasn't been done by ExpandNames before.
"""
raise NotImplementedError
HTYPE = None
-def _AddHostToEtcHosts(hostname):
- """Wrapper around utils.SetEtcHostsEntry.
-
- """
- hi = utils.HostInfo(name=hostname)
- utils.SetEtcHostsEntry(constants.ETC_HOSTS, hi.ip, hi.name, [hi.ShortName()])
-
-
-def _RemoveHostFromEtcHosts(hostname):
- """Wrapper around utils.RemoveEtcHostsEntry.
-
- """
- hi = utils.HostInfo(name=hostname)
- utils.RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.name)
- utils.RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.ShortName())
-
-
def _GetWantedNodes(lu, nodes):
"""Returns list of checked and expanded node names.
return _BuildInstanceHookEnv(**args)
-def _HasValidVG(vglist, vgname):
- """Checks if the volume group list is valid.
-
- A non-None return value means there's an error, and the return value
- is the error message.
-
- """
- vgsize = vglist.get(vgname, None)
- if vgsize is None:
- return "volume group '%s' missing" % vgname
- elif vgsize < 20480:
- return ("volume group '%s' too small (20480MiB required, %dMib found)" %
- (vgname, vgsize))
- return None
-
-
-def _InitSSHSetup(node):
- """Setup the SSH configuration for the cluster.
-
-
- This generates a dsa keypair for root, adds the pub key to the
- permitted hosts and adds the hostkey to its own known hosts.
-
- Args:
- node: the name of this host as a fqdn
-
- """
- priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
-
- for name in priv_key, pub_key:
- if os.path.exists(name):
- utils.CreateBackup(name)
- utils.RemoveFile(name)
-
- result = utils.RunCmd(["ssh-keygen", "-t", "dsa",
- "-f", priv_key,
- "-q", "-N", ""])
- if result.failed:
- raise errors.OpExecError("Could not generate ssh keypair, error %s" %
- result.output)
-
- f = open(pub_key, 'r')
- try:
- utils.AddAuthorizedKey(auth_keys, f.read(8192))
- finally:
- f.close()
-
-
-def _InitGanetiServerSetup(ss):
- """Setup the necessary configuration for the initial node daemon.
-
- This creates the nodepass file containing the shared password for
- the cluster and also generates the SSL certificate.
-
- """
- # Create pseudo random password
- randpass = sha.new(os.urandom(64)).hexdigest()
- # and write it into sstore
- ss.SetKey(ss.SS_NODED_PASS, randpass)
-
- result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024",
- "-days", str(365*5), "-nodes", "-x509",
- "-keyout", constants.SSL_CERT_FILE,
- "-out", constants.SSL_CERT_FILE, "-batch"])
- if result.failed:
- raise errors.OpExecError("could not generate server ssl cert, command"
- " %s had exitcode %s and error message %s" %
- (result.cmd, result.exit_code, result.output))
-
- os.chmod(constants.SSL_CERT_FILE, 0400)
-
- result = utils.RunCmd([constants.NODE_INITD_SCRIPT, "restart"])
-
- if result.failed:
- raise errors.OpExecError("Could not start the node daemon, command %s"
- " had exitcode %s and error %s" %
- (result.cmd, result.exit_code, result.output))
-
-
def _CheckInstanceBridgesExist(instance):
"""Check that the brigdes needed by an instance exist.
(brlist, instance.primary_node))
-class LUInitCluster(LogicalUnit):
- """Initialise the cluster.
-
- """
- HPATH = "cluster-init"
- HTYPE = constants.HTYPE_CLUSTER
- _OP_REQP = ["cluster_name", "hypervisor_type", "mac_prefix",
- "def_bridge", "master_netdev", "file_storage_dir"]
- REQ_CLUSTER = False
-
- def BuildHooksEnv(self):
- """Build hooks env.
-
- Notes: Since we don't require a cluster, we must manually add
- ourselves in the post-run node list.
-
- """
- env = {"OP_TARGET": self.op.cluster_name}
- return env, [], [self.hostname.name]
-
- def CheckPrereq(self):
- """Verify that the passed name is a valid one.
-
- """
- if config.ConfigWriter.IsCluster():
- raise errors.OpPrereqError("Cluster is already initialised")
-
- if self.op.hypervisor_type == constants.HT_XEN_HVM31:
- if not os.path.exists(constants.VNC_PASSWORD_FILE):
- raise errors.OpPrereqError("Please prepare the cluster VNC"
- "password file %s" %
- constants.VNC_PASSWORD_FILE)
-
- self.hostname = hostname = utils.HostInfo()
-
- if hostname.ip.startswith("127."):
- raise errors.OpPrereqError("This host's IP resolves to the private"
- " range (%s). Please fix DNS or %s." %
- (hostname.ip, constants.ETC_HOSTS))
-
- if not utils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT,
- source=constants.LOCALHOST_IP_ADDRESS):
- raise errors.OpPrereqError("Inconsistency: this host's name resolves"
- " to %s,\nbut this ip address does not"
- " belong to this host."
- " Aborting." % hostname.ip)
-
- self.clustername = clustername = utils.HostInfo(self.op.cluster_name)
-
- if utils.TcpPing(clustername.ip, constants.DEFAULT_NODED_PORT,
- timeout=5):
- raise errors.OpPrereqError("Cluster IP already active. Aborting.")
-
- secondary_ip = getattr(self.op, "secondary_ip", None)
- if secondary_ip and not utils.IsValidIP(secondary_ip):
- raise errors.OpPrereqError("Invalid secondary ip given")
- if (secondary_ip and
- secondary_ip != hostname.ip and
- (not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
- source=constants.LOCALHOST_IP_ADDRESS))):
- raise errors.OpPrereqError("You gave %s as secondary IP,"
- " but it does not belong to this host." %
- secondary_ip)
- self.secondary_ip = secondary_ip
-
- if not hasattr(self.op, "vg_name"):
- self.op.vg_name = None
- # if vg_name not None, checks if volume group is valid
- if self.op.vg_name:
- vgstatus = _HasValidVG(utils.ListVolumeGroups(), self.op.vg_name)
- if vgstatus:
- raise errors.OpPrereqError("Error: %s\nspecify --no-lvm-storage if"
- " you are not using lvm" % vgstatus)
-
- self.op.file_storage_dir = os.path.normpath(self.op.file_storage_dir)
-
- if not os.path.isabs(self.op.file_storage_dir):
- raise errors.OpPrereqError("The file storage directory you have is"
- " not an absolute path.")
-
- if not os.path.exists(self.op.file_storage_dir):
- try:
- os.makedirs(self.op.file_storage_dir, 0750)
- except OSError, err:
- raise errors.OpPrereqError("Cannot create file storage directory"
- " '%s': %s" %
- (self.op.file_storage_dir, err))
-
- if not os.path.isdir(self.op.file_storage_dir):
- raise errors.OpPrereqError("The file storage directory '%s' is not"
- " a directory." % self.op.file_storage_dir)
-
- if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$",
- self.op.mac_prefix):
- raise errors.OpPrereqError("Invalid mac prefix given '%s'" %
- self.op.mac_prefix)
-
- if self.op.hypervisor_type not in constants.HYPER_TYPES:
- raise errors.OpPrereqError("Invalid hypervisor type given '%s'" %
- self.op.hypervisor_type)
-
- result = utils.RunCmd(["ip", "link", "show", "dev", self.op.master_netdev])
- if result.failed:
- raise errors.OpPrereqError("Invalid master netdev given (%s): '%s'" %
- (self.op.master_netdev,
- result.output.strip()))
-
- if not (os.path.isfile(constants.NODE_INITD_SCRIPT) and
- os.access(constants.NODE_INITD_SCRIPT, os.X_OK)):
- raise errors.OpPrereqError("Init.d script '%s' missing or not"
- " executable." % constants.NODE_INITD_SCRIPT)
-
- def Exec(self, feedback_fn):
- """Initialize the cluster.
-
- """
- clustername = self.clustername
- hostname = self.hostname
-
- # set up the simple store
- self.sstore = ss = ssconf.SimpleStore()
- ss.SetKey(ss.SS_HYPERVISOR, self.op.hypervisor_type)
- ss.SetKey(ss.SS_MASTER_NODE, hostname.name)
- ss.SetKey(ss.SS_MASTER_IP, clustername.ip)
- ss.SetKey(ss.SS_MASTER_NETDEV, self.op.master_netdev)
- ss.SetKey(ss.SS_CLUSTER_NAME, clustername.name)
- ss.SetKey(ss.SS_FILE_STORAGE_DIR, self.op.file_storage_dir)
- ss.SetKey(ss.SS_CONFIG_VERSION, constants.CONFIG_VERSION)
-
- # set up the inter-node password and certificate
- _InitGanetiServerSetup(ss)
-
- # start the master ip
- rpc.call_node_start_master(hostname.name)
-
- # set up ssh config and /etc/hosts
- f = open(constants.SSH_HOST_RSA_PUB, 'r')
- try:
- sshline = f.read()
- finally:
- f.close()
- sshkey = sshline.split(" ")[1]
-
- _AddHostToEtcHosts(hostname.name)
- _InitSSHSetup(hostname.name)
-
- # init of cluster config file
- self.cfg = cfgw = config.ConfigWriter()
- cfgw.InitConfig(hostname.name, hostname.ip, self.secondary_ip,
- sshkey, self.op.mac_prefix,
- self.op.vg_name, self.op.def_bridge)
-
- ssh.WriteKnownHostsFile(cfgw, ss, constants.SSH_KNOWN_HOSTS_FILE)
-
-
class LUDestroyCluster(NoHooksLU):
"""Logical unit for destroying the cluster.
(node,))
bad = True
else:
- vgstatus = _HasValidVG(vglist, self.cfg.GetVGName())
+ vgstatus = utils.CheckVolumeGroupSize(vglist, self.cfg.GetVGName(),
+ constants.MIN_VG_SIZE)
if vgstatus:
feedback_fn(" - ERROR: %s on node %s" % (vgstatus, node))
bad = True
HPATH = "cluster-rename"
HTYPE = constants.HTYPE_CLUSTER
_OP_REQP = ["name"]
+ REQ_WSSTORE = True
def BuildHooksEnv(self):
"""Build hooks env.
raise errors.OpPrereqError("Neither the name nor the IP address of the"
" cluster has changed")
if new_ip != old_ip:
- result = utils.RunCmd(["fping", "-q", new_ip])
- if not result.failed:
+ if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
raise errors.OpPrereqError("The given cluster IP address (%s) is"
" reachable on the network. Aborting." %
new_ip)
node_list = self.cfg.GetNodeList()
vglist = rpc.call_vg_list(node_list)
for node in node_list:
- vgstatus = _HasValidVG(vglist[node], self.op.vg_name)
+ vgstatus = utils.CheckVolumeGroupSize(vglist[node], self.op.vg_name,
+ constants.MIN_VG_SIZE)
if vgstatus:
raise errors.OpPrereqError("Error on node '%s': %s" %
(node, vgstatus))
if done or oneshot:
break
- if unlock:
- #utils.Unlock('cmd')
- pass
- try:
- time.sleep(min(60, max_time))
- finally:
- if unlock:
- #utils.Lock('cmd')
- pass
+ time.sleep(min(60, max_time))
if done:
proc.LogInfo("Instance %s's disks are in sync." % instance.name)
"""Build hooks env.
This doesn't run on the target node in the pre phase as a failed
- node would not allows itself to run.
+ node would then be impossible to remove.
"""
env = {
rpc.call_node_leave_cluster(node.name)
- self.ssh.Run(node.name, 'root', "%s stop" % constants.NODE_INITD_SCRIPT)
-
logger.Info("Removing node %s from config" % node.name)
self.cfg.RemoveNode(node.name)
+ # Remove the node from the Ganeti Lock Manager
+ self.context.glm.remove(locking.LEVEL_NODE, node.name)
- _RemoveHostFromEtcHosts(node.name)
+ utils.RemoveHostFromEtcHosts(node.name)
class LUQueryNodes(NoHooksLU):
_CheckOutputFields(static=["name", "pinst_cnt", "sinst_cnt",
"pinst_list", "sinst_list",
- "pip", "sip"],
+ "pip", "sip", "tags"],
dynamic=self.dynamic_fields,
selected=self.op.output_fields)
val = node.primary_ip
elif field == "sip":
val = node.secondary_ip
+ elif field == "tags":
+ val = list(node.GetTags())
elif field in self.dynamic_fields:
val = live_data[node.name].get(field, None)
else:
primary_ip=primary_ip,
secondary_ip=secondary_ip)
- if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
- if not os.path.exists(constants.VNC_PASSWORD_FILE):
- raise errors.OpPrereqError("Cluster VNC password file %s missing" %
- constants.VNC_PASSWORD_FILE)
-
def Exec(self, feedback_fn):
"""Adds the new node to the cluster.
new_node = self.new_node
node = new_node.name
- # set up inter-node password and certificate and restarts the node daemon
- gntpass = self.sstore.GetNodeDaemonPassword()
- if not re.match('^[a-zA-Z0-9.]{1,64}$', gntpass):
- raise errors.OpExecError("ganeti password corruption detected")
- f = open(constants.SSL_CERT_FILE)
- try:
- gntpem = f.read(8192)
- finally:
- f.close()
- # in the base64 pem encoding, neither '!' nor '.' are valid chars,
- # so we use this to detect an invalid certificate; as long as the
- # cert doesn't contain this, the here-document will be correctly
- # parsed by the shell sequence below
- if re.search('^!EOF\.', gntpem, re.MULTILINE):
- raise errors.OpExecError("invalid PEM encoding in the SSL certificate")
- if not gntpem.endswith("\n"):
- raise errors.OpExecError("PEM must end with newline")
- logger.Info("copy cluster pass to %s and starting the node daemon" % node)
-
- # and then connect with ssh to set password and start ganeti-noded
- # note that all the below variables are sanitized at this point,
- # either by being constants or by the checks above
- ss = self.sstore
- mycommand = ("umask 077 && "
- "echo '%s' > '%s' && "
- "cat > '%s' << '!EOF.' && \n"
- "%s!EOF.\n%s restart" %
- (gntpass, ss.KeyToFilename(ss.SS_NODED_PASS),
- constants.SSL_CERT_FILE, gntpem,
- constants.NODE_INITD_SCRIPT))
-
- result = self.ssh.Run(node, 'root', mycommand, batch=False, ask_key=True)
- if result.failed:
- raise errors.OpExecError("Remote command on node %s, error: %s,"
- " output: %s" %
- (node, result.fail_reason, result.output))
-
# check connectivity
- time.sleep(4)
-
result = rpc.call_version([node])[node]
if result:
if constants.PROTOCOL_VERSION == result:
raise errors.OpExecError("Cannot transfer ssh keys to the new node")
# Add node to our /etc/hosts, and add key to known_hosts
- _AddHostToEtcHosts(new_node.name)
+ utils.AddHostToEtcHosts(new_node.name)
if new_node.secondary_ip != new_node.primary_ip:
if not rpc.call_node_tcp_ping(new_node.name,
" you gave (%s). Please fix and re-run this"
" command." % new_node.secondary_ip)
- success, msg = self.ssh.VerifyNodeHostname(node)
- if not success:
- raise errors.OpExecError("Node '%s' claims it has a different hostname"
- " than the one the resolver gives: %s."
- " Please fix and re-run this command." %
- (node, msg))
+ node_verify_list = [self.sstore.GetMasterNode()]
+ node_verify_param = {
+ 'nodelist': [node],
+ # TODO: do a node-net-test as well?
+ }
+
+ result = rpc.call_node_verify(node_verify_list, node_verify_param)
+ for verifier in node_verify_list:
+ if not result[verifier]:
+ raise errors.OpExecError("Cannot communicate with %s's node daemon"
+ " for remote verification" % verifier)
+ if result[verifier]['nodelist']:
+ for failed in result[verifier]['nodelist']:
+ feedback_fn("ssh/hostname verification failed %s -> %s" %
+ (verifier, result[verifier]['nodelist'][failed]))
+ raise errors.OpExecError("ssh/hostname verification failed.")
# Distribute updated /etc/hosts and known_hosts to all nodes,
# including the node just added
logger.Error("copy of file %s to node %s failed" %
(fname, to_node))
- to_copy = ss.GetFileList()
+ to_copy = self.sstore.GetFileList()
if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
to_copy.append(constants.VNC_PASSWORD_FILE)
for fname in to_copy:
- if not self.ssh.CopyFileToNode(node, fname):
+ result = rpc.call_upload_file([node], fname)
+ if not result[node]:
logger.Error("could not copy file %s to node %s" % (fname, node))
if not self.op.readd:
logger.Info("adding node %s to cluster.conf" % node)
self.cfg.AddNode(new_node)
+ # Add the new node to the Ganeti Lock Manager
+ self.context.glm.add(locking.LEVEL_NODE, node)
class LUMasterFailover(LogicalUnit):
HPATH = "master-failover"
HTYPE = constants.HTYPE_CLUSTER
REQ_MASTER = False
+ REQ_WSSTORE = True
_OP_REQP = []
def BuildHooksEnv(self):
"export_version": constants.EXPORT_VERSION,
"master": self.sstore.GetMasterNode(),
"architecture": (platform.architecture()[0], platform.machine()),
+ "hypervisor_type": self.sstore.GetHypervisorType(),
}
return result
-class LUClusterCopyFile(NoHooksLU):
- """Copy file to cluster.
-
- """
- _OP_REQP = ["nodes", "filename"]
-
- def CheckPrereq(self):
- """Check prerequisites.
-
- It should check that the named file exists and that the given list
- of nodes is valid.
-
- """
- if not os.path.exists(self.op.filename):
- raise errors.OpPrereqError("No such filename '%s'" % self.op.filename)
-
- self.nodes = _GetWantedNodes(self, self.op.nodes)
-
- def Exec(self, feedback_fn):
- """Copy a file from master to some nodes.
-
- Args:
- opts - class with options as members
- args - list containing a single element, the file name
- Opts used:
- nodes - list containing the name of target nodes; if empty, all nodes
-
- """
- filename = self.op.filename
-
- myname = utils.HostInfo().name
-
- for node in self.nodes:
- if node == myname:
- continue
- if not self.ssh.CopyFileToNode(node, filename):
- logger.Error("Copy of file %s to node %s failed" % (filename, node))
-
-
class LUDumpClusterConfig(NoHooksLU):
"""Return a text-representation of the cluster-config.
return self.cfg.DumpConfig()
-class LURunClusterCommand(NoHooksLU):
- """Run a command on some nodes.
-
- """
- _OP_REQP = ["command", "nodes"]
-
- def CheckPrereq(self):
- """Check prerequisites.
-
- It checks that the given list of nodes is valid.
-
- """
- self.nodes = _GetWantedNodes(self, self.op.nodes)
-
- def Exec(self, feedback_fn):
- """Run a command on some nodes.
-
- """
- # put the master at the end of the nodes list
- master_node = self.sstore.GetMasterNode()
- if master_node in self.nodes:
- self.nodes.remove(master_node)
- self.nodes.append(master_node)
-
- data = []
- for node in self.nodes:
- result = self.ssh.Run(node, "root", self.op.command)
- data.append((node, result.output, result.exit_code))
-
- return data
-
-
class LUActivateInstanceDisks(NoHooksLU):
"""Bring up an instance's disks.
new_name)
if not getattr(self.op, "ignore_ip", False):
- command = ["fping", "-q", name_info.ip]
- result = utils.RunCmd(command)
- if not result.failed:
+ if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
raise errors.OpPrereqError("IP %s of instance %s already in use" %
(name_info.ip, new_name))
logger.Info("removing instance %s out of cluster config" % instance.name)
self.cfg.RemoveInstance(instance.name)
+ # Remove the new instance from the Ganeti Lock Manager
+ self.context.glm.remove(locking.LEVEL_INSTANCE, instance.name)
class LUQueryInstances(NoHooksLU):
_CheckOutputFields(static=["name", "os", "pnode", "snodes",
"admin_state", "admin_ram",
"disk_template", "ip", "mac", "bridge",
- "sda_size", "sdb_size", "vcpus"],
+ "sda_size", "sdb_size", "vcpus", "tags"],
dynamic=self.dynamic_fields,
selected=self.op.output_fields)
val = disk.size
elif field == "vcpus":
val = instance.vcpus
+ elif field == "tags":
+ val = list(instance.GetTags())
else:
raise errors.ParameterError(field)
iout.append(val)
secondary_nodes = instance.secondary_nodes
if not secondary_nodes:
raise errors.ProgrammerError("no secondary node but using "
- "DT_REMOTE_RAID1 template")
+ "a mirrored disk template")
target_node = secondary_nodes[0]
# check memory requirements on the secondary node
feedback_fn("* checking disk consistency between source and target")
for dev in instance.disks:
- # for remote_raid1, these are md over drbd
+ # for drbd, these are drbd over lvm
if not _CheckDiskConsistency(self.cfg, dev, target_node, False):
if instance.status == "up" and not self.op.ignore_consistency:
raise errors.OpExecError("Disk %s is degraded on target node,"
instance.primary_node = target_node
# distribute new instance config to the other nodes
- self.cfg.AddInstance(instance)
+ self.cfg.Update(instance)
# Only start the instance if it's marked as up
if instance.status == "up":
return results
-def _GenerateMDDRBDBranch(cfg, primary, secondary, size, names):
- """Generate a drbd device complete with its children.
-
- """
- port = cfg.AllocatePort()
- vgname = cfg.GetVGName()
- dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
- logical_id=(vgname, names[0]))
- dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
- logical_id=(vgname, names[1]))
- drbd_dev = objects.Disk(dev_type=constants.LD_DRBD7, size=size,
- logical_id = (primary, secondary, port),
- children = [dev_data, dev_meta])
- return drbd_dev
-
-
def _GenerateDRBD8Branch(cfg, primary, secondary, size, names, iv_name):
"""Generate a drbd8 device complete with its children.
"""
# set optional parameters to none if they don't exist
for attr in ["kernel_path", "initrd_path", "hvm_boot_order", "pnode",
- "iallocator"]:
+ "iallocator", "hvm_acpi", "hvm_pae", "hvm_cdrom_image_path",
+ "vnc_bind_address"]:
if not hasattr(self.op, attr):
setattr(self.op, attr, None)
info = nodeinfo.get(node, None)
if not info:
raise errors.OpPrereqError("Cannot get current information"
- " from node '%s'" % nodeinfo)
+ " from node '%s'" % node)
vg_free = info.get('vg_free', None)
if not isinstance(vg_free, int):
raise errors.OpPrereqError("Can't compute free disk space on"
" destination node '%s'" %
(self.op.bridge, pnode.name))
+ # memory check on primary node
+ if self.op.start:
+ _CheckNodeFreeMemory(self.cfg, self.pnode.name,
+ "creating instance %s" % self.op.instance_name,
+ self.op.mem_size)
+
+ # hvm_cdrom_image_path verification
+ if self.op.hvm_cdrom_image_path is not None:
+ if not os.path.isabs(self.op.hvm_cdrom_image_path):
+ raise errors.OpPrereqError("The path to the HVM CDROM image must"
+ " be an absolute path or None, not %s" %
+ self.op.hvm_cdrom_image_path)
+ if not os.path.isfile(self.op.hvm_cdrom_image_path):
+ raise errors.OpPrereqError("The HVM CDROM image must either be a"
+ " regular file or a symlink pointing to"
+ " an existing regular file, not %s" %
+ self.op.hvm_cdrom_image_path)
+
+ # vnc_bind_address verification
+ if self.op.vnc_bind_address is not None:
+ if not utils.IsValidIP(self.op.vnc_bind_address):
+ raise errors.OpPrereqError("given VNC bind address '%s' doesn't look"
+ " like a valid IP address" %
+ self.op.vnc_bind_address)
+
if self.op.start:
self.instance_status = 'up'
else:
else:
network_port = None
+ if self.op.vnc_bind_address is None:
+ self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
+
# this is needed because os.path.join does not accept None arguments
if self.op.file_storage_dir is None:
string_file_storage_dir = ""
kernel_path=self.op.kernel_path,
initrd_path=self.op.initrd_path,
hvm_boot_order=self.op.hvm_boot_order,
+ hvm_acpi=self.op.hvm_acpi,
+ hvm_pae=self.op.hvm_pae,
+ hvm_cdrom_image_path=self.op.hvm_cdrom_image_path,
+ vnc_bind_address=self.op.vnc_bind_address,
)
feedback_fn("* creating instance disks...")
feedback_fn("adding instance %s to cluster config" % instance)
self.cfg.AddInstance(iobj)
+ # Add the new instance to the Ganeti Lock Manager
+ self.context.glm.add(locking.LEVEL_INSTANCE, instance)
if self.op.wait_for_sync:
disk_abort = not _WaitForSync(self.cfg, iobj, self.proc)
if disk_abort:
_RemoveDisks(iobj, self.cfg)
self.cfg.RemoveInstance(iobj.name)
+ # Remove the new instance from the Ganeti Lock Manager
+ self.context.glm.remove(locking.LEVEL_INSTANCE, iobj.name)
raise errors.OpExecError("There are some degraded disks for"
" this instance")
# replacement as for drbd7 (no different port allocated)
raise errors.OpPrereqError("Same secondary given, cannot execute"
" replacement")
- # the user gave the current secondary, switch to
- # 'no-replace-secondary' mode for drbd7
- remote_node = None
- if (instance.disk_template == constants.DT_REMOTE_RAID1 and
- self.op.mode != constants.REPLACE_DISK_ALL):
- raise errors.OpPrereqError("Template 'remote_raid1' only allows all"
- " disks replacement, not individual ones")
if instance.disk_template == constants.DT_DRBD8:
if (self.op.mode == constants.REPLACE_DISK_ALL and
remote_node is not None):
(name, instance.name))
self.op.remote_node = remote_node
- def _ExecRR1(self, feedback_fn):
- """Replace the disks of an instance.
-
- """
- instance = self.instance
- iv_names = {}
- # start of work
- if self.op.remote_node is None:
- remote_node = self.sec_node
- else:
- remote_node = self.op.remote_node
- cfg = self.cfg
- for dev in instance.disks:
- size = dev.size
- lv_names = [".%s_%s" % (dev.iv_name, suf) for suf in ["data", "meta"]]
- names = _GenerateUniqueNames(cfg, lv_names)
- new_drbd = _GenerateMDDRBDBranch(cfg, instance.primary_node,
- remote_node, size, names)
- iv_names[dev.iv_name] = (dev, dev.children[0], new_drbd)
- logger.Info("adding new mirror component on secondary for %s" %
- dev.iv_name)
- #HARDCODE
- if not _CreateBlockDevOnSecondary(cfg, remote_node, instance,
- new_drbd, False,
- _GetInstanceInfoText(instance)):
- raise errors.OpExecError("Failed to create new component on secondary"
- " node %s. Full abort, cleanup manually!" %
- remote_node)
-
- logger.Info("adding new mirror component on primary")
- #HARDCODE
- if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
- instance, new_drbd,
- _GetInstanceInfoText(instance)):
- # remove secondary dev
- cfg.SetDiskID(new_drbd, remote_node)
- rpc.call_blockdev_remove(remote_node, new_drbd)
- raise errors.OpExecError("Failed to create volume on primary!"
- " Full abort, cleanup manually!!")
-
- # the device exists now
- # call the primary node to add the mirror to md
- logger.Info("adding new mirror component to md")
- if not rpc.call_blockdev_addchildren(instance.primary_node, dev,
- [new_drbd]):
- logger.Error("Can't add mirror compoment to md!")
- cfg.SetDiskID(new_drbd, remote_node)
- if not rpc.call_blockdev_remove(remote_node, new_drbd):
- logger.Error("Can't rollback on secondary")
- cfg.SetDiskID(new_drbd, instance.primary_node)
- if not rpc.call_blockdev_remove(instance.primary_node, new_drbd):
- logger.Error("Can't rollback on primary")
- raise errors.OpExecError("Full abort, cleanup manually!!")
-
- dev.children.append(new_drbd)
- cfg.AddInstance(instance)
-
- # this can fail as the old devices are degraded and _WaitForSync
- # does a combined result over all disks, so we don't check its
- # return value
- _WaitForSync(cfg, instance, self.proc, unlock=True)
-
- # so check manually all the devices
- for name in iv_names:
- dev, child, new_drbd = iv_names[name]
- cfg.SetDiskID(dev, instance.primary_node)
- is_degr = rpc.call_blockdev_find(instance.primary_node, dev)[5]
- if is_degr:
- raise errors.OpExecError("MD device %s is degraded!" % name)
- cfg.SetDiskID(new_drbd, instance.primary_node)
- is_degr = rpc.call_blockdev_find(instance.primary_node, new_drbd)[5]
- if is_degr:
- raise errors.OpExecError("New drbd device %s is degraded!" % name)
-
- for name in iv_names:
- dev, child, new_drbd = iv_names[name]
- logger.Info("remove mirror %s component" % name)
- cfg.SetDiskID(dev, instance.primary_node)
- if not rpc.call_blockdev_removechildren(instance.primary_node,
- dev, [child]):
- logger.Error("Can't remove child from mirror, aborting"
- " *this device cleanup*.\nYou need to cleanup manually!!")
- continue
-
- for node in child.logical_id[:2]:
- logger.Info("remove child device on %s" % node)
- cfg.SetDiskID(child, node)
- if not rpc.call_blockdev_remove(node, child):
- logger.Error("Warning: failed to remove device from node %s,"
- " continuing operation." % node)
-
- dev.children.remove(child)
-
- cfg.AddInstance(instance)
-
def _ExecD8DiskOnly(self, feedback_fn):
"""Replace a disk on the primary or secondary for dbrd8.
"""
instance = self.instance
- if instance.disk_template == constants.DT_REMOTE_RAID1:
- fn = self._ExecRR1
- elif instance.disk_template == constants.DT_DRBD8:
+
+ # Activate the instance disks if we're replacing them on a down instance
+ if instance.status == "down":
+ op = opcodes.OpActivateInstanceDisks(instance_name=instance.name)
+ self.proc.ChainOpCode(op)
+
+ if instance.disk_template == constants.DT_DRBD8:
if self.op.remote_node is None:
fn = self._ExecD8DiskOnly
else:
fn = self._ExecD8Secondary
else:
raise errors.ProgrammerError("Unhandled disk replacement case")
- return fn(feedback_fn)
+
+ ret = fn(feedback_fn)
+
+ # Deactivate the instance disks if we're replacing them on a down instance
+ if instance.status == "down":
+ op = opcodes.OpDeactivateInstanceDisks(instance_name=instance.name)
+ self.proc.ChainOpCode(op)
+
+ return ret
+
+
+class LUGrowDisk(LogicalUnit):
+ """Grow a disk of an instance.
+
+ """
+ HPATH = "disk-grow"
+ HTYPE = constants.HTYPE_INSTANCE
+ _OP_REQP = ["instance_name", "disk", "amount"]
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on the master, the primary and all the secondaries.
+
+ """
+ env = {
+ "DISK": self.op.disk,
+ "AMOUNT": self.op.amount,
+ }
+ env.update(_BuildInstanceHookEnvByObject(self.instance))
+ nl = [
+ self.sstore.GetMasterNode(),
+ self.instance.primary_node,
+ ]
+ return env, nl, nl
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster.
+
+ """
+ instance = self.cfg.GetInstanceInfo(
+ self.cfg.ExpandInstanceName(self.op.instance_name))
+ if instance is None:
+ raise errors.OpPrereqError("Instance '%s' not known" %
+ self.op.instance_name)
+ self.instance = instance
+ self.op.instance_name = instance.name
+
+ if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
+ raise errors.OpPrereqError("Instance's disk layout does not support"
+ " growing.")
+
+ if instance.FindDisk(self.op.disk) is None:
+ raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
+ (self.op.disk, instance.name))
+
+ nodenames = [instance.primary_node] + list(instance.secondary_nodes)
+ nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
+ for node in nodenames:
+ info = nodeinfo.get(node, None)
+ if not info:
+ raise errors.OpPrereqError("Cannot get current information"
+ " from node '%s'" % node)
+ vg_free = info.get('vg_free', None)
+ if not isinstance(vg_free, int):
+ raise errors.OpPrereqError("Can't compute free disk space on"
+ " node %s" % node)
+ if self.op.amount > info['vg_free']:
+ raise errors.OpPrereqError("Not enough disk space on target node %s:"
+ " %d MiB available, %d MiB required" %
+ (node, info['vg_free'], self.op.amount))
+
+ def Exec(self, feedback_fn):
+ """Execute disk grow.
+
+ """
+ instance = self.instance
+ disk = instance.FindDisk(self.op.disk)
+ for node in (instance.secondary_nodes + (instance.primary_node,)):
+ self.cfg.SetDiskID(disk, node)
+ result = rpc.call_blockdev_grow(node, disk, self.op.amount)
+ if not result or not isinstance(result, tuple) or len(result) != 2:
+ raise errors.OpExecError("grow request failed to node %s" % node)
+ elif not result[0]:
+ raise errors.OpExecError("grow request failed to node %s: %s" %
+ (node, result[1]))
+ disk.RecordGrow(self.op.amount)
+ self.cfg.Update(instance)
+ return
class LUQueryInstanceData(NoHooksLU):
"memory": instance.memory,
"nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
"disks": disks,
- "network_port": instance.network_port,
"vcpus": instance.vcpus,
- "kernel_path": instance.kernel_path,
- "initrd_path": instance.initrd_path,
- "hvm_boot_order": instance.hvm_boot_order,
}
+ htkind = self.sstore.GetHypervisorType()
+ if htkind == constants.HT_XEN_PVM30:
+ idict["kernel_path"] = instance.kernel_path
+ idict["initrd_path"] = instance.initrd_path
+
+ if htkind == constants.HT_XEN_HVM31:
+ idict["hvm_boot_order"] = instance.hvm_boot_order
+ idict["hvm_acpi"] = instance.hvm_acpi
+ idict["hvm_pae"] = instance.hvm_pae
+ idict["hvm_cdrom_image_path"] = instance.hvm_cdrom_image_path
+
+ if htkind in constants.HTS_REQ_PORT:
+ idict["vnc_bind_address"] = instance.vnc_bind_address
+ idict["network_port"] = instance.network_port
+
result[instance.name] = idict
return result
self.kernel_path = getattr(self.op, "kernel_path", None)
self.initrd_path = getattr(self.op, "initrd_path", None)
self.hvm_boot_order = getattr(self.op, "hvm_boot_order", None)
- all_params = [self.mem, self.vcpus, self.ip, self.bridge, self.mac,
- self.kernel_path, self.initrd_path, self.hvm_boot_order]
- if all_params.count(None) == len(all_params):
+ self.hvm_acpi = getattr(self.op, "hvm_acpi", None)
+ self.hvm_pae = getattr(self.op, "hvm_pae", None)
+ self.hvm_cdrom_image_path = getattr(self.op, "hvm_cdrom_image_path", None)
+ self.vnc_bind_address = getattr(self.op, "vnc_bind_address", None)
+ all_parms = [self.mem, self.vcpus, self.ip, self.bridge, self.mac,
+ self.kernel_path, self.initrd_path, self.hvm_boot_order,
+ self.hvm_acpi, self.hvm_pae, self.hvm_cdrom_image_path,
+ self.vnc_bind_address]
+ if all_parms.count(None) == len(all_parms):
raise errors.OpPrereqError("No changes submitted")
if self.mem is not None:
try:
" must be one or more of [acdn]"
" or 'default'")
+ # hvm_cdrom_image_path verification
+ if self.op.hvm_cdrom_image_path is not None:
+ if not os.path.isabs(self.op.hvm_cdrom_image_path):
+ raise errors.OpPrereqError("The path to the HVM CDROM image must"
+ " be an absolute path or None, not %s" %
+ self.op.hvm_cdrom_image_path)
+ if not os.path.isfile(self.op.hvm_cdrom_image_path):
+ raise errors.OpPrereqError("The HVM CDROM image must either be a"
+ " regular file or a symlink pointing to"
+ " an existing regular file, not %s" %
+ self.op.hvm_cdrom_image_path)
+
+ # vnc_bind_address verification
+ if self.op.vnc_bind_address is not None:
+ if not utils.IsValidIP(self.op.vnc_bind_address):
+ raise errors.OpPrereqError("given VNC bind address '%s' doesn't look"
+ " like a valid IP address" %
+ self.op.vnc_bind_address)
+
instance = self.cfg.GetInstanceInfo(
self.cfg.ExpandInstanceName(self.op.instance_name))
if instance is None:
else:
instance.hvm_boot_order = self.hvm_boot_order
result.append(("hvm_boot_order", self.hvm_boot_order))
+ if self.hvm_acpi:
+ instance.hvm_acpi = self.hvm_acpi
+ result.append(("hvm_acpi", self.hvm_acpi))
+ if self.hvm_pae:
+ instance.hvm_pae = self.hvm_pae
+ result.append(("hvm_pae", self.hvm_pae))
+ if self.hvm_cdrom_image_path:
+ instance.hvm_cdrom_image_path = self.hvm_cdrom_image_path
+ result.append(("hvm_cdrom_image_path", self.hvm_cdrom_image_path))
+ if self.vnc_bind_address:
+ instance.vnc_bind_address = self.vnc_bind_address
+ result.append(("vnc_bind_address", self.vnc_bind_address))
self.cfg.AddInstance(instance)
class LUTestDelay(NoHooksLU):
"""Sleep for a specified amount of time.
- This LU sleeps on the master and/or nodes for a specified amoutn of
+ This LU sleeps on the master and/or nodes for a specified amount of
time.
"""
_OP_REQP = ["duration", "on_master", "on_nodes"]
+ REQ_BGL = False
- def CheckPrereq(self):
- """Check prerequisites.
+ def ExpandNames(self):
+ """Expand names and set required locks.
- This checks that we have a good list of nodes and/or the duration
- is valid.
+ This expands the node list, if any.
"""
-
+ self.needed_locks = {}
if self.op.on_nodes:
+ # _GetWantedNodes can be used here, but is not always appropriate to use
+ # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
+ # more information.
self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
+ self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ """
def Exec(self, feedback_fn):
"""Do the actual sleep.