from ganeti import utils
from ganeti import errors
from ganeti import hypervisor
-from ganeti import config
+from ganeti import locking
from ganeti import constants
from ganeti import objects
from ganeti import opcodes
-from ganeti import ssconf
from ganeti import serializer
"""Logical Unit base class.
Subclasses must follow these rules:
- - implement CheckPrereq which also fills in the opcode instance
- with all the fields (even if as None)
+ - implement ExpandNames
+ - implement CheckPrereq
- implement Exec
- implement BuildHooksEnv
- redefine HPATH and HTYPE
- - optionally redefine their run requirements (REQ_CLUSTER,
- REQ_MASTER); note that all commands require root permissions
+ - optionally redefine their run requirements:
+ REQ_MASTER: the LU needs to run on the master node
+ REQ_WSSTORE: the LU needs a writable SimpleStore
+ REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
+
+ Note that all commands require root permissions.
"""
HPATH = None
HTYPE = None
_OP_REQP = []
- REQ_CLUSTER = True
REQ_MASTER = True
+ REQ_WSSTORE = False
+ REQ_BGL = True
- def __init__(self, processor, op, cfg, sstore):
+ def __init__(self, processor, op, context, sstore):
"""Constructor for LogicalUnit.
This needs to be overriden in derived classes in order to check op
"""
self.proc = processor
self.op = op
- self.cfg = cfg
+ self.cfg = context.cfg
self.sstore = sstore
+ self.context = context
+ self.needed_locks = None
+ self.acquired_locks = {}
+ self.share_locks = dict(((i, 0) for i in locking.LEVELS))
+ # Used to force good behavior when calling helper functions
+ self.recalculate_locks = {}
self.__ssh = None
for attr_name in self._OP_REQP:
if attr_val is None:
raise errors.OpPrereqError("Required parameter '%s' missing" %
attr_name)
- if self.REQ_CLUSTER:
- if not cfg.IsCluster():
- raise errors.OpPrereqError("Cluster not initialized yet,"
- " use 'gnt-cluster init' first.")
- if self.REQ_MASTER:
- master = sstore.GetMasterNode()
- if master != utils.HostInfo().name:
- raise errors.OpPrereqError("Commands must be run on the master"
- " node %s" % master)
+
+ if not self.cfg.IsCluster():
+ raise errors.OpPrereqError("Cluster not initialized yet,"
+ " use 'gnt-cluster init' first.")
+ if self.REQ_MASTER:
+ master = sstore.GetMasterNode()
+ if master != utils.HostInfo().name:
+ raise errors.OpPrereqError("Commands must be run on the master"
+ " node %s" % master)
def __GetSSH(self):
"""Returns the SshRunner object
ssh = property(fget=__GetSSH)
+ def ExpandNames(self):
+ """Expand names for this LU.
+
+ This method is called before starting to execute the opcode, and it should
+ update all the parameters of the opcode to their canonical form (e.g. a
+ short node name must be fully expanded after this method has successfully
+ completed). This way locking, hooks, logging, ecc. can work correctly.
+
+ LUs which implement this method must also populate the self.needed_locks
+ member, as a dict with lock levels as keys, and a list of needed lock names
+ as values. Rules:
+ - Use an empty dict if you don't need any lock
+ - If you don't need any lock at a particular level omit that level
+ - Don't put anything for the BGL level
+ - If you want all locks at a level use None as a value
+ (this reflects what LockSet does, and will be replaced before
+ CheckPrereq with the full list of nodes that have been locked)
+
+ If you need to share locks (rather than acquire them exclusively) at one
+ level you can modify self.share_locks, setting a true value (usually 1) for
+ that level. By default locks are not shared.
+
+ Examples:
+ # Acquire all nodes and one instance
+ self.needed_locks = {
+ locking.LEVEL_NODE: None,
+ locking.LEVEL_INSTANCES: ['instance1.example.tld'],
+ }
+ # Acquire just two nodes
+ self.needed_locks = {
+ locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
+ }
+ # Acquire no locks
+ self.needed_locks = {} # No, you can't leave it to the default value None
+
+ """
+ # The implementation of this method is mandatory only if the new LU is
+ # concurrent, so that old LUs don't need to be changed all at the same
+ # time.
+ if self.REQ_BGL:
+ self.needed_locks = {} # Exclusive LUs don't need locks.
+ else:
+ raise NotImplementedError
+
+ def DeclareLocks(self, level):
+ """Declare LU locking needs for a level
+
+ While most LUs can just declare their locking needs at ExpandNames time,
+ sometimes there's the need to calculate some locks after having acquired
+ the ones before. This function is called just before acquiring locks at a
+ particular level, but after acquiring the ones at lower levels, and permits
+ such calculations. It can be used to modify self.needed_locks, and by
+ default it does nothing.
+
+ This function is only called if you have something already set in
+ self.needed_locks for the level.
+
+ @param level: Locking level which is going to be locked
+ @type level: member of ganeti.locking.LEVELS
+
+ """
+
def CheckPrereq(self):
"""Check prerequisites for this LU.
not fulfilled. Its return value is ignored.
This method should also update all the parameters of the opcode to
- their canonical form; e.g. a short node name must be fully
- expanded after this method has successfully completed (so that
- hooks, logging, etc. work correctly).
+ their canonical form if it hasn't been done by ExpandNames before.
"""
raise NotImplementedError
added by the hooks runner. If the LU doesn't define any
environment, an empty dict (and not None) should be returned.
- As for the node lists, the master should not be included in the
- them, as it will be added by the hooks runner in case this LU
- requires a cluster to run on (otherwise we don't have a node
- list). No nodes should be returned as an empty list (and not
- None).
+ No nodes should be returned as an empty list (and not None).
Note that if the HPATH for a LU class is None, this function will
not be called.
"""
raise NotImplementedError
+ def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
+ """Notify the LU about the results of its hooks.
-class NoHooksLU(LogicalUnit):
- """Simple LU which runs no hooks.
+ This method is called every time a hooks phase is executed, and notifies
+ the Logical Unit about the hooks' result. The LU can then use it to alter
+ its result based on the hooks. By default the method does nothing and the
+ previous result is passed back unchanged but any LU can define it if it
+ wants to use the local cluster hook-scripts somehow.
- This LU is intended as a parent for other LogicalUnits which will
- run no hooks, in order to reduce duplicate code.
+ Args:
+ phase: the hooks phase that has just been run
+ hooks_results: the results of the multi-node hooks rpc call
+ feedback_fn: function to send feedback back to the caller
+ lu_result: the previous result this LU had, or None in the PRE phase.
- """
- HPATH = None
- HTYPE = None
+ """
+ return lu_result
- def BuildHooksEnv(self):
- """Build hooks env.
+ def _ExpandAndLockInstance(self):
+ """Helper function to expand and lock an instance.
- This is a no-op, since we don't run hooks.
+ Many LUs that work on an instance take its name in self.op.instance_name
+ and need to expand it and then declare the expanded name for locking. This
+ function does it, and then updates self.op.instance_name to the expanded
+ name. It also initializes needed_locks as a dict, if this hasn't been done
+ before.
"""
- return {}, [], []
+ if self.needed_locks is None:
+ self.needed_locks = {}
+ else:
+ assert locking.LEVEL_INSTANCE not in self.needed_locks, \
+ "_ExpandAndLockInstance called with instance-level locks set"
+ expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
+ if expanded_name is None:
+ raise errors.OpPrereqError("Instance '%s' not known" %
+ self.op.instance_name)
+ self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
+ self.op.instance_name = expanded_name
+ def _LockInstancesNodes(self):
+ """Helper function to declare instances' nodes for locking.
-def _AddHostToEtcHosts(hostname):
- """Wrapper around utils.SetEtcHostsEntry.
+ This function should be called after locking one or more instances to lock
+ their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
+ with all primary or secondary nodes for instances already locked and
+ present in self.needed_locks[locking.LEVEL_INSTANCE].
- """
- hi = utils.HostInfo(name=hostname)
- utils.SetEtcHostsEntry(constants.ETC_HOSTS, hi.ip, hi.name, [hi.ShortName()])
+ It should be called from DeclareLocks, and for safety only works if
+ self.recalculate_locks[locking.LEVEL_NODE] is set.
+
+ In the future it may grow parameters to just lock some instance's nodes, or
+ to just lock primaries or secondary nodes, if needed.
+ If should be called in DeclareLocks in a way similar to:
+
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
+
+ """
+ assert locking.LEVEL_NODE in self.recalculate_locks, \
+ "_LockInstancesNodes helper function called with no nodes to recalculate"
-def _RemoveHostFromEtcHosts(hostname):
- """Wrapper around utils.RemoveEtcHostsEntry.
+ # TODO: check if we're really been called with the instance locks held
+
+ # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
+ # future we might want to have different behaviors depending on the value
+ # of self.recalculate_locks[locking.LEVEL_NODE]
+ wanted_nodes = []
+ for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
+ instance = self.context.cfg.GetInstanceInfo(instance_name)
+ wanted_nodes.append(instance.primary_node)
+ wanted_nodes.extend(instance.secondary_nodes)
+ self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
+
+ del self.recalculate_locks[locking.LEVEL_NODE]
+
+
+class NoHooksLU(LogicalUnit):
+ """Simple LU which runs no hooks.
+
+ This LU is intended as a parent for other LogicalUnits which will
+ run no hooks, in order to reduce duplicate code.
"""
- hi = utils.HostInfo(name=hostname)
- utils.RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.name)
- utils.RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.ShortName())
+ HPATH = None
+ HTYPE = None
def _GetWantedNodes(lu, nodes):
return _BuildInstanceHookEnv(**args)
-def _HasValidVG(vglist, vgname):
- """Checks if the volume group list is valid.
-
- A non-None return value means there's an error, and the return value
- is the error message.
-
- """
- vgsize = vglist.get(vgname, None)
- if vgsize is None:
- return "volume group '%s' missing" % vgname
- elif vgsize < 20480:
- return ("volume group '%s' too small (20480MiB required, %dMib found)" %
- (vgname, vgsize))
- return None
-
-
-def _InitSSHSetup(node):
- """Setup the SSH configuration for the cluster.
-
-
- This generates a dsa keypair for root, adds the pub key to the
- permitted hosts and adds the hostkey to its own known hosts.
-
- Args:
- node: the name of this host as a fqdn
-
- """
- priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
-
- for name in priv_key, pub_key:
- if os.path.exists(name):
- utils.CreateBackup(name)
- utils.RemoveFile(name)
-
- result = utils.RunCmd(["ssh-keygen", "-t", "dsa",
- "-f", priv_key,
- "-q", "-N", ""])
- if result.failed:
- raise errors.OpExecError("Could not generate ssh keypair, error %s" %
- result.output)
-
- f = open(pub_key, 'r')
- try:
- utils.AddAuthorizedKey(auth_keys, f.read(8192))
- finally:
- f.close()
-
-
-def _InitGanetiServerSetup(ss):
- """Setup the necessary configuration for the initial node daemon.
-
- This creates the nodepass file containing the shared password for
- the cluster and also generates the SSL certificate.
-
- """
- # Create pseudo random password
- randpass = sha.new(os.urandom(64)).hexdigest()
- # and write it into sstore
- ss.SetKey(ss.SS_NODED_PASS, randpass)
-
- result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024",
- "-days", str(365*5), "-nodes", "-x509",
- "-keyout", constants.SSL_CERT_FILE,
- "-out", constants.SSL_CERT_FILE, "-batch"])
- if result.failed:
- raise errors.OpExecError("could not generate server ssl cert, command"
- " %s had exitcode %s and error message %s" %
- (result.cmd, result.exit_code, result.output))
-
- os.chmod(constants.SSL_CERT_FILE, 0400)
-
- result = utils.RunCmd([constants.NODE_INITD_SCRIPT, "restart"])
-
- if result.failed:
- raise errors.OpExecError("Could not start the node daemon, command %s"
- " had exitcode %s and error %s" %
- (result.cmd, result.exit_code, result.output))
-
-
def _CheckInstanceBridgesExist(instance):
"""Check that the brigdes needed by an instance exist.
(brlist, instance.primary_node))
-class LUInitCluster(LogicalUnit):
- """Initialise the cluster.
-
- """
- HPATH = "cluster-init"
- HTYPE = constants.HTYPE_CLUSTER
- _OP_REQP = ["cluster_name", "hypervisor_type", "mac_prefix",
- "def_bridge", "master_netdev", "file_storage_dir"]
- REQ_CLUSTER = False
-
- def BuildHooksEnv(self):
- """Build hooks env.
-
- Notes: Since we don't require a cluster, we must manually add
- ourselves in the post-run node list.
-
- """
- env = {"OP_TARGET": self.op.cluster_name}
- return env, [], [self.hostname.name]
-
- def CheckPrereq(self):
- """Verify that the passed name is a valid one.
-
- """
- if config.ConfigWriter.IsCluster():
- raise errors.OpPrereqError("Cluster is already initialised")
-
- if self.op.hypervisor_type == constants.HT_XEN_HVM31:
- if not os.path.exists(constants.VNC_PASSWORD_FILE):
- raise errors.OpPrereqError("Please prepare the cluster VNC"
- "password file %s" %
- constants.VNC_PASSWORD_FILE)
-
- self.hostname = hostname = utils.HostInfo()
-
- if hostname.ip.startswith("127."):
- raise errors.OpPrereqError("This host's IP resolves to the private"
- " range (%s). Please fix DNS or %s." %
- (hostname.ip, constants.ETC_HOSTS))
-
- if not utils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT,
- source=constants.LOCALHOST_IP_ADDRESS):
- raise errors.OpPrereqError("Inconsistency: this host's name resolves"
- " to %s,\nbut this ip address does not"
- " belong to this host."
- " Aborting." % hostname.ip)
-
- self.clustername = clustername = utils.HostInfo(self.op.cluster_name)
-
- if utils.TcpPing(clustername.ip, constants.DEFAULT_NODED_PORT,
- timeout=5):
- raise errors.OpPrereqError("Cluster IP already active. Aborting.")
-
- secondary_ip = getattr(self.op, "secondary_ip", None)
- if secondary_ip and not utils.IsValidIP(secondary_ip):
- raise errors.OpPrereqError("Invalid secondary ip given")
- if (secondary_ip and
- secondary_ip != hostname.ip and
- (not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
- source=constants.LOCALHOST_IP_ADDRESS))):
- raise errors.OpPrereqError("You gave %s as secondary IP,"
- " but it does not belong to this host." %
- secondary_ip)
- self.secondary_ip = secondary_ip
-
- if not hasattr(self.op, "vg_name"):
- self.op.vg_name = None
- # if vg_name not None, checks if volume group is valid
- if self.op.vg_name:
- vgstatus = _HasValidVG(utils.ListVolumeGroups(), self.op.vg_name)
- if vgstatus:
- raise errors.OpPrereqError("Error: %s\nspecify --no-lvm-storage if"
- " you are not using lvm" % vgstatus)
-
- self.op.file_storage_dir = os.path.normpath(self.op.file_storage_dir)
-
- if not os.path.isabs(self.op.file_storage_dir):
- raise errors.OpPrereqError("The file storage directory you have is"
- " not an absolute path.")
-
- if not os.path.exists(self.op.file_storage_dir):
- try:
- os.makedirs(self.op.file_storage_dir, 0750)
- except OSError, err:
- raise errors.OpPrereqError("Cannot create file storage directory"
- " '%s': %s" %
- (self.op.file_storage_dir, err))
-
- if not os.path.isdir(self.op.file_storage_dir):
- raise errors.OpPrereqError("The file storage directory '%s' is not"
- " a directory." % self.op.file_storage_dir)
-
- if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$",
- self.op.mac_prefix):
- raise errors.OpPrereqError("Invalid mac prefix given '%s'" %
- self.op.mac_prefix)
-
- if self.op.hypervisor_type not in constants.HYPER_TYPES:
- raise errors.OpPrereqError("Invalid hypervisor type given '%s'" %
- self.op.hypervisor_type)
-
- result = utils.RunCmd(["ip", "link", "show", "dev", self.op.master_netdev])
- if result.failed:
- raise errors.OpPrereqError("Invalid master netdev given (%s): '%s'" %
- (self.op.master_netdev,
- result.output.strip()))
-
- if not (os.path.isfile(constants.NODE_INITD_SCRIPT) and
- os.access(constants.NODE_INITD_SCRIPT, os.X_OK)):
- raise errors.OpPrereqError("Init.d script '%s' missing or not"
- " executable." % constants.NODE_INITD_SCRIPT)
-
- def Exec(self, feedback_fn):
- """Initialize the cluster.
-
- """
- clustername = self.clustername
- hostname = self.hostname
-
- # set up the simple store
- self.sstore = ss = ssconf.SimpleStore()
- ss.SetKey(ss.SS_HYPERVISOR, self.op.hypervisor_type)
- ss.SetKey(ss.SS_MASTER_NODE, hostname.name)
- ss.SetKey(ss.SS_MASTER_IP, clustername.ip)
- ss.SetKey(ss.SS_MASTER_NETDEV, self.op.master_netdev)
- ss.SetKey(ss.SS_CLUSTER_NAME, clustername.name)
- ss.SetKey(ss.SS_FILE_STORAGE_DIR, self.op.file_storage_dir)
-
- # set up the inter-node password and certificate
- _InitGanetiServerSetup(ss)
-
- # start the master ip
- rpc.call_node_start_master(hostname.name)
-
- # set up ssh config and /etc/hosts
- f = open(constants.SSH_HOST_RSA_PUB, 'r')
- try:
- sshline = f.read()
- finally:
- f.close()
- sshkey = sshline.split(" ")[1]
-
- _AddHostToEtcHosts(hostname.name)
- _InitSSHSetup(hostname.name)
-
- # init of cluster config file
- self.cfg = cfgw = config.ConfigWriter()
- cfgw.InitConfig(hostname.name, hostname.ip, self.secondary_ip,
- sshkey, self.op.mac_prefix,
- self.op.vg_name, self.op.def_bridge)
-
- ssh.WriteKnownHostsFile(cfgw, ss, constants.SSH_KNOWN_HOSTS_FILE)
-
-
class LUDestroyCluster(NoHooksLU):
"""Logical unit for destroying the cluster.
"""
master = self.sstore.GetMasterNode()
- if not rpc.call_node_stop_master(master):
+ if not rpc.call_node_stop_master(master, False):
raise errors.OpExecError("Could not disable the master role")
priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
utils.CreateBackup(priv_key)
utils.CreateBackup(pub_key)
- rpc.call_node_leave_cluster(master)
+ return master
-class LUVerifyCluster(NoHooksLU):
+class LUVerifyCluster(LogicalUnit):
"""Verifies the cluster status.
"""
+ HPATH = "cluster-verify"
+ HTYPE = constants.HTYPE_CLUSTER
_OP_REQP = ["skip_checks"]
def _VerifyNode(self, node, file_list, local_cksum, vglist, node_result,
(node,))
bad = True
else:
- vgstatus = _HasValidVG(vglist, self.cfg.GetVGName())
+ vgstatus = utils.CheckVolumeGroupSize(vglist, self.cfg.GetVGName(),
+ constants.MIN_VG_SIZE)
if vgstatus:
feedback_fn(" - ERROR: %s on node %s" % (vgstatus, node))
bad = True
if 'nodelist' not in node_result:
bad = True
- feedback_fn(" - ERROR: node hasn't returned node connectivity data")
+ feedback_fn(" - ERROR: node hasn't returned node ssh connectivity data")
else:
if node_result['nodelist']:
bad = True
for node in node_result['nodelist']:
- feedback_fn(" - ERROR: communication with node '%s': %s" %
+ feedback_fn(" - ERROR: ssh communication with node '%s': %s" %
(node, node_result['nodelist'][node]))
+ if 'node-net-test' not in node_result:
+ bad = True
+ feedback_fn(" - ERROR: node hasn't returned node tcp connectivity data")
+ else:
+ if node_result['node-net-test']:
+ bad = True
+ nlist = utils.NiceSort(node_result['node-net-test'].keys())
+ for node in nlist:
+ feedback_fn(" - ERROR: tcp communication with node '%s': %s" %
+ (node, node_result['node-net-test'][node]))
+
hyp_result = node_result.get('hypervisor', None)
if hyp_result is not None:
feedback_fn(" - ERROR: hypervisor verify failure: '%s'" % hyp_result)
if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
raise errors.OpPrereqError("Invalid checks to be skipped specified")
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ Cluster-Verify hooks just rone in the post phase and their failure makes
+ the output be logged in the verify output and the verification to fail.
+
+ """
+ all_nodes = self.cfg.GetNodeList()
+ # TODO: populate the environment with useful information for verify hooks
+ env = {}
+ return env, [], all_nodes
+
def Exec(self, feedback_fn):
"""Verify integrity of cluster, performing various test on nodes.
vg_name = self.cfg.GetVGName()
nodelist = utils.NiceSort(self.cfg.GetNodeList())
+ nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
instancelist = utils.NiceSort(self.cfg.GetInstanceList())
i_non_redundant = [] # Non redundant instances
node_volume = {}
'filelist': file_names,
'nodelist': nodelist,
'hypervisor': None,
+ 'node-net-test': [(node.name, node.primary_ip, node.secondary_ip)
+ for node in nodeinfo]
}
all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param)
all_rversion = rpc.call_version(nodelist)
feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
% len(i_non_redundant))
- return int(bad)
+ return not bad
+
+ def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
+ """Analize the post-hooks' result, handle it, and send some
+ nicely-formatted feedback back to the user.
+
+ Args:
+ phase: the hooks phase that has just been run
+ hooks_results: the results of the multi-node hooks rpc call
+ feedback_fn: function to send feedback back to the caller
+ lu_result: previous Exec result
+
+ """
+ # We only really run POST phase hooks, and are only interested in
+ # their results
+ if phase == constants.HOOKS_PHASE_POST:
+ # Used to change hooks' output to proper indentation
+ indent_re = re.compile('^', re.M)
+ feedback_fn("* Hooks Results")
+ if not hooks_results:
+ feedback_fn(" - ERROR: general communication failure")
+ lu_result = 1
+ else:
+ for node_name in hooks_results:
+ show_node_header = True
+ res = hooks_results[node_name]
+ if res is False or not isinstance(res, list):
+ feedback_fn(" Communication failure")
+ lu_result = 1
+ continue
+ for script, hkr, output in res:
+ if hkr == constants.HKR_FAIL:
+ # The node header is only shown once, if there are
+ # failing hooks on that node
+ if show_node_header:
+ feedback_fn(" Node %s:" % node_name)
+ show_node_header = False
+ feedback_fn(" ERROR: Script %s failed, output:" % script)
+ output = indent_re.sub(' ', output)
+ feedback_fn("%s" % output)
+ lu_result = 1
+
+ return lu_result
class LUVerifyDisks(NoHooksLU):
HPATH = "cluster-rename"
HTYPE = constants.HTYPE_CLUSTER
_OP_REQP = ["name"]
+ REQ_WSSTORE = True
def BuildHooksEnv(self):
"""Build hooks env.
raise errors.OpPrereqError("Neither the name nor the IP address of the"
" cluster has changed")
if new_ip != old_ip:
- result = utils.RunCmd(["fping", "-q", new_ip])
- if not result.failed:
+ if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
raise errors.OpPrereqError("The given cluster IP address (%s) is"
" reachable on the network. Aborting." %
new_ip)
# shutdown the master IP
master = ss.GetMasterNode()
- if not rpc.call_node_stop_master(master):
+ if not rpc.call_node_stop_master(master, False):
raise errors.OpExecError("Could not disable the master role")
try:
logger.Error("copy of file %s to node %s failed" %
(fname, to_node))
finally:
- if not rpc.call_node_start_master(master):
+ if not rpc.call_node_start_master(master, False):
logger.Error("Could not re-enable the master role on the master,"
" please restart manually.")
node_list = self.cfg.GetNodeList()
vglist = rpc.call_vg_list(node_list)
for node in node_list:
- vgstatus = _HasValidVG(vglist[node], self.op.vg_name)
+ vgstatus = utils.CheckVolumeGroupSize(vglist[node], self.op.vg_name,
+ constants.MIN_VG_SIZE)
if vgstatus:
raise errors.OpPrereqError("Error on node '%s': %s" %
(node, vgstatus))
if done or oneshot:
break
- if unlock:
- utils.Unlock('cmd')
- try:
- time.sleep(min(60, max_time))
- finally:
- if unlock:
- utils.Lock('cmd')
+ time.sleep(min(60, max_time))
if done:
proc.LogInfo("Instance %s's disks are in sync." % instance.name)
for node_name, nr in rlist.iteritems():
if not nr:
continue
- for os in nr:
- if os.name not in all_os:
+ for os_obj in nr:
+ if os_obj.name not in all_os:
# build a list of nodes for this os containing empty lists
# for each node in node_list
- all_os[os.name] = {}
+ all_os[os_obj.name] = {}
for nname in node_list:
- all_os[os.name][nname] = []
- all_os[os.name][node_name].append(os)
+ all_os[os_obj.name][nname] = []
+ all_os[os_obj.name][node_name].append(os_obj)
return all_os
def Exec(self, feedback_fn):
"""Build hooks env.
This doesn't run on the target node in the pre phase as a failed
- node would not allows itself to run.
+ node would then be impossible to remove.
"""
env = {
logger.Info("stopping the node daemon and removing configs from node %s" %
node.name)
- rpc.call_node_leave_cluster(node.name)
-
- self.ssh.Run(node.name, 'root', "%s stop" % constants.NODE_INITD_SCRIPT)
-
- logger.Info("Removing node %s from config" % node.name)
-
- self.cfg.RemoveNode(node.name)
+ self.context.RemoveNode(node.name)
- _RemoveHostFromEtcHosts(node.name)
+ rpc.call_node_leave_cluster(node.name)
class LUQueryNodes(NoHooksLU):
"""
_OP_REQP = ["output_fields", "names"]
+ REQ_BGL = False
- def CheckPrereq(self):
- """Check prerequisites.
-
- This checks that the fields required are valid output fields.
-
- """
- self.dynamic_fields = frozenset(["dtotal", "dfree",
- "mtotal", "mnode", "mfree",
- "bootid"])
+ def ExpandNames(self):
+ self.dynamic_fields = frozenset([
+ "dtotal", "dfree",
+ "mtotal", "mnode", "mfree",
+ "bootid",
+ "ctotal",
+ ])
_CheckOutputFields(static=["name", "pinst_cnt", "sinst_cnt",
"pinst_list", "sinst_list",
- "pip", "sip"],
+ "pip", "sip", "tags"],
dynamic=self.dynamic_fields,
selected=self.op.output_fields)
- self.wanted = _GetWantedNodes(self, self.op.names)
+ self.needed_locks = {}
+ self.share_locks[locking.LEVEL_NODE] = 1
+ # TODO: we could lock nodes only if the user asked for dynamic fields. For
+ # that we need atomic ways to get info for a group of nodes from the
+ # config, though.
+ if not self.op.names:
+ self.needed_locks[locking.LEVEL_NODE] = None
+ else:
+ self.needed_locks[locking.LEVEL_NODE] = \
+ _GetWantedNodes(self, self.op.names)
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ """
+ # This of course is valid only if we locked the nodes
+ self.wanted = self.acquired_locks[locking.LEVEL_NODE]
def Exec(self, feedback_fn):
"""Computes the list of nodes and their attributes.
"mfree": utils.TryConvert(int, nodeinfo['memory_free']),
"dtotal": utils.TryConvert(int, nodeinfo['vg_size']),
"dfree": utils.TryConvert(int, nodeinfo['vg_free']),
+ "ctotal": utils.TryConvert(int, nodeinfo['cpu_total']),
"bootid": nodeinfo['bootid'],
}
else:
val = node.primary_ip
elif field == "sip":
val = node.secondary_ip
+ elif field == "tags":
+ val = list(node.GetTags())
elif field in self.dynamic_fields:
val = live_data[node.name].get(field, None)
else:
primary_ip=primary_ip,
secondary_ip=secondary_ip)
- if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
- if not os.path.exists(constants.VNC_PASSWORD_FILE):
- raise errors.OpPrereqError("Cluster VNC password file %s missing" %
- constants.VNC_PASSWORD_FILE)
-
def Exec(self, feedback_fn):
"""Adds the new node to the cluster.
new_node = self.new_node
node = new_node.name
- # set up inter-node password and certificate and restarts the node daemon
- gntpass = self.sstore.GetNodeDaemonPassword()
- if not re.match('^[a-zA-Z0-9.]{1,64}$', gntpass):
- raise errors.OpExecError("ganeti password corruption detected")
- f = open(constants.SSL_CERT_FILE)
- try:
- gntpem = f.read(8192)
- finally:
- f.close()
- # in the base64 pem encoding, neither '!' nor '.' are valid chars,
- # so we use this to detect an invalid certificate; as long as the
- # cert doesn't contain this, the here-document will be correctly
- # parsed by the shell sequence below
- if re.search('^!EOF\.', gntpem, re.MULTILINE):
- raise errors.OpExecError("invalid PEM encoding in the SSL certificate")
- if not gntpem.endswith("\n"):
- raise errors.OpExecError("PEM must end with newline")
- logger.Info("copy cluster pass to %s and starting the node daemon" % node)
-
- # and then connect with ssh to set password and start ganeti-noded
- # note that all the below variables are sanitized at this point,
- # either by being constants or by the checks above
- ss = self.sstore
- mycommand = ("umask 077 && "
- "echo '%s' > '%s' && "
- "cat > '%s' << '!EOF.' && \n"
- "%s!EOF.\n%s restart" %
- (gntpass, ss.KeyToFilename(ss.SS_NODED_PASS),
- constants.SSL_CERT_FILE, gntpem,
- constants.NODE_INITD_SCRIPT))
-
- result = self.ssh.Run(node, 'root', mycommand, batch=False, ask_key=True)
- if result.failed:
- raise errors.OpExecError("Remote command on node %s, error: %s,"
- " output: %s" %
- (node, result.fail_reason, result.output))
-
# check connectivity
- time.sleep(4)
-
result = rpc.call_version([node])[node]
if result:
if constants.PROTOCOL_VERSION == result:
raise errors.OpExecError("Cannot transfer ssh keys to the new node")
# Add node to our /etc/hosts, and add key to known_hosts
- _AddHostToEtcHosts(new_node.name)
+ utils.AddHostToEtcHosts(new_node.name)
if new_node.secondary_ip != new_node.primary_ip:
if not rpc.call_node_tcp_ping(new_node.name,
" you gave (%s). Please fix and re-run this"
" command." % new_node.secondary_ip)
- success, msg = self.ssh.VerifyNodeHostname(node)
- if not success:
- raise errors.OpExecError("Node '%s' claims it has a different hostname"
- " than the one the resolver gives: %s."
- " Please fix and re-run this command." %
- (node, msg))
+ node_verify_list = [self.sstore.GetMasterNode()]
+ node_verify_param = {
+ 'nodelist': [node],
+ # TODO: do a node-net-test as well?
+ }
+
+ result = rpc.call_node_verify(node_verify_list, node_verify_param)
+ for verifier in node_verify_list:
+ if not result[verifier]:
+ raise errors.OpExecError("Cannot communicate with %s's node daemon"
+ " for remote verification" % verifier)
+ if result[verifier]['nodelist']:
+ for failed in result[verifier]['nodelist']:
+ feedback_fn("ssh/hostname verification failed %s -> %s" %
+ (verifier, result[verifier]['nodelist'][failed]))
+ raise errors.OpExecError("ssh/hostname verification failed.")
# Distribute updated /etc/hosts and known_hosts to all nodes,
# including the node just added
myself = self.cfg.GetNodeInfo(self.sstore.GetMasterNode())
- dist_nodes = self.cfg.GetNodeList() + [node]
+ dist_nodes = self.cfg.GetNodeList()
+ if not self.op.readd:
+ dist_nodes.append(node)
if myself.name in dist_nodes:
dist_nodes.remove(myself.name)
logger.Error("copy of file %s to node %s failed" %
(fname, to_node))
- to_copy = ss.GetFileList()
+ to_copy = self.sstore.GetFileList()
if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
to_copy.append(constants.VNC_PASSWORD_FILE)
for fname in to_copy:
- if not self.ssh.CopyFileToNode(node, fname):
+ result = rpc.call_upload_file([node], fname)
+ if not result[node]:
logger.Error("could not copy file %s to node %s" % (fname, node))
- if not self.op.readd:
- logger.Info("adding node %s to cluster.conf" % node)
- self.cfg.AddNode(new_node)
-
-
-class LUMasterFailover(LogicalUnit):
- """Failover the master node to the current node.
-
- This is a special LU in that it must run on a non-master node.
-
- """
- HPATH = "master-failover"
- HTYPE = constants.HTYPE_CLUSTER
- REQ_MASTER = False
- _OP_REQP = []
-
- def BuildHooksEnv(self):
- """Build hooks env.
-
- This will run on the new master only in the pre phase, and on all
- the nodes in the post phase.
-
- """
- env = {
- "OP_TARGET": self.new_master,
- "NEW_MASTER": self.new_master,
- "OLD_MASTER": self.old_master,
- }
- return env, [self.new_master], self.cfg.GetNodeList()
-
- def CheckPrereq(self):
- """Check prerequisites.
-
- This checks that we are not already the master.
-
- """
- self.new_master = utils.HostInfo().name
- self.old_master = self.sstore.GetMasterNode()
-
- if self.old_master == self.new_master:
- raise errors.OpPrereqError("This commands must be run on the node"
- " where you want the new master to be."
- " %s is already the master" %
- self.old_master)
-
- def Exec(self, feedback_fn):
- """Failover the master node.
-
- This command, when run on a non-master node, will cause the current
- master to cease being master, and the non-master to become new
- master.
-
- """
- #TODO: do not rely on gethostname returning the FQDN
- logger.Info("setting master to %s, old master: %s" %
- (self.new_master, self.old_master))
-
- if not rpc.call_node_stop_master(self.old_master):
- logger.Error("could disable the master role on the old master"
- " %s, please disable manually" % self.old_master)
-
- ss = self.sstore
- ss.SetKey(ss.SS_MASTER_NODE, self.new_master)
- if not rpc.call_upload_file(self.cfg.GetNodeList(),
- ss.KeyToFilename(ss.SS_MASTER_NODE)):
- logger.Error("could not distribute the new simple store master file"
- " to the other nodes, please check.")
-
- if not rpc.call_node_start_master(self.new_master):
- logger.Error("could not start the master role on the new master"
- " %s, please check" % self.new_master)
- feedback_fn("Error in activating the master IP on the new master,"
- " please fix manually.")
-
+ if self.op.readd:
+ self.context.ReaddNode(new_node)
+ else:
+ self.context.AddNode(new_node)
class LUQueryClusterInfo(NoHooksLU):
"""
_OP_REQP = []
REQ_MASTER = False
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self.needed_locks = {}
def CheckPrereq(self):
"""No prerequsites needed for this LU.
"export_version": constants.EXPORT_VERSION,
"master": self.sstore.GetMasterNode(),
"architecture": (platform.architecture()[0], platform.machine()),
+ "hypervisor_type": self.sstore.GetHypervisorType(),
}
return result
-class LUClusterCopyFile(NoHooksLU):
- """Copy file to cluster.
-
- """
- _OP_REQP = ["nodes", "filename"]
-
- def CheckPrereq(self):
- """Check prerequisites.
-
- It should check that the named file exists and that the given list
- of nodes is valid.
-
- """
- if not os.path.exists(self.op.filename):
- raise errors.OpPrereqError("No such filename '%s'" % self.op.filename)
-
- self.nodes = _GetWantedNodes(self, self.op.nodes)
-
- def Exec(self, feedback_fn):
- """Copy a file from master to some nodes.
-
- Args:
- opts - class with options as members
- args - list containing a single element, the file name
- Opts used:
- nodes - list containing the name of target nodes; if empty, all nodes
-
- """
- filename = self.op.filename
-
- myname = utils.HostInfo().name
-
- for node in self.nodes:
- if node == myname:
- continue
- if not self.ssh.CopyFileToNode(node, filename):
- logger.Error("Copy of file %s to node %s failed" % (filename, node))
-
-
class LUDumpClusterConfig(NoHooksLU):
"""Return a text-representation of the cluster-config.
"""
_OP_REQP = []
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self.needed_locks = {}
def CheckPrereq(self):
"""No prerequisites.
return self.cfg.DumpConfig()
-class LURunClusterCommand(NoHooksLU):
- """Run a command on some nodes.
-
- """
- _OP_REQP = ["command", "nodes"]
-
- def CheckPrereq(self):
- """Check prerequisites.
-
- It checks that the given list of nodes is valid.
-
- """
- self.nodes = _GetWantedNodes(self, self.op.nodes)
-
- def Exec(self, feedback_fn):
- """Run a command on some nodes.
-
- """
- # put the master at the end of the nodes list
- master_node = self.sstore.GetMasterNode()
- if master_node in self.nodes:
- self.nodes.remove(master_node)
- self.nodes.append(master_node)
-
- data = []
- for node in self.nodes:
- result = self.ssh.Run(node, "root", self.op.command)
- data.append((node, result.output, result.exit_code))
-
- return data
-
-
class LUActivateInstanceDisks(NoHooksLU):
"""Bring up an instance's disks.
HPATH = "instance-start"
HTYPE = constants.HTYPE_INSTANCE
_OP_REQP = ["instance_name", "force"]
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = 'replace'
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
This checks that the instance is in the cluster.
"""
- instance = self.cfg.GetInstanceInfo(
- self.cfg.ExpandInstanceName(self.op.instance_name))
- if instance is None:
- raise errors.OpPrereqError("Instance '%s' not known" %
- self.op.instance_name)
+ self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert self.instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
# check bridges existance
_CheckInstanceBridgesExist(instance)
"starting instance %s" % instance.name,
instance.memory)
- self.instance = instance
- self.op.instance_name = instance.name
-
def Exec(self, feedback_fn):
"""Start the instance.
HPATH = "instance-reboot"
HTYPE = constants.HTYPE_INSTANCE
_OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
+ constants.INSTANCE_REBOOT_HARD,
+ constants.INSTANCE_REBOOT_FULL]:
+ raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
+ (constants.INSTANCE_REBOOT_SOFT,
+ constants.INSTANCE_REBOOT_HARD,
+ constants.INSTANCE_REBOOT_FULL))
+ self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = 'replace'
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ # FIXME: lock only primary on (not constants.INSTANCE_REBOOT_FULL)
+ self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
This checks that the instance is in the cluster.
"""
- instance = self.cfg.GetInstanceInfo(
- self.cfg.ExpandInstanceName(self.op.instance_name))
- if instance is None:
- raise errors.OpPrereqError("Instance '%s' not known" %
- self.op.instance_name)
+ self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert self.instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
# check bridges existance
_CheckInstanceBridgesExist(instance)
- self.instance = instance
- self.op.instance_name = instance.name
-
def Exec(self, feedback_fn):
"""Reboot the instance.
node_current = instance.primary_node
- if reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
- constants.INSTANCE_REBOOT_HARD,
- constants.INSTANCE_REBOOT_FULL]:
- raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
- (constants.INSTANCE_REBOOT_SOFT,
- constants.INSTANCE_REBOOT_HARD,
- constants.INSTANCE_REBOOT_FULL))
-
if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
constants.INSTANCE_REBOOT_HARD]:
if not rpc.call_instance_reboot(node_current, instance,
HPATH = "instance-stop"
HTYPE = constants.HTYPE_INSTANCE
_OP_REQP = ["instance_name"]
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = 'replace'
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
This checks that the instance is in the cluster.
"""
- instance = self.cfg.GetInstanceInfo(
- self.cfg.ExpandInstanceName(self.op.instance_name))
- if instance is None:
- raise errors.OpPrereqError("Instance '%s' not known" %
- self.op.instance_name)
- self.instance = instance
+ self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert self.instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
def Exec(self, feedback_fn):
"""Shutdown the instance.
HPATH = "instance-reinstall"
HTYPE = constants.HTYPE_INSTANCE
_OP_REQP = ["instance_name"]
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = 'replace'
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
This checks that the instance is in the cluster and is not running.
"""
- instance = self.cfg.GetInstanceInfo(
- self.cfg.ExpandInstanceName(self.op.instance_name))
- if instance is None:
- raise errors.OpPrereqError("Instance '%s' not known" %
- self.op.instance_name)
+ instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
+
if instance.disk_template == constants.DT_DISKLESS:
raise errors.OpPrereqError("Instance '%s' has no disks" %
self.op.instance_name)
new_name)
if not getattr(self.op, "ignore_ip", False):
- command = ["fping", "-q", name_info.ip]
- result = utils.RunCmd(command)
- if not result.failed:
+ if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
raise errors.OpPrereqError("IP %s of instance %s already in use" %
(name_info.ip, new_name))
old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
self.cfg.RenameInstance(inst.name, self.op.new_name)
+ # Change the instance lock. This is definitely safe while we hold the BGL
+ self.context.glm.remove(locking.LEVEL_INSTANCE, inst.name)
+ self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
# re-read the instance from the configuration after rename
inst = self.cfg.GetInstanceInfo(self.op.new_name)
try:
if not rpc.call_instance_run_rename(inst.primary_node, inst, old_name,
"sda", "sdb"):
- msg = ("Could run OS rename script for instance %s on node %s (but the"
- " instance has been renamed in Ganeti)" %
+ msg = ("Could not run OS rename script for instance %s on node %s"
+ " (but the instance has been renamed in Ganeti)" %
(inst.name, inst.primary_node))
logger.Error(msg)
finally:
"""
HPATH = "instance-remove"
HTYPE = constants.HTYPE_INSTANCE
- _OP_REQP = ["instance_name"]
+ _OP_REQP = ["instance_name", "ignore_failures"]
def BuildHooksEnv(self):
"""Build hooks env.
logger.Info("removing instance %s out of cluster config" % instance.name)
self.cfg.RemoveInstance(instance.name)
+ # Remove the new instance from the Ganeti Lock Manager
+ self.context.glm.remove(locking.LEVEL_INSTANCE, instance.name)
class LUQueryInstances(NoHooksLU):
"""
_OP_REQP = ["output_fields", "names"]
+ REQ_BGL = False
- def CheckPrereq(self):
- """Check prerequisites.
-
- This checks that the fields required are valid output fields.
-
- """
+ def ExpandNames(self):
self.dynamic_fields = frozenset(["oper_state", "oper_ram", "status"])
_CheckOutputFields(static=["name", "os", "pnode", "snodes",
"admin_state", "admin_ram",
"disk_template", "ip", "mac", "bridge",
- "sda_size", "sdb_size", "vcpus"],
+ "sda_size", "sdb_size", "vcpus", "tags",
+ "auto_balance",
+ "network_port", "kernel_path", "initrd_path",
+ "hvm_boot_order", "hvm_acpi", "hvm_pae",
+ "hvm_cdrom_image_path", "hvm_nic_type",
+ "hvm_disk_type", "vnc_bind_address"],
dynamic=self.dynamic_fields,
selected=self.op.output_fields)
- self.wanted = _GetWantedInstances(self, self.op.names)
+ self.needed_locks = {}
+ self.share_locks[locking.LEVEL_INSTANCE] = 1
+ self.share_locks[locking.LEVEL_NODE] = 1
+
+ # TODO: we could lock instances (and nodes) only if the user asked for
+ # dynamic fields. For that we need atomic ways to get info for a group of
+ # instances from the config, though.
+ if not self.op.names:
+ self.needed_locks[locking.LEVEL_INSTANCE] = None # Acquire all
+ else:
+ self.needed_locks[locking.LEVEL_INSTANCE] = \
+ _GetWantedInstances(self, self.op.names)
+
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = 'replace'
+
+ def DeclareLocks(self, level):
+ # TODO: locking of nodes could be avoided when not querying them
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ """
+ # This of course is valid only if we locked the instances
+ self.wanted = self.acquired_locks[locking.LEVEL_INSTANCE]
def Exec(self, feedback_fn):
"""Computes the list of nodes and their attributes.
val = disk.size
elif field == "vcpus":
val = instance.vcpus
+ elif field == "tags":
+ val = list(instance.GetTags())
+ elif field in ("network_port", "kernel_path", "initrd_path",
+ "hvm_boot_order", "hvm_acpi", "hvm_pae",
+ "hvm_cdrom_image_path", "hvm_nic_type",
+ "hvm_disk_type", "vnc_bind_address"):
+ val = getattr(instance, field, None)
+ if val is not None:
+ pass
+ elif field in ("hvm_nic_type", "hvm_disk_type",
+ "kernel_path", "initrd_path"):
+ val = "default"
+ else:
+ val = "-"
else:
raise errors.ParameterError(field)
iout.append(val)
HPATH = "instance-failover"
HTYPE = constants.HTYPE_INSTANCE
_OP_REQP = ["instance_name", "ignore_consistency"]
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = 'replace'
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
This checks that the instance is in the cluster.
"""
- instance = self.cfg.GetInstanceInfo(
- self.cfg.ExpandInstanceName(self.op.instance_name))
- if instance is None:
- raise errors.OpPrereqError("Instance '%s' not known" %
- self.op.instance_name)
+ self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert self.instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
if instance.disk_template not in constants.DTS_NET_MIRROR:
raise errors.OpPrereqError("Instance's disk layout is not"
secondary_nodes = instance.secondary_nodes
if not secondary_nodes:
raise errors.ProgrammerError("no secondary node but using "
- "DT_REMOTE_RAID1 template")
+ "a mirrored disk template")
target_node = secondary_nodes[0]
# check memory requirements on the secondary node
" exist on destination node '%s'" %
(brlist, target_node))
- self.instance = instance
-
def Exec(self, feedback_fn):
"""Failover an instance.
feedback_fn("* checking disk consistency between source and target")
for dev in instance.disks:
- # for remote_raid1, these are md over drbd
+ # for drbd, these are drbd over lvm
if not _CheckDiskConsistency(self.cfg, dev, target_node, False):
if instance.status == "up" and not self.op.ignore_consistency:
raise errors.OpExecError("Disk %s is degraded on target node,"
instance.primary_node = target_node
# distribute new instance config to the other nodes
- self.cfg.AddInstance(instance)
+ self.cfg.Update(instance)
# Only start the instance if it's marked as up
if instance.status == "up":
return results
-def _GenerateMDDRBDBranch(cfg, primary, secondary, size, names):
- """Generate a drbd device complete with its children.
-
- """
- port = cfg.AllocatePort()
- vgname = cfg.GetVGName()
- dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
- logical_id=(vgname, names[0]))
- dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
- logical_id=(vgname, names[1]))
- drbd_dev = objects.Disk(dev_type=constants.LD_DRBD7, size=size,
- logical_id = (primary, secondary, port),
- children = [dev_data, dev_meta])
- return drbd_dev
-
-
def _GenerateDRBD8Branch(cfg, primary, secondary, size, names, iv_name):
"""Generate a drbd8 device complete with its children.
nics = [{"mac": self.op.mac, "ip": getattr(self.op, "ip", None),
"bridge": self.op.bridge}]
ial = IAllocator(self.cfg, self.sstore,
+ mode=constants.IALLOCATOR_MODE_ALLOC,
name=self.op.instance_name,
disk_template=self.op.disk_template,
tags=[],
mem_size=self.op.mem_size,
disks=disks,
nics=nics,
- mode=constants.IALLOCATOR_MODE_ALLOC)
+ )
ial.Run(self.op.iallocator)
"""
# set optional parameters to none if they don't exist
for attr in ["kernel_path", "initrd_path", "hvm_boot_order", "pnode",
- "iallocator"]:
+ "iallocator", "hvm_acpi", "hvm_pae", "hvm_cdrom_image_path",
+ "hvm_nic_type", "hvm_disk_type", "vnc_bind_address"]:
if not hasattr(self.op, attr):
setattr(self.op, attr, None)
self.op.file_driver)
if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
- raise errors.OpPrereqError("File storage directory not a relative"
- " path")
+ raise errors.OpPrereqError("File storage directory not a relative"
+ " path")
#### allocator run
if [self.op.iallocator, self.op.pnode].count(None) != 1:
info = nodeinfo.get(node, None)
if not info:
raise errors.OpPrereqError("Cannot get current information"
- " from node '%s'" % nodeinfo)
+ " from node '%s'" % node)
vg_free = info.get('vg_free', None)
if not isinstance(vg_free, int):
raise errors.OpPrereqError("Can't compute free disk space on"
" destination node '%s'" %
(self.op.bridge, pnode.name))
+ # memory check on primary node
+ if self.op.start:
+ _CheckNodeFreeMemory(self.cfg, self.pnode.name,
+ "creating instance %s" % self.op.instance_name,
+ self.op.mem_size)
+
+ # hvm_cdrom_image_path verification
+ if self.op.hvm_cdrom_image_path is not None:
+ if not os.path.isabs(self.op.hvm_cdrom_image_path):
+ raise errors.OpPrereqError("The path to the HVM CDROM image must"
+ " be an absolute path or None, not %s" %
+ self.op.hvm_cdrom_image_path)
+ if not os.path.isfile(self.op.hvm_cdrom_image_path):
+ raise errors.OpPrereqError("The HVM CDROM image must either be a"
+ " regular file or a symlink pointing to"
+ " an existing regular file, not %s" %
+ self.op.hvm_cdrom_image_path)
+
+ # vnc_bind_address verification
+ if self.op.vnc_bind_address is not None:
+ if not utils.IsValidIP(self.op.vnc_bind_address):
+ raise errors.OpPrereqError("given VNC bind address '%s' doesn't look"
+ " like a valid IP address" %
+ self.op.vnc_bind_address)
+
+ # Xen HVM device type checks
+ if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
+ if self.op.hvm_nic_type not in constants.HT_HVM_VALID_NIC_TYPES:
+ raise errors.OpPrereqError("Invalid NIC type %s specified for Xen HVM"
+ " hypervisor" % self.op.hvm_nic_type)
+ if self.op.hvm_disk_type not in constants.HT_HVM_VALID_DISK_TYPES:
+ raise errors.OpPrereqError("Invalid disk type %s specified for Xen HVM"
+ " hypervisor" % self.op.hvm_disk_type)
+
if self.op.start:
self.instance_status = 'up'
else:
else:
network_port = None
+ if self.op.vnc_bind_address is None:
+ self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
+
# this is needed because os.path.join does not accept None arguments
if self.op.file_storage_dir is None:
string_file_storage_dir = ""
kernel_path=self.op.kernel_path,
initrd_path=self.op.initrd_path,
hvm_boot_order=self.op.hvm_boot_order,
+ hvm_acpi=self.op.hvm_acpi,
+ hvm_pae=self.op.hvm_pae,
+ hvm_cdrom_image_path=self.op.hvm_cdrom_image_path,
+ vnc_bind_address=self.op.vnc_bind_address,
+ hvm_nic_type=self.op.hvm_nic_type,
+ hvm_disk_type=self.op.hvm_disk_type,
)
feedback_fn("* creating instance disks...")
feedback_fn("adding instance %s to cluster config" % instance)
self.cfg.AddInstance(iobj)
+ # Add the new instance to the Ganeti Lock Manager
+ self.context.glm.add(locking.LEVEL_INSTANCE, instance)
if self.op.wait_for_sync:
disk_abort = not _WaitForSync(self.cfg, iobj, self.proc)
if disk_abort:
_RemoveDisks(iobj, self.cfg)
self.cfg.RemoveInstance(iobj.name)
+ # Remove the new instance from the Ganeti Lock Manager
+ self.context.glm.remove(locking.LEVEL_INSTANCE, iobj.name)
raise errors.OpExecError("There are some degraded disks for"
" this instance")
"""
_OP_REQP = ["instance_name"]
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
def CheckPrereq(self):
"""Check prerequisites.
This checks that the instance is in the cluster.
"""
- instance = self.cfg.GetInstanceInfo(
- self.cfg.ExpandInstanceName(self.op.instance_name))
- if instance is None:
- raise errors.OpPrereqError("Instance '%s' not known" %
- self.op.instance_name)
- self.instance = instance
+ self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert self.instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
def Exec(self, feedback_fn):
"""Connect to the console of an instance
HTYPE = constants.HTYPE_INSTANCE
_OP_REQP = ["instance_name", "mode", "disks"]
+ def _RunAllocator(self):
+ """Compute a new secondary node using an IAllocator.
+
+ """
+ ial = IAllocator(self.cfg, self.sstore,
+ mode=constants.IALLOCATOR_MODE_RELOC,
+ name=self.op.instance_name,
+ relocate_from=[self.sec_node])
+
+ ial.Run(self.op.iallocator)
+
+ if not ial.success:
+ raise errors.OpPrereqError("Can't compute nodes using"
+ " iallocator '%s': %s" % (self.op.iallocator,
+ ial.info))
+ if len(ial.nodes) != ial.required_nodes:
+ raise errors.OpPrereqError("iallocator '%s' returned invalid number"
+ " of nodes (%s), required %s" %
+ (len(ial.nodes), ial.required_nodes))
+ self.op.remote_node = ial.nodes[0]
+ logger.ToStdout("Selected new secondary for the instance: %s" %
+ self.op.remote_node)
+
def BuildHooksEnv(self):
"""Build hooks env.
This checks that the instance is in the cluster.
"""
+ if not hasattr(self.op, "remote_node"):
+ self.op.remote_node = None
+
instance = self.cfg.GetInstanceInfo(
self.cfg.ExpandInstanceName(self.op.instance_name))
if instance is None:
self.sec_node = instance.secondary_nodes[0]
- remote_node = getattr(self.op, "remote_node", None)
+ ia_name = getattr(self.op, "iallocator", None)
+ if ia_name is not None:
+ if self.op.remote_node is not None:
+ raise errors.OpPrereqError("Give either the iallocator or the new"
+ " secondary, not both")
+ self.op.remote_node = self._RunAllocator()
+
+ remote_node = self.op.remote_node
if remote_node is not None:
remote_node = self.cfg.ExpandNodeName(remote_node)
if remote_node is None:
# replacement as for drbd7 (no different port allocated)
raise errors.OpPrereqError("Same secondary given, cannot execute"
" replacement")
- # the user gave the current secondary, switch to
- # 'no-replace-secondary' mode for drbd7
- remote_node = None
- if (instance.disk_template == constants.DT_REMOTE_RAID1 and
- self.op.mode != constants.REPLACE_DISK_ALL):
- raise errors.OpPrereqError("Template 'remote_raid1' only allows all"
- " disks replacement, not individual ones")
if instance.disk_template == constants.DT_DRBD8:
if (self.op.mode == constants.REPLACE_DISK_ALL and
remote_node is not None):
(name, instance.name))
self.op.remote_node = remote_node
- def _ExecRR1(self, feedback_fn):
- """Replace the disks of an instance.
-
- """
- instance = self.instance
- iv_names = {}
- # start of work
- if self.op.remote_node is None:
- remote_node = self.sec_node
- else:
- remote_node = self.op.remote_node
- cfg = self.cfg
- for dev in instance.disks:
- size = dev.size
- lv_names = [".%s_%s" % (dev.iv_name, suf) for suf in ["data", "meta"]]
- names = _GenerateUniqueNames(cfg, lv_names)
- new_drbd = _GenerateMDDRBDBranch(cfg, instance.primary_node,
- remote_node, size, names)
- iv_names[dev.iv_name] = (dev, dev.children[0], new_drbd)
- logger.Info("adding new mirror component on secondary for %s" %
- dev.iv_name)
- #HARDCODE
- if not _CreateBlockDevOnSecondary(cfg, remote_node, instance,
- new_drbd, False,
- _GetInstanceInfoText(instance)):
- raise errors.OpExecError("Failed to create new component on secondary"
- " node %s. Full abort, cleanup manually!" %
- remote_node)
-
- logger.Info("adding new mirror component on primary")
- #HARDCODE
- if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
- instance, new_drbd,
- _GetInstanceInfoText(instance)):
- # remove secondary dev
- cfg.SetDiskID(new_drbd, remote_node)
- rpc.call_blockdev_remove(remote_node, new_drbd)
- raise errors.OpExecError("Failed to create volume on primary!"
- " Full abort, cleanup manually!!")
-
- # the device exists now
- # call the primary node to add the mirror to md
- logger.Info("adding new mirror component to md")
- if not rpc.call_blockdev_addchildren(instance.primary_node, dev,
- [new_drbd]):
- logger.Error("Can't add mirror compoment to md!")
- cfg.SetDiskID(new_drbd, remote_node)
- if not rpc.call_blockdev_remove(remote_node, new_drbd):
- logger.Error("Can't rollback on secondary")
- cfg.SetDiskID(new_drbd, instance.primary_node)
- if not rpc.call_blockdev_remove(instance.primary_node, new_drbd):
- logger.Error("Can't rollback on primary")
- raise errors.OpExecError("Full abort, cleanup manually!!")
-
- dev.children.append(new_drbd)
- cfg.AddInstance(instance)
-
- # this can fail as the old devices are degraded and _WaitForSync
- # does a combined result over all disks, so we don't check its
- # return value
- _WaitForSync(cfg, instance, self.proc, unlock=True)
-
- # so check manually all the devices
- for name in iv_names:
- dev, child, new_drbd = iv_names[name]
- cfg.SetDiskID(dev, instance.primary_node)
- is_degr = rpc.call_blockdev_find(instance.primary_node, dev)[5]
- if is_degr:
- raise errors.OpExecError("MD device %s is degraded!" % name)
- cfg.SetDiskID(new_drbd, instance.primary_node)
- is_degr = rpc.call_blockdev_find(instance.primary_node, new_drbd)[5]
- if is_degr:
- raise errors.OpExecError("New drbd device %s is degraded!" % name)
-
- for name in iv_names:
- dev, child, new_drbd = iv_names[name]
- logger.Info("remove mirror %s component" % name)
- cfg.SetDiskID(dev, instance.primary_node)
- if not rpc.call_blockdev_removechildren(instance.primary_node,
- dev, [child]):
- logger.Error("Can't remove child from mirror, aborting"
- " *this device cleanup*.\nYou need to cleanup manually!!")
- continue
-
- for node in child.logical_id[:2]:
- logger.Info("remove child device on %s" % node)
- cfg.SetDiskID(child, node)
- if not rpc.call_blockdev_remove(node, child):
- logger.Error("Warning: failed to remove device from node %s,"
- " continuing operation." % node)
-
- dev.children.remove(child)
-
- cfg.AddInstance(instance)
-
def _ExecD8DiskOnly(self, feedback_fn):
"""Replace a disk on the primary or secondary for dbrd8.
"""
instance = self.instance
- if instance.disk_template == constants.DT_REMOTE_RAID1:
- fn = self._ExecRR1
- elif instance.disk_template == constants.DT_DRBD8:
+
+ # Activate the instance disks if we're replacing them on a down instance
+ if instance.status == "down":
+ op = opcodes.OpActivateInstanceDisks(instance_name=instance.name)
+ self.proc.ChainOpCode(op)
+
+ if instance.disk_template == constants.DT_DRBD8:
if self.op.remote_node is None:
fn = self._ExecD8DiskOnly
else:
fn = self._ExecD8Secondary
else:
raise errors.ProgrammerError("Unhandled disk replacement case")
- return fn(feedback_fn)
+
+ ret = fn(feedback_fn)
+
+ # Deactivate the instance disks if we're replacing them on a down instance
+ if instance.status == "down":
+ op = opcodes.OpDeactivateInstanceDisks(instance_name=instance.name)
+ self.proc.ChainOpCode(op)
+
+ return ret
+
+
+class LUGrowDisk(LogicalUnit):
+ """Grow a disk of an instance.
+
+ """
+ HPATH = "disk-grow"
+ HTYPE = constants.HTYPE_INSTANCE
+ _OP_REQP = ["instance_name", "disk", "amount"]
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on the master, the primary and all the secondaries.
+
+ """
+ env = {
+ "DISK": self.op.disk,
+ "AMOUNT": self.op.amount,
+ }
+ env.update(_BuildInstanceHookEnvByObject(self.instance))
+ nl = [
+ self.sstore.GetMasterNode(),
+ self.instance.primary_node,
+ ]
+ return env, nl, nl
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster.
+
+ """
+ instance = self.cfg.GetInstanceInfo(
+ self.cfg.ExpandInstanceName(self.op.instance_name))
+ if instance is None:
+ raise errors.OpPrereqError("Instance '%s' not known" %
+ self.op.instance_name)
+ self.instance = instance
+ self.op.instance_name = instance.name
+
+ if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
+ raise errors.OpPrereqError("Instance's disk layout does not support"
+ " growing.")
+
+ if instance.FindDisk(self.op.disk) is None:
+ raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
+ (self.op.disk, instance.name))
+
+ nodenames = [instance.primary_node] + list(instance.secondary_nodes)
+ nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
+ for node in nodenames:
+ info = nodeinfo.get(node, None)
+ if not info:
+ raise errors.OpPrereqError("Cannot get current information"
+ " from node '%s'" % node)
+ vg_free = info.get('vg_free', None)
+ if not isinstance(vg_free, int):
+ raise errors.OpPrereqError("Can't compute free disk space on"
+ " node %s" % node)
+ if self.op.amount > info['vg_free']:
+ raise errors.OpPrereqError("Not enough disk space on target node %s:"
+ " %d MiB available, %d MiB required" %
+ (node, info['vg_free'], self.op.amount))
+
+ def Exec(self, feedback_fn):
+ """Execute disk grow.
+
+ """
+ instance = self.instance
+ disk = instance.FindDisk(self.op.disk)
+ for node in (instance.secondary_nodes + (instance.primary_node,)):
+ self.cfg.SetDiskID(disk, node)
+ result = rpc.call_blockdev_grow(node, disk, self.op.amount)
+ if not result or not isinstance(result, tuple) or len(result) != 2:
+ raise errors.OpExecError("grow request failed to node %s" % node)
+ elif not result[0]:
+ raise errors.OpExecError("grow request failed to node %s: %s" %
+ (node, result[1]))
+ disk.RecordGrow(self.op.amount)
+ self.cfg.Update(instance)
+ return
class LUQueryInstanceData(NoHooksLU):
"memory": instance.memory,
"nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
"disks": disks,
- "network_port": instance.network_port,
"vcpus": instance.vcpus,
- "kernel_path": instance.kernel_path,
- "initrd_path": instance.initrd_path,
- "hvm_boot_order": instance.hvm_boot_order,
}
+ htkind = self.sstore.GetHypervisorType()
+ if htkind == constants.HT_XEN_PVM30:
+ idict["kernel_path"] = instance.kernel_path
+ idict["initrd_path"] = instance.initrd_path
+
+ if htkind == constants.HT_XEN_HVM31:
+ idict["hvm_boot_order"] = instance.hvm_boot_order
+ idict["hvm_acpi"] = instance.hvm_acpi
+ idict["hvm_pae"] = instance.hvm_pae
+ idict["hvm_cdrom_image_path"] = instance.hvm_cdrom_image_path
+ idict["hvm_nic_type"] = instance.hvm_nic_type
+ idict["hvm_disk_type"] = instance.hvm_disk_type
+
+ if htkind in constants.HTS_REQ_PORT:
+ if instance.vnc_bind_address is None:
+ vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
+ else:
+ vnc_bind_address = instance.vnc_bind_address
+ if instance.network_port is None:
+ vnc_console_port = None
+ elif vnc_bind_address == constants.BIND_ADDRESS_GLOBAL:
+ vnc_console_port = "%s:%s" % (instance.primary_node,
+ instance.network_port)
+ elif vnc_bind_address == constants.LOCALHOST_IP_ADDRESS:
+ vnc_console_port = "%s:%s on node %s" % (vnc_bind_address,
+ instance.network_port,
+ instance.primary_node)
+ else:
+ vnc_console_port = "%s:%s" % (instance.vnc_bind_address,
+ instance.network_port)
+ idict["vnc_console_port"] = vnc_console_port
+ idict["vnc_bind_address"] = vnc_bind_address
+ idict["network_port"] = instance.network_port
+
result[instance.name] = idict
return result
HPATH = "instance-modify"
HTYPE = constants.HTYPE_INSTANCE
_OP_REQP = ["instance_name"]
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
def BuildHooksEnv(self):
"""Build hooks env.
This only checks the instance list against the existing names.
"""
+ # FIXME: all the parameters could be checked before, in ExpandNames, or in
+ # a separate CheckArguments function, if we implement one, so the operation
+ # can be aborted without waiting for any lock, should it have an error...
self.mem = getattr(self.op, "mem", None)
self.vcpus = getattr(self.op, "vcpus", None)
self.ip = getattr(self.op, "ip", None)
self.kernel_path = getattr(self.op, "kernel_path", None)
self.initrd_path = getattr(self.op, "initrd_path", None)
self.hvm_boot_order = getattr(self.op, "hvm_boot_order", None)
- all_params = [self.mem, self.vcpus, self.ip, self.bridge, self.mac,
- self.kernel_path, self.initrd_path, self.hvm_boot_order]
- if all_params.count(None) == len(all_params):
+ self.hvm_acpi = getattr(self.op, "hvm_acpi", None)
+ self.hvm_pae = getattr(self.op, "hvm_pae", None)
+ self.hvm_nic_type = getattr(self.op, "hvm_nic_type", None)
+ self.hvm_disk_type = getattr(self.op, "hvm_disk_type", None)
+ self.hvm_cdrom_image_path = getattr(self.op, "hvm_cdrom_image_path", None)
+ self.vnc_bind_address = getattr(self.op, "vnc_bind_address", None)
+ self.force = getattr(self.op, "force", None)
+ all_parms = [self.mem, self.vcpus, self.ip, self.bridge, self.mac,
+ self.kernel_path, self.initrd_path, self.hvm_boot_order,
+ self.hvm_acpi, self.hvm_pae, self.hvm_cdrom_image_path,
+ self.vnc_bind_address, self.hvm_nic_type, self.hvm_disk_type]
+ if all_parms.count(None) == len(all_parms):
raise errors.OpPrereqError("No changes submitted")
if self.mem is not None:
try:
" must be one or more of [acdn]"
" or 'default'")
- instance = self.cfg.GetInstanceInfo(
- self.cfg.ExpandInstanceName(self.op.instance_name))
- if instance is None:
- raise errors.OpPrereqError("No such instance name '%s'" %
- self.op.instance_name)
- self.op.instance_name = instance.name
- self.instance = instance
+ # hvm_cdrom_image_path verification
+ if self.op.hvm_cdrom_image_path is not None:
+ if not (os.path.isabs(self.op.hvm_cdrom_image_path) or
+ self.op.hvm_cdrom_image_path.lower() == "none"):
+ raise errors.OpPrereqError("The path to the HVM CDROM image must"
+ " be an absolute path or None, not %s" %
+ self.op.hvm_cdrom_image_path)
+ if not (os.path.isfile(self.op.hvm_cdrom_image_path) or
+ self.op.hvm_cdrom_image_path.lower() == "none"):
+ raise errors.OpPrereqError("The HVM CDROM image must either be a"
+ " regular file or a symlink pointing to"
+ " an existing regular file, not %s" %
+ self.op.hvm_cdrom_image_path)
+
+ # vnc_bind_address verification
+ if self.op.vnc_bind_address is not None:
+ if not utils.IsValidIP(self.op.vnc_bind_address):
+ raise errors.OpPrereqError("given VNC bind address '%s' doesn't look"
+ " like a valid IP address" %
+ self.op.vnc_bind_address)
+
+ instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert self.instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
+ self.warn = []
+ if self.mem is not None and not self.force:
+ pnode = self.instance.primary_node
+ nodelist = [pnode]
+ nodelist.extend(instance.secondary_nodes)
+ instance_info = rpc.call_instance_info(pnode, instance.name)
+ nodeinfo = rpc.call_node_info(nodelist, self.cfg.GetVGName())
+
+ if pnode not in nodeinfo or not isinstance(nodeinfo[pnode], dict):
+ # Assume the primary node is unreachable and go ahead
+ self.warn.append("Can't get info from primary node %s" % pnode)
+ else:
+ if instance_info:
+ current_mem = instance_info['memory']
+ else:
+ # Assume instance not running
+ # (there is a slight race condition here, but it's not very probable,
+ # and we have no other way to check)
+ current_mem = 0
+ miss_mem = self.mem - current_mem - nodeinfo[pnode]['memory_free']
+ if miss_mem > 0:
+ raise errors.OpPrereqError("This change will prevent the instance"
+ " from starting, due to %d MB of memory"
+ " missing on its primary node" % miss_mem)
+
+ for node in instance.secondary_nodes:
+ if node not in nodeinfo or not isinstance(nodeinfo[node], dict):
+ self.warn.append("Can't get info from secondary node %s" % node)
+ elif self.mem > nodeinfo[node]['memory_free']:
+ self.warn.append("Not enough memory to failover instance to secondary"
+ " node %s" % node)
+
+ # Xen HVM device type checks
+ if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
+ if self.op.hvm_nic_type is not None:
+ if self.op.hvm_nic_type not in constants.HT_HVM_VALID_NIC_TYPES:
+ raise errors.OpPrereqError("Invalid NIC type %s specified for Xen"
+ " HVM hypervisor" % self.op.hvm_nic_type)
+ if self.op.hvm_disk_type is not None:
+ if self.op.hvm_disk_type not in constants.HT_HVM_VALID_DISK_TYPES:
+ raise errors.OpPrereqError("Invalid disk type %s specified for Xen"
+ " HVM hypervisor" % self.op.hvm_disk_type)
+
return
def Exec(self, feedback_fn):
All parameters take effect only at the next restart of the instance.
"""
+ # Process here the warnings from CheckPrereq, as we don't have a
+ # feedback_fn there.
+ for warn in self.warn:
+ feedback_fn("WARNING: %s" % warn)
+
result = []
instance = self.instance
if self.mem:
else:
instance.hvm_boot_order = self.hvm_boot_order
result.append(("hvm_boot_order", self.hvm_boot_order))
+ if self.hvm_acpi is not None:
+ instance.hvm_acpi = self.hvm_acpi
+ result.append(("hvm_acpi", self.hvm_acpi))
+ if self.hvm_pae is not None:
+ instance.hvm_pae = self.hvm_pae
+ result.append(("hvm_pae", self.hvm_pae))
+ if self.hvm_nic_type is not None:
+ instance.hvm_nic_type = self.hvm_nic_type
+ result.append(("hvm_nic_type", self.hvm_nic_type))
+ if self.hvm_disk_type is not None:
+ instance.hvm_disk_type = self.hvm_disk_type
+ result.append(("hvm_disk_type", self.hvm_disk_type))
+ if self.hvm_cdrom_image_path:
+ if self.hvm_cdrom_image_path == constants.VALUE_NONE:
+ instance.hvm_cdrom_image_path = None
+ else:
+ instance.hvm_cdrom_image_path = self.hvm_cdrom_image_path
+ result.append(("hvm_cdrom_image_path", self.hvm_cdrom_image_path))
+ if self.vnc_bind_address:
+ instance.vnc_bind_address = self.vnc_bind_address
+ result.append(("vnc_bind_address", self.vnc_bind_address))
- self.cfg.AddInstance(instance)
+ self.cfg.Update(instance)
return result
def CheckPrereq(self):
"""Check prerequisites.
- This checks that the instance name is a valid one.
+ This checks that the instance and node names are valid.
"""
instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
self.op.target_node)
self.op.target_node = self.dst_node.name
+ # instance disk type verification
+ for disk in self.instance.disks:
+ if disk.dev_type == constants.LD_FILE:
+ raise errors.OpPrereqError("Export not supported for instances with"
+ " file-based disks")
+
def Exec(self, feedback_fn):
"""Export an instance to an image in the cluster.
if self.op.shutdown:
# shutdown the instance, but not the disks
if not rpc.call_instance_shutdown(src_node, instance):
- raise errors.OpExecError("Could not shutdown instance %s on node %s" %
+ raise errors.OpExecError("Could not shutdown instance %s on node %s" %
(instance.name, src_node))
vgname = self.cfg.GetVGName()
" on node %s" % (instance.name, node))
+class LURemoveExport(NoHooksLU):
+ """Remove exports related to the named instance.
+
+ """
+ _OP_REQP = ["instance_name"]
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+ """
+ pass
+
+ def Exec(self, feedback_fn):
+ """Remove any export.
+
+ """
+ instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
+ # If the instance was not found we'll try with the name that was passed in.
+ # This will only work if it was an FQDN, though.
+ fqdn_warn = False
+ if not instance_name:
+ fqdn_warn = True
+ instance_name = self.op.instance_name
+
+ op = opcodes.OpQueryExports(nodes=[])
+ exportlist = self.proc.ChainOpCode(op)
+ found = False
+ for node in exportlist:
+ if instance_name in exportlist[node]:
+ found = True
+ if not rpc.call_export_remove(node, instance_name):
+ logger.Error("could not remove export for instance %s"
+ " on node %s" % (instance_name, node))
+
+ if fqdn_warn and not found:
+ feedback_fn("Export not found. If trying to remove an export belonging"
+ " to a deleted instance please use its Fully Qualified"
+ " Domain Name.")
+
+
class TagsLU(NoHooksLU):
"""Generic tags LU.
"""Returns the tag list.
"""
- return self.target.GetTags()
+ return list(self.target.GetTags())
class LUSearchTags(NoHooksLU):
" config file and the operation has been"
" aborted. Please retry.")
+
class LUTestDelay(NoHooksLU):
"""Sleep for a specified amount of time.
- This LU sleeps on the master and/or nodes for a specified amoutn of
+ This LU sleeps on the master and/or nodes for a specified amount of
time.
"""
_OP_REQP = ["duration", "on_master", "on_nodes"]
+ REQ_BGL = False
- def CheckPrereq(self):
- """Check prerequisites.
+ def ExpandNames(self):
+ """Expand names and set required locks.
- This checks that we have a good list of nodes and/or the duration
- is valid.
+ This expands the node list, if any.
"""
-
+ self.needed_locks = {}
if self.op.on_nodes:
+ # _GetWantedNodes can be used here, but is not always appropriate to use
+ # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
+ # more information.
self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
+ self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ """
def Exec(self, feedback_fn):
"""Do the actual sleep.
easy usage
"""
- _KEYS = [
- "mode", "name",
+ _ALLO_KEYS = [
"mem_size", "disks", "disk_template",
"os", "tags", "nics", "vcpus",
]
+ _RELO_KEYS = [
+ "relocate_from",
+ ]
- def __init__(self, cfg, sstore, **kwargs):
+ def __init__(self, cfg, sstore, mode, name, **kwargs):
self.cfg = cfg
self.sstore = sstore
# init buffer variables
self.in_text = self.out_text = self.in_data = self.out_data = None
# init all input fields so that pylint is happy
- self.mode = self.name = None
+ self.mode = mode
+ self.name = name
self.mem_size = self.disks = self.disk_template = None
self.os = self.tags = self.nics = self.vcpus = None
+ self.relocate_from = None
# computed fields
self.required_nodes = None
# init result fields
self.success = self.info = self.nodes = None
+ if self.mode == constants.IALLOCATOR_MODE_ALLOC:
+ keyset = self._ALLO_KEYS
+ elif self.mode == constants.IALLOCATOR_MODE_RELOC:
+ keyset = self._RELO_KEYS
+ else:
+ raise errors.ProgrammerError("Unknown mode '%s' passed to the"
+ " IAllocator" % self.mode)
for key in kwargs:
- if key not in self._KEYS:
+ if key not in keyset:
raise errors.ProgrammerError("Invalid input parameter '%s' to"
" IAllocator" % key)
setattr(self, key, kwargs[key])
- for key in self._KEYS:
+ for key in keyset:
if key not in kwargs:
raise errors.ProgrammerError("Missing input parameter '%s' to"
" IAllocator" % key)
"version": 1,
"cluster_name": self.sstore.GetClusterName(),
"cluster_tags": list(cfg.GetClusterInfo().GetTags()),
+ "hypervisor_type": self.sstore.GetHypervisorType(),
# we don't have job IDs
}
+ i_list = [cfg.GetInstanceInfo(iname) for iname in cfg.GetInstanceList()]
+
# node data
node_results = {}
node_list = cfg.GetNodeList()
if nname not in node_data or not isinstance(node_data[nname], dict):
raise errors.OpExecError("Can't get data for node %s" % nname)
remote_info = node_data[nname]
- for attr in ['memory_total', 'memory_free',
- 'vg_size', 'vg_free']:
+ for attr in ['memory_total', 'memory_free', 'memory_dom0',
+ 'vg_size', 'vg_free', 'cpu_total']:
if attr not in remote_info:
raise errors.OpExecError("Node '%s' didn't return attribute '%s'" %
(nname, attr))
try:
- int(remote_info[attr])
+ remote_info[attr] = int(remote_info[attr])
except ValueError, err:
raise errors.OpExecError("Node '%s' returned invalid value for '%s':"
" %s" % (nname, attr, str(err)))
+ # compute memory used by primary instances
+ i_p_mem = i_p_up_mem = 0
+ for iinfo in i_list:
+ if iinfo.primary_node == nname:
+ i_p_mem += iinfo.memory
+ if iinfo.status == "up":
+ i_p_up_mem += iinfo.memory
+
+ # compute memory used by instances
pnr = {
"tags": list(ninfo.GetTags()),
- "total_memory": utils.TryConvert(int, remote_info['memory_total']),
- "free_memory": utils.TryConvert(int, remote_info['memory_free']),
- "total_disk": utils.TryConvert(int, remote_info['vg_size']),
- "free_disk": utils.TryConvert(int, remote_info['vg_free']),
+ "total_memory": remote_info['memory_total'],
+ "reserved_memory": remote_info['memory_dom0'],
+ "free_memory": remote_info['memory_free'],
+ "i_pri_memory": i_p_mem,
+ "i_pri_up_memory": i_p_up_mem,
+ "total_disk": remote_info['vg_size'],
+ "free_disk": remote_info['vg_free'],
"primary_ip": ninfo.primary_ip,
"secondary_ip": ninfo.secondary_ip,
+ "total_cpus": remote_info['cpu_total'],
}
node_results[nname] = pnr
data["nodes"] = node_results
# instance data
instance_data = {}
- i_list = cfg.GetInstanceList()
- for iname in i_list:
- iinfo = cfg.GetInstanceInfo(iname)
+ for iinfo in i_list:
nic_data = [{"mac": n.mac, "ip": n.ip, "bridge": n.bridge}
for n in iinfo.nics]
pir = {
"disks": [{"size": dsk.size, "mode": "w"} for dsk in iinfo.disks],
"disk_template": iinfo.disk_template,
}
- instance_data[iname] = pir
+ instance_data[iinfo.name] = pir
data["instances"] = instance_data
"name": self.name,
"disk_space_total": disk_space,
"required_nodes": self.required_nodes,
- "nodes": list(instance.secondary_nodes),
+ "relocate_from": self.relocate_from,
}
self.in_data["request"] = request
self.in_text = serializer.Dump(self.in_data)
- def Run(self, name, validate=True):
+ def Run(self, name, validate=True, call_fn=rpc.call_iallocator_runner):
"""Run an instance allocator and return the results.
"""
data = self.in_text
- alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
- os.path.isfile)
- if alloc_script is None:
- raise errors.OpExecError("Can't find allocator '%s'" % name)
+ result = call_fn(self.sstore.GetMasterNode(), name, self.in_text)
- fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
- try:
- os.write(fd, data)
- os.close(fd)
- result = utils.RunCmd([alloc_script, fin_name])
- if result.failed:
- raise errors.OpExecError("Instance allocator call failed: %s,"
- " output: %s" %
- (result.fail_reason, result.output))
- finally:
- os.unlink(fin_name)
- self.out_text = result.stdout
+ if not isinstance(result, tuple) or len(result) != 4:
+ raise errors.OpExecError("Invalid result from master iallocator runner")
+
+ rcode, stdout, stderr, fail = result
+
+ if rcode == constants.IARUN_NOTFOUND:
+ raise errors.OpExecError("Can't find allocator '%s'" % name)
+ elif rcode == constants.IARUN_FAILURE:
+ raise errors.OpExecError("Instance allocator call failed: %s,"
+ " output: %s" % (fail, stdout+stderr))
+ self.out_text = stdout
if validate:
self._ValidateResult()
raise errors.OpPrereqError("Instance '%s' not found for relocation" %
self.op.name)
self.op.name = fname
+ self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
else:
raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
self.op.mode)
"""Run the allocator test.
"""
- ial = IAllocator(self.cfg, self.sstore,
- mode=self.op.mode,
- name=self.op.name,
- mem_size=self.op.mem_size,
- disks=self.op.disks,
- disk_template=self.op.disk_template,
- os=self.op.os,
- tags=self.op.tags,
- nics=self.op.nics,
- vcpus=self.op.vcpus,
- )
+ if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
+ ial = IAllocator(self.cfg, self.sstore,
+ mode=self.op.mode,
+ name=self.op.name,
+ mem_size=self.op.mem_size,
+ disks=self.op.disks,
+ disk_template=self.op.disk_template,
+ os=self.op.os,
+ tags=self.op.tags,
+ nics=self.op.nics,
+ vcpus=self.op.vcpus,
+ )
+ else:
+ ial = IAllocator(self.cfg, self.sstore,
+ mode=self.op.mode,
+ name=self.op.name,
+ relocate_from=list(self.relocate_from),
+ )
if self.op.direction == constants.IALLOCATOR_DIR_IN:
result = ial.in_text