X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/b5602d15cb500f8e5adc82563428fbea739e01e3..a2fd9afc7fa2c48e0e659a814054de75e06fd819:/lib/cmdlib.py?ds=sidebyside diff --git a/lib/cmdlib.py b/lib/cmdlib.py index e3c1a86..c339933 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -57,6 +57,7 @@ class LogicalUnit(object): - optionally redefine their run requirements: REQ_MASTER: the LU needs to run on the master node REQ_WSSTORE: the LU needs a writable SimpleStore + REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively Note that all commands require root permissions. @@ -66,8 +67,9 @@ class LogicalUnit(object): _OP_REQP = [] REQ_MASTER = True REQ_WSSTORE = False + REQ_BGL = True - def __init__(self, processor, op, cfg, sstore): + def __init__(self, processor, op, context, sstore): """Constructor for LogicalUnit. This needs to be overriden in derived classes in order to check op @@ -76,8 +78,9 @@ class LogicalUnit(object): """ self.proc = processor self.op = op - self.cfg = cfg + self.cfg = context.cfg self.sstore = sstore + self.context = context self.__ssh = None for attr_name in self._OP_REQP: @@ -1243,6 +1246,8 @@ class LURemoveNode(LogicalUnit): logger.Info("Removing node %s from config" % node.name) self.cfg.RemoveNode(node.name) + # Remove the node from the Ganeti Lock Manager + self.context.glm.remove(locking.LEVEL_NODE, node.name) utils.RemoveHostFromEtcHosts(node.name) @@ -1529,46 +1534,7 @@ class LUAddNode(LogicalUnit): new_node = self.new_node node = new_node.name - # set up inter-node password and certificate and restarts the node daemon - gntpass = self.sstore.GetNodeDaemonPassword() - if not re.match('^[a-zA-Z0-9.]{1,64}$', gntpass): - raise errors.OpExecError("ganeti password corruption detected") - f = open(constants.SSL_CERT_FILE) - try: - gntpem = f.read(8192) - finally: - f.close() - # in the base64 pem encoding, neither '!' nor '.' are valid chars, - # so we use this to detect an invalid certificate; as long as the - # cert doesn't contain this, the here-document will be correctly - # parsed by the shell sequence below - if re.search('^!EOF\.', gntpem, re.MULTILINE): - raise errors.OpExecError("invalid PEM encoding in the SSL certificate") - if not gntpem.endswith("\n"): - raise errors.OpExecError("PEM must end with newline") - logger.Info("copy cluster pass to %s and starting the node daemon" % node) - - # and then connect with ssh to set password and start ganeti-noded - # note that all the below variables are sanitized at this point, - # either by being constants or by the checks above - ss = self.sstore - mycommand = ("umask 077 && " - "echo '%s' > '%s' && " - "cat > '%s' << '!EOF.' && \n" - "%s!EOF.\n%s restart" % - (gntpass, ss.KeyToFilename(ss.SS_NODED_PASS), - constants.SSL_CERT_FILE, gntpem, - constants.NODE_INITD_SCRIPT)) - - result = self.ssh.Run(node, 'root', mycommand, batch=False, ask_key=True) - if result.failed: - raise errors.OpExecError("Remote command on node %s, error: %s," - " output: %s" % - (node, result.fail_reason, result.output)) - # check connectivity - time.sleep(4) - result = rpc.call_version([node])[node] if result: if constants.PROTOCOL_VERSION == result: @@ -1615,12 +1581,22 @@ class LUAddNode(LogicalUnit): " you gave (%s). Please fix and re-run this" " command." % new_node.secondary_ip) - success, msg = self.ssh.VerifyNodeHostname(node) - if not success: - raise errors.OpExecError("Node '%s' claims it has a different hostname" - " than the one the resolver gives: %s." - " Please fix and re-run this command." % - (node, msg)) + node_verify_list = [self.sstore.GetMasterNode()] + node_verify_param = { + 'nodelist': [node], + # TODO: do a node-net-test as well? + } + + result = rpc.call_node_verify(node_verify_list, node_verify_param) + for verifier in node_verify_list: + if not result[verifier]: + raise errors.OpExecError("Cannot communicate with %s's node daemon" + " for remote verification" % verifier) + if result[verifier]['nodelist']: + for failed in result[verifier]['nodelist']: + feedback_fn("ssh/hostname verification failed %s -> %s" % + (verifier, result[verifier]['nodelist'][failed])) + raise errors.OpExecError("ssh/hostname verification failed.") # Distribute updated /etc/hosts and known_hosts to all nodes, # including the node just added @@ -1639,7 +1615,7 @@ class LUAddNode(LogicalUnit): logger.Error("copy of file %s to node %s failed" % (fname, to_node)) - to_copy = ss.GetFileList() + to_copy = self.sstore.GetFileList() if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31: to_copy.append(constants.VNC_PASSWORD_FILE) for fname in to_copy: @@ -1650,6 +1626,8 @@ class LUAddNode(LogicalUnit): if not self.op.readd: logger.Info("adding node %s to cluster.conf" % node) self.cfg.AddNode(new_node) + # Add the new node to the Ganeti Lock Manager + self.context.glm.add(locking.LEVEL_NODE, node) class LUMasterFailover(LogicalUnit): @@ -2392,6 +2370,8 @@ class LURemoveInstance(LogicalUnit): logger.Info("removing instance %s out of cluster config" % instance.name) self.cfg.RemoveInstance(instance.name) + # Remove the new instance from the Ganeti Lock Manager + self.context.glm.remove(locking.LEVEL_INSTANCE, instance.name) class LUQueryInstances(NoHooksLU): @@ -3250,6 +3230,8 @@ class LUCreateInstance(LogicalUnit): feedback_fn("adding instance %s to cluster config" % instance) self.cfg.AddInstance(iobj) + # Add the new instance to the Ganeti Lock Manager + self.context.glm.add(locking.LEVEL_INSTANCE, instance) if self.op.wait_for_sync: disk_abort = not _WaitForSync(self.cfg, iobj, self.proc) @@ -3264,6 +3246,8 @@ class LUCreateInstance(LogicalUnit): if disk_abort: _RemoveDisks(iobj, self.cfg) self.cfg.RemoveInstance(iobj.name) + # Remove the new instance from the Ganeti Lock Manager + self.context.glm.remove(locking.LEVEL_INSTANCE, iobj.name) raise errors.OpExecError("There are some degraded disks for" " this instance") @@ -4577,7 +4561,7 @@ class LUDelTags(TagsLU): class LUTestDelay(NoHooksLU): """Sleep for a specified amount of time. - This LU sleeps on the master and/or nodes for a specified amoutn of + This LU sleeps on the master and/or nodes for a specified amount of time. """