4 # Copyright (C) 2006, 2007, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Master daemon program.
24 Some classes deviates from the standard style guide since the
25 inheritance from parent classes requires it.
29 # pylint: disable=C0103
30 # C0103: Invalid name ganeti-masterd
41 from optparse import OptionParser
43 from ganeti import config
44 from ganeti import constants
45 from ganeti import daemon
46 from ganeti import mcpu
47 from ganeti import opcodes
48 from ganeti import jqueue
49 from ganeti import locking
50 from ganeti import luxi
51 from ganeti import utils
52 from ganeti import errors
53 from ganeti import ssconf
54 from ganeti import workerpool
55 from ganeti import rpc
56 from ganeti import bootstrap
57 from ganeti import netutils
58 from ganeti import objects
59 from ganeti import query
60 from ganeti import runtime
61 from ganeti import pathutils
64 from ganeti.utils import version
67 CLIENT_REQUEST_WORKERS = 16
69 EXIT_NOTMASTER = constants.EXIT_NOTMASTER
70 EXIT_NODESETUP_ERROR = constants.EXIT_NODESETUP_ERROR
73 def _LogNewJob(status, info, ops):
74 """Log information about a recently submitted job.
77 op_summary = utils.CommaJoin(op.Summary() for op in ops)
80 logging.info("New job with id %s, summary: %s", info, op_summary)
82 logging.info("Failed to submit job, reason: '%s', summary: %s",
86 class ClientRequestWorker(workerpool.BaseWorker):
87 # pylint: disable=W0221
88 def RunTask(self, server, message, client):
89 """Process the request.
92 client_ops = ClientOps(server)
95 (method, args, ver) = luxi.ParseRequest(message)
96 except luxi.ProtocolError, err:
97 logging.error("Protocol Error: %s", err)
103 # Verify client's version if there was one in the request
104 if ver is not None and ver != constants.LUXI_VERSION:
105 raise errors.LuxiError("LUXI version mismatch, server %s, request %s" %
106 (constants.LUXI_VERSION, ver))
108 result = client_ops.handle_request(method, args)
110 except errors.GenericError, err:
111 logging.exception("Unexpected exception")
113 result = errors.EncodeException(err)
115 logging.exception("Unexpected exception")
117 result = "Caught exception: %s" % str(err[1])
120 reply = luxi.FormatResponse(success, result)
121 client.send_message(reply)
122 # awake the main thread so that it can write out the data.
123 server.awaker.signal()
124 except: # pylint: disable=W0702
125 logging.exception("Send error")
129 class MasterClientHandler(daemon.AsyncTerminatedMessageStream):
130 """Handler for master peers.
135 def __init__(self, server, connected_socket, client_address, family):
136 daemon.AsyncTerminatedMessageStream.__init__(self, connected_socket,
139 family, self._MAX_UNHANDLED)
142 def handle_message(self, message, _):
143 self.server.request_workers.AddTask((self.server, message, self))
146 class _MasterShutdownCheck:
147 """Logic for master daemon shutdown.
150 #: How long to wait between checks
151 _CHECK_INTERVAL = 5.0
153 #: How long to wait after all jobs are done (e.g. to give clients time to
154 #: retrieve the job status)
155 _SHUTDOWN_LINGER = 5.0
158 """Initializes this class.
161 self._had_active_jobs = None
162 self._linger_timeout = None
164 def __call__(self, jq_prepare_result):
165 """Determines if master daemon is ready for shutdown.
167 @param jq_prepare_result: Result of L{jqueue.JobQueue.PrepareShutdown}
168 @rtype: None or number
169 @return: None if master daemon is ready, timeout if the check must be
173 if jq_prepare_result:
174 # Check again shortly
175 logging.info("Job queue has been notified for shutdown but is still"
176 " busy; next check in %s seconds", self._CHECK_INTERVAL)
177 self._had_active_jobs = True
178 return self._CHECK_INTERVAL
180 if not self._had_active_jobs:
181 # Can shut down as there were no active jobs on the first check
184 # No jobs are running anymore, but maybe some clients want to collect some
185 # information. Give them a short amount of time.
186 if self._linger_timeout is None:
187 self._linger_timeout = utils.RunningTimeout(self._SHUTDOWN_LINGER, True)
189 remaining = self._linger_timeout.Remaining()
191 logging.info("Job queue no longer busy; shutting down master daemon"
192 " in %s seconds", remaining)
194 # TODO: Should the master daemon socket be closed at this point? Doing so
195 # wouldn't affect existing connections.
203 class MasterServer(daemon.AsyncStreamServer):
206 This is the main asynchronous master server. It handles connections to the
210 family = socket.AF_UNIX
212 def __init__(self, address, uid, gid):
213 """MasterServer constructor
215 @param address: the unix socket address to bind the MasterServer to
216 @param uid: The uid of the owner of the socket
217 @param gid: The gid of the owner of the socket
220 temp_name = tempfile.mktemp(dir=os.path.dirname(address))
221 daemon.AsyncStreamServer.__init__(self, self.family, temp_name)
222 os.chmod(temp_name, 0770)
223 os.chown(temp_name, uid, gid)
224 os.rename(temp_name, address)
226 self.awaker = daemon.AsyncAwaker()
228 # We'll only start threads once we've forked.
230 self.request_workers = None
232 self._shutdown_check = None
234 def handle_connection(self, connected_socket, client_address):
235 # TODO: add connection count and limit the number of open connections to a
236 # maximum number to avoid breaking for lack of file descriptors or memory.
237 MasterClientHandler(self, connected_socket, client_address, self.family)
239 def setup_queue(self):
240 self.context = GanetiContext()
241 self.request_workers = workerpool.WorkerPool("ClientReq",
242 CLIENT_REQUEST_WORKERS,
245 def WaitForShutdown(self):
246 """Prepares server for shutdown.
249 if self._shutdown_check is None:
250 self._shutdown_check = _MasterShutdownCheck()
252 return self._shutdown_check(self.context.jobqueue.PrepareShutdown())
254 def server_cleanup(self):
255 """Cleanup the server.
257 This involves shutting down the processor threads and the master
264 if self.request_workers:
265 self.request_workers.TerminateWorkers()
267 self.context.jobqueue.Shutdown()
271 """Class holding high-level client operations."""
272 def __init__(self, server):
275 def handle_request(self, method, args): # pylint: disable=R0911
276 context = self.server.context
277 queue = context.jobqueue
279 # TODO: Parameter validation
280 if not isinstance(args, (tuple, list)):
281 logging.info("Received invalid arguments of type '%s'", type(args))
282 raise ValueError("Invalid arguments type '%s'" % type(args))
284 if method not in luxi.REQ_ALL:
285 logging.info("Received invalid request '%s'", method)
286 raise ValueError("Invalid operation '%s'" % method)
288 # TODO: Rewrite to not exit in each 'if/elif' branch
290 if method == luxi.REQ_SUBMIT_JOB:
291 logging.info("Receiving new job")
293 ops = [opcodes.OpCode.LoadOpCode(state) for state in job_def]
294 job_id = queue.SubmitJob(ops)
295 _LogNewJob(True, job_id, ops)
298 elif method == luxi.REQ_SUBMIT_JOB_TO_DRAINED_QUEUE:
299 logging.info("Forcefully receiving new job")
301 ops = [opcodes.OpCode.LoadOpCode(state) for state in job_def]
302 job_id = queue.SubmitJobToDrainedQueue(ops)
303 _LogNewJob(True, job_id, ops)
306 elif method == luxi.REQ_SUBMIT_MANY_JOBS:
307 logging.info("Receiving multiple jobs")
311 jobs.append([opcodes.OpCode.LoadOpCode(state) for state in ops])
312 job_ids = queue.SubmitManyJobs(jobs)
313 for ((status, job_id), ops) in zip(job_ids, jobs):
314 _LogNewJob(status, job_id, ops)
317 elif method == luxi.REQ_CANCEL_JOB:
319 logging.info("Received job cancel request for %s", job_id)
320 return queue.CancelJob(job_id)
322 elif method == luxi.REQ_CHANGE_JOB_PRIORITY:
323 (job_id, priority) = args
324 logging.info("Received request to change priority for job %s to %s",
326 return queue.ChangeJobPriority(job_id, priority)
328 elif method == luxi.REQ_ARCHIVE_JOB:
330 logging.info("Received job archive request for %s", job_id)
331 return queue.ArchiveJob(job_id)
333 elif method == luxi.REQ_AUTO_ARCHIVE_JOBS:
334 (age, timeout) = args
335 logging.info("Received job autoarchive request for age %s, timeout %s",
337 return queue.AutoArchiveJobs(age, timeout)
339 elif method == luxi.REQ_WAIT_FOR_JOB_CHANGE:
340 (job_id, fields, prev_job_info, prev_log_serial, timeout) = args
341 logging.info("Received job poll request for %s", job_id)
342 return queue.WaitForJobChanges(job_id, fields, prev_job_info,
343 prev_log_serial, timeout)
345 elif method == luxi.REQ_QUERY:
346 (what, fields, qfilter) = args
348 if what in constants.QR_VIA_OP:
349 result = self._Query(opcodes.OpQuery(what=what, fields=fields,
351 elif what == constants.QR_LOCK:
352 if qfilter is not None:
353 raise errors.OpPrereqError("Lock queries can't be filtered",
355 return context.glm.QueryLocks(fields)
356 elif what == constants.QR_JOB:
357 return queue.QueryJobs(fields, qfilter)
358 elif what in constants.QR_VIA_LUXI:
359 raise NotImplementedError
361 raise errors.OpPrereqError("Resource type '%s' unknown" % what,
366 elif method == luxi.REQ_QUERY_FIELDS:
367 (what, fields) = args
368 req = objects.QueryFieldsRequest(what=what, fields=fields)
371 fielddefs = query.ALL_FIELDS[req.what]
373 raise errors.OpPrereqError("Resource type '%s' unknown" % req.what,
376 return query.QueryFields(fielddefs, req.fields)
378 elif method == luxi.REQ_QUERY_JOBS:
379 (job_ids, fields) = args
380 if isinstance(job_ids, (tuple, list)) and job_ids:
381 msg = utils.CommaJoin(job_ids)
384 logging.info("Received job query request for %s", msg)
385 return queue.OldStyleQueryJobs(job_ids, fields)
387 elif method == luxi.REQ_QUERY_INSTANCES:
388 (names, fields, use_locking) = args
389 logging.info("Received instance query request for %s", names)
391 raise errors.OpPrereqError("Sync queries are not allowed",
393 op = opcodes.OpInstanceQuery(names=names, output_fields=fields,
394 use_locking=use_locking)
395 return self._Query(op)
397 elif method == luxi.REQ_QUERY_NODES:
398 (names, fields, use_locking) = args
399 logging.info("Received node query request for %s", names)
401 raise errors.OpPrereqError("Sync queries are not allowed",
403 op = opcodes.OpNodeQuery(names=names, output_fields=fields,
404 use_locking=use_locking)
405 return self._Query(op)
407 elif method == luxi.REQ_QUERY_GROUPS:
408 (names, fields, use_locking) = args
409 logging.info("Received group query request for %s", names)
411 raise errors.OpPrereqError("Sync queries are not allowed",
413 op = opcodes.OpGroupQuery(names=names, output_fields=fields)
414 return self._Query(op)
416 elif method == luxi.REQ_QUERY_NETWORKS:
417 (names, fields, use_locking) = args
418 logging.info("Received network query request for %s", names)
420 raise errors.OpPrereqError("Sync queries are not allowed",
422 op = opcodes.OpNetworkQuery(names=names, output_fields=fields)
423 return self._Query(op)
425 elif method == luxi.REQ_QUERY_EXPORTS:
426 (nodes, use_locking) = args
428 raise errors.OpPrereqError("Sync queries are not allowed",
430 logging.info("Received exports query request")
431 op = opcodes.OpBackupQuery(nodes=nodes, use_locking=use_locking)
432 return self._Query(op)
434 elif method == luxi.REQ_QUERY_CONFIG_VALUES:
436 logging.info("Received config values query request for %s", fields)
437 op = opcodes.OpClusterConfigQuery(output_fields=fields)
438 return self._Query(op)
440 elif method == luxi.REQ_QUERY_CLUSTER_INFO:
441 logging.info("Received cluster info query request")
442 op = opcodes.OpClusterQuery()
443 return self._Query(op)
445 elif method == luxi.REQ_QUERY_TAGS:
447 logging.info("Received tags query request")
448 op = opcodes.OpTagsGet(kind=kind, name=name, use_locking=False)
449 return self._Query(op)
451 elif method == luxi.REQ_SET_DRAIN_FLAG:
452 (drain_flag, ) = args
453 logging.info("Received queue drain flag change request to %s",
455 return queue.SetDrainFlag(drain_flag)
457 elif method == luxi.REQ_SET_WATCHER_PAUSE:
460 return _SetWatcherPause(context, until)
463 logging.critical("Request '%s' in luxi.REQ_ALL, but not known", method)
464 raise errors.ProgrammerError("Operation '%s' in luxi.REQ_ALL,"
465 " but not implemented" % method)
467 def _Query(self, op):
468 """Runs the specified opcode and returns the result.
471 # Queries don't have a job id
472 proc = mcpu.Processor(self.server.context, None, enable_locks=False)
474 # TODO: Executing an opcode using locks will acquire them in blocking mode.
475 # Consider using a timeout for retries.
476 return proc.ExecOpCode(op, None)
479 class GanetiContext(object):
480 """Context common to all ganeti threads.
482 This class creates and holds common objects shared by all threads.
485 # pylint: disable=W0212
486 # we do want to ensure a singleton here
490 """Constructs a new GanetiContext object.
492 There should be only a GanetiContext object at any time, so this
493 function raises an error if this is not the case.
496 assert self.__class__._instance is None, "double GanetiContext instance"
498 # Create global configuration object
499 self.cfg = config.ConfigWriter()
502 self.glm = locking.GanetiLockManager(
503 self.cfg.GetNodeList(),
504 self.cfg.GetNodeGroupList(),
505 [inst.name for inst in self.cfg.GetAllInstancesInfo().values()],
506 self.cfg.GetNetworkList())
508 self.cfg.SetContext(self)
511 self.rpc = rpc.RpcRunner(self.cfg, self.glm.AddToLockMonitor)
514 self.jobqueue = jqueue.JobQueue(self)
516 # setting this also locks the class against attribute modifications
517 self.__class__._instance = self
519 def __setattr__(self, name, value):
520 """Setting GanetiContext attributes is forbidden after initialization.
523 assert self.__class__._instance is None, "Attempt to modify Ganeti Context"
524 object.__setattr__(self, name, value)
526 def AddNode(self, node, ec_id):
527 """Adds a node to the configuration and lock manager.
530 # Add it to the configuration
531 self.cfg.AddNode(node, ec_id)
533 # If preseeding fails it'll not be added
534 self.jobqueue.AddNode(node)
536 # Add the new node to the Ganeti Lock Manager
537 self.glm.add(locking.LEVEL_NODE, node.uuid)
538 self.glm.add(locking.LEVEL_NODE_RES, node.uuid)
540 def ReaddNode(self, node):
541 """Updates a node that's already in the configuration
544 # Synchronize the queue again
545 self.jobqueue.AddNode(node)
547 def RemoveNode(self, node):
548 """Removes a node from the configuration and lock manager.
551 # Remove node from configuration
552 self.cfg.RemoveNode(node.uuid)
555 self.jobqueue.RemoveNode(node.name)
557 # Remove the node from the Ganeti Lock Manager
558 self.glm.remove(locking.LEVEL_NODE, node.uuid)
559 self.glm.remove(locking.LEVEL_NODE_RES, node.uuid)
562 def _SetWatcherPause(context, until):
563 """Creates or removes the watcher pause file.
565 @type context: L{GanetiContext}
566 @param context: Global Ganeti context
567 @type until: None or int
568 @param until: Unix timestamp saying until when the watcher shouldn't run
571 node_names = context.cfg.GetNodeList()
574 logging.info("Received request to no longer pause watcher")
576 if not ht.TNumber(until):
577 raise TypeError("Duration must be numeric")
579 if until < time.time():
580 raise errors.GenericError("Unable to set pause end time in the past")
582 logging.info("Received request to pause watcher until %s", until)
584 result = context.rpc.call_set_watcher_pause(node_names, until)
586 errmsg = utils.CommaJoin("%s (%s)" % (node_name, nres.fail_msg)
587 for (node_name, nres) in result.items()
588 if nres.fail_msg and not nres.offline)
590 raise errors.OpExecError("Watcher pause was set where possible, but failed"
591 " on the following node(s): %s" % errmsg)
597 def CheckAgreement():
598 """Check the agreement on who is the master.
600 The function uses a very simple algorithm: we must get more positive
601 than negative answers. Since in most of the cases we are the master,
602 we'll use our own config file for getting the node list. In the
603 future we could collect the current node list from our (possibly
604 obsolete) known nodes.
606 In order to account for cold-start of all nodes, we retry for up to
607 a minute until we get a real answer as the top-voted one. If the
608 nodes are more out-of-sync, for now manual startup of the master
611 Note that for a even number of nodes cluster, we need at least half
612 of the nodes (beside ourselves) to vote for us. This creates a
613 problem on two-node clusters, since in this case we require the
614 other node to be up too to confirm our status.
617 myself = netutils.Hostname.GetSysName()
618 #temp instantiation of a config writer, used only to get the node list
619 cfg = config.ConfigWriter()
620 node_names = cfg.GetNodeNames(cfg.GetNodeList())
624 votes = bootstrap.GatherMasterVotes(node_names)
626 # empty node list, this is a one node cluster
628 if votes[0][0] is None:
634 logging.critical("Cluster inconsistent, most of the nodes didn't answer"
635 " after multiple retries. Aborting startup")
636 logging.critical("Use the --no-voting option if you understand what"
637 " effects it has on the cluster state")
639 # here a real node is at the top of the list
640 all_votes = sum(item[1] for item in votes)
641 top_node, top_votes = votes[0]
644 if top_node != myself:
645 logging.critical("It seems we are not the master (top-voted node"
646 " is %s with %d out of %d votes)", top_node, top_votes,
648 elif top_votes < all_votes - top_votes:
649 logging.critical("It seems we are not the master (%d votes for,"
650 " %d votes against)", top_votes, all_votes - top_votes)
658 def ActivateMasterIP():
660 cfg = config.ConfigWriter()
661 master_params = cfg.GetMasterNetworkParameters()
662 ems = cfg.GetUseExternalMipScript()
663 runner = rpc.BootstrapRunner()
664 # we use the node name, as the configuration is only available here yet
665 result = runner.call_node_activate_master_ip(
666 cfg.GetNodeName(master_params.uuid), master_params, ems)
668 msg = result.fail_msg
670 logging.error("Can't activate master IP address: %s", msg)
673 def CheckMasterd(options, args):
674 """Initial checks whether to run or exit with a failure.
677 if args: # masterd doesn't take any arguments
678 print >> sys.stderr, ("Usage: %s [-f] [-d]" % sys.argv[0])
679 sys.exit(constants.EXIT_FAILURE)
681 ssconf.CheckMaster(options.debug)
684 options.uid = pwd.getpwnam(constants.MASTERD_USER).pw_uid
685 options.gid = grp.getgrnam(constants.DAEMONS_GROUP).gr_gid
687 print >> sys.stderr, ("User or group not existing on system: %s:%s" %
688 (constants.MASTERD_USER, constants.DAEMONS_GROUP))
689 sys.exit(constants.EXIT_FAILURE)
691 # Determine static runtime architecture information
692 runtime.InitArchInfo()
694 # Check the configuration is sane before anything else
696 config.ConfigWriter()
697 except errors.ConfigVersionMismatch, err:
698 v1 = "%s.%s.%s" % version.SplitVersion(err.args[0])
699 v2 = "%s.%s.%s" % version.SplitVersion(err.args[1])
700 print >> sys.stderr, \
701 ("Configuration version mismatch. The current Ganeti software"
702 " expects version %s, but the on-disk configuration file has"
703 " version %s. This is likely the result of upgrading the"
704 " software without running the upgrade procedure. Please contact"
705 " your cluster administrator or complete the upgrade using the"
706 " cfgupgrade utility, after reading the upgrade notes." %
708 sys.exit(constants.EXIT_FAILURE)
709 except errors.ConfigurationError, err:
710 print >> sys.stderr, \
711 ("Configuration error while opening the configuration file: %s\n"
712 "This might be caused by an incomplete software upgrade or"
713 " by a corrupted configuration file. Until the problem is fixed"
714 " the master daemon cannot start." % str(err))
715 sys.exit(constants.EXIT_FAILURE)
717 # If CheckMaster didn't fail we believe we are the master, but we have to
718 # confirm with the other nodes.
719 if options.no_voting:
720 if not options.yes_do_it:
721 sys.stdout.write("The 'no voting' option has been selected.\n")
722 sys.stdout.write("This is dangerous, please confirm by"
723 " typing uppercase 'yes': ")
726 confirmation = sys.stdin.readline().strip()
727 if confirmation != "YES":
728 print >> sys.stderr, "Aborting."
729 sys.exit(constants.EXIT_FAILURE)
732 # CheckAgreement uses RPC and threads, hence it needs to be run in
733 # a separate process before we call utils.Daemonize in the current
735 if not utils.RunInSeparateProcess(CheckAgreement):
736 sys.exit(constants.EXIT_FAILURE)
738 # ActivateMasterIP also uses RPC/threads, so we run it again via a
741 # TODO: decide whether failure to activate the master IP is a fatal error
742 utils.RunInSeparateProcess(ActivateMasterIP)
745 def PrepMasterd(options, _):
746 """Prep master daemon function, executed with the PID file held.
749 # This is safe to do as the pid file guarantees against
750 # concurrent execution.
751 utils.RemoveFile(pathutils.MASTER_SOCKET)
753 mainloop = daemon.Mainloop()
754 master = MasterServer(pathutils.MASTER_SOCKET, options.uid, options.gid)
755 return (mainloop, master)
758 def ExecMasterd(options, args, prep_data): # pylint: disable=W0613
759 """Main master daemon function, executed with the PID file held.
762 (mainloop, master) = prep_data
768 mainloop.Run(shutdown_wait_fn=master.WaitForShutdown)
770 master.server_cleanup()
774 utils.RemoveFile(pathutils.MASTER_SOCKET)
776 logging.info("Clean master daemon shutdown")
781 parser = OptionParser(description="Ganeti master daemon",
782 usage="%prog [-f] [-d]",
783 version="%%prog (ganeti) %s" %
784 constants.RELEASE_VERSION)
785 parser.add_option("--no-voting", dest="no_voting",
786 help="Do not check that the nodes agree on this node"
787 " being the master and start the daemon unconditionally",
788 default=False, action="store_true")
789 parser.add_option("--yes-do-it", dest="yes_do_it",
790 help="Override interactive check for --no-voting",
791 default=False, action="store_true")
792 daemon.GenericMain(constants.MASTERD, parser, CheckMasterd, PrepMasterd,
793 ExecMasterd, multithreaded=True)