4 # Copyright (C) 2006, 2007, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Master daemon program.
24 Some classes deviates from the standard style guide since the
25 inheritance from parent classes requires it.
29 # pylint: disable=C0103
30 # C0103: Invalid name ganeti-masterd
41 from optparse import OptionParser
43 from ganeti import config
44 from ganeti import constants
45 from ganeti import daemon
46 from ganeti import mcpu
47 from ganeti import opcodes
48 from ganeti import jqueue
49 from ganeti import locking
50 from ganeti import luxi
51 from ganeti import utils
52 from ganeti import errors
53 from ganeti import ssconf
54 from ganeti import workerpool
55 from ganeti import rpc
56 from ganeti import bootstrap
57 from ganeti import netutils
58 from ganeti import objects
59 from ganeti import query
60 from ganeti import runtime
61 from ganeti import pathutils
64 CLIENT_REQUEST_WORKERS = 16
66 EXIT_NOTMASTER = constants.EXIT_NOTMASTER
67 EXIT_NODESETUP_ERROR = constants.EXIT_NODESETUP_ERROR
70 def _LogNewJob(status, info, ops):
71 """Log information about a recently submitted job.
74 op_summary = utils.CommaJoin(op.Summary() for op in ops)
77 logging.info("New job with id %s, summary: %s", info, op_summary)
79 logging.info("Failed to submit job, reason: '%s', summary: %s",
83 class ClientRequestWorker(workerpool.BaseWorker):
84 # pylint: disable=W0221
85 def RunTask(self, server, message, client):
86 """Process the request.
89 client_ops = ClientOps(server)
92 (method, args, version) = luxi.ParseRequest(message)
93 except luxi.ProtocolError, err:
94 logging.error("Protocol Error: %s", err)
100 # Verify client's version if there was one in the request
101 if version is not None and version != constants.LUXI_VERSION:
102 raise errors.LuxiError("LUXI version mismatch, server %s, request %s" %
103 (constants.LUXI_VERSION, version))
105 result = client_ops.handle_request(method, args)
107 except errors.GenericError, err:
108 logging.exception("Unexpected exception")
110 result = errors.EncodeException(err)
112 logging.exception("Unexpected exception")
114 result = "Caught exception: %s" % str(err[1])
117 reply = luxi.FormatResponse(success, result)
118 client.send_message(reply)
119 # awake the main thread so that it can write out the data.
120 server.awaker.signal()
121 except: # pylint: disable=W0702
122 logging.exception("Send error")
126 class MasterClientHandler(daemon.AsyncTerminatedMessageStream):
127 """Handler for master peers.
132 def __init__(self, server, connected_socket, client_address, family):
133 daemon.AsyncTerminatedMessageStream.__init__(self, connected_socket,
136 family, self._MAX_UNHANDLED)
139 def handle_message(self, message, _):
140 self.server.request_workers.AddTask((self.server, message, self))
143 class _MasterShutdownCheck:
144 """Logic for master daemon shutdown.
147 #: How long to wait between checks
148 _CHECK_INTERVAL = 5.0
150 #: How long to wait after all jobs are done (e.g. to give clients time to
151 #: retrieve the job status)
152 _SHUTDOWN_LINGER = 5.0
155 """Initializes this class.
158 self._had_active_jobs = None
159 self._linger_timeout = None
161 def __call__(self, jq_prepare_result):
162 """Determines if master daemon is ready for shutdown.
164 @param jq_prepare_result: Result of L{jqueue.JobQueue.PrepareShutdown}
165 @rtype: None or number
166 @return: None if master daemon is ready, timeout if the check must be
170 if jq_prepare_result:
171 # Check again shortly
172 logging.info("Job queue has been notified for shutdown but is still"
173 " busy; next check in %s seconds", self._CHECK_INTERVAL)
174 self._had_active_jobs = True
175 return self._CHECK_INTERVAL
177 if not self._had_active_jobs:
178 # Can shut down as there were no active jobs on the first check
181 # No jobs are running anymore, but maybe some clients want to collect some
182 # information. Give them a short amount of time.
183 if self._linger_timeout is None:
184 self._linger_timeout = utils.RunningTimeout(self._SHUTDOWN_LINGER, True)
186 remaining = self._linger_timeout.Remaining()
188 logging.info("Job queue no longer busy; shutting down master daemon"
189 " in %s seconds", remaining)
191 # TODO: Should the master daemon socket be closed at this point? Doing so
192 # wouldn't affect existing connections.
200 class MasterServer(daemon.AsyncStreamServer):
203 This is the main asynchronous master server. It handles connections to the
207 family = socket.AF_UNIX
209 def __init__(self, address, uid, gid):
210 """MasterServer constructor
212 @param address: the unix socket address to bind the MasterServer to
213 @param uid: The uid of the owner of the socket
214 @param gid: The gid of the owner of the socket
217 temp_name = tempfile.mktemp(dir=os.path.dirname(address))
218 daemon.AsyncStreamServer.__init__(self, self.family, temp_name)
219 os.chmod(temp_name, 0770)
220 os.chown(temp_name, uid, gid)
221 os.rename(temp_name, address)
223 self.awaker = daemon.AsyncAwaker()
225 # We'll only start threads once we've forked.
227 self.request_workers = None
229 self._shutdown_check = None
231 def handle_connection(self, connected_socket, client_address):
232 # TODO: add connection count and limit the number of open connections to a
233 # maximum number to avoid breaking for lack of file descriptors or memory.
234 MasterClientHandler(self, connected_socket, client_address, self.family)
236 def setup_queue(self):
237 self.context = GanetiContext()
238 self.request_workers = workerpool.WorkerPool("ClientReq",
239 CLIENT_REQUEST_WORKERS,
242 def WaitForShutdown(self):
243 """Prepares server for shutdown.
246 if self._shutdown_check is None:
247 self._shutdown_check = _MasterShutdownCheck()
249 return self._shutdown_check(self.context.jobqueue.PrepareShutdown())
251 def server_cleanup(self):
252 """Cleanup the server.
254 This involves shutting down the processor threads and the master
261 if self.request_workers:
262 self.request_workers.TerminateWorkers()
264 self.context.jobqueue.Shutdown()
268 """Class holding high-level client operations."""
269 def __init__(self, server):
272 def handle_request(self, method, args): # pylint: disable=R0911
273 context = self.server.context
274 queue = context.jobqueue
276 # TODO: Parameter validation
277 if not isinstance(args, (tuple, list)):
278 logging.info("Received invalid arguments of type '%s'", type(args))
279 raise ValueError("Invalid arguments type '%s'" % type(args))
281 # TODO: Rewrite to not exit in each 'if/elif' branch
283 if method == luxi.REQ_SUBMIT_JOB:
284 logging.info("Receiving new job")
286 ops = [opcodes.OpCode.LoadOpCode(state) for state in job_def]
287 job_id = queue.SubmitJob(ops)
288 _LogNewJob(True, job_id, ops)
291 elif method == luxi.REQ_SUBMIT_MANY_JOBS:
292 logging.info("Receiving multiple jobs")
296 jobs.append([opcodes.OpCode.LoadOpCode(state) for state in ops])
297 job_ids = queue.SubmitManyJobs(jobs)
298 for ((status, job_id), ops) in zip(job_ids, jobs):
299 _LogNewJob(status, job_id, ops)
302 elif method == luxi.REQ_CANCEL_JOB:
304 logging.info("Received job cancel request for %s", job_id)
305 return queue.CancelJob(job_id)
307 elif method == luxi.REQ_CHANGE_JOB_PRIORITY:
308 (job_id, priority) = args
309 logging.info("Received request to change priority for job %s to %s",
311 return queue.ChangeJobPriority(job_id, priority)
313 elif method == luxi.REQ_ARCHIVE_JOB:
315 logging.info("Received job archive request for %s", job_id)
316 return queue.ArchiveJob(job_id)
318 elif method == luxi.REQ_AUTO_ARCHIVE_JOBS:
319 (age, timeout) = args
320 logging.info("Received job autoarchive request for age %s, timeout %s",
322 return queue.AutoArchiveJobs(age, timeout)
324 elif method == luxi.REQ_WAIT_FOR_JOB_CHANGE:
325 (job_id, fields, prev_job_info, prev_log_serial, timeout) = args
326 logging.info("Received job poll request for %s", job_id)
327 return queue.WaitForJobChanges(job_id, fields, prev_job_info,
328 prev_log_serial, timeout)
330 elif method == luxi.REQ_QUERY:
331 (what, fields, qfilter) = args
333 if what in constants.QR_VIA_OP:
334 result = self._Query(opcodes.OpQuery(what=what, fields=fields,
336 elif what == constants.QR_LOCK:
337 if qfilter is not None:
338 raise errors.OpPrereqError("Lock queries can't be filtered",
340 return context.glm.QueryLocks(fields)
341 elif what == constants.QR_JOB:
342 return queue.QueryJobs(fields, qfilter)
343 elif what in constants.QR_VIA_LUXI:
344 raise NotImplementedError
346 raise errors.OpPrereqError("Resource type '%s' unknown" % what,
351 elif method == luxi.REQ_QUERY_FIELDS:
352 (what, fields) = args
353 req = objects.QueryFieldsRequest(what=what, fields=fields)
356 fielddefs = query.ALL_FIELDS[req.what]
358 raise errors.OpPrereqError("Resource type '%s' unknown" % req.what,
361 return query.QueryFields(fielddefs, req.fields)
363 elif method == luxi.REQ_QUERY_JOBS:
364 (job_ids, fields) = args
365 if isinstance(job_ids, (tuple, list)) and job_ids:
366 msg = utils.CommaJoin(job_ids)
369 logging.info("Received job query request for %s", msg)
370 return queue.OldStyleQueryJobs(job_ids, fields)
372 elif method == luxi.REQ_QUERY_INSTANCES:
373 (names, fields, use_locking) = args
374 logging.info("Received instance query request for %s", names)
376 raise errors.OpPrereqError("Sync queries are not allowed",
378 op = opcodes.OpInstanceQuery(names=names, output_fields=fields,
379 use_locking=use_locking)
380 return self._Query(op)
382 elif method == luxi.REQ_QUERY_NODES:
383 (names, fields, use_locking) = args
384 logging.info("Received node query request for %s", names)
386 raise errors.OpPrereqError("Sync queries are not allowed",
388 op = opcodes.OpNodeQuery(names=names, output_fields=fields,
389 use_locking=use_locking)
390 return self._Query(op)
392 elif method == luxi.REQ_QUERY_GROUPS:
393 (names, fields, use_locking) = args
394 logging.info("Received group query request for %s", names)
396 raise errors.OpPrereqError("Sync queries are not allowed",
398 op = opcodes.OpGroupQuery(names=names, output_fields=fields)
399 return self._Query(op)
401 elif method == luxi.REQ_QUERY_NETWORKS:
402 (names, fields, use_locking) = args
403 logging.info("Received network query request for %s", names)
405 raise errors.OpPrereqError("Sync queries are not allowed",
407 op = opcodes.OpNetworkQuery(names=names, output_fields=fields)
408 return self._Query(op)
410 elif method == luxi.REQ_QUERY_EXPORTS:
411 (nodes, use_locking) = args
413 raise errors.OpPrereqError("Sync queries are not allowed",
415 logging.info("Received exports query request")
416 op = opcodes.OpBackupQuery(nodes=nodes, use_locking=use_locking)
417 return self._Query(op)
419 elif method == luxi.REQ_QUERY_CONFIG_VALUES:
421 logging.info("Received config values query request for %s", fields)
422 op = opcodes.OpClusterConfigQuery(output_fields=fields)
423 return self._Query(op)
425 elif method == luxi.REQ_QUERY_CLUSTER_INFO:
426 logging.info("Received cluster info query request")
427 op = opcodes.OpClusterQuery()
428 return self._Query(op)
430 elif method == luxi.REQ_QUERY_TAGS:
432 logging.info("Received tags query request")
433 op = opcodes.OpTagsGet(kind=kind, name=name, use_locking=False)
434 return self._Query(op)
436 elif method == luxi.REQ_SET_DRAIN_FLAG:
437 (drain_flag, ) = args
438 logging.info("Received queue drain flag change request to %s",
440 return queue.SetDrainFlag(drain_flag)
442 elif method == luxi.REQ_SET_WATCHER_PAUSE:
446 logging.info("Received request to no longer pause the watcher")
448 if not isinstance(until, (int, float)):
449 raise TypeError("Duration must be an integer or float")
451 if until < time.time():
452 raise errors.GenericError("Unable to set pause end time in the past")
454 logging.info("Received request to pause the watcher until %s", until)
456 return _SetWatcherPause(until)
459 logging.info("Received invalid request '%s'", method)
460 raise ValueError("Invalid operation '%s'" % method)
462 def _Query(self, op):
463 """Runs the specified opcode and returns the result.
466 # Queries don't have a job id
467 proc = mcpu.Processor(self.server.context, None, enable_locks=False)
469 # TODO: Executing an opcode using locks will acquire them in blocking mode.
470 # Consider using a timeout for retries.
471 return proc.ExecOpCode(op, None)
474 class GanetiContext(object):
475 """Context common to all ganeti threads.
477 This class creates and holds common objects shared by all threads.
480 # pylint: disable=W0212
481 # we do want to ensure a singleton here
485 """Constructs a new GanetiContext object.
487 There should be only a GanetiContext object at any time, so this
488 function raises an error if this is not the case.
491 assert self.__class__._instance is None, "double GanetiContext instance"
493 # Create global configuration object
494 self.cfg = config.ConfigWriter()
497 self.glm = locking.GanetiLockManager(
498 self.cfg.GetNodeList(),
499 self.cfg.GetNodeGroupList(),
500 self.cfg.GetInstanceList(),
501 self.cfg.GetNetworkList())
503 self.cfg.SetContext(self)
506 self.rpc = rpc.RpcRunner(self.cfg, self.glm.AddToLockMonitor)
509 self.jobqueue = jqueue.JobQueue(self)
511 # setting this also locks the class against attribute modifications
512 self.__class__._instance = self
514 def __setattr__(self, name, value):
515 """Setting GanetiContext attributes is forbidden after initialization.
518 assert self.__class__._instance is None, "Attempt to modify Ganeti Context"
519 object.__setattr__(self, name, value)
521 def AddNode(self, node, ec_id):
522 """Adds a node to the configuration and lock manager.
525 # Add it to the configuration
526 self.cfg.AddNode(node, ec_id)
528 # If preseeding fails it'll not be added
529 self.jobqueue.AddNode(node)
531 # Add the new node to the Ganeti Lock Manager
532 self.glm.add(locking.LEVEL_NODE, node.name)
533 self.glm.add(locking.LEVEL_NODE_RES, node.name)
535 def ReaddNode(self, node):
536 """Updates a node that's already in the configuration
539 # Synchronize the queue again
540 self.jobqueue.AddNode(node)
542 def RemoveNode(self, name):
543 """Removes a node from the configuration and lock manager.
546 # Remove node from configuration
547 self.cfg.RemoveNode(name)
550 self.jobqueue.RemoveNode(name)
552 # Remove the node from the Ganeti Lock Manager
553 self.glm.remove(locking.LEVEL_NODE, name)
554 self.glm.remove(locking.LEVEL_NODE_RES, name)
557 def _SetWatcherPause(until):
558 """Creates or removes the watcher pause file.
560 @type until: None or int
561 @param until: Unix timestamp saying until when the watcher shouldn't run
565 utils.RemoveFile(pathutils.WATCHER_PAUSEFILE)
567 utils.WriteFile(pathutils.WATCHER_PAUSEFILE,
568 data="%d\n" % (until, ))
574 def CheckAgreement():
575 """Check the agreement on who is the master.
577 The function uses a very simple algorithm: we must get more positive
578 than negative answers. Since in most of the cases we are the master,
579 we'll use our own config file for getting the node list. In the
580 future we could collect the current node list from our (possibly
581 obsolete) known nodes.
583 In order to account for cold-start of all nodes, we retry for up to
584 a minute until we get a real answer as the top-voted one. If the
585 nodes are more out-of-sync, for now manual startup of the master
588 Note that for a even number of nodes cluster, we need at least half
589 of the nodes (beside ourselves) to vote for us. This creates a
590 problem on two-node clusters, since in this case we require the
591 other node to be up too to confirm our status.
594 myself = netutils.Hostname.GetSysName()
595 #temp instantiation of a config writer, used only to get the node list
596 cfg = config.ConfigWriter()
597 node_list = cfg.GetNodeList()
601 votes = bootstrap.GatherMasterVotes(node_list)
603 # empty node list, this is a one node cluster
605 if votes[0][0] is None:
611 logging.critical("Cluster inconsistent, most of the nodes didn't answer"
612 " after multiple retries. Aborting startup")
613 logging.critical("Use the --no-voting option if you understand what"
614 " effects it has on the cluster state")
616 # here a real node is at the top of the list
617 all_votes = sum(item[1] for item in votes)
618 top_node, top_votes = votes[0]
621 if top_node != myself:
622 logging.critical("It seems we are not the master (top-voted node"
623 " is %s with %d out of %d votes)", top_node, top_votes,
625 elif top_votes < all_votes - top_votes:
626 logging.critical("It seems we are not the master (%d votes for,"
627 " %d votes against)", top_votes, all_votes - top_votes)
635 def ActivateMasterIP():
637 cfg = config.ConfigWriter()
638 master_params = cfg.GetMasterNetworkParameters()
639 ems = cfg.GetUseExternalMipScript()
640 runner = rpc.BootstrapRunner()
641 result = runner.call_node_activate_master_ip(master_params.name,
644 msg = result.fail_msg
646 logging.error("Can't activate master IP address: %s", msg)
649 def CheckMasterd(options, args):
650 """Initial checks whether to run or exit with a failure.
653 if args: # masterd doesn't take any arguments
654 print >> sys.stderr, ("Usage: %s [-f] [-d]" % sys.argv[0])
655 sys.exit(constants.EXIT_FAILURE)
657 ssconf.CheckMaster(options.debug)
660 options.uid = pwd.getpwnam(constants.MASTERD_USER).pw_uid
661 options.gid = grp.getgrnam(constants.DAEMONS_GROUP).gr_gid
663 print >> sys.stderr, ("User or group not existing on system: %s:%s" %
664 (constants.MASTERD_USER, constants.DAEMONS_GROUP))
665 sys.exit(constants.EXIT_FAILURE)
667 # Determine static runtime architecture information
668 runtime.InitArchInfo()
670 # Check the configuration is sane before anything else
672 config.ConfigWriter()
673 except errors.ConfigVersionMismatch, err:
674 v1 = "%s.%s.%s" % constants.SplitVersion(err.args[0])
675 v2 = "%s.%s.%s" % constants.SplitVersion(err.args[1])
676 print >> sys.stderr, \
677 ("Configuration version mismatch. The current Ganeti software"
678 " expects version %s, but the on-disk configuration file has"
679 " version %s. This is likely the result of upgrading the"
680 " software without running the upgrade procedure. Please contact"
681 " your cluster administrator or complete the upgrade using the"
682 " cfgupgrade utility, after reading the upgrade notes." %
684 sys.exit(constants.EXIT_FAILURE)
685 except errors.ConfigurationError, err:
686 print >> sys.stderr, \
687 ("Configuration error while opening the configuration file: %s\n"
688 "This might be caused by an incomplete software upgrade or"
689 " by a corrupted configuration file. Until the problem is fixed"
690 " the master daemon cannot start." % str(err))
691 sys.exit(constants.EXIT_FAILURE)
693 # If CheckMaster didn't fail we believe we are the master, but we have to
694 # confirm with the other nodes.
695 if options.no_voting:
696 if not options.yes_do_it:
697 sys.stdout.write("The 'no voting' option has been selected.\n")
698 sys.stdout.write("This is dangerous, please confirm by"
699 " typing uppercase 'yes': ")
702 confirmation = sys.stdin.readline().strip()
703 if confirmation != "YES":
704 print >> sys.stderr, "Aborting."
705 sys.exit(constants.EXIT_FAILURE)
708 # CheckAgreement uses RPC and threads, hence it needs to be run in
709 # a separate process before we call utils.Daemonize in the current
711 if not utils.RunInSeparateProcess(CheckAgreement):
712 sys.exit(constants.EXIT_FAILURE)
714 # ActivateMasterIP also uses RPC/threads, so we run it again via a
717 # TODO: decide whether failure to activate the master IP is a fatal error
718 utils.RunInSeparateProcess(ActivateMasterIP)
721 def PrepMasterd(options, _):
722 """Prep master daemon function, executed with the PID file held.
725 # This is safe to do as the pid file guarantees against
726 # concurrent execution.
727 utils.RemoveFile(pathutils.MASTER_SOCKET)
729 mainloop = daemon.Mainloop()
730 master = MasterServer(pathutils.MASTER_SOCKET, options.uid, options.gid)
731 return (mainloop, master)
734 def ExecMasterd(options, args, prep_data): # pylint: disable=W0613
735 """Main master daemon function, executed with the PID file held.
738 (mainloop, master) = prep_data
744 mainloop.Run(shutdown_wait_fn=master.WaitForShutdown)
746 master.server_cleanup()
750 utils.RemoveFile(pathutils.MASTER_SOCKET)
752 logging.info("Clean master daemon shutdown")
757 parser = OptionParser(description="Ganeti master daemon",
758 usage="%prog [-f] [-d]",
759 version="%%prog (ganeti) %s" %
760 constants.RELEASE_VERSION)
761 parser.add_option("--no-voting", dest="no_voting",
762 help="Do not check that the nodes agree on this node"
763 " being the master and start the daemon unconditionally",
764 default=False, action="store_true")
765 parser.add_option("--yes-do-it", dest="yes_do_it",
766 help="Override interactive check for --no-voting",
767 default=False, action="store_true")
768 daemon.GenericMain(constants.MASTERD, parser, CheckMasterd, PrepMasterd,
769 ExecMasterd, multithreaded=True)