Statistics
| Branch: | Tag: | Revision:

root / daemons / ganeti-masterd @ 82d9caef

History | View | Annotate | Download (12.9 kB)

1 685ee993 Iustin Pop
#!/usr/bin/python -u
2 ffeffa1d Iustin Pop
#
3 ffeffa1d Iustin Pop
4 ffeffa1d Iustin Pop
# Copyright (C) 2006, 2007 Google Inc.
5 ffeffa1d Iustin Pop
#
6 ffeffa1d Iustin Pop
# This program is free software; you can redistribute it and/or modify
7 ffeffa1d Iustin Pop
# it under the terms of the GNU General Public License as published by
8 ffeffa1d Iustin Pop
# the Free Software Foundation; either version 2 of the License, or
9 ffeffa1d Iustin Pop
# (at your option) any later version.
10 ffeffa1d Iustin Pop
#
11 ffeffa1d Iustin Pop
# This program is distributed in the hope that it will be useful, but
12 ffeffa1d Iustin Pop
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 ffeffa1d Iustin Pop
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 ffeffa1d Iustin Pop
# General Public License for more details.
15 ffeffa1d Iustin Pop
#
16 ffeffa1d Iustin Pop
# You should have received a copy of the GNU General Public License
17 ffeffa1d Iustin Pop
# along with this program; if not, write to the Free Software
18 ffeffa1d Iustin Pop
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 ffeffa1d Iustin Pop
# 02110-1301, USA.
20 ffeffa1d Iustin Pop
21 ffeffa1d Iustin Pop
22 ffeffa1d Iustin Pop
"""Master daemon program.
23 ffeffa1d Iustin Pop
24 ffeffa1d Iustin Pop
Some classes deviates from the standard style guide since the
25 ffeffa1d Iustin Pop
inheritance from parent classes requires it.
26 ffeffa1d Iustin Pop
27 ffeffa1d Iustin Pop
"""
28 ffeffa1d Iustin Pop
29 ffeffa1d Iustin Pop
30 c1f2901b Iustin Pop
import sys
31 ffeffa1d Iustin Pop
import SocketServer
32 ffeffa1d Iustin Pop
import time
33 ffeffa1d Iustin Pop
import collections
34 ffeffa1d Iustin Pop
import Queue
35 ffeffa1d Iustin Pop
import random
36 ffeffa1d Iustin Pop
import signal
37 ffeffa1d Iustin Pop
import simplejson
38 96cb3986 Michael Hanselmann
import logging
39 ffeffa1d Iustin Pop
40 ffeffa1d Iustin Pop
from cStringIO import StringIO
41 c1f2901b Iustin Pop
from optparse import OptionParser
42 ffeffa1d Iustin Pop
43 39dcf2ef Guido Trotter
from ganeti import config
44 ffeffa1d Iustin Pop
from ganeti import constants
45 ffeffa1d Iustin Pop
from ganeti import mcpu
46 ffeffa1d Iustin Pop
from ganeti import opcodes
47 ffeffa1d Iustin Pop
from ganeti import jqueue
48 39dcf2ef Guido Trotter
from ganeti import locking
49 ffeffa1d Iustin Pop
from ganeti import luxi
50 ffeffa1d Iustin Pop
from ganeti import utils
51 c1f2901b Iustin Pop
from ganeti import errors
52 c1f2901b Iustin Pop
from ganeti import ssconf
53 23e50d39 Michael Hanselmann
from ganeti import workerpool
54 b1b6ea87 Iustin Pop
from ganeti import rpc
55 d7cdb55d Iustin Pop
from ganeti import bootstrap
56 c1f2901b Iustin Pop
57 c1f2901b Iustin Pop
58 23e50d39 Michael Hanselmann
CLIENT_REQUEST_WORKERS = 16
59 23e50d39 Michael Hanselmann
60 c1f2901b Iustin Pop
EXIT_NOTMASTER = constants.EXIT_NOTMASTER
61 c1f2901b Iustin Pop
EXIT_NODESETUP_ERROR = constants.EXIT_NODESETUP_ERROR
62 ffeffa1d Iustin Pop
63 ffeffa1d Iustin Pop
64 23e50d39 Michael Hanselmann
class ClientRequestWorker(workerpool.BaseWorker):
65 23e50d39 Michael Hanselmann
  def RunTask(self, server, request, client_address):
66 23e50d39 Michael Hanselmann
    """Process the request.
67 23e50d39 Michael Hanselmann
68 23e50d39 Michael Hanselmann
    This is copied from the code in ThreadingMixIn.
69 23e50d39 Michael Hanselmann
70 23e50d39 Michael Hanselmann
    """
71 23e50d39 Michael Hanselmann
    try:
72 23e50d39 Michael Hanselmann
      server.finish_request(request, client_address)
73 23e50d39 Michael Hanselmann
      server.close_request(request)
74 23e50d39 Michael Hanselmann
    except:
75 23e50d39 Michael Hanselmann
      server.handle_error(request, client_address)
76 23e50d39 Michael Hanselmann
      server.close_request(request)
77 23e50d39 Michael Hanselmann
78 23e50d39 Michael Hanselmann
79 ffeffa1d Iustin Pop
class IOServer(SocketServer.UnixStreamServer):
80 ffeffa1d Iustin Pop
  """IO thread class.
81 ffeffa1d Iustin Pop
82 ffeffa1d Iustin Pop
  This class takes care of initializing the other threads, setting
83 ffeffa1d Iustin Pop
  signal handlers (which are processed only in this thread), and doing
84 ffeffa1d Iustin Pop
  cleanup at shutdown.
85 ffeffa1d Iustin Pop
86 ffeffa1d Iustin Pop
  """
87 9113300d Michael Hanselmann
  def __init__(self, address, rqhandler):
88 ce862cd5 Guido Trotter
    """IOServer constructor
89 ce862cd5 Guido Trotter
90 ce862cd5 Guido Trotter
    Args:
91 ce862cd5 Guido Trotter
      address: the address to bind this IOServer to
92 ce862cd5 Guido Trotter
      rqhandler: RequestHandler type object
93 ce862cd5 Guido Trotter
94 ce862cd5 Guido Trotter
    """
95 ffeffa1d Iustin Pop
    SocketServer.UnixStreamServer.__init__(self, address, rqhandler)
96 50a3fbb2 Michael Hanselmann
97 50a3fbb2 Michael Hanselmann
    # We'll only start threads once we've forked.
98 9113300d Michael Hanselmann
    self.context = None
99 23e50d39 Michael Hanselmann
    self.request_workers = None
100 50a3fbb2 Michael Hanselmann
101 50a3fbb2 Michael Hanselmann
  def setup_queue(self):
102 9113300d Michael Hanselmann
    self.context = GanetiContext()
103 23e50d39 Michael Hanselmann
    self.request_workers = workerpool.WorkerPool(CLIENT_REQUEST_WORKERS,
104 23e50d39 Michael Hanselmann
                                                 ClientRequestWorker)
105 ffeffa1d Iustin Pop
106 ffeffa1d Iustin Pop
  def process_request(self, request, client_address):
107 23e50d39 Michael Hanselmann
    """Add task to workerpool to process request.
108 ffeffa1d Iustin Pop
109 ffeffa1d Iustin Pop
    """
110 23e50d39 Michael Hanselmann
    self.request_workers.AddTask(self, request, client_address)
111 ffeffa1d Iustin Pop
112 ffeffa1d Iustin Pop
  def serve_forever(self):
113 ffeffa1d Iustin Pop
    """Handle one request at a time until told to quit."""
114 610bc9ee Michael Hanselmann
    sighandler = utils.SignalHandler([signal.SIGINT, signal.SIGTERM])
115 610bc9ee Michael Hanselmann
    try:
116 610bc9ee Michael Hanselmann
      while not sighandler.called:
117 610bc9ee Michael Hanselmann
        self.handle_request()
118 610bc9ee Michael Hanselmann
    finally:
119 610bc9ee Michael Hanselmann
      sighandler.Reset()
120 c1f2901b Iustin Pop
121 c1f2901b Iustin Pop
  def server_cleanup(self):
122 c1f2901b Iustin Pop
    """Cleanup the server.
123 c1f2901b Iustin Pop
124 c1f2901b Iustin Pop
    This involves shutting down the processor threads and the master
125 c1f2901b Iustin Pop
    socket.
126 c1f2901b Iustin Pop
127 c1f2901b Iustin Pop
    """
128 50a3fbb2 Michael Hanselmann
    try:
129 50a3fbb2 Michael Hanselmann
      self.server_close()
130 50a3fbb2 Michael Hanselmann
    finally:
131 23e50d39 Michael Hanselmann
      if self.request_workers:
132 36088c4c Michael Hanselmann
        self.request_workers.TerminateWorkers()
133 9113300d Michael Hanselmann
      if self.context:
134 9113300d Michael Hanselmann
        self.context.jobqueue.Shutdown()
135 ffeffa1d Iustin Pop
136 ffeffa1d Iustin Pop
137 ffeffa1d Iustin Pop
class ClientRqHandler(SocketServer.BaseRequestHandler):
138 ffeffa1d Iustin Pop
  """Client handler"""
139 ffeffa1d Iustin Pop
  EOM = '\3'
140 ffeffa1d Iustin Pop
  READ_SIZE = 4096
141 ffeffa1d Iustin Pop
142 ffeffa1d Iustin Pop
  def setup(self):
143 ffeffa1d Iustin Pop
    self._buffer = ""
144 ffeffa1d Iustin Pop
    self._msgs = collections.deque()
145 ffeffa1d Iustin Pop
    self._ops = ClientOps(self.server)
146 ffeffa1d Iustin Pop
147 ffeffa1d Iustin Pop
  def handle(self):
148 ffeffa1d Iustin Pop
    while True:
149 ffeffa1d Iustin Pop
      msg = self.read_message()
150 ffeffa1d Iustin Pop
      if msg is None:
151 3d8548c4 Michael Hanselmann
        logging.info("client closed connection")
152 ffeffa1d Iustin Pop
        break
153 3d8548c4 Michael Hanselmann
154 ffeffa1d Iustin Pop
      request = simplejson.loads(msg)
155 3d8548c4 Michael Hanselmann
      logging.debug("request: %s", request)
156 ffeffa1d Iustin Pop
      if not isinstance(request, dict):
157 3d8548c4 Michael Hanselmann
        logging.error("wrong request received: %s", msg)
158 ffeffa1d Iustin Pop
        break
159 3d8548c4 Michael Hanselmann
160 3d8548c4 Michael Hanselmann
      method = request.get(luxi.KEY_METHOD, None)
161 3d8548c4 Michael Hanselmann
      args = request.get(luxi.KEY_ARGS, None)
162 3d8548c4 Michael Hanselmann
      if method is None or args is None:
163 3d8548c4 Michael Hanselmann
        logging.error("no method or args in request")
164 ffeffa1d Iustin Pop
        break
165 3d8548c4 Michael Hanselmann
166 3d8548c4 Michael Hanselmann
      success = False
167 3d8548c4 Michael Hanselmann
      try:
168 3d8548c4 Michael Hanselmann
        result = self._ops.handle_request(method, args)
169 3d8548c4 Michael Hanselmann
        success = True
170 6797ec29 Iustin Pop
      except errors.GenericError, err:
171 6797ec29 Iustin Pop
        success = False
172 6797ec29 Iustin Pop
        result = (err.__class__.__name__, err.args)
173 3d8548c4 Michael Hanselmann
      except:
174 3d8548c4 Michael Hanselmann
        logging.error("Unexpected exception", exc_info=True)
175 3d8548c4 Michael Hanselmann
        err = sys.exc_info()
176 3d8548c4 Michael Hanselmann
        result = "Caught exception: %s" % str(err[1])
177 3d8548c4 Michael Hanselmann
178 3d8548c4 Michael Hanselmann
      response = {
179 3d8548c4 Michael Hanselmann
        luxi.KEY_SUCCESS: success,
180 3d8548c4 Michael Hanselmann
        luxi.KEY_RESULT: result,
181 3d8548c4 Michael Hanselmann
        }
182 3d8548c4 Michael Hanselmann
      logging.debug("response: %s", response)
183 3d8548c4 Michael Hanselmann
      self.send_message(simplejson.dumps(response))
184 ffeffa1d Iustin Pop
185 ffeffa1d Iustin Pop
  def read_message(self):
186 ffeffa1d Iustin Pop
    while not self._msgs:
187 ffeffa1d Iustin Pop
      data = self.request.recv(self.READ_SIZE)
188 ffeffa1d Iustin Pop
      if not data:
189 ffeffa1d Iustin Pop
        return None
190 ffeffa1d Iustin Pop
      new_msgs = (self._buffer + data).split(self.EOM)
191 ffeffa1d Iustin Pop
      self._buffer = new_msgs.pop()
192 ffeffa1d Iustin Pop
      self._msgs.extend(new_msgs)
193 ffeffa1d Iustin Pop
    return self._msgs.popleft()
194 ffeffa1d Iustin Pop
195 ffeffa1d Iustin Pop
  def send_message(self, msg):
196 ffeffa1d Iustin Pop
    #print "sending", msg
197 ffeffa1d Iustin Pop
    self.request.sendall(msg + self.EOM)
198 ffeffa1d Iustin Pop
199 ffeffa1d Iustin Pop
200 ffeffa1d Iustin Pop
class ClientOps:
201 ffeffa1d Iustin Pop
  """Class holding high-level client operations."""
202 ffeffa1d Iustin Pop
  def __init__(self, server):
203 ffeffa1d Iustin Pop
    self.server = server
204 ffeffa1d Iustin Pop
205 0bbe448c Michael Hanselmann
  def handle_request(self, method, args):
206 9113300d Michael Hanselmann
    queue = self.server.context.jobqueue
207 0bbe448c Michael Hanselmann
208 0bbe448c Michael Hanselmann
    # TODO: Parameter validation
209 0bbe448c Michael Hanselmann
210 0bbe448c Michael Hanselmann
    if method == luxi.REQ_SUBMIT_JOB:
211 0bbe448c Michael Hanselmann
      ops = [opcodes.OpCode.LoadOpCode(state) for state in args]
212 4c848b18 Michael Hanselmann
      return queue.SubmitJob(ops)
213 ffeffa1d Iustin Pop
214 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_CANCEL_JOB:
215 3a2c7775 Michael Hanselmann
      job_id = args
216 0bbe448c Michael Hanselmann
      return queue.CancelJob(job_id)
217 ffeffa1d Iustin Pop
218 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_ARCHIVE_JOB:
219 3a2c7775 Michael Hanselmann
      job_id = args
220 0bbe448c Michael Hanselmann
      return queue.ArchiveJob(job_id)
221 0bbe448c Michael Hanselmann
222 07cd723a Iustin Pop
    elif method == luxi.REQ_AUTOARCHIVE_JOBS:
223 07cd723a Iustin Pop
      age = args
224 07cd723a Iustin Pop
      return queue.AutoArchiveJobs(age)
225 07cd723a Iustin Pop
226 dfe57c22 Michael Hanselmann
    elif method == luxi.REQ_WAIT_FOR_JOB_CHANGE:
227 5c735209 Iustin Pop
      (job_id, fields, prev_job_info, prev_log_serial, timeout) = args
228 6c5a7090 Michael Hanselmann
      return queue.WaitForJobChanges(job_id, fields, prev_job_info,
229 5c735209 Iustin Pop
                                     prev_log_serial, timeout)
230 dfe57c22 Michael Hanselmann
231 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_QUERY_JOBS:
232 0bbe448c Michael Hanselmann
      (job_ids, fields) = args
233 0bbe448c Michael Hanselmann
      return queue.QueryJobs(job_ids, fields)
234 0bbe448c Michael Hanselmann
235 ee6c7b94 Michael Hanselmann
    elif method == luxi.REQ_QUERY_INSTANCES:
236 ee6c7b94 Michael Hanselmann
      (names, fields) = args
237 ee6c7b94 Michael Hanselmann
      op = opcodes.OpQueryInstances(names=names, output_fields=fields)
238 ee6c7b94 Michael Hanselmann
      return self._Query(op)
239 ee6c7b94 Michael Hanselmann
240 02f7fe54 Michael Hanselmann
    elif method == luxi.REQ_QUERY_NODES:
241 02f7fe54 Michael Hanselmann
      (names, fields) = args
242 02f7fe54 Michael Hanselmann
      op = opcodes.OpQueryNodes(names=names, output_fields=fields)
243 02f7fe54 Michael Hanselmann
      return self._Query(op)
244 02f7fe54 Michael Hanselmann
245 32f93223 Michael Hanselmann
    elif method == luxi.REQ_QUERY_EXPORTS:
246 32f93223 Michael Hanselmann
      nodes = args
247 32f93223 Michael Hanselmann
      op = opcodes.OpQueryExports(nodes=nodes)
248 32f93223 Michael Hanselmann
      return self._Query(op)
249 32f93223 Michael Hanselmann
250 ae5849b5 Michael Hanselmann
    elif method == luxi.REQ_QUERY_CONFIG_VALUES:
251 ae5849b5 Michael Hanselmann
      fields = args
252 ae5849b5 Michael Hanselmann
      op = opcodes.OpQueryConfigValues(output_fields=fields)
253 ae5849b5 Michael Hanselmann
      return self._Query(op)
254 ae5849b5 Michael Hanselmann
255 3ccafd0e Iustin Pop
    elif method == luxi.REQ_QUEUE_SET_DRAIN_FLAG:
256 3ccafd0e Iustin Pop
      drain_flag = args
257 3ccafd0e Iustin Pop
      return queue.SetDrainFlag(drain_flag)
258 3ccafd0e Iustin Pop
259 0bbe448c Michael Hanselmann
    else:
260 0bbe448c Michael Hanselmann
      raise ValueError("Invalid operation")
261 ffeffa1d Iustin Pop
262 ee6c7b94 Michael Hanselmann
  def _DummyLog(self, *args):
263 ee6c7b94 Michael Hanselmann
    pass
264 ee6c7b94 Michael Hanselmann
265 ee6c7b94 Michael Hanselmann
  def _Query(self, op):
266 ee6c7b94 Michael Hanselmann
    """Runs the specified opcode and returns the result.
267 ee6c7b94 Michael Hanselmann
268 ee6c7b94 Michael Hanselmann
    """
269 ee6c7b94 Michael Hanselmann
    proc = mcpu.Processor(self.server.context)
270 ee6c7b94 Michael Hanselmann
    # TODO: Where should log messages go?
271 e92376d7 Iustin Pop
    return proc.ExecOpCode(op, self._DummyLog, None)
272 ee6c7b94 Michael Hanselmann
273 ffeffa1d Iustin Pop
274 39dcf2ef Guido Trotter
class GanetiContext(object):
275 39dcf2ef Guido Trotter
  """Context common to all ganeti threads.
276 39dcf2ef Guido Trotter
277 39dcf2ef Guido Trotter
  This class creates and holds common objects shared by all threads.
278 39dcf2ef Guido Trotter
279 39dcf2ef Guido Trotter
  """
280 39dcf2ef Guido Trotter
  _instance = None
281 39dcf2ef Guido Trotter
282 39dcf2ef Guido Trotter
  def __init__(self):
283 39dcf2ef Guido Trotter
    """Constructs a new GanetiContext object.
284 39dcf2ef Guido Trotter
285 39dcf2ef Guido Trotter
    There should be only a GanetiContext object at any time, so this
286 39dcf2ef Guido Trotter
    function raises an error if this is not the case.
287 39dcf2ef Guido Trotter
288 39dcf2ef Guido Trotter
    """
289 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "double GanetiContext instance"
290 39dcf2ef Guido Trotter
291 9113300d Michael Hanselmann
    # Create global configuration object
292 39dcf2ef Guido Trotter
    self.cfg = config.ConfigWriter()
293 9113300d Michael Hanselmann
294 9113300d Michael Hanselmann
    # Locking manager
295 984f7c32 Guido Trotter
    self.glm = locking.GanetiLockManager(
296 39dcf2ef Guido Trotter
                self.cfg.GetNodeList(),
297 39dcf2ef Guido Trotter
                self.cfg.GetInstanceList())
298 39dcf2ef Guido Trotter
299 9113300d Michael Hanselmann
    # Job queue
300 9113300d Michael Hanselmann
    self.jobqueue = jqueue.JobQueue(self)
301 9113300d Michael Hanselmann
302 39dcf2ef Guido Trotter
    # setting this also locks the class against attribute modifications
303 39dcf2ef Guido Trotter
    self.__class__._instance = self
304 39dcf2ef Guido Trotter
305 39dcf2ef Guido Trotter
  def __setattr__(self, name, value):
306 39dcf2ef Guido Trotter
    """Setting GanetiContext attributes is forbidden after initialization.
307 39dcf2ef Guido Trotter
308 39dcf2ef Guido Trotter
    """
309 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "Attempt to modify Ganeti Context"
310 39dcf2ef Guido Trotter
    object.__setattr__(self, name, value)
311 39dcf2ef Guido Trotter
312 d8470559 Michael Hanselmann
  def AddNode(self, node):
313 d8470559 Michael Hanselmann
    """Adds a node to the configuration and lock manager.
314 d8470559 Michael Hanselmann
315 d8470559 Michael Hanselmann
    """
316 d8470559 Michael Hanselmann
    # Add it to the configuration
317 d8470559 Michael Hanselmann
    self.cfg.AddNode(node)
318 d8470559 Michael Hanselmann
319 c36176cc Michael Hanselmann
    # If preseeding fails it'll not be added
320 c36176cc Michael Hanselmann
    self.jobqueue.AddNode(node.name)
321 c36176cc Michael Hanselmann
322 d8470559 Michael Hanselmann
    # Add the new node to the Ganeti Lock Manager
323 d8470559 Michael Hanselmann
    self.glm.add(locking.LEVEL_NODE, node.name)
324 d8470559 Michael Hanselmann
325 d8470559 Michael Hanselmann
  def ReaddNode(self, node):
326 d8470559 Michael Hanselmann
    """Updates a node that's already in the configuration
327 d8470559 Michael Hanselmann
328 d8470559 Michael Hanselmann
    """
329 c36176cc Michael Hanselmann
    # Synchronize the queue again
330 c36176cc Michael Hanselmann
    self.jobqueue.AddNode(node.name)
331 d8470559 Michael Hanselmann
332 d8470559 Michael Hanselmann
  def RemoveNode(self, name):
333 d8470559 Michael Hanselmann
    """Removes a node from the configuration and lock manager.
334 d8470559 Michael Hanselmann
335 d8470559 Michael Hanselmann
    """
336 d8470559 Michael Hanselmann
    # Remove node from configuration
337 d8470559 Michael Hanselmann
    self.cfg.RemoveNode(name)
338 d8470559 Michael Hanselmann
339 c36176cc Michael Hanselmann
    # Notify job queue
340 c36176cc Michael Hanselmann
    self.jobqueue.RemoveNode(name)
341 c36176cc Michael Hanselmann
342 d8470559 Michael Hanselmann
    # Remove the node from the Ganeti Lock Manager
343 d8470559 Michael Hanselmann
    self.glm.remove(locking.LEVEL_NODE, name)
344 d8470559 Michael Hanselmann
345 39dcf2ef Guido Trotter
346 c1f2901b Iustin Pop
def ParseOptions():
347 c1f2901b Iustin Pop
  """Parse the command line options.
348 c1f2901b Iustin Pop
349 c1f2901b Iustin Pop
  Returns:
350 c1f2901b Iustin Pop
    (options, args) as from OptionParser.parse_args()
351 c1f2901b Iustin Pop
352 c1f2901b Iustin Pop
  """
353 c1f2901b Iustin Pop
  parser = OptionParser(description="Ganeti master daemon",
354 c1f2901b Iustin Pop
                        usage="%prog [-f] [-d]",
355 c1f2901b Iustin Pop
                        version="%%prog (ganeti) %s" %
356 c1f2901b Iustin Pop
                        constants.RELEASE_VERSION)
357 c1f2901b Iustin Pop
358 c1f2901b Iustin Pop
  parser.add_option("-f", "--foreground", dest="fork",
359 c1f2901b Iustin Pop
                    help="Don't detach from the current terminal",
360 c1f2901b Iustin Pop
                    default=True, action="store_false")
361 c1f2901b Iustin Pop
  parser.add_option("-d", "--debug", dest="debug",
362 c1f2901b Iustin Pop
                    help="Enable some debug messages",
363 c1f2901b Iustin Pop
                    default=False, action="store_true")
364 c1f2901b Iustin Pop
  options, args = parser.parse_args()
365 c1f2901b Iustin Pop
  return options, args
366 c1f2901b Iustin Pop
367 c1f2901b Iustin Pop
368 36205981 Iustin Pop
def CheckAgreement():
369 36205981 Iustin Pop
  """Check the agreement on who is the master.
370 36205981 Iustin Pop
371 36205981 Iustin Pop
  The function uses a very simple algorithm: we must get more positive
372 36205981 Iustin Pop
  than negative answers. Since in most of the cases we are the master,
373 36205981 Iustin Pop
  we'll use our own config file for getting the node list. In the
374 36205981 Iustin Pop
  future we could collect the current node list from our (possibly
375 36205981 Iustin Pop
  obsolete) known nodes.
376 36205981 Iustin Pop
377 d7cdb55d Iustin Pop
  In order to account for cold-start of all nodes, we retry for up to
378 d7cdb55d Iustin Pop
  a minute until we get a real answer as the top-voted one. If the
379 d7cdb55d Iustin Pop
  nodes are more out-of-sync, for now manual startup of the master
380 d7cdb55d Iustin Pop
  should be attempted.
381 d7cdb55d Iustin Pop
382 d7cdb55d Iustin Pop
  Note that for a even number of nodes cluster, we need at least half
383 d7cdb55d Iustin Pop
  of the nodes (beside ourselves) to vote for us. This creates a
384 d7cdb55d Iustin Pop
  problem on two-node clusters, since in this case we require the
385 d7cdb55d Iustin Pop
  other node to be up too to confirm our status.
386 d7cdb55d Iustin Pop
387 36205981 Iustin Pop
  """
388 36205981 Iustin Pop
  myself = utils.HostInfo().name
389 36205981 Iustin Pop
  #temp instantiation of a config writer, used only to get the node list
390 36205981 Iustin Pop
  cfg = config.ConfigWriter()
391 36205981 Iustin Pop
  node_list = cfg.GetNodeList()
392 36205981 Iustin Pop
  del cfg
393 d7cdb55d Iustin Pop
  retries = 6
394 d7cdb55d Iustin Pop
  while retries > 0:
395 d7cdb55d Iustin Pop
    votes = bootstrap.GatherMasterVotes(node_list)
396 d7cdb55d Iustin Pop
    if not votes:
397 d7cdb55d Iustin Pop
      # empty node list, this is a one node cluster
398 d7cdb55d Iustin Pop
      return True
399 d7cdb55d Iustin Pop
    if votes[0][0] is None:
400 d7cdb55d Iustin Pop
      retries -= 1
401 d7cdb55d Iustin Pop
      time.sleep(10)
402 36205981 Iustin Pop
      continue
403 d7cdb55d Iustin Pop
    break
404 d7cdb55d Iustin Pop
  if retries == 0:
405 d7cdb55d Iustin Pop
      logging.critical("Cluster inconsistent, most of the nodes didn't answer"
406 d7cdb55d Iustin Pop
                       " after multiple retries. Aborting startup")
407 d7cdb55d Iustin Pop
      return False
408 d7cdb55d Iustin Pop
  # here a real node is at the top of the list
409 d7cdb55d Iustin Pop
  all_votes = sum(item[1] for item in votes)
410 d7cdb55d Iustin Pop
  top_node, top_votes = votes[0]
411 d7cdb55d Iustin Pop
  result = False
412 d7cdb55d Iustin Pop
  if top_node != myself:
413 d7cdb55d Iustin Pop
    logging.critical("It seems we are not the master (top-voted node"
414 d7cdb55d Iustin Pop
                     " is %s)", top_node)
415 d7cdb55d Iustin Pop
  elif top_votes < all_votes - top_votes:
416 36205981 Iustin Pop
    logging.critical("It seems we are not the master (%d votes for,"
417 d7cdb55d Iustin Pop
                     " %d votes against)", top_votes, all_votes - top_votes)
418 d7cdb55d Iustin Pop
  else:
419 d7cdb55d Iustin Pop
    result = True
420 d7cdb55d Iustin Pop
421 d7cdb55d Iustin Pop
  return result
422 36205981 Iustin Pop
423 36205981 Iustin Pop
424 ffeffa1d Iustin Pop
def main():
425 ffeffa1d Iustin Pop
  """Main function"""
426 ffeffa1d Iustin Pop
427 c1f2901b Iustin Pop
  options, args = ParseOptions()
428 c1f2901b Iustin Pop
  utils.debug = options.debug
429 b74159ee Iustin Pop
  utils.no_fork = True
430 c1f2901b Iustin Pop
431 5675cd1f Iustin Pop
  ssconf.CheckMaster(options.debug)
432 c1f2901b Iustin Pop
433 36205981 Iustin Pop
  # we believe we are the master, let's ask the other nodes...
434 36205981 Iustin Pop
  if not CheckAgreement():
435 36205981 Iustin Pop
    return
436 36205981 Iustin Pop
437 9113300d Michael Hanselmann
  master = IOServer(constants.MASTER_SOCKET, ClientRqHandler)
438 ffeffa1d Iustin Pop
439 c1f2901b Iustin Pop
  # become a daemon
440 c1f2901b Iustin Pop
  if options.fork:
441 c1f2901b Iustin Pop
    utils.Daemonize(logfile=constants.LOG_MASTERDAEMON,
442 c1f2901b Iustin Pop
                    noclose_fds=[master.fileno()])
443 c1f2901b Iustin Pop
444 99e88451 Iustin Pop
  utils.WritePidFile(constants.MASTERD_PID)
445 8feda3ad Guido Trotter
446 82d9caef Iustin Pop
  utils.SetupLogging(constants.LOG_MASTERDAEMON, debug=options.debug,
447 82d9caef Iustin Pop
                     stderr_logging=not options.fork)
448 3b316acb Iustin Pop
449 d4fa5c23 Iustin Pop
  logging.info("ganeti master daemon startup")
450 3b316acb Iustin Pop
451 b1b6ea87 Iustin Pop
  # activate ip
452 a42872ff Michael Hanselmann
  master_node = ssconf.SimpleConfigReader().GetMasterNode()
453 72737a7f Iustin Pop
  if not rpc.RpcRunner.call_node_start_master(master_node, False):
454 b1b6ea87 Iustin Pop
    logging.error("Can't activate master IP address")
455 b1b6ea87 Iustin Pop
456 d4fa5c23 Iustin Pop
  master.setup_queue()
457 c1f2901b Iustin Pop
  try:
458 d4fa5c23 Iustin Pop
    master.serve_forever()
459 a4af651e Iustin Pop
  finally:
460 d4fa5c23 Iustin Pop
    master.server_cleanup()
461 99e88451 Iustin Pop
    utils.RemovePidFile(constants.MASTERD_PID)
462 a4af651e Iustin Pop
463 ffeffa1d Iustin Pop
464 ffeffa1d Iustin Pop
if __name__ == "__main__":
465 ffeffa1d Iustin Pop
  main()