Statistics
| Branch: | Tag: | Revision:

root / daemons / ganeti-masterd @ bbe19c17

History | View | Annotate | Download (13.8 kB)

1 685ee993 Iustin Pop
#!/usr/bin/python -u
2 ffeffa1d Iustin Pop
#
3 ffeffa1d Iustin Pop
4 ffeffa1d Iustin Pop
# Copyright (C) 2006, 2007 Google Inc.
5 ffeffa1d Iustin Pop
#
6 ffeffa1d Iustin Pop
# This program is free software; you can redistribute it and/or modify
7 ffeffa1d Iustin Pop
# it under the terms of the GNU General Public License as published by
8 ffeffa1d Iustin Pop
# the Free Software Foundation; either version 2 of the License, or
9 ffeffa1d Iustin Pop
# (at your option) any later version.
10 ffeffa1d Iustin Pop
#
11 ffeffa1d Iustin Pop
# This program is distributed in the hope that it will be useful, but
12 ffeffa1d Iustin Pop
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 ffeffa1d Iustin Pop
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 ffeffa1d Iustin Pop
# General Public License for more details.
15 ffeffa1d Iustin Pop
#
16 ffeffa1d Iustin Pop
# You should have received a copy of the GNU General Public License
17 ffeffa1d Iustin Pop
# along with this program; if not, write to the Free Software
18 ffeffa1d Iustin Pop
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 ffeffa1d Iustin Pop
# 02110-1301, USA.
20 ffeffa1d Iustin Pop
21 ffeffa1d Iustin Pop
22 ffeffa1d Iustin Pop
"""Master daemon program.
23 ffeffa1d Iustin Pop
24 ffeffa1d Iustin Pop
Some classes deviates from the standard style guide since the
25 ffeffa1d Iustin Pop
inheritance from parent classes requires it.
26 ffeffa1d Iustin Pop
27 ffeffa1d Iustin Pop
"""
28 ffeffa1d Iustin Pop
29 ffeffa1d Iustin Pop
30 d823660a Guido Trotter
import os
31 d823660a Guido Trotter
import errno
32 c1f2901b Iustin Pop
import sys
33 ffeffa1d Iustin Pop
import SocketServer
34 ffeffa1d Iustin Pop
import time
35 ffeffa1d Iustin Pop
import collections
36 ffeffa1d Iustin Pop
import Queue
37 ffeffa1d Iustin Pop
import random
38 ffeffa1d Iustin Pop
import signal
39 ffeffa1d Iustin Pop
import simplejson
40 96cb3986 Michael Hanselmann
import logging
41 ffeffa1d Iustin Pop
42 ffeffa1d Iustin Pop
from cStringIO import StringIO
43 c1f2901b Iustin Pop
from optparse import OptionParser
44 ffeffa1d Iustin Pop
45 39dcf2ef Guido Trotter
from ganeti import config
46 ffeffa1d Iustin Pop
from ganeti import constants
47 ffeffa1d Iustin Pop
from ganeti import mcpu
48 ffeffa1d Iustin Pop
from ganeti import opcodes
49 ffeffa1d Iustin Pop
from ganeti import jqueue
50 39dcf2ef Guido Trotter
from ganeti import locking
51 ffeffa1d Iustin Pop
from ganeti import luxi
52 ffeffa1d Iustin Pop
from ganeti import utils
53 c1f2901b Iustin Pop
from ganeti import errors
54 c1f2901b Iustin Pop
from ganeti import ssconf
55 23e50d39 Michael Hanselmann
from ganeti import workerpool
56 b1b6ea87 Iustin Pop
from ganeti import rpc
57 d7cdb55d Iustin Pop
from ganeti import bootstrap
58 c1f2901b Iustin Pop
59 c1f2901b Iustin Pop
60 23e50d39 Michael Hanselmann
CLIENT_REQUEST_WORKERS = 16
61 23e50d39 Michael Hanselmann
62 c1f2901b Iustin Pop
EXIT_NOTMASTER = constants.EXIT_NOTMASTER
63 c1f2901b Iustin Pop
EXIT_NODESETUP_ERROR = constants.EXIT_NODESETUP_ERROR
64 ffeffa1d Iustin Pop
65 ffeffa1d Iustin Pop
66 23e50d39 Michael Hanselmann
class ClientRequestWorker(workerpool.BaseWorker):
67 23e50d39 Michael Hanselmann
  def RunTask(self, server, request, client_address):
68 23e50d39 Michael Hanselmann
    """Process the request.
69 23e50d39 Michael Hanselmann
70 23e50d39 Michael Hanselmann
    This is copied from the code in ThreadingMixIn.
71 23e50d39 Michael Hanselmann
72 23e50d39 Michael Hanselmann
    """
73 23e50d39 Michael Hanselmann
    try:
74 23e50d39 Michael Hanselmann
      server.finish_request(request, client_address)
75 23e50d39 Michael Hanselmann
      server.close_request(request)
76 23e50d39 Michael Hanselmann
    except:
77 23e50d39 Michael Hanselmann
      server.handle_error(request, client_address)
78 23e50d39 Michael Hanselmann
      server.close_request(request)
79 23e50d39 Michael Hanselmann
80 23e50d39 Michael Hanselmann
81 ffeffa1d Iustin Pop
class IOServer(SocketServer.UnixStreamServer):
82 ffeffa1d Iustin Pop
  """IO thread class.
83 ffeffa1d Iustin Pop
84 ffeffa1d Iustin Pop
  This class takes care of initializing the other threads, setting
85 ffeffa1d Iustin Pop
  signal handlers (which are processed only in this thread), and doing
86 ffeffa1d Iustin Pop
  cleanup at shutdown.
87 ffeffa1d Iustin Pop
88 ffeffa1d Iustin Pop
  """
89 9113300d Michael Hanselmann
  def __init__(self, address, rqhandler):
90 ce862cd5 Guido Trotter
    """IOServer constructor
91 ce862cd5 Guido Trotter
92 ce862cd5 Guido Trotter
    Args:
93 ce862cd5 Guido Trotter
      address: the address to bind this IOServer to
94 ce862cd5 Guido Trotter
      rqhandler: RequestHandler type object
95 ce862cd5 Guido Trotter
96 ce862cd5 Guido Trotter
    """
97 ffeffa1d Iustin Pop
    SocketServer.UnixStreamServer.__init__(self, address, rqhandler)
98 50a3fbb2 Michael Hanselmann
99 50a3fbb2 Michael Hanselmann
    # We'll only start threads once we've forked.
100 9113300d Michael Hanselmann
    self.context = None
101 23e50d39 Michael Hanselmann
    self.request_workers = None
102 50a3fbb2 Michael Hanselmann
103 50a3fbb2 Michael Hanselmann
  def setup_queue(self):
104 9113300d Michael Hanselmann
    self.context = GanetiContext()
105 23e50d39 Michael Hanselmann
    self.request_workers = workerpool.WorkerPool(CLIENT_REQUEST_WORKERS,
106 23e50d39 Michael Hanselmann
                                                 ClientRequestWorker)
107 ffeffa1d Iustin Pop
108 ffeffa1d Iustin Pop
  def process_request(self, request, client_address):
109 23e50d39 Michael Hanselmann
    """Add task to workerpool to process request.
110 ffeffa1d Iustin Pop
111 ffeffa1d Iustin Pop
    """
112 23e50d39 Michael Hanselmann
    self.request_workers.AddTask(self, request, client_address)
113 ffeffa1d Iustin Pop
114 ffeffa1d Iustin Pop
  def serve_forever(self):
115 ffeffa1d Iustin Pop
    """Handle one request at a time until told to quit."""
116 610bc9ee Michael Hanselmann
    sighandler = utils.SignalHandler([signal.SIGINT, signal.SIGTERM])
117 610bc9ee Michael Hanselmann
    try:
118 610bc9ee Michael Hanselmann
      while not sighandler.called:
119 610bc9ee Michael Hanselmann
        self.handle_request()
120 610bc9ee Michael Hanselmann
    finally:
121 610bc9ee Michael Hanselmann
      sighandler.Reset()
122 c1f2901b Iustin Pop
123 c1f2901b Iustin Pop
  def server_cleanup(self):
124 c1f2901b Iustin Pop
    """Cleanup the server.
125 c1f2901b Iustin Pop
126 c1f2901b Iustin Pop
    This involves shutting down the processor threads and the master
127 c1f2901b Iustin Pop
    socket.
128 c1f2901b Iustin Pop
129 c1f2901b Iustin Pop
    """
130 50a3fbb2 Michael Hanselmann
    try:
131 50a3fbb2 Michael Hanselmann
      self.server_close()
132 50a3fbb2 Michael Hanselmann
    finally:
133 23e50d39 Michael Hanselmann
      if self.request_workers:
134 36088c4c Michael Hanselmann
        self.request_workers.TerminateWorkers()
135 9113300d Michael Hanselmann
      if self.context:
136 9113300d Michael Hanselmann
        self.context.jobqueue.Shutdown()
137 ffeffa1d Iustin Pop
138 ffeffa1d Iustin Pop
139 ffeffa1d Iustin Pop
class ClientRqHandler(SocketServer.BaseRequestHandler):
140 ffeffa1d Iustin Pop
  """Client handler"""
141 ffeffa1d Iustin Pop
  EOM = '\3'
142 ffeffa1d Iustin Pop
  READ_SIZE = 4096
143 ffeffa1d Iustin Pop
144 ffeffa1d Iustin Pop
  def setup(self):
145 ffeffa1d Iustin Pop
    self._buffer = ""
146 ffeffa1d Iustin Pop
    self._msgs = collections.deque()
147 ffeffa1d Iustin Pop
    self._ops = ClientOps(self.server)
148 ffeffa1d Iustin Pop
149 ffeffa1d Iustin Pop
  def handle(self):
150 ffeffa1d Iustin Pop
    while True:
151 ffeffa1d Iustin Pop
      msg = self.read_message()
152 ffeffa1d Iustin Pop
      if msg is None:
153 3d8548c4 Michael Hanselmann
        logging.info("client closed connection")
154 ffeffa1d Iustin Pop
        break
155 3d8548c4 Michael Hanselmann
156 ffeffa1d Iustin Pop
      request = simplejson.loads(msg)
157 3d8548c4 Michael Hanselmann
      logging.debug("request: %s", request)
158 ffeffa1d Iustin Pop
      if not isinstance(request, dict):
159 3d8548c4 Michael Hanselmann
        logging.error("wrong request received: %s", msg)
160 ffeffa1d Iustin Pop
        break
161 3d8548c4 Michael Hanselmann
162 3d8548c4 Michael Hanselmann
      method = request.get(luxi.KEY_METHOD, None)
163 3d8548c4 Michael Hanselmann
      args = request.get(luxi.KEY_ARGS, None)
164 3d8548c4 Michael Hanselmann
      if method is None or args is None:
165 3d8548c4 Michael Hanselmann
        logging.error("no method or args in request")
166 ffeffa1d Iustin Pop
        break
167 3d8548c4 Michael Hanselmann
168 3d8548c4 Michael Hanselmann
      success = False
169 3d8548c4 Michael Hanselmann
      try:
170 3d8548c4 Michael Hanselmann
        result = self._ops.handle_request(method, args)
171 3d8548c4 Michael Hanselmann
        success = True
172 6797ec29 Iustin Pop
      except errors.GenericError, err:
173 6797ec29 Iustin Pop
        success = False
174 6797ec29 Iustin Pop
        result = (err.__class__.__name__, err.args)
175 3d8548c4 Michael Hanselmann
      except:
176 3d8548c4 Michael Hanselmann
        logging.error("Unexpected exception", exc_info=True)
177 3d8548c4 Michael Hanselmann
        err = sys.exc_info()
178 3d8548c4 Michael Hanselmann
        result = "Caught exception: %s" % str(err[1])
179 3d8548c4 Michael Hanselmann
180 3d8548c4 Michael Hanselmann
      response = {
181 3d8548c4 Michael Hanselmann
        luxi.KEY_SUCCESS: success,
182 3d8548c4 Michael Hanselmann
        luxi.KEY_RESULT: result,
183 3d8548c4 Michael Hanselmann
        }
184 3d8548c4 Michael Hanselmann
      logging.debug("response: %s", response)
185 3d8548c4 Michael Hanselmann
      self.send_message(simplejson.dumps(response))
186 ffeffa1d Iustin Pop
187 ffeffa1d Iustin Pop
  def read_message(self):
188 ffeffa1d Iustin Pop
    while not self._msgs:
189 ffeffa1d Iustin Pop
      data = self.request.recv(self.READ_SIZE)
190 ffeffa1d Iustin Pop
      if not data:
191 ffeffa1d Iustin Pop
        return None
192 ffeffa1d Iustin Pop
      new_msgs = (self._buffer + data).split(self.EOM)
193 ffeffa1d Iustin Pop
      self._buffer = new_msgs.pop()
194 ffeffa1d Iustin Pop
      self._msgs.extend(new_msgs)
195 ffeffa1d Iustin Pop
    return self._msgs.popleft()
196 ffeffa1d Iustin Pop
197 ffeffa1d Iustin Pop
  def send_message(self, msg):
198 ffeffa1d Iustin Pop
    #print "sending", msg
199 ffeffa1d Iustin Pop
    self.request.sendall(msg + self.EOM)
200 ffeffa1d Iustin Pop
201 ffeffa1d Iustin Pop
202 ffeffa1d Iustin Pop
class ClientOps:
203 ffeffa1d Iustin Pop
  """Class holding high-level client operations."""
204 ffeffa1d Iustin Pop
  def __init__(self, server):
205 ffeffa1d Iustin Pop
    self.server = server
206 ffeffa1d Iustin Pop
207 0bbe448c Michael Hanselmann
  def handle_request(self, method, args):
208 9113300d Michael Hanselmann
    queue = self.server.context.jobqueue
209 0bbe448c Michael Hanselmann
210 0bbe448c Michael Hanselmann
    # TODO: Parameter validation
211 0bbe448c Michael Hanselmann
212 0bbe448c Michael Hanselmann
    if method == luxi.REQ_SUBMIT_JOB:
213 0bbe448c Michael Hanselmann
      ops = [opcodes.OpCode.LoadOpCode(state) for state in args]
214 4c848b18 Michael Hanselmann
      return queue.SubmitJob(ops)
215 ffeffa1d Iustin Pop
216 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_CANCEL_JOB:
217 3a2c7775 Michael Hanselmann
      job_id = args
218 0bbe448c Michael Hanselmann
      return queue.CancelJob(job_id)
219 ffeffa1d Iustin Pop
220 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_ARCHIVE_JOB:
221 3a2c7775 Michael Hanselmann
      job_id = args
222 0bbe448c Michael Hanselmann
      return queue.ArchiveJob(job_id)
223 0bbe448c Michael Hanselmann
224 07cd723a Iustin Pop
    elif method == luxi.REQ_AUTOARCHIVE_JOBS:
225 07cd723a Iustin Pop
      age = args
226 07cd723a Iustin Pop
      return queue.AutoArchiveJobs(age)
227 07cd723a Iustin Pop
228 dfe57c22 Michael Hanselmann
    elif method == luxi.REQ_WAIT_FOR_JOB_CHANGE:
229 5c735209 Iustin Pop
      (job_id, fields, prev_job_info, prev_log_serial, timeout) = args
230 6c5a7090 Michael Hanselmann
      return queue.WaitForJobChanges(job_id, fields, prev_job_info,
231 5c735209 Iustin Pop
                                     prev_log_serial, timeout)
232 dfe57c22 Michael Hanselmann
233 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_QUERY_JOBS:
234 0bbe448c Michael Hanselmann
      (job_ids, fields) = args
235 0bbe448c Michael Hanselmann
      return queue.QueryJobs(job_ids, fields)
236 0bbe448c Michael Hanselmann
237 ee6c7b94 Michael Hanselmann
    elif method == luxi.REQ_QUERY_INSTANCES:
238 ee6c7b94 Michael Hanselmann
      (names, fields) = args
239 ee6c7b94 Michael Hanselmann
      op = opcodes.OpQueryInstances(names=names, output_fields=fields)
240 ee6c7b94 Michael Hanselmann
      return self._Query(op)
241 ee6c7b94 Michael Hanselmann
242 02f7fe54 Michael Hanselmann
    elif method == luxi.REQ_QUERY_NODES:
243 02f7fe54 Michael Hanselmann
      (names, fields) = args
244 02f7fe54 Michael Hanselmann
      op = opcodes.OpQueryNodes(names=names, output_fields=fields)
245 02f7fe54 Michael Hanselmann
      return self._Query(op)
246 02f7fe54 Michael Hanselmann
247 32f93223 Michael Hanselmann
    elif method == luxi.REQ_QUERY_EXPORTS:
248 32f93223 Michael Hanselmann
      nodes = args
249 32f93223 Michael Hanselmann
      op = opcodes.OpQueryExports(nodes=nodes)
250 32f93223 Michael Hanselmann
      return self._Query(op)
251 32f93223 Michael Hanselmann
252 ae5849b5 Michael Hanselmann
    elif method == luxi.REQ_QUERY_CONFIG_VALUES:
253 ae5849b5 Michael Hanselmann
      fields = args
254 ae5849b5 Michael Hanselmann
      op = opcodes.OpQueryConfigValues(output_fields=fields)
255 ae5849b5 Michael Hanselmann
      return self._Query(op)
256 ae5849b5 Michael Hanselmann
257 3ccafd0e Iustin Pop
    elif method == luxi.REQ_QUEUE_SET_DRAIN_FLAG:
258 3ccafd0e Iustin Pop
      drain_flag = args
259 3ccafd0e Iustin Pop
      return queue.SetDrainFlag(drain_flag)
260 3ccafd0e Iustin Pop
261 0bbe448c Michael Hanselmann
    else:
262 0bbe448c Michael Hanselmann
      raise ValueError("Invalid operation")
263 ffeffa1d Iustin Pop
264 ee6c7b94 Michael Hanselmann
  def _DummyLog(self, *args):
265 ee6c7b94 Michael Hanselmann
    pass
266 ee6c7b94 Michael Hanselmann
267 ee6c7b94 Michael Hanselmann
  def _Query(self, op):
268 ee6c7b94 Michael Hanselmann
    """Runs the specified opcode and returns the result.
269 ee6c7b94 Michael Hanselmann
270 ee6c7b94 Michael Hanselmann
    """
271 ee6c7b94 Michael Hanselmann
    proc = mcpu.Processor(self.server.context)
272 ee6c7b94 Michael Hanselmann
    # TODO: Where should log messages go?
273 e92376d7 Iustin Pop
    return proc.ExecOpCode(op, self._DummyLog, None)
274 ee6c7b94 Michael Hanselmann
275 ffeffa1d Iustin Pop
276 39dcf2ef Guido Trotter
class GanetiContext(object):
277 39dcf2ef Guido Trotter
  """Context common to all ganeti threads.
278 39dcf2ef Guido Trotter
279 39dcf2ef Guido Trotter
  This class creates and holds common objects shared by all threads.
280 39dcf2ef Guido Trotter
281 39dcf2ef Guido Trotter
  """
282 39dcf2ef Guido Trotter
  _instance = None
283 39dcf2ef Guido Trotter
284 39dcf2ef Guido Trotter
  def __init__(self):
285 39dcf2ef Guido Trotter
    """Constructs a new GanetiContext object.
286 39dcf2ef Guido Trotter
287 39dcf2ef Guido Trotter
    There should be only a GanetiContext object at any time, so this
288 39dcf2ef Guido Trotter
    function raises an error if this is not the case.
289 39dcf2ef Guido Trotter
290 39dcf2ef Guido Trotter
    """
291 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "double GanetiContext instance"
292 39dcf2ef Guido Trotter
293 9113300d Michael Hanselmann
    # Create global configuration object
294 39dcf2ef Guido Trotter
    self.cfg = config.ConfigWriter()
295 9113300d Michael Hanselmann
296 9113300d Michael Hanselmann
    # Locking manager
297 984f7c32 Guido Trotter
    self.glm = locking.GanetiLockManager(
298 39dcf2ef Guido Trotter
                self.cfg.GetNodeList(),
299 39dcf2ef Guido Trotter
                self.cfg.GetInstanceList())
300 39dcf2ef Guido Trotter
301 9113300d Michael Hanselmann
    # Job queue
302 9113300d Michael Hanselmann
    self.jobqueue = jqueue.JobQueue(self)
303 9113300d Michael Hanselmann
304 39dcf2ef Guido Trotter
    # setting this also locks the class against attribute modifications
305 39dcf2ef Guido Trotter
    self.__class__._instance = self
306 39dcf2ef Guido Trotter
307 39dcf2ef Guido Trotter
  def __setattr__(self, name, value):
308 39dcf2ef Guido Trotter
    """Setting GanetiContext attributes is forbidden after initialization.
309 39dcf2ef Guido Trotter
310 39dcf2ef Guido Trotter
    """
311 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "Attempt to modify Ganeti Context"
312 39dcf2ef Guido Trotter
    object.__setattr__(self, name, value)
313 39dcf2ef Guido Trotter
314 d8470559 Michael Hanselmann
  def AddNode(self, node):
315 d8470559 Michael Hanselmann
    """Adds a node to the configuration and lock manager.
316 d8470559 Michael Hanselmann
317 d8470559 Michael Hanselmann
    """
318 d8470559 Michael Hanselmann
    # Add it to the configuration
319 d8470559 Michael Hanselmann
    self.cfg.AddNode(node)
320 d8470559 Michael Hanselmann
321 c36176cc Michael Hanselmann
    # If preseeding fails it'll not be added
322 99aabbed Iustin Pop
    self.jobqueue.AddNode(node)
323 c36176cc Michael Hanselmann
324 d8470559 Michael Hanselmann
    # Add the new node to the Ganeti Lock Manager
325 d8470559 Michael Hanselmann
    self.glm.add(locking.LEVEL_NODE, node.name)
326 d8470559 Michael Hanselmann
327 d8470559 Michael Hanselmann
  def ReaddNode(self, node):
328 d8470559 Michael Hanselmann
    """Updates a node that's already in the configuration
329 d8470559 Michael Hanselmann
330 d8470559 Michael Hanselmann
    """
331 c36176cc Michael Hanselmann
    # Synchronize the queue again
332 99aabbed Iustin Pop
    self.jobqueue.AddNode(node)
333 d8470559 Michael Hanselmann
334 d8470559 Michael Hanselmann
  def RemoveNode(self, name):
335 d8470559 Michael Hanselmann
    """Removes a node from the configuration and lock manager.
336 d8470559 Michael Hanselmann
337 d8470559 Michael Hanselmann
    """
338 d8470559 Michael Hanselmann
    # Remove node from configuration
339 d8470559 Michael Hanselmann
    self.cfg.RemoveNode(name)
340 d8470559 Michael Hanselmann
341 c36176cc Michael Hanselmann
    # Notify job queue
342 c36176cc Michael Hanselmann
    self.jobqueue.RemoveNode(name)
343 c36176cc Michael Hanselmann
344 d8470559 Michael Hanselmann
    # Remove the node from the Ganeti Lock Manager
345 d8470559 Michael Hanselmann
    self.glm.remove(locking.LEVEL_NODE, name)
346 d8470559 Michael Hanselmann
347 39dcf2ef Guido Trotter
348 c1f2901b Iustin Pop
def ParseOptions():
349 c1f2901b Iustin Pop
  """Parse the command line options.
350 c1f2901b Iustin Pop
351 c1f2901b Iustin Pop
  Returns:
352 c1f2901b Iustin Pop
    (options, args) as from OptionParser.parse_args()
353 c1f2901b Iustin Pop
354 c1f2901b Iustin Pop
  """
355 c1f2901b Iustin Pop
  parser = OptionParser(description="Ganeti master daemon",
356 c1f2901b Iustin Pop
                        usage="%prog [-f] [-d]",
357 c1f2901b Iustin Pop
                        version="%%prog (ganeti) %s" %
358 c1f2901b Iustin Pop
                        constants.RELEASE_VERSION)
359 c1f2901b Iustin Pop
360 c1f2901b Iustin Pop
  parser.add_option("-f", "--foreground", dest="fork",
361 c1f2901b Iustin Pop
                    help="Don't detach from the current terminal",
362 c1f2901b Iustin Pop
                    default=True, action="store_false")
363 c1f2901b Iustin Pop
  parser.add_option("-d", "--debug", dest="debug",
364 c1f2901b Iustin Pop
                    help="Enable some debug messages",
365 c1f2901b Iustin Pop
                    default=False, action="store_true")
366 c1f2901b Iustin Pop
  options, args = parser.parse_args()
367 c1f2901b Iustin Pop
  return options, args
368 c1f2901b Iustin Pop
369 c1f2901b Iustin Pop
370 36205981 Iustin Pop
def CheckAgreement():
371 36205981 Iustin Pop
  """Check the agreement on who is the master.
372 36205981 Iustin Pop
373 36205981 Iustin Pop
  The function uses a very simple algorithm: we must get more positive
374 36205981 Iustin Pop
  than negative answers. Since in most of the cases we are the master,
375 36205981 Iustin Pop
  we'll use our own config file for getting the node list. In the
376 36205981 Iustin Pop
  future we could collect the current node list from our (possibly
377 36205981 Iustin Pop
  obsolete) known nodes.
378 36205981 Iustin Pop
379 d7cdb55d Iustin Pop
  In order to account for cold-start of all nodes, we retry for up to
380 d7cdb55d Iustin Pop
  a minute until we get a real answer as the top-voted one. If the
381 d7cdb55d Iustin Pop
  nodes are more out-of-sync, for now manual startup of the master
382 d7cdb55d Iustin Pop
  should be attempted.
383 d7cdb55d Iustin Pop
384 d7cdb55d Iustin Pop
  Note that for a even number of nodes cluster, we need at least half
385 d7cdb55d Iustin Pop
  of the nodes (beside ourselves) to vote for us. This creates a
386 d7cdb55d Iustin Pop
  problem on two-node clusters, since in this case we require the
387 d7cdb55d Iustin Pop
  other node to be up too to confirm our status.
388 d7cdb55d Iustin Pop
389 36205981 Iustin Pop
  """
390 36205981 Iustin Pop
  myself = utils.HostInfo().name
391 36205981 Iustin Pop
  #temp instantiation of a config writer, used only to get the node list
392 36205981 Iustin Pop
  cfg = config.ConfigWriter()
393 36205981 Iustin Pop
  node_list = cfg.GetNodeList()
394 36205981 Iustin Pop
  del cfg
395 d7cdb55d Iustin Pop
  retries = 6
396 d7cdb55d Iustin Pop
  while retries > 0:
397 d7cdb55d Iustin Pop
    votes = bootstrap.GatherMasterVotes(node_list)
398 d7cdb55d Iustin Pop
    if not votes:
399 d7cdb55d Iustin Pop
      # empty node list, this is a one node cluster
400 d7cdb55d Iustin Pop
      return True
401 d7cdb55d Iustin Pop
    if votes[0][0] is None:
402 d7cdb55d Iustin Pop
      retries -= 1
403 d7cdb55d Iustin Pop
      time.sleep(10)
404 36205981 Iustin Pop
      continue
405 d7cdb55d Iustin Pop
    break
406 d7cdb55d Iustin Pop
  if retries == 0:
407 d7cdb55d Iustin Pop
      logging.critical("Cluster inconsistent, most of the nodes didn't answer"
408 d7cdb55d Iustin Pop
                       " after multiple retries. Aborting startup")
409 d7cdb55d Iustin Pop
      return False
410 d7cdb55d Iustin Pop
  # here a real node is at the top of the list
411 d7cdb55d Iustin Pop
  all_votes = sum(item[1] for item in votes)
412 d7cdb55d Iustin Pop
  top_node, top_votes = votes[0]
413 d7cdb55d Iustin Pop
  result = False
414 d7cdb55d Iustin Pop
  if top_node != myself:
415 d7cdb55d Iustin Pop
    logging.critical("It seems we are not the master (top-voted node"
416 bbe19c17 Iustin Pop
                     " is %s with %d out of %d votes)", top_node, top_votes,
417 bbe19c17 Iustin Pop
                     all_votes)
418 d7cdb55d Iustin Pop
  elif top_votes < all_votes - top_votes:
419 36205981 Iustin Pop
    logging.critical("It seems we are not the master (%d votes for,"
420 d7cdb55d Iustin Pop
                     " %d votes against)", top_votes, all_votes - top_votes)
421 d7cdb55d Iustin Pop
  else:
422 d7cdb55d Iustin Pop
    result = True
423 d7cdb55d Iustin Pop
424 d7cdb55d Iustin Pop
  return result
425 36205981 Iustin Pop
426 36205981 Iustin Pop
427 ffeffa1d Iustin Pop
def main():
428 ffeffa1d Iustin Pop
  """Main function"""
429 ffeffa1d Iustin Pop
430 c1f2901b Iustin Pop
  options, args = ParseOptions()
431 c1f2901b Iustin Pop
  utils.debug = options.debug
432 b74159ee Iustin Pop
  utils.no_fork = True
433 c1f2901b Iustin Pop
434 4331f6cd Michael Hanselmann
  rpc.Init()
435 4331f6cd Michael Hanselmann
  try:
436 4331f6cd Michael Hanselmann
    ssconf.CheckMaster(options.debug)
437 c1f2901b Iustin Pop
438 4331f6cd Michael Hanselmann
    # we believe we are the master, let's ask the other nodes...
439 4331f6cd Michael Hanselmann
    if not CheckAgreement():
440 4331f6cd Michael Hanselmann
      return
441 36205981 Iustin Pop
442 1cb8d376 Guido Trotter
    dirs = [(constants.RUN_GANETI_DIR, constants.RUN_DIRS_MODE),
443 1cb8d376 Guido Trotter
            (constants.SOCKET_DIR, constants.SOCKET_DIR_MODE),
444 1cb8d376 Guido Trotter
           ]
445 1cb8d376 Guido Trotter
    for dir, mode in dirs:
446 1cb8d376 Guido Trotter
      try:
447 1cb8d376 Guido Trotter
        os.mkdir(dir, mode)
448 1cb8d376 Guido Trotter
      except EnvironmentError, err:
449 1cb8d376 Guido Trotter
        if err.errno != errno.EEXIST:
450 1cb8d376 Guido Trotter
          raise errors.GenericError("Cannot create needed directory"
451 1cb8d376 Guido Trotter
            " '%s': %s" % (constants.SOCKET_DIR, err))
452 1cb8d376 Guido Trotter
      if not os.path.isdir(dir):
453 1cb8d376 Guido Trotter
        raise errors.GenericError("%s is not a directory" % dir)
454 d823660a Guido Trotter
455 227647ac Guido Trotter
    # This is safe to do as the pid file guarantees against
456 227647ac Guido Trotter
    # concurrent execution.
457 227647ac Guido Trotter
    utils.RemoveFile(constants.MASTER_SOCKET)
458 227647ac Guido Trotter
459 4331f6cd Michael Hanselmann
    master = IOServer(constants.MASTER_SOCKET, ClientRqHandler)
460 4331f6cd Michael Hanselmann
  finally:
461 4331f6cd Michael Hanselmann
    rpc.Shutdown()
462 ffeffa1d Iustin Pop
463 c1f2901b Iustin Pop
  # become a daemon
464 c1f2901b Iustin Pop
  if options.fork:
465 c1f2901b Iustin Pop
    utils.Daemonize(logfile=constants.LOG_MASTERDAEMON,
466 c1f2901b Iustin Pop
                    noclose_fds=[master.fileno()])
467 c1f2901b Iustin Pop
468 99e88451 Iustin Pop
  utils.WritePidFile(constants.MASTERD_PID)
469 4331f6cd Michael Hanselmann
  try:
470 15486fa7 Michael Hanselmann
    utils.SetupLogging(constants.LOG_MASTERDAEMON, debug=options.debug,
471 15486fa7 Michael Hanselmann
                       stderr_logging=not options.fork)
472 3b316acb Iustin Pop
473 15486fa7 Michael Hanselmann
    logging.info("Ganeti master daemon startup")
474 b1b6ea87 Iustin Pop
475 15486fa7 Michael Hanselmann
    rpc.Init()
476 4331f6cd Michael Hanselmann
    try:
477 15486fa7 Michael Hanselmann
      # activate ip
478 15486fa7 Michael Hanselmann
      master_node = ssconf.SimpleConfigReader().GetMasterNode()
479 15486fa7 Michael Hanselmann
      if not rpc.RpcRunner.call_node_start_master(master_node, False):
480 15486fa7 Michael Hanselmann
        logging.error("Can't activate master IP address")
481 15486fa7 Michael Hanselmann
482 15486fa7 Michael Hanselmann
      master.setup_queue()
483 15486fa7 Michael Hanselmann
      try:
484 15486fa7 Michael Hanselmann
        master.serve_forever()
485 15486fa7 Michael Hanselmann
      finally:
486 15486fa7 Michael Hanselmann
        master.server_cleanup()
487 4331f6cd Michael Hanselmann
    finally:
488 15486fa7 Michael Hanselmann
      rpc.Shutdown()
489 a4af651e Iustin Pop
  finally:
490 15486fa7 Michael Hanselmann
    utils.RemovePidFile(constants.MASTERD_PID)
491 227647ac Guido Trotter
    utils.RemoveFile(constants.MASTER_SOCKET)
492 a4af651e Iustin Pop
493 ffeffa1d Iustin Pop
494 ffeffa1d Iustin Pop
if __name__ == "__main__":
495 ffeffa1d Iustin Pop
  main()