Statistics
| Branch: | Tag: | Revision:

root / daemons / ganeti-masterd @ e566ddbd

History | View | Annotate | Download (15.4 kB)

1 685ee993 Iustin Pop
#!/usr/bin/python -u
2 ffeffa1d Iustin Pop
#
3 ffeffa1d Iustin Pop
4 ffeffa1d Iustin Pop
# Copyright (C) 2006, 2007 Google Inc.
5 ffeffa1d Iustin Pop
#
6 ffeffa1d Iustin Pop
# This program is free software; you can redistribute it and/or modify
7 ffeffa1d Iustin Pop
# it under the terms of the GNU General Public License as published by
8 ffeffa1d Iustin Pop
# the Free Software Foundation; either version 2 of the License, or
9 ffeffa1d Iustin Pop
# (at your option) any later version.
10 ffeffa1d Iustin Pop
#
11 ffeffa1d Iustin Pop
# This program is distributed in the hope that it will be useful, but
12 ffeffa1d Iustin Pop
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 ffeffa1d Iustin Pop
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 ffeffa1d Iustin Pop
# General Public License for more details.
15 ffeffa1d Iustin Pop
#
16 ffeffa1d Iustin Pop
# You should have received a copy of the GNU General Public License
17 ffeffa1d Iustin Pop
# along with this program; if not, write to the Free Software
18 ffeffa1d Iustin Pop
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 ffeffa1d Iustin Pop
# 02110-1301, USA.
20 ffeffa1d Iustin Pop
21 ffeffa1d Iustin Pop
22 ffeffa1d Iustin Pop
"""Master daemon program.
23 ffeffa1d Iustin Pop
24 ffeffa1d Iustin Pop
Some classes deviates from the standard style guide since the
25 ffeffa1d Iustin Pop
inheritance from parent classes requires it.
26 ffeffa1d Iustin Pop
27 ffeffa1d Iustin Pop
"""
28 ffeffa1d Iustin Pop
29 ffeffa1d Iustin Pop
30 d823660a Guido Trotter
import os
31 d823660a Guido Trotter
import errno
32 c1f2901b Iustin Pop
import sys
33 ffeffa1d Iustin Pop
import SocketServer
34 ffeffa1d Iustin Pop
import time
35 ffeffa1d Iustin Pop
import collections
36 ffeffa1d Iustin Pop
import Queue
37 ffeffa1d Iustin Pop
import random
38 ffeffa1d Iustin Pop
import signal
39 ffeffa1d Iustin Pop
import simplejson
40 96cb3986 Michael Hanselmann
import logging
41 ffeffa1d Iustin Pop
42 ffeffa1d Iustin Pop
from cStringIO import StringIO
43 c1f2901b Iustin Pop
from optparse import OptionParser
44 ffeffa1d Iustin Pop
45 39dcf2ef Guido Trotter
from ganeti import config
46 ffeffa1d Iustin Pop
from ganeti import constants
47 ffeffa1d Iustin Pop
from ganeti import mcpu
48 ffeffa1d Iustin Pop
from ganeti import opcodes
49 ffeffa1d Iustin Pop
from ganeti import jqueue
50 39dcf2ef Guido Trotter
from ganeti import locking
51 ffeffa1d Iustin Pop
from ganeti import luxi
52 ffeffa1d Iustin Pop
from ganeti import utils
53 c1f2901b Iustin Pop
from ganeti import errors
54 c1f2901b Iustin Pop
from ganeti import ssconf
55 23e50d39 Michael Hanselmann
from ganeti import workerpool
56 b1b6ea87 Iustin Pop
from ganeti import rpc
57 d7cdb55d Iustin Pop
from ganeti import bootstrap
58 c1f2901b Iustin Pop
59 c1f2901b Iustin Pop
60 23e50d39 Michael Hanselmann
CLIENT_REQUEST_WORKERS = 16
61 23e50d39 Michael Hanselmann
62 c1f2901b Iustin Pop
EXIT_NOTMASTER = constants.EXIT_NOTMASTER
63 c1f2901b Iustin Pop
EXIT_NODESETUP_ERROR = constants.EXIT_NODESETUP_ERROR
64 ffeffa1d Iustin Pop
65 ffeffa1d Iustin Pop
66 23e50d39 Michael Hanselmann
class ClientRequestWorker(workerpool.BaseWorker):
67 23e50d39 Michael Hanselmann
  def RunTask(self, server, request, client_address):
68 23e50d39 Michael Hanselmann
    """Process the request.
69 23e50d39 Michael Hanselmann
70 23e50d39 Michael Hanselmann
    This is copied from the code in ThreadingMixIn.
71 23e50d39 Michael Hanselmann
72 23e50d39 Michael Hanselmann
    """
73 23e50d39 Michael Hanselmann
    try:
74 23e50d39 Michael Hanselmann
      server.finish_request(request, client_address)
75 23e50d39 Michael Hanselmann
      server.close_request(request)
76 23e50d39 Michael Hanselmann
    except:
77 23e50d39 Michael Hanselmann
      server.handle_error(request, client_address)
78 23e50d39 Michael Hanselmann
      server.close_request(request)
79 23e50d39 Michael Hanselmann
80 23e50d39 Michael Hanselmann
81 ffeffa1d Iustin Pop
class IOServer(SocketServer.UnixStreamServer):
82 ffeffa1d Iustin Pop
  """IO thread class.
83 ffeffa1d Iustin Pop
84 ffeffa1d Iustin Pop
  This class takes care of initializing the other threads, setting
85 ffeffa1d Iustin Pop
  signal handlers (which are processed only in this thread), and doing
86 ffeffa1d Iustin Pop
  cleanup at shutdown.
87 ffeffa1d Iustin Pop
88 ffeffa1d Iustin Pop
  """
89 9113300d Michael Hanselmann
  def __init__(self, address, rqhandler):
90 ce862cd5 Guido Trotter
    """IOServer constructor
91 ce862cd5 Guido Trotter
92 c41eea6e Iustin Pop
    @param address: the address to bind this IOServer to
93 c41eea6e Iustin Pop
    @param rqhandler: RequestHandler type object
94 ce862cd5 Guido Trotter
95 ce862cd5 Guido Trotter
    """
96 ffeffa1d Iustin Pop
    SocketServer.UnixStreamServer.__init__(self, address, rqhandler)
97 50a3fbb2 Michael Hanselmann
98 50a3fbb2 Michael Hanselmann
    # We'll only start threads once we've forked.
99 9113300d Michael Hanselmann
    self.context = None
100 23e50d39 Michael Hanselmann
    self.request_workers = None
101 50a3fbb2 Michael Hanselmann
102 50a3fbb2 Michael Hanselmann
  def setup_queue(self):
103 9113300d Michael Hanselmann
    self.context = GanetiContext()
104 23e50d39 Michael Hanselmann
    self.request_workers = workerpool.WorkerPool(CLIENT_REQUEST_WORKERS,
105 23e50d39 Michael Hanselmann
                                                 ClientRequestWorker)
106 ffeffa1d Iustin Pop
107 ffeffa1d Iustin Pop
  def process_request(self, request, client_address):
108 23e50d39 Michael Hanselmann
    """Add task to workerpool to process request.
109 ffeffa1d Iustin Pop
110 ffeffa1d Iustin Pop
    """
111 23e50d39 Michael Hanselmann
    self.request_workers.AddTask(self, request, client_address)
112 ffeffa1d Iustin Pop
113 ffeffa1d Iustin Pop
  def serve_forever(self):
114 ffeffa1d Iustin Pop
    """Handle one request at a time until told to quit."""
115 610bc9ee Michael Hanselmann
    sighandler = utils.SignalHandler([signal.SIGINT, signal.SIGTERM])
116 610bc9ee Michael Hanselmann
    try:
117 610bc9ee Michael Hanselmann
      while not sighandler.called:
118 610bc9ee Michael Hanselmann
        self.handle_request()
119 610bc9ee Michael Hanselmann
    finally:
120 610bc9ee Michael Hanselmann
      sighandler.Reset()
121 c1f2901b Iustin Pop
122 c1f2901b Iustin Pop
  def server_cleanup(self):
123 c1f2901b Iustin Pop
    """Cleanup the server.
124 c1f2901b Iustin Pop
125 c1f2901b Iustin Pop
    This involves shutting down the processor threads and the master
126 c1f2901b Iustin Pop
    socket.
127 c1f2901b Iustin Pop
128 c1f2901b Iustin Pop
    """
129 50a3fbb2 Michael Hanselmann
    try:
130 50a3fbb2 Michael Hanselmann
      self.server_close()
131 50a3fbb2 Michael Hanselmann
    finally:
132 23e50d39 Michael Hanselmann
      if self.request_workers:
133 36088c4c Michael Hanselmann
        self.request_workers.TerminateWorkers()
134 9113300d Michael Hanselmann
      if self.context:
135 9113300d Michael Hanselmann
        self.context.jobqueue.Shutdown()
136 ffeffa1d Iustin Pop
137 ffeffa1d Iustin Pop
138 ffeffa1d Iustin Pop
class ClientRqHandler(SocketServer.BaseRequestHandler):
139 ffeffa1d Iustin Pop
  """Client handler"""
140 ffeffa1d Iustin Pop
  EOM = '\3'
141 ffeffa1d Iustin Pop
  READ_SIZE = 4096
142 ffeffa1d Iustin Pop
143 ffeffa1d Iustin Pop
  def setup(self):
144 ffeffa1d Iustin Pop
    self._buffer = ""
145 ffeffa1d Iustin Pop
    self._msgs = collections.deque()
146 ffeffa1d Iustin Pop
    self._ops = ClientOps(self.server)
147 ffeffa1d Iustin Pop
148 ffeffa1d Iustin Pop
  def handle(self):
149 ffeffa1d Iustin Pop
    while True:
150 ffeffa1d Iustin Pop
      msg = self.read_message()
151 ffeffa1d Iustin Pop
      if msg is None:
152 d21d09d6 Iustin Pop
        logging.debug("client closed connection")
153 ffeffa1d Iustin Pop
        break
154 3d8548c4 Michael Hanselmann
155 ffeffa1d Iustin Pop
      request = simplejson.loads(msg)
156 3d8548c4 Michael Hanselmann
      logging.debug("request: %s", request)
157 ffeffa1d Iustin Pop
      if not isinstance(request, dict):
158 3d8548c4 Michael Hanselmann
        logging.error("wrong request received: %s", msg)
159 ffeffa1d Iustin Pop
        break
160 3d8548c4 Michael Hanselmann
161 3d8548c4 Michael Hanselmann
      method = request.get(luxi.KEY_METHOD, None)
162 3d8548c4 Michael Hanselmann
      args = request.get(luxi.KEY_ARGS, None)
163 3d8548c4 Michael Hanselmann
      if method is None or args is None:
164 3d8548c4 Michael Hanselmann
        logging.error("no method or args in request")
165 ffeffa1d Iustin Pop
        break
166 3d8548c4 Michael Hanselmann
167 3d8548c4 Michael Hanselmann
      success = False
168 3d8548c4 Michael Hanselmann
      try:
169 3d8548c4 Michael Hanselmann
        result = self._ops.handle_request(method, args)
170 3d8548c4 Michael Hanselmann
        success = True
171 6797ec29 Iustin Pop
      except errors.GenericError, err:
172 6797ec29 Iustin Pop
        success = False
173 6797ec29 Iustin Pop
        result = (err.__class__.__name__, err.args)
174 3d8548c4 Michael Hanselmann
      except:
175 3d8548c4 Michael Hanselmann
        logging.error("Unexpected exception", exc_info=True)
176 3d8548c4 Michael Hanselmann
        err = sys.exc_info()
177 3d8548c4 Michael Hanselmann
        result = "Caught exception: %s" % str(err[1])
178 3d8548c4 Michael Hanselmann
179 3d8548c4 Michael Hanselmann
      response = {
180 3d8548c4 Michael Hanselmann
        luxi.KEY_SUCCESS: success,
181 3d8548c4 Michael Hanselmann
        luxi.KEY_RESULT: result,
182 3d8548c4 Michael Hanselmann
        }
183 3d8548c4 Michael Hanselmann
      logging.debug("response: %s", response)
184 3d8548c4 Michael Hanselmann
      self.send_message(simplejson.dumps(response))
185 ffeffa1d Iustin Pop
186 ffeffa1d Iustin Pop
  def read_message(self):
187 ffeffa1d Iustin Pop
    while not self._msgs:
188 ffeffa1d Iustin Pop
      data = self.request.recv(self.READ_SIZE)
189 ffeffa1d Iustin Pop
      if not data:
190 ffeffa1d Iustin Pop
        return None
191 ffeffa1d Iustin Pop
      new_msgs = (self._buffer + data).split(self.EOM)
192 ffeffa1d Iustin Pop
      self._buffer = new_msgs.pop()
193 ffeffa1d Iustin Pop
      self._msgs.extend(new_msgs)
194 ffeffa1d Iustin Pop
    return self._msgs.popleft()
195 ffeffa1d Iustin Pop
196 ffeffa1d Iustin Pop
  def send_message(self, msg):
197 ffeffa1d Iustin Pop
    #print "sending", msg
198 ffeffa1d Iustin Pop
    self.request.sendall(msg + self.EOM)
199 ffeffa1d Iustin Pop
200 ffeffa1d Iustin Pop
201 ffeffa1d Iustin Pop
class ClientOps:
202 ffeffa1d Iustin Pop
  """Class holding high-level client operations."""
203 ffeffa1d Iustin Pop
  def __init__(self, server):
204 ffeffa1d Iustin Pop
    self.server = server
205 ffeffa1d Iustin Pop
206 0bbe448c Michael Hanselmann
  def handle_request(self, method, args):
207 9113300d Michael Hanselmann
    queue = self.server.context.jobqueue
208 0bbe448c Michael Hanselmann
209 0bbe448c Michael Hanselmann
    # TODO: Parameter validation
210 0bbe448c Michael Hanselmann
211 0bbe448c Michael Hanselmann
    if method == luxi.REQ_SUBMIT_JOB:
212 e566ddbd Iustin Pop
      logging.info("Received new job")
213 0bbe448c Michael Hanselmann
      ops = [opcodes.OpCode.LoadOpCode(state) for state in args]
214 4c848b18 Michael Hanselmann
      return queue.SubmitJob(ops)
215 ffeffa1d Iustin Pop
216 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_CANCEL_JOB:
217 3a2c7775 Michael Hanselmann
      job_id = args
218 e566ddbd Iustin Pop
      logging.info("Received job cancel request for %s", job_id)
219 0bbe448c Michael Hanselmann
      return queue.CancelJob(job_id)
220 ffeffa1d Iustin Pop
221 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_ARCHIVE_JOB:
222 3a2c7775 Michael Hanselmann
      job_id = args
223 e566ddbd Iustin Pop
      logging.info("Received job archive request for %s", job_id)
224 0bbe448c Michael Hanselmann
      return queue.ArchiveJob(job_id)
225 0bbe448c Michael Hanselmann
226 07cd723a Iustin Pop
    elif method == luxi.REQ_AUTOARCHIVE_JOBS:
227 f8ad5591 Michael Hanselmann
      (age, timeout) = args
228 e566ddbd Iustin Pop
      logging.info("Received job autoarchive request for age %s, timeout %s",
229 e566ddbd Iustin Pop
                   age, timeout)
230 f8ad5591 Michael Hanselmann
      return queue.AutoArchiveJobs(age, timeout)
231 07cd723a Iustin Pop
232 dfe57c22 Michael Hanselmann
    elif method == luxi.REQ_WAIT_FOR_JOB_CHANGE:
233 5c735209 Iustin Pop
      (job_id, fields, prev_job_info, prev_log_serial, timeout) = args
234 e566ddbd Iustin Pop
      logging.info("Received job poll request for %s", job_id)
235 6c5a7090 Michael Hanselmann
      return queue.WaitForJobChanges(job_id, fields, prev_job_info,
236 5c735209 Iustin Pop
                                     prev_log_serial, timeout)
237 dfe57c22 Michael Hanselmann
238 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_QUERY_JOBS:
239 0bbe448c Michael Hanselmann
      (job_ids, fields) = args
240 e566ddbd Iustin Pop
      if isinstance(job_ids, (tuple, list)) and job_ids:
241 e566ddbd Iustin Pop
        msg = ", ".join(job_ids)
242 e566ddbd Iustin Pop
      else:
243 e566ddbd Iustin Pop
        msg = str(job_ids)
244 e566ddbd Iustin Pop
      logging.info("Received job query request for %s", msg)
245 0bbe448c Michael Hanselmann
      return queue.QueryJobs(job_ids, fields)
246 0bbe448c Michael Hanselmann
247 ee6c7b94 Michael Hanselmann
    elif method == luxi.REQ_QUERY_INSTANCES:
248 ec79568d Iustin Pop
      (names, fields, use_locking) = args
249 e566ddbd Iustin Pop
      logging.info("Received instance query request for %s", names)
250 ec79568d Iustin Pop
      op = opcodes.OpQueryInstances(names=names, output_fields=fields,
251 ec79568d Iustin Pop
                                    use_locking=use_locking)
252 ee6c7b94 Michael Hanselmann
      return self._Query(op)
253 ee6c7b94 Michael Hanselmann
254 02f7fe54 Michael Hanselmann
    elif method == luxi.REQ_QUERY_NODES:
255 ec79568d Iustin Pop
      (names, fields, use_locking) = args
256 e566ddbd Iustin Pop
      logging.info("Received node query request for %s", names)
257 ec79568d Iustin Pop
      op = opcodes.OpQueryNodes(names=names, output_fields=fields,
258 ec79568d Iustin Pop
                                use_locking=use_locking)
259 02f7fe54 Michael Hanselmann
      return self._Query(op)
260 02f7fe54 Michael Hanselmann
261 32f93223 Michael Hanselmann
    elif method == luxi.REQ_QUERY_EXPORTS:
262 ec79568d Iustin Pop
      nodes, use_locking = args
263 e566ddbd Iustin Pop
      logging.info("Received exports query request")
264 ec79568d Iustin Pop
      op = opcodes.OpQueryExports(nodes=nodes, use_locking=use_locking)
265 32f93223 Michael Hanselmann
      return self._Query(op)
266 32f93223 Michael Hanselmann
267 ae5849b5 Michael Hanselmann
    elif method == luxi.REQ_QUERY_CONFIG_VALUES:
268 ae5849b5 Michael Hanselmann
      fields = args
269 e566ddbd Iustin Pop
      logging.info("Received config values query request for %s", fields)
270 ae5849b5 Michael Hanselmann
      op = opcodes.OpQueryConfigValues(output_fields=fields)
271 ae5849b5 Michael Hanselmann
      return self._Query(op)
272 ae5849b5 Michael Hanselmann
273 66baeccc Iustin Pop
    elif method == luxi.REQ_QUERY_CLUSTER_INFO:
274 e566ddbd Iustin Pop
      logging.info("Received cluster info query request")
275 66baeccc Iustin Pop
      op = opcodes.OpQueryClusterInfo()
276 66baeccc Iustin Pop
      return self._Query(op)
277 66baeccc Iustin Pop
278 3ccafd0e Iustin Pop
    elif method == luxi.REQ_QUEUE_SET_DRAIN_FLAG:
279 3ccafd0e Iustin Pop
      drain_flag = args
280 e566ddbd Iustin Pop
      logging.info("Received queue drain flag change request to %s",
281 e566ddbd Iustin Pop
                   drain_flag)
282 3ccafd0e Iustin Pop
      return queue.SetDrainFlag(drain_flag)
283 3ccafd0e Iustin Pop
284 0bbe448c Michael Hanselmann
    else:
285 e566ddbd Iustin Pop
      logging.info("Received invalid request '%s'", method)
286 e566ddbd Iustin Pop
      raise ValueError("Invalid operation '%s'" % method)
287 ffeffa1d Iustin Pop
288 ee6c7b94 Michael Hanselmann
  def _DummyLog(self, *args):
289 ee6c7b94 Michael Hanselmann
    pass
290 ee6c7b94 Michael Hanselmann
291 ee6c7b94 Michael Hanselmann
  def _Query(self, op):
292 ee6c7b94 Michael Hanselmann
    """Runs the specified opcode and returns the result.
293 ee6c7b94 Michael Hanselmann
294 ee6c7b94 Michael Hanselmann
    """
295 ee6c7b94 Michael Hanselmann
    proc = mcpu.Processor(self.server.context)
296 ee6c7b94 Michael Hanselmann
    # TODO: Where should log messages go?
297 e92376d7 Iustin Pop
    return proc.ExecOpCode(op, self._DummyLog, None)
298 ee6c7b94 Michael Hanselmann
299 ffeffa1d Iustin Pop
300 39dcf2ef Guido Trotter
class GanetiContext(object):
301 39dcf2ef Guido Trotter
  """Context common to all ganeti threads.
302 39dcf2ef Guido Trotter
303 39dcf2ef Guido Trotter
  This class creates and holds common objects shared by all threads.
304 39dcf2ef Guido Trotter
305 39dcf2ef Guido Trotter
  """
306 39dcf2ef Guido Trotter
  _instance = None
307 39dcf2ef Guido Trotter
308 39dcf2ef Guido Trotter
  def __init__(self):
309 39dcf2ef Guido Trotter
    """Constructs a new GanetiContext object.
310 39dcf2ef Guido Trotter
311 39dcf2ef Guido Trotter
    There should be only a GanetiContext object at any time, so this
312 39dcf2ef Guido Trotter
    function raises an error if this is not the case.
313 39dcf2ef Guido Trotter
314 39dcf2ef Guido Trotter
    """
315 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "double GanetiContext instance"
316 39dcf2ef Guido Trotter
317 9113300d Michael Hanselmann
    # Create global configuration object
318 39dcf2ef Guido Trotter
    self.cfg = config.ConfigWriter()
319 9113300d Michael Hanselmann
320 9113300d Michael Hanselmann
    # Locking manager
321 984f7c32 Guido Trotter
    self.glm = locking.GanetiLockManager(
322 39dcf2ef Guido Trotter
                self.cfg.GetNodeList(),
323 39dcf2ef Guido Trotter
                self.cfg.GetInstanceList())
324 39dcf2ef Guido Trotter
325 9113300d Michael Hanselmann
    # Job queue
326 9113300d Michael Hanselmann
    self.jobqueue = jqueue.JobQueue(self)
327 9113300d Michael Hanselmann
328 39dcf2ef Guido Trotter
    # setting this also locks the class against attribute modifications
329 39dcf2ef Guido Trotter
    self.__class__._instance = self
330 39dcf2ef Guido Trotter
331 39dcf2ef Guido Trotter
  def __setattr__(self, name, value):
332 39dcf2ef Guido Trotter
    """Setting GanetiContext attributes is forbidden after initialization.
333 39dcf2ef Guido Trotter
334 39dcf2ef Guido Trotter
    """
335 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "Attempt to modify Ganeti Context"
336 39dcf2ef Guido Trotter
    object.__setattr__(self, name, value)
337 39dcf2ef Guido Trotter
338 d8470559 Michael Hanselmann
  def AddNode(self, node):
339 d8470559 Michael Hanselmann
    """Adds a node to the configuration and lock manager.
340 d8470559 Michael Hanselmann
341 d8470559 Michael Hanselmann
    """
342 d8470559 Michael Hanselmann
    # Add it to the configuration
343 d8470559 Michael Hanselmann
    self.cfg.AddNode(node)
344 d8470559 Michael Hanselmann
345 c36176cc Michael Hanselmann
    # If preseeding fails it'll not be added
346 99aabbed Iustin Pop
    self.jobqueue.AddNode(node)
347 c36176cc Michael Hanselmann
348 d8470559 Michael Hanselmann
    # Add the new node to the Ganeti Lock Manager
349 d8470559 Michael Hanselmann
    self.glm.add(locking.LEVEL_NODE, node.name)
350 d8470559 Michael Hanselmann
351 d8470559 Michael Hanselmann
  def ReaddNode(self, node):
352 d8470559 Michael Hanselmann
    """Updates a node that's already in the configuration
353 d8470559 Michael Hanselmann
354 d8470559 Michael Hanselmann
    """
355 c36176cc Michael Hanselmann
    # Synchronize the queue again
356 99aabbed Iustin Pop
    self.jobqueue.AddNode(node)
357 d8470559 Michael Hanselmann
358 d8470559 Michael Hanselmann
  def RemoveNode(self, name):
359 d8470559 Michael Hanselmann
    """Removes a node from the configuration and lock manager.
360 d8470559 Michael Hanselmann
361 d8470559 Michael Hanselmann
    """
362 d8470559 Michael Hanselmann
    # Remove node from configuration
363 d8470559 Michael Hanselmann
    self.cfg.RemoveNode(name)
364 d8470559 Michael Hanselmann
365 c36176cc Michael Hanselmann
    # Notify job queue
366 c36176cc Michael Hanselmann
    self.jobqueue.RemoveNode(name)
367 c36176cc Michael Hanselmann
368 d8470559 Michael Hanselmann
    # Remove the node from the Ganeti Lock Manager
369 d8470559 Michael Hanselmann
    self.glm.remove(locking.LEVEL_NODE, name)
370 d8470559 Michael Hanselmann
371 39dcf2ef Guido Trotter
372 c1f2901b Iustin Pop
def ParseOptions():
373 c1f2901b Iustin Pop
  """Parse the command line options.
374 c1f2901b Iustin Pop
375 c41eea6e Iustin Pop
  @return: (options, args) as from OptionParser.parse_args()
376 c1f2901b Iustin Pop
377 c1f2901b Iustin Pop
  """
378 c1f2901b Iustin Pop
  parser = OptionParser(description="Ganeti master daemon",
379 c1f2901b Iustin Pop
                        usage="%prog [-f] [-d]",
380 c1f2901b Iustin Pop
                        version="%%prog (ganeti) %s" %
381 c1f2901b Iustin Pop
                        constants.RELEASE_VERSION)
382 c1f2901b Iustin Pop
383 c1f2901b Iustin Pop
  parser.add_option("-f", "--foreground", dest="fork",
384 c1f2901b Iustin Pop
                    help="Don't detach from the current terminal",
385 c1f2901b Iustin Pop
                    default=True, action="store_false")
386 c1f2901b Iustin Pop
  parser.add_option("-d", "--debug", dest="debug",
387 c1f2901b Iustin Pop
                    help="Enable some debug messages",
388 c1f2901b Iustin Pop
                    default=False, action="store_true")
389 5de4474d Iustin Pop
  parser.add_option("--no-voting", dest="no_voting",
390 5de4474d Iustin Pop
                    help="Do not check that the nodes agree on this node"
391 5de4474d Iustin Pop
                    " being the master and start the daemon unconditionally",
392 5de4474d Iustin Pop
                    default=False, action="store_true")
393 c1f2901b Iustin Pop
  options, args = parser.parse_args()
394 c1f2901b Iustin Pop
  return options, args
395 c1f2901b Iustin Pop
396 c1f2901b Iustin Pop
397 36205981 Iustin Pop
def CheckAgreement():
398 36205981 Iustin Pop
  """Check the agreement on who is the master.
399 36205981 Iustin Pop
400 36205981 Iustin Pop
  The function uses a very simple algorithm: we must get more positive
401 36205981 Iustin Pop
  than negative answers. Since in most of the cases we are the master,
402 36205981 Iustin Pop
  we'll use our own config file for getting the node list. In the
403 36205981 Iustin Pop
  future we could collect the current node list from our (possibly
404 36205981 Iustin Pop
  obsolete) known nodes.
405 36205981 Iustin Pop
406 d7cdb55d Iustin Pop
  In order to account for cold-start of all nodes, we retry for up to
407 d7cdb55d Iustin Pop
  a minute until we get a real answer as the top-voted one. If the
408 d7cdb55d Iustin Pop
  nodes are more out-of-sync, for now manual startup of the master
409 d7cdb55d Iustin Pop
  should be attempted.
410 d7cdb55d Iustin Pop
411 d7cdb55d Iustin Pop
  Note that for a even number of nodes cluster, we need at least half
412 d7cdb55d Iustin Pop
  of the nodes (beside ourselves) to vote for us. This creates a
413 d7cdb55d Iustin Pop
  problem on two-node clusters, since in this case we require the
414 d7cdb55d Iustin Pop
  other node to be up too to confirm our status.
415 d7cdb55d Iustin Pop
416 36205981 Iustin Pop
  """
417 36205981 Iustin Pop
  myself = utils.HostInfo().name
418 36205981 Iustin Pop
  #temp instantiation of a config writer, used only to get the node list
419 36205981 Iustin Pop
  cfg = config.ConfigWriter()
420 36205981 Iustin Pop
  node_list = cfg.GetNodeList()
421 36205981 Iustin Pop
  del cfg
422 d7cdb55d Iustin Pop
  retries = 6
423 d7cdb55d Iustin Pop
  while retries > 0:
424 d7cdb55d Iustin Pop
    votes = bootstrap.GatherMasterVotes(node_list)
425 d7cdb55d Iustin Pop
    if not votes:
426 d7cdb55d Iustin Pop
      # empty node list, this is a one node cluster
427 d7cdb55d Iustin Pop
      return True
428 d7cdb55d Iustin Pop
    if votes[0][0] is None:
429 d7cdb55d Iustin Pop
      retries -= 1
430 d7cdb55d Iustin Pop
      time.sleep(10)
431 36205981 Iustin Pop
      continue
432 d7cdb55d Iustin Pop
    break
433 d7cdb55d Iustin Pop
  if retries == 0:
434 e09fdcfa Iustin Pop
    logging.critical("Cluster inconsistent, most of the nodes didn't answer"
435 e09fdcfa Iustin Pop
                     " after multiple retries. Aborting startup")
436 e09fdcfa Iustin Pop
    return False
437 d7cdb55d Iustin Pop
  # here a real node is at the top of the list
438 d7cdb55d Iustin Pop
  all_votes = sum(item[1] for item in votes)
439 d7cdb55d Iustin Pop
  top_node, top_votes = votes[0]
440 d7cdb55d Iustin Pop
  result = False
441 d7cdb55d Iustin Pop
  if top_node != myself:
442 d7cdb55d Iustin Pop
    logging.critical("It seems we are not the master (top-voted node"
443 bbe19c17 Iustin Pop
                     " is %s with %d out of %d votes)", top_node, top_votes,
444 bbe19c17 Iustin Pop
                     all_votes)
445 d7cdb55d Iustin Pop
  elif top_votes < all_votes - top_votes:
446 36205981 Iustin Pop
    logging.critical("It seems we are not the master (%d votes for,"
447 d7cdb55d Iustin Pop
                     " %d votes against)", top_votes, all_votes - top_votes)
448 d7cdb55d Iustin Pop
  else:
449 d7cdb55d Iustin Pop
    result = True
450 d7cdb55d Iustin Pop
451 d7cdb55d Iustin Pop
  return result
452 36205981 Iustin Pop
453 36205981 Iustin Pop
454 ffeffa1d Iustin Pop
def main():
455 ffeffa1d Iustin Pop
  """Main function"""
456 ffeffa1d Iustin Pop
457 c1f2901b Iustin Pop
  options, args = ParseOptions()
458 c1f2901b Iustin Pop
  utils.debug = options.debug
459 b74159ee Iustin Pop
  utils.no_fork = True
460 c1f2901b Iustin Pop
461 7d88772a Iustin Pop
  if options.fork:
462 7d88772a Iustin Pop
    utils.CloseFDs()
463 7d88772a Iustin Pop
464 4331f6cd Michael Hanselmann
  rpc.Init()
465 4331f6cd Michael Hanselmann
  try:
466 4331f6cd Michael Hanselmann
    ssconf.CheckMaster(options.debug)
467 c1f2901b Iustin Pop
468 4331f6cd Michael Hanselmann
    # we believe we are the master, let's ask the other nodes...
469 5de4474d Iustin Pop
    if options.no_voting:
470 5de4474d Iustin Pop
      sys.stdout.write("The 'no voting' option has been selected.\n")
471 5de4474d Iustin Pop
      sys.stdout.write("This is dangerous, please confirm by"
472 5de4474d Iustin Pop
                       " typing uppercase 'yes': ")
473 5de4474d Iustin Pop
      sys.stdout.flush()
474 5de4474d Iustin Pop
      confirmation = sys.stdin.readline().strip()
475 5de4474d Iustin Pop
      if confirmation != "YES":
476 5de4474d Iustin Pop
        print "Aborting."
477 5de4474d Iustin Pop
        return
478 5de4474d Iustin Pop
    else:
479 5de4474d Iustin Pop
      if not CheckAgreement():
480 5de4474d Iustin Pop
        return
481 36205981 Iustin Pop
482 1cb8d376 Guido Trotter
    dirs = [(constants.RUN_GANETI_DIR, constants.RUN_DIRS_MODE),
483 1cb8d376 Guido Trotter
            (constants.SOCKET_DIR, constants.SOCKET_DIR_MODE),
484 1cb8d376 Guido Trotter
           ]
485 9dae41ad Guido Trotter
    utils.EnsureDirs(dirs)
486 d823660a Guido Trotter
487 227647ac Guido Trotter
    # This is safe to do as the pid file guarantees against
488 227647ac Guido Trotter
    # concurrent execution.
489 227647ac Guido Trotter
    utils.RemoveFile(constants.MASTER_SOCKET)
490 227647ac Guido Trotter
491 4331f6cd Michael Hanselmann
    master = IOServer(constants.MASTER_SOCKET, ClientRqHandler)
492 4331f6cd Michael Hanselmann
  finally:
493 4331f6cd Michael Hanselmann
    rpc.Shutdown()
494 ffeffa1d Iustin Pop
495 c1f2901b Iustin Pop
  # become a daemon
496 c1f2901b Iustin Pop
  if options.fork:
497 7d88772a Iustin Pop
    utils.Daemonize(logfile=constants.LOG_MASTERDAEMON)
498 c1f2901b Iustin Pop
499 99e88451 Iustin Pop
  utils.WritePidFile(constants.MASTERD_PID)
500 4331f6cd Michael Hanselmann
  try:
501 15486fa7 Michael Hanselmann
    utils.SetupLogging(constants.LOG_MASTERDAEMON, debug=options.debug,
502 d21d09d6 Iustin Pop
                       stderr_logging=not options.fork, multithreaded=True)
503 3b316acb Iustin Pop
504 15486fa7 Michael Hanselmann
    logging.info("Ganeti master daemon startup")
505 b1b6ea87 Iustin Pop
506 15486fa7 Michael Hanselmann
    rpc.Init()
507 4331f6cd Michael Hanselmann
    try:
508 15486fa7 Michael Hanselmann
      # activate ip
509 15486fa7 Michael Hanselmann
      master_node = ssconf.SimpleConfigReader().GetMasterNode()
510 15486fa7 Michael Hanselmann
      if not rpc.RpcRunner.call_node_start_master(master_node, False):
511 15486fa7 Michael Hanselmann
        logging.error("Can't activate master IP address")
512 15486fa7 Michael Hanselmann
513 15486fa7 Michael Hanselmann
      master.setup_queue()
514 15486fa7 Michael Hanselmann
      try:
515 15486fa7 Michael Hanselmann
        master.serve_forever()
516 15486fa7 Michael Hanselmann
      finally:
517 15486fa7 Michael Hanselmann
        master.server_cleanup()
518 4331f6cd Michael Hanselmann
    finally:
519 15486fa7 Michael Hanselmann
      rpc.Shutdown()
520 a4af651e Iustin Pop
  finally:
521 15486fa7 Michael Hanselmann
    utils.RemovePidFile(constants.MASTERD_PID)
522 227647ac Guido Trotter
    utils.RemoveFile(constants.MASTER_SOCKET)
523 a4af651e Iustin Pop
524 ffeffa1d Iustin Pop
525 ffeffa1d Iustin Pop
if __name__ == "__main__":
526 ffeffa1d Iustin Pop
  main()