| Branch: | Tag: | Revision:

root / daemons / ganeti-masterd @ 90b54c26

History | View | Annotate | Download (16 kB)

1 685ee993 Iustin Pop
#!/usr/bin/python -u
2 ffeffa1d Iustin Pop
3 ffeffa1d Iustin Pop
4 ffeffa1d Iustin Pop
# Copyright (C) 2006, 2007 Google Inc.
5 ffeffa1d Iustin Pop
6 ffeffa1d Iustin Pop
# This program is free software; you can redistribute it and/or modify
7 ffeffa1d Iustin Pop
# it under the terms of the GNU General Public License as published by
8 ffeffa1d Iustin Pop
# the Free Software Foundation; either version 2 of the License, or
9 ffeffa1d Iustin Pop
# (at your option) any later version.
10 ffeffa1d Iustin Pop
11 ffeffa1d Iustin Pop
# This program is distributed in the hope that it will be useful, but
12 ffeffa1d Iustin Pop
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 ffeffa1d Iustin Pop
14 ffeffa1d Iustin Pop
# General Public License for more details.
15 ffeffa1d Iustin Pop
16 ffeffa1d Iustin Pop
# You should have received a copy of the GNU General Public License
17 ffeffa1d Iustin Pop
# along with this program; if not, write to the Free Software
18 ffeffa1d Iustin Pop
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 ffeffa1d Iustin Pop
# 02110-1301, USA.
20 ffeffa1d Iustin Pop
21 ffeffa1d Iustin Pop
22 ffeffa1d Iustin Pop
"""Master daemon program.
23 ffeffa1d Iustin Pop
24 ffeffa1d Iustin Pop
Some classes deviates from the standard style guide since the
25 ffeffa1d Iustin Pop
inheritance from parent classes requires it.
26 ffeffa1d Iustin Pop
27 ffeffa1d Iustin Pop
28 ffeffa1d Iustin Pop
29 ffeffa1d Iustin Pop
30 d823660a Guido Trotter
import os
31 d823660a Guido Trotter
import errno
32 c1f2901b Iustin Pop
import sys
33 ffeffa1d Iustin Pop
import SocketServer
34 ffeffa1d Iustin Pop
import time
35 ffeffa1d Iustin Pop
import collections
36 ffeffa1d Iustin Pop
import Queue
37 ffeffa1d Iustin Pop
import random
38 ffeffa1d Iustin Pop
import signal
39 96cb3986 Michael Hanselmann
import logging
40 ffeffa1d Iustin Pop
41 ffeffa1d Iustin Pop
from cStringIO import StringIO
42 c1f2901b Iustin Pop
from optparse import OptionParser
43 ffeffa1d Iustin Pop
44 39dcf2ef Guido Trotter
from ganeti import config
45 ffeffa1d Iustin Pop
from ganeti import constants
46 ffeffa1d Iustin Pop
from ganeti import mcpu
47 ffeffa1d Iustin Pop
from ganeti import opcodes
48 ffeffa1d Iustin Pop
from ganeti import jqueue
49 39dcf2ef Guido Trotter
from ganeti import locking
50 ffeffa1d Iustin Pop
from ganeti import luxi
51 ffeffa1d Iustin Pop
from ganeti import utils
52 c1f2901b Iustin Pop
from ganeti import errors
53 c1f2901b Iustin Pop
from ganeti import ssconf
54 23e50d39 Michael Hanselmann
from ganeti import workerpool
55 b1b6ea87 Iustin Pop
from ganeti import rpc
56 d7cdb55d Iustin Pop
from ganeti import bootstrap
57 dd36d829 Iustin Pop
from ganeti import serializer
58 c1f2901b Iustin Pop
59 c1f2901b Iustin Pop
60 23e50d39 Michael Hanselmann
61 23e50d39 Michael Hanselmann
62 c1f2901b Iustin Pop
63 c1f2901b Iustin Pop
64 ffeffa1d Iustin Pop
65 ffeffa1d Iustin Pop
66 23e50d39 Michael Hanselmann
class ClientRequestWorker(workerpool.BaseWorker):
67 23e50d39 Michael Hanselmann
  def RunTask(self, server, request, client_address):
68 23e50d39 Michael Hanselmann
    """Process the request.
69 23e50d39 Michael Hanselmann
70 23e50d39 Michael Hanselmann
    This is copied from the code in ThreadingMixIn.
71 23e50d39 Michael Hanselmann
72 23e50d39 Michael Hanselmann
73 23e50d39 Michael Hanselmann
74 23e50d39 Michael Hanselmann
      server.finish_request(request, client_address)
75 23e50d39 Michael Hanselmann
76 23e50d39 Michael Hanselmann
77 23e50d39 Michael Hanselmann
      server.handle_error(request, client_address)
78 23e50d39 Michael Hanselmann
79 23e50d39 Michael Hanselmann
80 23e50d39 Michael Hanselmann
81 ffeffa1d Iustin Pop
class IOServer(SocketServer.UnixStreamServer):
82 ffeffa1d Iustin Pop
  """IO thread class.
83 ffeffa1d Iustin Pop
84 ffeffa1d Iustin Pop
  This class takes care of initializing the other threads, setting
85 ffeffa1d Iustin Pop
  signal handlers (which are processed only in this thread), and doing
86 ffeffa1d Iustin Pop
  cleanup at shutdown.
87 ffeffa1d Iustin Pop
88 ffeffa1d Iustin Pop
89 9113300d Michael Hanselmann
  def __init__(self, address, rqhandler):
90 ce862cd5 Guido Trotter
    """IOServer constructor
91 ce862cd5 Guido Trotter
92 c41eea6e Iustin Pop
    @param address: the address to bind this IOServer to
93 c41eea6e Iustin Pop
    @param rqhandler: RequestHandler type object
94 ce862cd5 Guido Trotter
95 ce862cd5 Guido Trotter
96 ffeffa1d Iustin Pop
    SocketServer.UnixStreamServer.__init__(self, address, rqhandler)
97 50a3fbb2 Michael Hanselmann
98 50a3fbb2 Michael Hanselmann
    # We'll only start threads once we've forked.
99 9113300d Michael Hanselmann
    self.context = None
100 23e50d39 Michael Hanselmann
    self.request_workers = None
101 50a3fbb2 Michael Hanselmann
102 50a3fbb2 Michael Hanselmann
  def setup_queue(self):
103 9113300d Michael Hanselmann
    self.context = GanetiContext()
104 23e50d39 Michael Hanselmann
    self.request_workers = workerpool.WorkerPool(CLIENT_REQUEST_WORKERS,
105 23e50d39 Michael Hanselmann
106 ffeffa1d Iustin Pop
107 ffeffa1d Iustin Pop
  def process_request(self, request, client_address):
108 23e50d39 Michael Hanselmann
    """Add task to workerpool to process request.
109 ffeffa1d Iustin Pop
110 ffeffa1d Iustin Pop
111 23e50d39 Michael Hanselmann
    self.request_workers.AddTask(self, request, client_address)
112 ffeffa1d Iustin Pop
113 ffeffa1d Iustin Pop
  def serve_forever(self):
114 ffeffa1d Iustin Pop
    """Handle one request at a time until told to quit."""
115 610bc9ee Michael Hanselmann
    sighandler = utils.SignalHandler([signal.SIGINT, signal.SIGTERM])
116 610bc9ee Michael Hanselmann
117 610bc9ee Michael Hanselmann
      while not sighandler.called:
118 610bc9ee Michael Hanselmann
119 610bc9ee Michael Hanselmann
120 610bc9ee Michael Hanselmann
121 c1f2901b Iustin Pop
122 c1f2901b Iustin Pop
  def server_cleanup(self):
123 c1f2901b Iustin Pop
    """Cleanup the server.
124 c1f2901b Iustin Pop
125 c1f2901b Iustin Pop
    This involves shutting down the processor threads and the master
126 c1f2901b Iustin Pop
127 c1f2901b Iustin Pop
128 c1f2901b Iustin Pop
129 50a3fbb2 Michael Hanselmann
130 50a3fbb2 Michael Hanselmann
131 50a3fbb2 Michael Hanselmann
132 23e50d39 Michael Hanselmann
      if self.request_workers:
133 36088c4c Michael Hanselmann
134 9113300d Michael Hanselmann
      if self.context:
135 9113300d Michael Hanselmann
136 ffeffa1d Iustin Pop
137 ffeffa1d Iustin Pop
138 ffeffa1d Iustin Pop
class ClientRqHandler(SocketServer.BaseRequestHandler):
139 ffeffa1d Iustin Pop
  """Client handler"""
140 ffeffa1d Iustin Pop
  EOM = '\3'
141 ffeffa1d Iustin Pop
  READ_SIZE = 4096
142 ffeffa1d Iustin Pop
143 ffeffa1d Iustin Pop
  def setup(self):
144 ffeffa1d Iustin Pop
    self._buffer = ""
145 ffeffa1d Iustin Pop
    self._msgs = collections.deque()
146 ffeffa1d Iustin Pop
    self._ops = ClientOps(self.server)
147 ffeffa1d Iustin Pop
148 ffeffa1d Iustin Pop
  def handle(self):
149 ffeffa1d Iustin Pop
    while True:
150 ffeffa1d Iustin Pop
      msg = self.read_message()
151 ffeffa1d Iustin Pop
      if msg is None:
152 d21d09d6 Iustin Pop
        logging.debug("client closed connection")
153 ffeffa1d Iustin Pop
154 3d8548c4 Michael Hanselmann
155 dd36d829 Iustin Pop
      request = serializer.LoadJson(msg)
156 3d8548c4 Michael Hanselmann
      logging.debug("request: %s", request)
157 ffeffa1d Iustin Pop
      if not isinstance(request, dict):
158 3d8548c4 Michael Hanselmann
        logging.error("wrong request received: %s", msg)
159 ffeffa1d Iustin Pop
160 3d8548c4 Michael Hanselmann
161 3d8548c4 Michael Hanselmann
      method = request.get(luxi.KEY_METHOD, None)
162 3d8548c4 Michael Hanselmann
      args = request.get(luxi.KEY_ARGS, None)
163 3d8548c4 Michael Hanselmann
      if method is None or args is None:
164 3d8548c4 Michael Hanselmann
        logging.error("no method or args in request")
165 ffeffa1d Iustin Pop
166 3d8548c4 Michael Hanselmann
167 3d8548c4 Michael Hanselmann
      success = False
168 3d8548c4 Michael Hanselmann
169 3d8548c4 Michael Hanselmann
        result = self._ops.handle_request(method, args)
170 3d8548c4 Michael Hanselmann
        success = True
171 6797ec29 Iustin Pop
      except errors.GenericError, err:
172 6797ec29 Iustin Pop
        success = False
173 6797ec29 Iustin Pop
        result = (err.__class__.__name__, err.args)
174 3d8548c4 Michael Hanselmann
175 3d8548c4 Michael Hanselmann
        logging.error("Unexpected exception", exc_info=True)
176 3d8548c4 Michael Hanselmann
        err = sys.exc_info()
177 3d8548c4 Michael Hanselmann
        result = "Caught exception: %s" % str(err[1])
178 3d8548c4 Michael Hanselmann
179 3d8548c4 Michael Hanselmann
      response = {
180 3d8548c4 Michael Hanselmann
        luxi.KEY_SUCCESS: success,
181 3d8548c4 Michael Hanselmann
        luxi.KEY_RESULT: result,
182 3d8548c4 Michael Hanselmann
183 3d8548c4 Michael Hanselmann
      logging.debug("response: %s", response)
184 dd36d829 Iustin Pop
185 ffeffa1d Iustin Pop
186 ffeffa1d Iustin Pop
  def read_message(self):
187 ffeffa1d Iustin Pop
    while not self._msgs:
188 ffeffa1d Iustin Pop
      data = self.request.recv(self.READ_SIZE)
189 ffeffa1d Iustin Pop
      if not data:
190 ffeffa1d Iustin Pop
        return None
191 ffeffa1d Iustin Pop
      new_msgs = (self._buffer + data).split(self.EOM)
192 ffeffa1d Iustin Pop
      self._buffer = new_msgs.pop()
193 ffeffa1d Iustin Pop
194 ffeffa1d Iustin Pop
    return self._msgs.popleft()
195 ffeffa1d Iustin Pop
196 ffeffa1d Iustin Pop
  def send_message(self, msg):
197 ffeffa1d Iustin Pop
    #print "sending", msg
198 ffeffa1d Iustin Pop
    self.request.sendall(msg + self.EOM)
199 ffeffa1d Iustin Pop
200 ffeffa1d Iustin Pop
201 ffeffa1d Iustin Pop
class ClientOps:
202 ffeffa1d Iustin Pop
  """Class holding high-level client operations."""
203 ffeffa1d Iustin Pop
  def __init__(self, server):
204 ffeffa1d Iustin Pop
    self.server = server
205 ffeffa1d Iustin Pop
206 0bbe448c Michael Hanselmann
  def handle_request(self, method, args):
207 9113300d Michael Hanselmann
    queue = self.server.context.jobqueue
208 0bbe448c Michael Hanselmann
209 0bbe448c Michael Hanselmann
    # TODO: Parameter validation
210 0bbe448c Michael Hanselmann
211 0bbe448c Michael Hanselmann
    if method == luxi.REQ_SUBMIT_JOB:
212 e566ddbd Iustin Pop"Received new job")
213 0bbe448c Michael Hanselmann
      ops = [opcodes.OpCode.LoadOpCode(state) for state in args]
214 4c848b18 Michael Hanselmann
      return queue.SubmitJob(ops)
215 ffeffa1d Iustin Pop
216 2971c913 Iustin Pop
    if method == luxi.REQ_SUBMIT_MANY_JOBS:
217 2971c913 Iustin Pop"Received multiple jobs")
218 2971c913 Iustin Pop
      jobs = []
219 2971c913 Iustin Pop
      for ops in args:
220 2971c913 Iustin Pop
        jobs.append([opcodes.OpCode.LoadOpCode(state) for state in ops])
221 2971c913 Iustin Pop
      return queue.SubmitManyJobs(jobs)
222 2971c913 Iustin Pop
223 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_CANCEL_JOB:
224 3a2c7775 Michael Hanselmann
      job_id = args
225 e566ddbd Iustin Pop"Received job cancel request for %s", job_id)
226 0bbe448c Michael Hanselmann
      return queue.CancelJob(job_id)
227 ffeffa1d Iustin Pop
228 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_ARCHIVE_JOB:
229 3a2c7775 Michael Hanselmann
      job_id = args
230 e566ddbd Iustin Pop"Received job archive request for %s", job_id)
231 0bbe448c Michael Hanselmann
      return queue.ArchiveJob(job_id)
232 0bbe448c Michael Hanselmann
233 07cd723a Iustin Pop
    elif method == luxi.REQ_AUTOARCHIVE_JOBS:
234 f8ad5591 Michael Hanselmann
      (age, timeout) = args
235 e566ddbd Iustin Pop"Received job autoarchive request for age %s, timeout %s",
236 e566ddbd Iustin Pop
                   age, timeout)
237 f8ad5591 Michael Hanselmann
      return queue.AutoArchiveJobs(age, timeout)
238 07cd723a Iustin Pop
239 dfe57c22 Michael Hanselmann
    elif method == luxi.REQ_WAIT_FOR_JOB_CHANGE:
240 5c735209 Iustin Pop
      (job_id, fields, prev_job_info, prev_log_serial, timeout) = args
241 e566ddbd Iustin Pop"Received job poll request for %s", job_id)
242 6c5a7090 Michael Hanselmann
      return queue.WaitForJobChanges(job_id, fields, prev_job_info,
243 5c735209 Iustin Pop
                                     prev_log_serial, timeout)
244 dfe57c22 Michael Hanselmann
245 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_QUERY_JOBS:
246 0bbe448c Michael Hanselmann
      (job_ids, fields) = args
247 e566ddbd Iustin Pop
      if isinstance(job_ids, (tuple, list)) and job_ids:
248 e566ddbd Iustin Pop
        msg = ", ".join(job_ids)
249 e566ddbd Iustin Pop
250 e566ddbd Iustin Pop
        msg = str(job_ids)
251 e566ddbd Iustin Pop"Received job query request for %s", msg)
252 0bbe448c Michael Hanselmann
      return queue.QueryJobs(job_ids, fields)
253 0bbe448c Michael Hanselmann
254 ee6c7b94 Michael Hanselmann
    elif method == luxi.REQ_QUERY_INSTANCES:
255 ec79568d Iustin Pop
      (names, fields, use_locking) = args
256 e566ddbd Iustin Pop"Received instance query request for %s", names)
257 77921a95 Iustin Pop
      if use_locking:
258 77921a95 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed")
259 ec79568d Iustin Pop
      op = opcodes.OpQueryInstances(names=names, output_fields=fields,
260 ec79568d Iustin Pop
261 ee6c7b94 Michael Hanselmann
      return self._Query(op)
262 ee6c7b94 Michael Hanselmann
263 02f7fe54 Michael Hanselmann
    elif method == luxi.REQ_QUERY_NODES:
264 ec79568d Iustin Pop
      (names, fields, use_locking) = args
265 e566ddbd Iustin Pop"Received node query request for %s", names)
266 77921a95 Iustin Pop
      if use_locking:
267 77921a95 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed")
268 ec79568d Iustin Pop
      op = opcodes.OpQueryNodes(names=names, output_fields=fields,
269 ec79568d Iustin Pop
270 02f7fe54 Michael Hanselmann
      return self._Query(op)
271 02f7fe54 Michael Hanselmann
272 32f93223 Michael Hanselmann
    elif method == luxi.REQ_QUERY_EXPORTS:
273 ec79568d Iustin Pop
      nodes, use_locking = args
274 77921a95 Iustin Pop
      if use_locking:
275 77921a95 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed")
276 e566ddbd Iustin Pop"Received exports query request")
277 ec79568d Iustin Pop
      op = opcodes.OpQueryExports(nodes=nodes, use_locking=use_locking)
278 32f93223 Michael Hanselmann
      return self._Query(op)
279 32f93223 Michael Hanselmann
280 ae5849b5 Michael Hanselmann
    elif method == luxi.REQ_QUERY_CONFIG_VALUES:
281 ae5849b5 Michael Hanselmann
      fields = args
282 e566ddbd Iustin Pop"Received config values query request for %s", fields)
283 ae5849b5 Michael Hanselmann
      op = opcodes.OpQueryConfigValues(output_fields=fields)
284 ae5849b5 Michael Hanselmann
      return self._Query(op)
285 ae5849b5 Michael Hanselmann
286 66baeccc Iustin Pop
    elif method == luxi.REQ_QUERY_CLUSTER_INFO:
287 e566ddbd Iustin Pop"Received cluster info query request")
288 66baeccc Iustin Pop
      op = opcodes.OpQueryClusterInfo()
289 66baeccc Iustin Pop
      return self._Query(op)
290 66baeccc Iustin Pop
291 3ccafd0e Iustin Pop
    elif method == luxi.REQ_QUEUE_SET_DRAIN_FLAG:
292 3ccafd0e Iustin Pop
      drain_flag = args
293 e566ddbd Iustin Pop"Received queue drain flag change request to %s",
294 e566ddbd Iustin Pop
295 3ccafd0e Iustin Pop
      return queue.SetDrainFlag(drain_flag)
296 3ccafd0e Iustin Pop
297 0bbe448c Michael Hanselmann
298 e566ddbd Iustin Pop"Received invalid request '%s'", method)
299 e566ddbd Iustin Pop
      raise ValueError("Invalid operation '%s'" % method)
300 ffeffa1d Iustin Pop
301 ee6c7b94 Michael Hanselmann
  def _DummyLog(self, *args):
302 ee6c7b94 Michael Hanselmann
303 ee6c7b94 Michael Hanselmann
304 ee6c7b94 Michael Hanselmann
  def _Query(self, op):
305 ee6c7b94 Michael Hanselmann
    """Runs the specified opcode and returns the result.
306 ee6c7b94 Michael Hanselmann
307 ee6c7b94 Michael Hanselmann
308 ee6c7b94 Michael Hanselmann
    proc = mcpu.Processor(self.server.context)
309 ee6c7b94 Michael Hanselmann
    # TODO: Where should log messages go?
310 e92376d7 Iustin Pop
    return proc.ExecOpCode(op, self._DummyLog, None)
311 ee6c7b94 Michael Hanselmann
312 ffeffa1d Iustin Pop
313 39dcf2ef Guido Trotter
class GanetiContext(object):
314 39dcf2ef Guido Trotter
  """Context common to all ganeti threads.
315 39dcf2ef Guido Trotter
316 39dcf2ef Guido Trotter
  This class creates and holds common objects shared by all threads.
317 39dcf2ef Guido Trotter
318 39dcf2ef Guido Trotter
319 39dcf2ef Guido Trotter
  _instance = None
320 39dcf2ef Guido Trotter
321 39dcf2ef Guido Trotter
  def __init__(self):
322 39dcf2ef Guido Trotter
    """Constructs a new GanetiContext object.
323 39dcf2ef Guido Trotter
324 39dcf2ef Guido Trotter
    There should be only a GanetiContext object at any time, so this
325 39dcf2ef Guido Trotter
    function raises an error if this is not the case.
326 39dcf2ef Guido Trotter
327 39dcf2ef Guido Trotter
328 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "double GanetiContext instance"
329 39dcf2ef Guido Trotter
330 9113300d Michael Hanselmann
    # Create global configuration object
331 39dcf2ef Guido Trotter
    self.cfg = config.ConfigWriter()
332 9113300d Michael Hanselmann
333 9113300d Michael Hanselmann
    # Locking manager
334 984f7c32 Guido Trotter
    self.glm = locking.GanetiLockManager(
335 39dcf2ef Guido Trotter
336 39dcf2ef Guido Trotter
337 39dcf2ef Guido Trotter
338 9113300d Michael Hanselmann
    # Job queue
339 9113300d Michael Hanselmann
    self.jobqueue = jqueue.JobQueue(self)
340 9113300d Michael Hanselmann
341 39dcf2ef Guido Trotter
    # setting this also locks the class against attribute modifications
342 39dcf2ef Guido Trotter
    self.__class__._instance = self
343 39dcf2ef Guido Trotter
344 39dcf2ef Guido Trotter
  def __setattr__(self, name, value):
345 39dcf2ef Guido Trotter
    """Setting GanetiContext attributes is forbidden after initialization.
346 39dcf2ef Guido Trotter
347 39dcf2ef Guido Trotter
348 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "Attempt to modify Ganeti Context"
349 39dcf2ef Guido Trotter
    object.__setattr__(self, name, value)
350 39dcf2ef Guido Trotter
351 d8470559 Michael Hanselmann
  def AddNode(self, node):
352 d8470559 Michael Hanselmann
    """Adds a node to the configuration and lock manager.
353 d8470559 Michael Hanselmann
354 d8470559 Michael Hanselmann
355 d8470559 Michael Hanselmann
    # Add it to the configuration
356 d8470559 Michael Hanselmann
357 d8470559 Michael Hanselmann
358 c36176cc Michael Hanselmann
    # If preseeding fails it'll not be added
359 99aabbed Iustin Pop
360 c36176cc Michael Hanselmann
361 d8470559 Michael Hanselmann
    # Add the new node to the Ganeti Lock Manager
362 d8470559 Michael Hanselmann
363 d8470559 Michael Hanselmann
364 d8470559 Michael Hanselmann
  def ReaddNode(self, node):
365 d8470559 Michael Hanselmann
    """Updates a node that's already in the configuration
366 d8470559 Michael Hanselmann
367 d8470559 Michael Hanselmann
368 c36176cc Michael Hanselmann
    # Synchronize the queue again
369 99aabbed Iustin Pop
370 d8470559 Michael Hanselmann
371 d8470559 Michael Hanselmann
  def RemoveNode(self, name):
372 d8470559 Michael Hanselmann
    """Removes a node from the configuration and lock manager.
373 d8470559 Michael Hanselmann
374 d8470559 Michael Hanselmann
375 d8470559 Michael Hanselmann
    # Remove node from configuration
376 d8470559 Michael Hanselmann
377 d8470559 Michael Hanselmann
378 c36176cc Michael Hanselmann
    # Notify job queue
379 c36176cc Michael Hanselmann
380 c36176cc Michael Hanselmann
381 d8470559 Michael Hanselmann
    # Remove the node from the Ganeti Lock Manager
382 d8470559 Michael Hanselmann
    self.glm.remove(locking.LEVEL_NODE, name)
383 d8470559 Michael Hanselmann
384 39dcf2ef Guido Trotter
385 c1f2901b Iustin Pop
def ParseOptions():
386 c1f2901b Iustin Pop
  """Parse the command line options.
387 c1f2901b Iustin Pop
388 c41eea6e Iustin Pop
  @return: (options, args) as from OptionParser.parse_args()
389 c1f2901b Iustin Pop
390 c1f2901b Iustin Pop
391 c1f2901b Iustin Pop
  parser = OptionParser(description="Ganeti master daemon",
392 c1f2901b Iustin Pop
                        usage="%prog [-f] [-d]",
393 c1f2901b Iustin Pop
                        version="%%prog (ganeti) %s" %
394 c1f2901b Iustin Pop
395 c1f2901b Iustin Pop
396 c1f2901b Iustin Pop
  parser.add_option("-f", "--foreground", dest="fork",
397 c1f2901b Iustin Pop
                    help="Don't detach from the current terminal",
398 c1f2901b Iustin Pop
                    default=True, action="store_false")
399 c1f2901b Iustin Pop
  parser.add_option("-d", "--debug", dest="debug",
400 c1f2901b Iustin Pop
                    help="Enable some debug messages",
401 c1f2901b Iustin Pop
                    default=False, action="store_true")
402 5de4474d Iustin Pop
  parser.add_option("--no-voting", dest="no_voting",
403 5de4474d Iustin Pop
                    help="Do not check that the nodes agree on this node"
404 5de4474d Iustin Pop
                    " being the master and start the daemon unconditionally",
405 5de4474d Iustin Pop
                    default=False, action="store_true")
406 c1f2901b Iustin Pop
  options, args = parser.parse_args()
407 c1f2901b Iustin Pop
  return options, args
408 c1f2901b Iustin Pop
409 c1f2901b Iustin Pop
410 36205981 Iustin Pop
def CheckAgreement():
411 36205981 Iustin Pop
  """Check the agreement on who is the master.
412 36205981 Iustin Pop
413 36205981 Iustin Pop
  The function uses a very simple algorithm: we must get more positive
414 36205981 Iustin Pop
  than negative answers. Since in most of the cases we are the master,
415 36205981 Iustin Pop
  we'll use our own config file for getting the node list. In the
416 36205981 Iustin Pop
  future we could collect the current node list from our (possibly
417 36205981 Iustin Pop
  obsolete) known nodes.
418 36205981 Iustin Pop
419 d7cdb55d Iustin Pop
  In order to account for cold-start of all nodes, we retry for up to
420 d7cdb55d Iustin Pop
  a minute until we get a real answer as the top-voted one. If the
421 d7cdb55d Iustin Pop
  nodes are more out-of-sync, for now manual startup of the master
422 d7cdb55d Iustin Pop
  should be attempted.
423 d7cdb55d Iustin Pop
424 d7cdb55d Iustin Pop
  Note that for a even number of nodes cluster, we need at least half
425 d7cdb55d Iustin Pop
  of the nodes (beside ourselves) to vote for us. This creates a
426 d7cdb55d Iustin Pop
  problem on two-node clusters, since in this case we require the
427 d7cdb55d Iustin Pop
  other node to be up too to confirm our status.
428 d7cdb55d Iustin Pop
429 36205981 Iustin Pop
430 36205981 Iustin Pop
  myself = utils.HostInfo().name
431 36205981 Iustin Pop
  #temp instantiation of a config writer, used only to get the node list
432 36205981 Iustin Pop
  cfg = config.ConfigWriter()
433 36205981 Iustin Pop
  node_list = cfg.GetNodeList()
434 36205981 Iustin Pop
  del cfg
435 d7cdb55d Iustin Pop
  retries = 6
436 d7cdb55d Iustin Pop
  while retries > 0:
437 d7cdb55d Iustin Pop
    votes = bootstrap.GatherMasterVotes(node_list)
438 d7cdb55d Iustin Pop
    if not votes:
439 d7cdb55d Iustin Pop
      # empty node list, this is a one node cluster
440 d7cdb55d Iustin Pop
      return True
441 d7cdb55d Iustin Pop
    if votes[0][0] is None:
442 d7cdb55d Iustin Pop
      retries -= 1
443 d7cdb55d Iustin Pop
444 36205981 Iustin Pop
445 d7cdb55d Iustin Pop
446 d7cdb55d Iustin Pop
  if retries == 0:
447 e09fdcfa Iustin Pop
    logging.critical("Cluster inconsistent, most of the nodes didn't answer"
448 e09fdcfa Iustin Pop
                     " after multiple retries. Aborting startup")
449 e09fdcfa Iustin Pop
    return False
450 d7cdb55d Iustin Pop
  # here a real node is at the top of the list
451 d7cdb55d Iustin Pop
  all_votes = sum(item[1] for item in votes)
452 d7cdb55d Iustin Pop
  top_node, top_votes = votes[0]
453 d7cdb55d Iustin Pop
  result = False
454 d7cdb55d Iustin Pop
  if top_node != myself:
455 d7cdb55d Iustin Pop
    logging.critical("It seems we are not the master (top-voted node"
456 bbe19c17 Iustin Pop
                     " is %s with %d out of %d votes)", top_node, top_votes,
457 bbe19c17 Iustin Pop
458 d7cdb55d Iustin Pop
  elif top_votes < all_votes - top_votes:
459 36205981 Iustin Pop
    logging.critical("It seems we are not the master (%d votes for,"
460 d7cdb55d Iustin Pop
                     " %d votes against)", top_votes, all_votes - top_votes)
461 d7cdb55d Iustin Pop
462 d7cdb55d Iustin Pop
    result = True
463 d7cdb55d Iustin Pop
464 d7cdb55d Iustin Pop
  return result
465 36205981 Iustin Pop
466 36205981 Iustin Pop
467 ffeffa1d Iustin Pop
def main():
468 ffeffa1d Iustin Pop
  """Main function"""
469 ffeffa1d Iustin Pop
470 c1f2901b Iustin Pop
  options, args = ParseOptions()
471 c1f2901b Iustin Pop
  utils.debug = options.debug
472 b74159ee Iustin Pop
  utils.no_fork = True
473 c1f2901b Iustin Pop
474 7d88772a Iustin Pop
  if options.fork:
475 7d88772a Iustin Pop
476 7d88772a Iustin Pop
477 4331f6cd Michael Hanselmann
478 4331f6cd Michael Hanselmann
479 4331f6cd Michael Hanselmann
480 c1f2901b Iustin Pop
481 4331f6cd Michael Hanselmann
    # we believe we are the master, let's ask the other nodes...
482 5de4474d Iustin Pop
    if options.no_voting:
483 5de4474d Iustin Pop
      sys.stdout.write("The 'no voting' option has been selected.\n")
484 5de4474d Iustin Pop
      sys.stdout.write("This is dangerous, please confirm by"
485 5de4474d Iustin Pop
                       " typing uppercase 'yes': ")
486 5de4474d Iustin Pop
487 5de4474d Iustin Pop
      confirmation = sys.stdin.readline().strip()
488 5de4474d Iustin Pop
      if confirmation != "YES":
489 5de4474d Iustin Pop
        print "Aborting."
490 5de4474d Iustin Pop
491 5de4474d Iustin Pop
492 5de4474d Iustin Pop
      if not CheckAgreement():
493 5de4474d Iustin Pop
494 36205981 Iustin Pop
495 1cb8d376 Guido Trotter
    dirs = [(constants.RUN_GANETI_DIR, constants.RUN_DIRS_MODE),
496 1cb8d376 Guido Trotter
            (constants.SOCKET_DIR, constants.SOCKET_DIR_MODE),
497 1cb8d376 Guido Trotter
498 9dae41ad Guido Trotter
499 d823660a Guido Trotter
500 227647ac Guido Trotter
    # This is safe to do as the pid file guarantees against
501 227647ac Guido Trotter
    # concurrent execution.
502 227647ac Guido Trotter
503 227647ac Guido Trotter
504 4331f6cd Michael Hanselmann
    master = IOServer(constants.MASTER_SOCKET, ClientRqHandler)
505 4331f6cd Michael Hanselmann
506 4331f6cd Michael Hanselmann
507 ffeffa1d Iustin Pop
508 c1f2901b Iustin Pop
  # become a daemon
509 c1f2901b Iustin Pop
  if options.fork:
510 7d88772a Iustin Pop
511 c1f2901b Iustin Pop
512 99e88451 Iustin Pop
513 4331f6cd Michael Hanselmann
514 15486fa7 Michael Hanselmann
    utils.SetupLogging(constants.LOG_MASTERDAEMON, debug=options.debug,
515 d21d09d6 Iustin Pop
                       stderr_logging=not options.fork, multithreaded=True)
516 3b316acb Iustin Pop
517 15486fa7 Michael Hanselmann"Ganeti master daemon startup")
518 b1b6ea87 Iustin Pop
519 15486fa7 Michael Hanselmann
520 4331f6cd Michael Hanselmann
521 15486fa7 Michael Hanselmann
      # activate ip
522 15486fa7 Michael Hanselmann
      master_node = ssconf.SimpleConfigReader().GetMasterNode()
523 b726aff0 Iustin Pop
      result = rpc.RpcRunner.call_node_start_master(master_node, False)
524 b726aff0 Iustin Pop
      msg = result.RemoteFailMsg()
525 b726aff0 Iustin Pop
      if msg:
526 b726aff0 Iustin Pop
        logging.error("Can't activate master IP address: %s", msg)
527 15486fa7 Michael Hanselmann
528 15486fa7 Michael Hanselmann
529 15486fa7 Michael Hanselmann
530 15486fa7 Michael Hanselmann
531 15486fa7 Michael Hanselmann
532 15486fa7 Michael Hanselmann
533 4331f6cd Michael Hanselmann
534 15486fa7 Michael Hanselmann
535 a4af651e Iustin Pop
536 15486fa7 Michael Hanselmann
537 227647ac Guido Trotter
538 a4af651e Iustin Pop
539 ffeffa1d Iustin Pop
540 ffeffa1d Iustin Pop
if __name__ == "__main__":
541 ffeffa1d Iustin Pop