Statistics
| Branch: | Tag: | Revision:

root / daemons / ganeti-masterd @ 158377f3

History | View | Annotate | Download (17.6 kB)

1 834f8b67 Iustin Pop
#!/usr/bin/python
2 ffeffa1d Iustin Pop
#
3 ffeffa1d Iustin Pop
4 ffeffa1d Iustin Pop
# Copyright (C) 2006, 2007 Google Inc.
5 ffeffa1d Iustin Pop
#
6 ffeffa1d Iustin Pop
# This program is free software; you can redistribute it and/or modify
7 ffeffa1d Iustin Pop
# it under the terms of the GNU General Public License as published by
8 ffeffa1d Iustin Pop
# the Free Software Foundation; either version 2 of the License, or
9 ffeffa1d Iustin Pop
# (at your option) any later version.
10 ffeffa1d Iustin Pop
#
11 ffeffa1d Iustin Pop
# This program is distributed in the hope that it will be useful, but
12 ffeffa1d Iustin Pop
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 ffeffa1d Iustin Pop
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 ffeffa1d Iustin Pop
# General Public License for more details.
15 ffeffa1d Iustin Pop
#
16 ffeffa1d Iustin Pop
# You should have received a copy of the GNU General Public License
17 ffeffa1d Iustin Pop
# along with this program; if not, write to the Free Software
18 ffeffa1d Iustin Pop
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 ffeffa1d Iustin Pop
# 02110-1301, USA.
20 ffeffa1d Iustin Pop
21 ffeffa1d Iustin Pop
22 ffeffa1d Iustin Pop
"""Master daemon program.
23 ffeffa1d Iustin Pop
24 ffeffa1d Iustin Pop
Some classes deviates from the standard style guide since the
25 ffeffa1d Iustin Pop
inheritance from parent classes requires it.
26 ffeffa1d Iustin Pop
27 ffeffa1d Iustin Pop
"""
28 ffeffa1d Iustin Pop
29 7260cfbe Iustin Pop
# pylint: disable-msg=C0103
30 7260cfbe Iustin Pop
# C0103: Invalid name ganeti-masterd
31 ffeffa1d Iustin Pop
32 bbfd0568 René Nussbaumer
import grp
33 bbfd0568 René Nussbaumer
import os
34 bbfd0568 René Nussbaumer
import pwd
35 c1f2901b Iustin Pop
import sys
36 cdd7f900 Guido Trotter
import socket
37 ffeffa1d Iustin Pop
import SocketServer
38 ffeffa1d Iustin Pop
import time
39 bbfd0568 René Nussbaumer
import tempfile
40 ffeffa1d Iustin Pop
import collections
41 96cb3986 Michael Hanselmann
import logging
42 ffeffa1d Iustin Pop
43 c1f2901b Iustin Pop
from optparse import OptionParser
44 ffeffa1d Iustin Pop
45 39dcf2ef Guido Trotter
from ganeti import config
46 ffeffa1d Iustin Pop
from ganeti import constants
47 04ccf5e9 Guido Trotter
from ganeti import daemon
48 ffeffa1d Iustin Pop
from ganeti import mcpu
49 ffeffa1d Iustin Pop
from ganeti import opcodes
50 ffeffa1d Iustin Pop
from ganeti import jqueue
51 39dcf2ef Guido Trotter
from ganeti import locking
52 ffeffa1d Iustin Pop
from ganeti import luxi
53 ffeffa1d Iustin Pop
from ganeti import utils
54 c1f2901b Iustin Pop
from ganeti import errors
55 c1f2901b Iustin Pop
from ganeti import ssconf
56 23e50d39 Michael Hanselmann
from ganeti import workerpool
57 b1b6ea87 Iustin Pop
from ganeti import rpc
58 d7cdb55d Iustin Pop
from ganeti import bootstrap
59 c1f2901b Iustin Pop
60 c1f2901b Iustin Pop
61 23e50d39 Michael Hanselmann
CLIENT_REQUEST_WORKERS = 16
62 23e50d39 Michael Hanselmann
63 c1f2901b Iustin Pop
EXIT_NOTMASTER = constants.EXIT_NOTMASTER
64 c1f2901b Iustin Pop
EXIT_NODESETUP_ERROR = constants.EXIT_NODESETUP_ERROR
65 ffeffa1d Iustin Pop
66 ffeffa1d Iustin Pop
67 23e50d39 Michael Hanselmann
class ClientRequestWorker(workerpool.BaseWorker):
68 7260cfbe Iustin Pop
   # pylint: disable-msg=W0221
69 23e50d39 Michael Hanselmann
  def RunTask(self, server, request, client_address):
70 23e50d39 Michael Hanselmann
    """Process the request.
71 23e50d39 Michael Hanselmann
72 23e50d39 Michael Hanselmann
    """
73 23e50d39 Michael Hanselmann
    try:
74 cdd7f900 Guido Trotter
      server.request_handler_class(request, client_address, server)
75 cdd7f900 Guido Trotter
    finally:
76 cdd7f900 Guido Trotter
      request.close()
77 23e50d39 Michael Hanselmann
78 23e50d39 Michael Hanselmann
79 cdd7f900 Guido Trotter
class MasterServer(daemon.AsyncStreamServer):
80 cdd7f900 Guido Trotter
  """Master Server.
81 ffeffa1d Iustin Pop
82 cdd7f900 Guido Trotter
  This is the main asynchronous master server. It handles connections to the
83 cdd7f900 Guido Trotter
  master socket.
84 ffeffa1d Iustin Pop
85 ffeffa1d Iustin Pop
  """
86 bbfd0568 René Nussbaumer
  def __init__(self, mainloop, address, handler_class, uid, gid):
87 cdd7f900 Guido Trotter
    """MasterServer constructor
88 ce862cd5 Guido Trotter
89 cdd7f900 Guido Trotter
    @type mainloop: ganeti.daemon.Mainloop
90 cdd7f900 Guido Trotter
    @param mainloop: Mainloop used to poll for I/O events
91 cdd7f900 Guido Trotter
    @param address: the unix socket address to bind the MasterServer to
92 cdd7f900 Guido Trotter
    @param handler_class: handler class for the connections
93 bbfd0568 René Nussbaumer
    @param uid: The uid of the owner of the socket
94 bbfd0568 René Nussbaumer
    @param gid: The gid of the owner of the socket
95 ce862cd5 Guido Trotter
96 ce862cd5 Guido Trotter
    """
97 bbfd0568 René Nussbaumer
    temp_name = tempfile.mktemp(dir=os.path.dirname(address))
98 bbfd0568 René Nussbaumer
    daemon.AsyncStreamServer.__init__(self, socket.AF_UNIX, temp_name)
99 bbfd0568 René Nussbaumer
    os.chmod(temp_name, 0770)
100 bbfd0568 René Nussbaumer
    os.chown(temp_name, uid, gid)
101 bbfd0568 René Nussbaumer
    os.rename(temp_name, address)
102 bbfd0568 René Nussbaumer
103 cdd7f900 Guido Trotter
    self.request_handler_class = handler_class
104 cdd7f900 Guido Trotter
    self.mainloop = mainloop
105 50a3fbb2 Michael Hanselmann
106 50a3fbb2 Michael Hanselmann
    # We'll only start threads once we've forked.
107 9113300d Michael Hanselmann
    self.context = None
108 23e50d39 Michael Hanselmann
    self.request_workers = None
109 50a3fbb2 Michael Hanselmann
110 cdd7f900 Guido Trotter
  def handle_connection(self, connected_socket, client_address):
111 cdd7f900 Guido Trotter
    self.request_workers.AddTask(self, connected_socket, client_address)
112 cdd7f900 Guido Trotter
113 50a3fbb2 Michael Hanselmann
  def setup_queue(self):
114 9113300d Michael Hanselmann
    self.context = GanetiContext()
115 89e2b4d2 Michael Hanselmann
    self.request_workers = workerpool.WorkerPool("ClientReq",
116 89e2b4d2 Michael Hanselmann
                                                 CLIENT_REQUEST_WORKERS,
117 23e50d39 Michael Hanselmann
                                                 ClientRequestWorker)
118 ffeffa1d Iustin Pop
119 c1f2901b Iustin Pop
  def server_cleanup(self):
120 c1f2901b Iustin Pop
    """Cleanup the server.
121 c1f2901b Iustin Pop
122 c1f2901b Iustin Pop
    This involves shutting down the processor threads and the master
123 c1f2901b Iustin Pop
    socket.
124 c1f2901b Iustin Pop
125 c1f2901b Iustin Pop
    """
126 50a3fbb2 Michael Hanselmann
    try:
127 cdd7f900 Guido Trotter
      self.close()
128 50a3fbb2 Michael Hanselmann
    finally:
129 23e50d39 Michael Hanselmann
      if self.request_workers:
130 36088c4c Michael Hanselmann
        self.request_workers.TerminateWorkers()
131 9113300d Michael Hanselmann
      if self.context:
132 9113300d Michael Hanselmann
        self.context.jobqueue.Shutdown()
133 ffeffa1d Iustin Pop
134 ffeffa1d Iustin Pop
135 ffeffa1d Iustin Pop
class ClientRqHandler(SocketServer.BaseRequestHandler):
136 ffeffa1d Iustin Pop
  """Client handler"""
137 ffeffa1d Iustin Pop
  READ_SIZE = 4096
138 ffeffa1d Iustin Pop
139 ffeffa1d Iustin Pop
  def setup(self):
140 7260cfbe Iustin Pop
    # pylint: disable-msg=W0201
141 7260cfbe Iustin Pop
    # setup() is the api for initialising for this class
142 ffeffa1d Iustin Pop
    self._buffer = ""
143 ffeffa1d Iustin Pop
    self._msgs = collections.deque()
144 ffeffa1d Iustin Pop
    self._ops = ClientOps(self.server)
145 ffeffa1d Iustin Pop
146 ffeffa1d Iustin Pop
  def handle(self):
147 ffeffa1d Iustin Pop
    while True:
148 ffeffa1d Iustin Pop
      msg = self.read_message()
149 ffeffa1d Iustin Pop
      if msg is None:
150 d21d09d6 Iustin Pop
        logging.debug("client closed connection")
151 ffeffa1d Iustin Pop
        break
152 3d8548c4 Michael Hanselmann
153 231db3a5 Michael Hanselmann
      (method, args) = luxi.ParseRequest(msg)
154 3d8548c4 Michael Hanselmann
155 3d8548c4 Michael Hanselmann
      success = False
156 3d8548c4 Michael Hanselmann
      try:
157 3d8548c4 Michael Hanselmann
        result = self._ops.handle_request(method, args)
158 3d8548c4 Michael Hanselmann
        success = True
159 6797ec29 Iustin Pop
      except errors.GenericError, err:
160 d2fc26dd Michael Hanselmann
        logging.exception("Unexpected exception")
161 6956e9cd Iustin Pop
        result = errors.EncodeException(err)
162 3d8548c4 Michael Hanselmann
      except:
163 d2fc26dd Michael Hanselmann
        logging.exception("Unexpected exception")
164 231db3a5 Michael Hanselmann
        result = "Caught exception: %s" % str(sys.exc_info()[1])
165 3d8548c4 Michael Hanselmann
166 231db3a5 Michael Hanselmann
      self.send_message(luxi.FormatResponse(success, result))
167 ffeffa1d Iustin Pop
168 ffeffa1d Iustin Pop
  def read_message(self):
169 ffeffa1d Iustin Pop
    while not self._msgs:
170 ffeffa1d Iustin Pop
      data = self.request.recv(self.READ_SIZE)
171 ffeffa1d Iustin Pop
      if not data:
172 ffeffa1d Iustin Pop
        return None
173 25942a6c Guido Trotter
      new_msgs = (self._buffer + data).split(constants.LUXI_EOM)
174 ffeffa1d Iustin Pop
      self._buffer = new_msgs.pop()
175 ffeffa1d Iustin Pop
      self._msgs.extend(new_msgs)
176 ffeffa1d Iustin Pop
    return self._msgs.popleft()
177 ffeffa1d Iustin Pop
178 ffeffa1d Iustin Pop
  def send_message(self, msg):
179 6096ee13 Michael Hanselmann
    # TODO: sendall is not guaranteed to send everything
180 25942a6c Guido Trotter
    self.request.sendall(msg + constants.LUXI_EOM)
181 ffeffa1d Iustin Pop
182 ffeffa1d Iustin Pop
183 ffeffa1d Iustin Pop
class ClientOps:
184 ffeffa1d Iustin Pop
  """Class holding high-level client operations."""
185 ffeffa1d Iustin Pop
  def __init__(self, server):
186 ffeffa1d Iustin Pop
    self.server = server
187 ffeffa1d Iustin Pop
188 7260cfbe Iustin Pop
  def handle_request(self, method, args): # pylint: disable-msg=R0911
189 9113300d Michael Hanselmann
    queue = self.server.context.jobqueue
190 0bbe448c Michael Hanselmann
191 0bbe448c Michael Hanselmann
    # TODO: Parameter validation
192 0bbe448c Michael Hanselmann
193 7260cfbe Iustin Pop
    # TODO: Rewrite to not exit in each 'if/elif' branch
194 7260cfbe Iustin Pop
195 0bbe448c Michael Hanselmann
    if method == luxi.REQ_SUBMIT_JOB:
196 e566ddbd Iustin Pop
      logging.info("Received new job")
197 0bbe448c Michael Hanselmann
      ops = [opcodes.OpCode.LoadOpCode(state) for state in args]
198 4c848b18 Michael Hanselmann
      return queue.SubmitJob(ops)
199 ffeffa1d Iustin Pop
200 2971c913 Iustin Pop
    if method == luxi.REQ_SUBMIT_MANY_JOBS:
201 2971c913 Iustin Pop
      logging.info("Received multiple jobs")
202 2971c913 Iustin Pop
      jobs = []
203 2971c913 Iustin Pop
      for ops in args:
204 2971c913 Iustin Pop
        jobs.append([opcodes.OpCode.LoadOpCode(state) for state in ops])
205 2971c913 Iustin Pop
      return queue.SubmitManyJobs(jobs)
206 2971c913 Iustin Pop
207 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_CANCEL_JOB:
208 3a2c7775 Michael Hanselmann
      job_id = args
209 e566ddbd Iustin Pop
      logging.info("Received job cancel request for %s", job_id)
210 0bbe448c Michael Hanselmann
      return queue.CancelJob(job_id)
211 ffeffa1d Iustin Pop
212 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_ARCHIVE_JOB:
213 3a2c7775 Michael Hanselmann
      job_id = args
214 e566ddbd Iustin Pop
      logging.info("Received job archive request for %s", job_id)
215 0bbe448c Michael Hanselmann
      return queue.ArchiveJob(job_id)
216 0bbe448c Michael Hanselmann
217 07cd723a Iustin Pop
    elif method == luxi.REQ_AUTOARCHIVE_JOBS:
218 f8ad5591 Michael Hanselmann
      (age, timeout) = args
219 e566ddbd Iustin Pop
      logging.info("Received job autoarchive request for age %s, timeout %s",
220 e566ddbd Iustin Pop
                   age, timeout)
221 f8ad5591 Michael Hanselmann
      return queue.AutoArchiveJobs(age, timeout)
222 07cd723a Iustin Pop
223 dfe57c22 Michael Hanselmann
    elif method == luxi.REQ_WAIT_FOR_JOB_CHANGE:
224 5c735209 Iustin Pop
      (job_id, fields, prev_job_info, prev_log_serial, timeout) = args
225 e566ddbd Iustin Pop
      logging.info("Received job poll request for %s", job_id)
226 6c5a7090 Michael Hanselmann
      return queue.WaitForJobChanges(job_id, fields, prev_job_info,
227 5c735209 Iustin Pop
                                     prev_log_serial, timeout)
228 dfe57c22 Michael Hanselmann
229 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_QUERY_JOBS:
230 0bbe448c Michael Hanselmann
      (job_ids, fields) = args
231 e566ddbd Iustin Pop
      if isinstance(job_ids, (tuple, list)) and job_ids:
232 1f864b60 Iustin Pop
        msg = utils.CommaJoin(job_ids)
233 e566ddbd Iustin Pop
      else:
234 e566ddbd Iustin Pop
        msg = str(job_ids)
235 e566ddbd Iustin Pop
      logging.info("Received job query request for %s", msg)
236 0bbe448c Michael Hanselmann
      return queue.QueryJobs(job_ids, fields)
237 0bbe448c Michael Hanselmann
238 ee6c7b94 Michael Hanselmann
    elif method == luxi.REQ_QUERY_INSTANCES:
239 ec79568d Iustin Pop
      (names, fields, use_locking) = args
240 e566ddbd Iustin Pop
      logging.info("Received instance query request for %s", names)
241 77921a95 Iustin Pop
      if use_locking:
242 debac808 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed",
243 debac808 Iustin Pop
                                   errors.ECODE_INVAL)
244 ec79568d Iustin Pop
      op = opcodes.OpQueryInstances(names=names, output_fields=fields,
245 ec79568d Iustin Pop
                                    use_locking=use_locking)
246 ee6c7b94 Michael Hanselmann
      return self._Query(op)
247 ee6c7b94 Michael Hanselmann
248 02f7fe54 Michael Hanselmann
    elif method == luxi.REQ_QUERY_NODES:
249 ec79568d Iustin Pop
      (names, fields, use_locking) = args
250 e566ddbd Iustin Pop
      logging.info("Received node query request for %s", names)
251 77921a95 Iustin Pop
      if use_locking:
252 debac808 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed",
253 debac808 Iustin Pop
                                   errors.ECODE_INVAL)
254 ec79568d Iustin Pop
      op = opcodes.OpQueryNodes(names=names, output_fields=fields,
255 ec79568d Iustin Pop
                                use_locking=use_locking)
256 02f7fe54 Michael Hanselmann
      return self._Query(op)
257 02f7fe54 Michael Hanselmann
258 32f93223 Michael Hanselmann
    elif method == luxi.REQ_QUERY_EXPORTS:
259 ec79568d Iustin Pop
      nodes, use_locking = args
260 77921a95 Iustin Pop
      if use_locking:
261 debac808 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed",
262 debac808 Iustin Pop
                                   errors.ECODE_INVAL)
263 e566ddbd Iustin Pop
      logging.info("Received exports query request")
264 ec79568d Iustin Pop
      op = opcodes.OpQueryExports(nodes=nodes, use_locking=use_locking)
265 32f93223 Michael Hanselmann
      return self._Query(op)
266 32f93223 Michael Hanselmann
267 ae5849b5 Michael Hanselmann
    elif method == luxi.REQ_QUERY_CONFIG_VALUES:
268 ae5849b5 Michael Hanselmann
      fields = args
269 e566ddbd Iustin Pop
      logging.info("Received config values query request for %s", fields)
270 ae5849b5 Michael Hanselmann
      op = opcodes.OpQueryConfigValues(output_fields=fields)
271 ae5849b5 Michael Hanselmann
      return self._Query(op)
272 ae5849b5 Michael Hanselmann
273 66baeccc Iustin Pop
    elif method == luxi.REQ_QUERY_CLUSTER_INFO:
274 e566ddbd Iustin Pop
      logging.info("Received cluster info query request")
275 66baeccc Iustin Pop
      op = opcodes.OpQueryClusterInfo()
276 66baeccc Iustin Pop
      return self._Query(op)
277 66baeccc Iustin Pop
278 7699c3af Iustin Pop
    elif method == luxi.REQ_QUERY_TAGS:
279 7699c3af Iustin Pop
      kind, name = args
280 7699c3af Iustin Pop
      logging.info("Received tags query request")
281 7699c3af Iustin Pop
      op = opcodes.OpGetTags(kind=kind, name=name)
282 7699c3af Iustin Pop
      return self._Query(op)
283 7699c3af Iustin Pop
284 3ccafd0e Iustin Pop
    elif method == luxi.REQ_QUEUE_SET_DRAIN_FLAG:
285 3ccafd0e Iustin Pop
      drain_flag = args
286 e566ddbd Iustin Pop
      logging.info("Received queue drain flag change request to %s",
287 e566ddbd Iustin Pop
                   drain_flag)
288 3ccafd0e Iustin Pop
      return queue.SetDrainFlag(drain_flag)
289 3ccafd0e Iustin Pop
290 05e50653 Michael Hanselmann
    elif method == luxi.REQ_SET_WATCHER_PAUSE:
291 05e50653 Michael Hanselmann
      (until, ) = args
292 05e50653 Michael Hanselmann
293 05e50653 Michael Hanselmann
      if until is None:
294 05e50653 Michael Hanselmann
        logging.info("Received request to no longer pause the watcher")
295 05e50653 Michael Hanselmann
      else:
296 05e50653 Michael Hanselmann
        if not isinstance(until, (int, float)):
297 05e50653 Michael Hanselmann
          raise TypeError("Duration must be an integer or float")
298 05e50653 Michael Hanselmann
299 05e50653 Michael Hanselmann
        if until < time.time():
300 05e50653 Michael Hanselmann
          raise errors.GenericError("Unable to set pause end time in the past")
301 05e50653 Michael Hanselmann
302 05e50653 Michael Hanselmann
        logging.info("Received request to pause the watcher until %s", until)
303 05e50653 Michael Hanselmann
304 05e50653 Michael Hanselmann
      return _SetWatcherPause(until)
305 05e50653 Michael Hanselmann
306 0bbe448c Michael Hanselmann
    else:
307 e566ddbd Iustin Pop
      logging.info("Received invalid request '%s'", method)
308 e566ddbd Iustin Pop
      raise ValueError("Invalid operation '%s'" % method)
309 ffeffa1d Iustin Pop
310 ee6c7b94 Michael Hanselmann
  def _Query(self, op):
311 ee6c7b94 Michael Hanselmann
    """Runs the specified opcode and returns the result.
312 ee6c7b94 Michael Hanselmann
313 ee6c7b94 Michael Hanselmann
    """
314 adfa97e3 Guido Trotter
    # Queries don't have a job id
315 adfa97e3 Guido Trotter
    proc = mcpu.Processor(self.server.context, None)
316 031a3e57 Michael Hanselmann
    return proc.ExecOpCode(op, None)
317 ee6c7b94 Michael Hanselmann
318 ffeffa1d Iustin Pop
319 39dcf2ef Guido Trotter
class GanetiContext(object):
320 39dcf2ef Guido Trotter
  """Context common to all ganeti threads.
321 39dcf2ef Guido Trotter
322 39dcf2ef Guido Trotter
  This class creates and holds common objects shared by all threads.
323 39dcf2ef Guido Trotter
324 39dcf2ef Guido Trotter
  """
325 7260cfbe Iustin Pop
  # pylint: disable-msg=W0212
326 7260cfbe Iustin Pop
  # we do want to ensure a singleton here
327 39dcf2ef Guido Trotter
  _instance = None
328 39dcf2ef Guido Trotter
329 39dcf2ef Guido Trotter
  def __init__(self):
330 39dcf2ef Guido Trotter
    """Constructs a new GanetiContext object.
331 39dcf2ef Guido Trotter
332 39dcf2ef Guido Trotter
    There should be only a GanetiContext object at any time, so this
333 39dcf2ef Guido Trotter
    function raises an error if this is not the case.
334 39dcf2ef Guido Trotter
335 39dcf2ef Guido Trotter
    """
336 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "double GanetiContext instance"
337 39dcf2ef Guido Trotter
338 9113300d Michael Hanselmann
    # Create global configuration object
339 39dcf2ef Guido Trotter
    self.cfg = config.ConfigWriter()
340 9113300d Michael Hanselmann
341 9113300d Michael Hanselmann
    # Locking manager
342 984f7c32 Guido Trotter
    self.glm = locking.GanetiLockManager(
343 39dcf2ef Guido Trotter
                self.cfg.GetNodeList(),
344 39dcf2ef Guido Trotter
                self.cfg.GetInstanceList())
345 39dcf2ef Guido Trotter
346 9113300d Michael Hanselmann
    # Job queue
347 9113300d Michael Hanselmann
    self.jobqueue = jqueue.JobQueue(self)
348 9113300d Michael Hanselmann
349 39dcf2ef Guido Trotter
    # setting this also locks the class against attribute modifications
350 39dcf2ef Guido Trotter
    self.__class__._instance = self
351 39dcf2ef Guido Trotter
352 39dcf2ef Guido Trotter
  def __setattr__(self, name, value):
353 39dcf2ef Guido Trotter
    """Setting GanetiContext attributes is forbidden after initialization.
354 39dcf2ef Guido Trotter
355 39dcf2ef Guido Trotter
    """
356 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "Attempt to modify Ganeti Context"
357 39dcf2ef Guido Trotter
    object.__setattr__(self, name, value)
358 39dcf2ef Guido Trotter
359 0debfb35 Guido Trotter
  def AddNode(self, node, ec_id):
360 d8470559 Michael Hanselmann
    """Adds a node to the configuration and lock manager.
361 d8470559 Michael Hanselmann
362 d8470559 Michael Hanselmann
    """
363 d8470559 Michael Hanselmann
    # Add it to the configuration
364 0debfb35 Guido Trotter
    self.cfg.AddNode(node, ec_id)
365 d8470559 Michael Hanselmann
366 c36176cc Michael Hanselmann
    # If preseeding fails it'll not be added
367 99aabbed Iustin Pop
    self.jobqueue.AddNode(node)
368 c36176cc Michael Hanselmann
369 d8470559 Michael Hanselmann
    # Add the new node to the Ganeti Lock Manager
370 d8470559 Michael Hanselmann
    self.glm.add(locking.LEVEL_NODE, node.name)
371 d8470559 Michael Hanselmann
372 d8470559 Michael Hanselmann
  def ReaddNode(self, node):
373 d8470559 Michael Hanselmann
    """Updates a node that's already in the configuration
374 d8470559 Michael Hanselmann
375 d8470559 Michael Hanselmann
    """
376 c36176cc Michael Hanselmann
    # Synchronize the queue again
377 99aabbed Iustin Pop
    self.jobqueue.AddNode(node)
378 d8470559 Michael Hanselmann
379 d8470559 Michael Hanselmann
  def RemoveNode(self, name):
380 d8470559 Michael Hanselmann
    """Removes a node from the configuration and lock manager.
381 d8470559 Michael Hanselmann
382 d8470559 Michael Hanselmann
    """
383 d8470559 Michael Hanselmann
    # Remove node from configuration
384 d8470559 Michael Hanselmann
    self.cfg.RemoveNode(name)
385 d8470559 Michael Hanselmann
386 c36176cc Michael Hanselmann
    # Notify job queue
387 c36176cc Michael Hanselmann
    self.jobqueue.RemoveNode(name)
388 c36176cc Michael Hanselmann
389 d8470559 Michael Hanselmann
    # Remove the node from the Ganeti Lock Manager
390 d8470559 Michael Hanselmann
    self.glm.remove(locking.LEVEL_NODE, name)
391 d8470559 Michael Hanselmann
392 39dcf2ef Guido Trotter
393 05e50653 Michael Hanselmann
def _SetWatcherPause(until):
394 05e50653 Michael Hanselmann
  """Creates or removes the watcher pause file.
395 05e50653 Michael Hanselmann
396 05e50653 Michael Hanselmann
  @type until: None or int
397 05e50653 Michael Hanselmann
  @param until: Unix timestamp saying until when the watcher shouldn't run
398 05e50653 Michael Hanselmann
399 05e50653 Michael Hanselmann
  """
400 05e50653 Michael Hanselmann
  if until is None:
401 05e50653 Michael Hanselmann
    utils.RemoveFile(constants.WATCHER_PAUSEFILE)
402 05e50653 Michael Hanselmann
  else:
403 05e50653 Michael Hanselmann
    utils.WriteFile(constants.WATCHER_PAUSEFILE,
404 05e50653 Michael Hanselmann
                    data="%d\n" % (until, ))
405 05e50653 Michael Hanselmann
406 28b498cd Michael Hanselmann
  return until
407 28b498cd Michael Hanselmann
408 05e50653 Michael Hanselmann
409 36205981 Iustin Pop
def CheckAgreement():
410 36205981 Iustin Pop
  """Check the agreement on who is the master.
411 36205981 Iustin Pop
412 36205981 Iustin Pop
  The function uses a very simple algorithm: we must get more positive
413 36205981 Iustin Pop
  than negative answers. Since in most of the cases we are the master,
414 36205981 Iustin Pop
  we'll use our own config file for getting the node list. In the
415 36205981 Iustin Pop
  future we could collect the current node list from our (possibly
416 36205981 Iustin Pop
  obsolete) known nodes.
417 36205981 Iustin Pop
418 d7cdb55d Iustin Pop
  In order to account for cold-start of all nodes, we retry for up to
419 d7cdb55d Iustin Pop
  a minute until we get a real answer as the top-voted one. If the
420 d7cdb55d Iustin Pop
  nodes are more out-of-sync, for now manual startup of the master
421 d7cdb55d Iustin Pop
  should be attempted.
422 d7cdb55d Iustin Pop
423 d7cdb55d Iustin Pop
  Note that for a even number of nodes cluster, we need at least half
424 d7cdb55d Iustin Pop
  of the nodes (beside ourselves) to vote for us. This creates a
425 d7cdb55d Iustin Pop
  problem on two-node clusters, since in this case we require the
426 d7cdb55d Iustin Pop
  other node to be up too to confirm our status.
427 d7cdb55d Iustin Pop
428 36205981 Iustin Pop
  """
429 36205981 Iustin Pop
  myself = utils.HostInfo().name
430 36205981 Iustin Pop
  #temp instantiation of a config writer, used only to get the node list
431 36205981 Iustin Pop
  cfg = config.ConfigWriter()
432 36205981 Iustin Pop
  node_list = cfg.GetNodeList()
433 36205981 Iustin Pop
  del cfg
434 d7cdb55d Iustin Pop
  retries = 6
435 d7cdb55d Iustin Pop
  while retries > 0:
436 d7cdb55d Iustin Pop
    votes = bootstrap.GatherMasterVotes(node_list)
437 d7cdb55d Iustin Pop
    if not votes:
438 d7cdb55d Iustin Pop
      # empty node list, this is a one node cluster
439 d7cdb55d Iustin Pop
      return True
440 d7cdb55d Iustin Pop
    if votes[0][0] is None:
441 d7cdb55d Iustin Pop
      retries -= 1
442 d7cdb55d Iustin Pop
      time.sleep(10)
443 36205981 Iustin Pop
      continue
444 d7cdb55d Iustin Pop
    break
445 d7cdb55d Iustin Pop
  if retries == 0:
446 e09fdcfa Iustin Pop
    logging.critical("Cluster inconsistent, most of the nodes didn't answer"
447 e09fdcfa Iustin Pop
                     " after multiple retries. Aborting startup")
448 d8f5a37d Iustin Pop
    logging.critical("Use the --no-voting option if you understand what"
449 d8f5a37d Iustin Pop
                     " effects it has on the cluster state")
450 e09fdcfa Iustin Pop
    return False
451 d7cdb55d Iustin Pop
  # here a real node is at the top of the list
452 d7cdb55d Iustin Pop
  all_votes = sum(item[1] for item in votes)
453 d7cdb55d Iustin Pop
  top_node, top_votes = votes[0]
454 8a20c732 Michael Hanselmann
455 d7cdb55d Iustin Pop
  result = False
456 d7cdb55d Iustin Pop
  if top_node != myself:
457 d7cdb55d Iustin Pop
    logging.critical("It seems we are not the master (top-voted node"
458 bbe19c17 Iustin Pop
                     " is %s with %d out of %d votes)", top_node, top_votes,
459 bbe19c17 Iustin Pop
                     all_votes)
460 d7cdb55d Iustin Pop
  elif top_votes < all_votes - top_votes:
461 36205981 Iustin Pop
    logging.critical("It seems we are not the master (%d votes for,"
462 d7cdb55d Iustin Pop
                     " %d votes against)", top_votes, all_votes - top_votes)
463 d7cdb55d Iustin Pop
  else:
464 d7cdb55d Iustin Pop
    result = True
465 d7cdb55d Iustin Pop
466 d7cdb55d Iustin Pop
  return result
467 36205981 Iustin Pop
468 6c948699 Michael Hanselmann
469 ed0efaa5 Michael Hanselmann
def CheckAgreementWithRpc():
470 4331f6cd Michael Hanselmann
  rpc.Init()
471 4331f6cd Michael Hanselmann
  try:
472 ed0efaa5 Michael Hanselmann
    return CheckAgreement()
473 4331f6cd Michael Hanselmann
  finally:
474 4331f6cd Michael Hanselmann
    rpc.Shutdown()
475 ffeffa1d Iustin Pop
476 c1f2901b Iustin Pop
477 ed0efaa5 Michael Hanselmann
def CheckMasterd(options, args):
478 ed0efaa5 Michael Hanselmann
  """Initial checks whether to run or exit with a failure.
479 ed0efaa5 Michael Hanselmann
480 ed0efaa5 Michael Hanselmann
  """
481 f93427cd Iustin Pop
  if args: # masterd doesn't take any arguments
482 f93427cd Iustin Pop
    print >> sys.stderr, ("Usage: %s [-f] [-d]" % sys.argv[0])
483 f93427cd Iustin Pop
    sys.exit(constants.EXIT_FAILURE)
484 f93427cd Iustin Pop
485 ed0efaa5 Michael Hanselmann
  ssconf.CheckMaster(options.debug)
486 ed0efaa5 Michael Hanselmann
487 bbfd0568 René Nussbaumer
  try:
488 bbfd0568 René Nussbaumer
    options.uid = pwd.getpwnam(constants.MASTERD_USER).pw_uid
489 bbfd0568 René Nussbaumer
    options.gid = grp.getgrnam(constants.DAEMONS_GROUP).gr_gid
490 bbfd0568 René Nussbaumer
  except KeyError:
491 bbfd0568 René Nussbaumer
    print >> sys.stderr, ("User or group not existing on system: %s:%s" %
492 bbfd0568 René Nussbaumer
                          (constants.MASTERD_USER, constants.DAEMONS_GROUP))
493 bbfd0568 René Nussbaumer
    sys.exit(constants.EXIT_FAILURE)
494 bbfd0568 René Nussbaumer
495 bbfd0568 René Nussbaumer
496 ed0efaa5 Michael Hanselmann
  # If CheckMaster didn't fail we believe we are the master, but we have to
497 ed0efaa5 Michael Hanselmann
  # confirm with the other nodes.
498 ed0efaa5 Michael Hanselmann
  if options.no_voting:
499 ed0efaa5 Michael Hanselmann
    if options.yes_do_it:
500 ed0efaa5 Michael Hanselmann
      return
501 ed0efaa5 Michael Hanselmann
502 ed0efaa5 Michael Hanselmann
    sys.stdout.write("The 'no voting' option has been selected.\n")
503 ed0efaa5 Michael Hanselmann
    sys.stdout.write("This is dangerous, please confirm by"
504 ed0efaa5 Michael Hanselmann
                     " typing uppercase 'yes': ")
505 ed0efaa5 Michael Hanselmann
    sys.stdout.flush()
506 ed0efaa5 Michael Hanselmann
507 ed0efaa5 Michael Hanselmann
    confirmation = sys.stdin.readline().strip()
508 ed0efaa5 Michael Hanselmann
    if confirmation != "YES":
509 7260cfbe Iustin Pop
      print >> sys.stderr, "Aborting."
510 ed0efaa5 Michael Hanselmann
      sys.exit(constants.EXIT_FAILURE)
511 ed0efaa5 Michael Hanselmann
512 ed0efaa5 Michael Hanselmann
    return
513 ed0efaa5 Michael Hanselmann
514 ed0efaa5 Michael Hanselmann
  # CheckAgreement uses RPC and threads, hence it needs to be run in a separate
515 ed0efaa5 Michael Hanselmann
  # process before we call utils.Daemonize in the current process.
516 eb58f7bd Michael Hanselmann
  if not utils.RunInSeparateProcess(CheckAgreementWithRpc):
517 ed0efaa5 Michael Hanselmann
    sys.exit(constants.EXIT_FAILURE)
518 ed0efaa5 Michael Hanselmann
519 ed0efaa5 Michael Hanselmann
520 2d54e29c Iustin Pop
def ExecMasterd (options, args): # pylint: disable-msg=W0613
521 6c948699 Michael Hanselmann
  """Main master daemon function, executed with the PID file held.
522 3b316acb Iustin Pop
523 04ccf5e9 Guido Trotter
  """
524 04ccf5e9 Guido Trotter
  # This is safe to do as the pid file guarantees against
525 04ccf5e9 Guido Trotter
  # concurrent execution.
526 04ccf5e9 Guido Trotter
  utils.RemoveFile(constants.MASTER_SOCKET)
527 b1b6ea87 Iustin Pop
528 cdd7f900 Guido Trotter
  mainloop = daemon.Mainloop()
529 bbfd0568 René Nussbaumer
  master = MasterServer(mainloop, constants.MASTER_SOCKET, ClientRqHandler,
530 bbfd0568 René Nussbaumer
                        options.uid, options.gid)
531 04ccf5e9 Guido Trotter
  try:
532 15486fa7 Michael Hanselmann
    rpc.Init()
533 4331f6cd Michael Hanselmann
    try:
534 15486fa7 Michael Hanselmann
      # activate ip
535 b2890442 Guido Trotter
      master_node = ssconf.SimpleStore().GetMasterNode()
536 3583908a Guido Trotter
      result = rpc.RpcRunner.call_node_start_master(master_node, False, False)
537 3cebe102 Michael Hanselmann
      msg = result.fail_msg
538 b726aff0 Iustin Pop
      if msg:
539 b726aff0 Iustin Pop
        logging.error("Can't activate master IP address: %s", msg)
540 15486fa7 Michael Hanselmann
541 15486fa7 Michael Hanselmann
      master.setup_queue()
542 15486fa7 Michael Hanselmann
      try:
543 cdd7f900 Guido Trotter
        mainloop.Run()
544 15486fa7 Michael Hanselmann
      finally:
545 15486fa7 Michael Hanselmann
        master.server_cleanup()
546 4331f6cd Michael Hanselmann
    finally:
547 15486fa7 Michael Hanselmann
      rpc.Shutdown()
548 a4af651e Iustin Pop
  finally:
549 227647ac Guido Trotter
    utils.RemoveFile(constants.MASTER_SOCKET)
550 a4af651e Iustin Pop
551 ffeffa1d Iustin Pop
552 04ccf5e9 Guido Trotter
def main():
553 04ccf5e9 Guido Trotter
  """Main function"""
554 04ccf5e9 Guido Trotter
  parser = OptionParser(description="Ganeti master daemon",
555 04ccf5e9 Guido Trotter
                        usage="%prog [-f] [-d]",
556 04ccf5e9 Guido Trotter
                        version="%%prog (ganeti) %s" %
557 04ccf5e9 Guido Trotter
                        constants.RELEASE_VERSION)
558 04ccf5e9 Guido Trotter
  parser.add_option("--no-voting", dest="no_voting",
559 04ccf5e9 Guido Trotter
                    help="Do not check that the nodes agree on this node"
560 04ccf5e9 Guido Trotter
                    " being the master and start the daemon unconditionally",
561 04ccf5e9 Guido Trotter
                    default=False, action="store_true")
562 04ccf5e9 Guido Trotter
  parser.add_option("--yes-do-it", dest="yes_do_it",
563 04ccf5e9 Guido Trotter
                    help="Override interactive check for --no-voting",
564 04ccf5e9 Guido Trotter
                    default=False, action="store_true")
565 04ccf5e9 Guido Trotter
  dirs = [(constants.RUN_GANETI_DIR, constants.RUN_DIRS_MODE),
566 04ccf5e9 Guido Trotter
          (constants.SOCKET_DIR, constants.SOCKET_DIR_MODE),
567 04ccf5e9 Guido Trotter
         ]
568 04ccf5e9 Guido Trotter
  daemon.GenericMain(constants.MASTERD, parser, dirs,
569 30dabd03 Michael Hanselmann
                     CheckMasterd, ExecMasterd,
570 30dabd03 Michael Hanselmann
                     multithreaded=True)
571 6c948699 Michael Hanselmann
572 04ccf5e9 Guido Trotter
573 ffeffa1d Iustin Pop
if __name__ == "__main__":
574 ffeffa1d Iustin Pop
  main()