Statistics
| Branch: | Tag: | Revision:

root / lib / server / masterd.py @ 29d91329

History | View | Annotate | Download (19.5 kB)

1 834f8b67 Iustin Pop
#!/usr/bin/python
2 ffeffa1d Iustin Pop
#
3 ffeffa1d Iustin Pop
4 b705c7a6 Manuel Franceschini
# Copyright (C) 2006, 2007, 2010 Google Inc.
5 ffeffa1d Iustin Pop
#
6 ffeffa1d Iustin Pop
# This program is free software; you can redistribute it and/or modify
7 ffeffa1d Iustin Pop
# it under the terms of the GNU General Public License as published by
8 ffeffa1d Iustin Pop
# the Free Software Foundation; either version 2 of the License, or
9 ffeffa1d Iustin Pop
# (at your option) any later version.
10 ffeffa1d Iustin Pop
#
11 ffeffa1d Iustin Pop
# This program is distributed in the hope that it will be useful, but
12 ffeffa1d Iustin Pop
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 ffeffa1d Iustin Pop
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 ffeffa1d Iustin Pop
# General Public License for more details.
15 ffeffa1d Iustin Pop
#
16 ffeffa1d Iustin Pop
# You should have received a copy of the GNU General Public License
17 ffeffa1d Iustin Pop
# along with this program; if not, write to the Free Software
18 ffeffa1d Iustin Pop
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 ffeffa1d Iustin Pop
# 02110-1301, USA.
20 ffeffa1d Iustin Pop
21 ffeffa1d Iustin Pop
22 ffeffa1d Iustin Pop
"""Master daemon program.
23 ffeffa1d Iustin Pop

24 ffeffa1d Iustin Pop
Some classes deviates from the standard style guide since the
25 ffeffa1d Iustin Pop
inheritance from parent classes requires it.
26 ffeffa1d Iustin Pop

27 ffeffa1d Iustin Pop
"""
28 ffeffa1d Iustin Pop
29 7260cfbe Iustin Pop
# pylint: disable-msg=C0103
30 7260cfbe Iustin Pop
# C0103: Invalid name ganeti-masterd
31 ffeffa1d Iustin Pop
32 bbfd0568 René Nussbaumer
import grp
33 bbfd0568 René Nussbaumer
import os
34 bbfd0568 René Nussbaumer
import pwd
35 c1f2901b Iustin Pop
import sys
36 cdd7f900 Guido Trotter
import socket
37 ffeffa1d Iustin Pop
import time
38 bbfd0568 René Nussbaumer
import tempfile
39 96cb3986 Michael Hanselmann
import logging
40 ffeffa1d Iustin Pop
41 c1f2901b Iustin Pop
from optparse import OptionParser
42 ffeffa1d Iustin Pop
43 39dcf2ef Guido Trotter
from ganeti import config
44 ffeffa1d Iustin Pop
from ganeti import constants
45 04ccf5e9 Guido Trotter
from ganeti import daemon
46 ffeffa1d Iustin Pop
from ganeti import mcpu
47 ffeffa1d Iustin Pop
from ganeti import opcodes
48 ffeffa1d Iustin Pop
from ganeti import jqueue
49 39dcf2ef Guido Trotter
from ganeti import locking
50 ffeffa1d Iustin Pop
from ganeti import luxi
51 ffeffa1d Iustin Pop
from ganeti import utils
52 c1f2901b Iustin Pop
from ganeti import errors
53 c1f2901b Iustin Pop
from ganeti import ssconf
54 23e50d39 Michael Hanselmann
from ganeti import workerpool
55 b1b6ea87 Iustin Pop
from ganeti import rpc
56 d7cdb55d Iustin Pop
from ganeti import bootstrap
57 a744b676 Manuel Franceschini
from ganeti import netutils
58 c1f2901b Iustin Pop
59 c1f2901b Iustin Pop
60 23e50d39 Michael Hanselmann
CLIENT_REQUEST_WORKERS = 16
61 23e50d39 Michael Hanselmann
62 c1f2901b Iustin Pop
EXIT_NOTMASTER = constants.EXIT_NOTMASTER
63 c1f2901b Iustin Pop
EXIT_NODESETUP_ERROR = constants.EXIT_NODESETUP_ERROR
64 ffeffa1d Iustin Pop
65 ffeffa1d Iustin Pop
66 23e50d39 Michael Hanselmann
class ClientRequestWorker(workerpool.BaseWorker):
67 e0dbb89b Guido Trotter
  # pylint: disable-msg=W0221
68 7e5a6e86 Guido Trotter
  def RunTask(self, server, message, client):
69 23e50d39 Michael Hanselmann
    """Process the request.
70 23e50d39 Michael Hanselmann

71 23e50d39 Michael Hanselmann
    """
72 7e5a6e86 Guido Trotter
    client_ops = ClientOps(server)
73 7e5a6e86 Guido Trotter
74 23e50d39 Michael Hanselmann
    try:
75 e986f20c Michael Hanselmann
      (method, args, version) = luxi.ParseRequest(message)
76 7e5a6e86 Guido Trotter
    except luxi.ProtocolError, err:
77 7e5a6e86 Guido Trotter
      logging.error("Protocol Error: %s", err)
78 7e5a6e86 Guido Trotter
      client.close_log()
79 7e5a6e86 Guido Trotter
      return
80 7e5a6e86 Guido Trotter
81 7e5a6e86 Guido Trotter
    success = False
82 7e5a6e86 Guido Trotter
    try:
83 e986f20c Michael Hanselmann
      # Verify client's version if there was one in the request
84 e986f20c Michael Hanselmann
      if version is not None and version != constants.LUXI_VERSION:
85 e986f20c Michael Hanselmann
        raise errors.LuxiError("LUXI version mismatch, server %s, request %s" %
86 e986f20c Michael Hanselmann
                               (constants.LUXI_VERSION, version))
87 e986f20c Michael Hanselmann
88 7e5a6e86 Guido Trotter
      result = client_ops.handle_request(method, args)
89 7e5a6e86 Guido Trotter
      success = True
90 7e5a6e86 Guido Trotter
    except errors.GenericError, err:
91 7e5a6e86 Guido Trotter
      logging.exception("Unexpected exception")
92 7e5a6e86 Guido Trotter
      success = False
93 7e5a6e86 Guido Trotter
      result = errors.EncodeException(err)
94 7e5a6e86 Guido Trotter
    except:
95 7e5a6e86 Guido Trotter
      logging.exception("Unexpected exception")
96 7e5a6e86 Guido Trotter
      err = sys.exc_info()
97 7e5a6e86 Guido Trotter
      result = "Caught exception: %s" % str(err[1])
98 7e5a6e86 Guido Trotter
99 7e5a6e86 Guido Trotter
    try:
100 7e5a6e86 Guido Trotter
      reply = luxi.FormatResponse(success, result)
101 7e5a6e86 Guido Trotter
      client.send_message(reply)
102 7e5a6e86 Guido Trotter
      # awake the main thread so that it can write out the data.
103 7e5a6e86 Guido Trotter
      server.awaker.signal()
104 e0dbb89b Guido Trotter
    except: # pylint: disable-msg=W0702
105 7e5a6e86 Guido Trotter
      logging.exception("Send error")
106 7e5a6e86 Guido Trotter
      client.close_log()
107 7e5a6e86 Guido Trotter
108 7e5a6e86 Guido Trotter
109 7e5a6e86 Guido Trotter
class MasterClientHandler(daemon.AsyncTerminatedMessageStream):
110 7e5a6e86 Guido Trotter
  """Handler for master peers.
111 7e5a6e86 Guido Trotter

112 7e5a6e86 Guido Trotter
  """
113 7e5a6e86 Guido Trotter
  _MAX_UNHANDLED = 1
114 7e5a6e86 Guido Trotter
  def __init__(self, server, connected_socket, client_address, family):
115 7e5a6e86 Guido Trotter
    daemon.AsyncTerminatedMessageStream.__init__(self, connected_socket,
116 7e5a6e86 Guido Trotter
                                                 client_address,
117 7e5a6e86 Guido Trotter
                                                 constants.LUXI_EOM,
118 7e5a6e86 Guido Trotter
                                                 family, self._MAX_UNHANDLED)
119 7e5a6e86 Guido Trotter
    self.server = server
120 7e5a6e86 Guido Trotter
121 7e5a6e86 Guido Trotter
  def handle_message(self, message, _):
122 b2e8a4d9 Michael Hanselmann
    self.server.request_workers.AddTask((self.server, message, self))
123 23e50d39 Michael Hanselmann
124 23e50d39 Michael Hanselmann
125 cdd7f900 Guido Trotter
class MasterServer(daemon.AsyncStreamServer):
126 cdd7f900 Guido Trotter
  """Master Server.
127 ffeffa1d Iustin Pop

128 cdd7f900 Guido Trotter
  This is the main asynchronous master server. It handles connections to the
129 cdd7f900 Guido Trotter
  master socket.
130 ffeffa1d Iustin Pop

131 ffeffa1d Iustin Pop
  """
132 7e5a6e86 Guido Trotter
  family = socket.AF_UNIX
133 7e5a6e86 Guido Trotter
134 7e5a6e86 Guido Trotter
  def __init__(self, mainloop, address, uid, gid):
135 cdd7f900 Guido Trotter
    """MasterServer constructor
136 ce862cd5 Guido Trotter

137 cdd7f900 Guido Trotter
    @type mainloop: ganeti.daemon.Mainloop
138 cdd7f900 Guido Trotter
    @param mainloop: Mainloop used to poll for I/O events
139 cdd7f900 Guido Trotter
    @param address: the unix socket address to bind the MasterServer to
140 bbfd0568 René Nussbaumer
    @param uid: The uid of the owner of the socket
141 bbfd0568 René Nussbaumer
    @param gid: The gid of the owner of the socket
142 ce862cd5 Guido Trotter

143 ce862cd5 Guido Trotter
    """
144 bbfd0568 René Nussbaumer
    temp_name = tempfile.mktemp(dir=os.path.dirname(address))
145 7e5a6e86 Guido Trotter
    daemon.AsyncStreamServer.__init__(self, self.family, temp_name)
146 bbfd0568 René Nussbaumer
    os.chmod(temp_name, 0770)
147 bbfd0568 René Nussbaumer
    os.chown(temp_name, uid, gid)
148 bbfd0568 René Nussbaumer
    os.rename(temp_name, address)
149 bbfd0568 René Nussbaumer
150 cdd7f900 Guido Trotter
    self.mainloop = mainloop
151 7e5a6e86 Guido Trotter
    self.awaker = daemon.AsyncAwaker()
152 50a3fbb2 Michael Hanselmann
153 50a3fbb2 Michael Hanselmann
    # We'll only start threads once we've forked.
154 9113300d Michael Hanselmann
    self.context = None
155 23e50d39 Michael Hanselmann
    self.request_workers = None
156 50a3fbb2 Michael Hanselmann
157 cdd7f900 Guido Trotter
  def handle_connection(self, connected_socket, client_address):
158 7e5a6e86 Guido Trotter
    # TODO: add connection count and limit the number of open connections to a
159 7e5a6e86 Guido Trotter
    # maximum number to avoid breaking for lack of file descriptors or memory.
160 7e5a6e86 Guido Trotter
    MasterClientHandler(self, connected_socket, client_address, self.family)
161 cdd7f900 Guido Trotter
162 50a3fbb2 Michael Hanselmann
  def setup_queue(self):
163 9113300d Michael Hanselmann
    self.context = GanetiContext()
164 89e2b4d2 Michael Hanselmann
    self.request_workers = workerpool.WorkerPool("ClientReq",
165 89e2b4d2 Michael Hanselmann
                                                 CLIENT_REQUEST_WORKERS,
166 23e50d39 Michael Hanselmann
                                                 ClientRequestWorker)
167 ffeffa1d Iustin Pop
168 c1f2901b Iustin Pop
  def server_cleanup(self):
169 c1f2901b Iustin Pop
    """Cleanup the server.
170 c1f2901b Iustin Pop

171 c1f2901b Iustin Pop
    This involves shutting down the processor threads and the master
172 c1f2901b Iustin Pop
    socket.
173 c1f2901b Iustin Pop

174 c1f2901b Iustin Pop
    """
175 50a3fbb2 Michael Hanselmann
    try:
176 cdd7f900 Guido Trotter
      self.close()
177 50a3fbb2 Michael Hanselmann
    finally:
178 23e50d39 Michael Hanselmann
      if self.request_workers:
179 36088c4c Michael Hanselmann
        self.request_workers.TerminateWorkers()
180 9113300d Michael Hanselmann
      if self.context:
181 9113300d Michael Hanselmann
        self.context.jobqueue.Shutdown()
182 ffeffa1d Iustin Pop
183 ffeffa1d Iustin Pop
184 ffeffa1d Iustin Pop
class ClientOps:
185 ffeffa1d Iustin Pop
  """Class holding high-level client operations."""
186 ffeffa1d Iustin Pop
  def __init__(self, server):
187 ffeffa1d Iustin Pop
    self.server = server
188 ffeffa1d Iustin Pop
189 7260cfbe Iustin Pop
  def handle_request(self, method, args): # pylint: disable-msg=R0911
190 9113300d Michael Hanselmann
    queue = self.server.context.jobqueue
191 0bbe448c Michael Hanselmann
192 0bbe448c Michael Hanselmann
    # TODO: Parameter validation
193 0bbe448c Michael Hanselmann
194 7260cfbe Iustin Pop
    # TODO: Rewrite to not exit in each 'if/elif' branch
195 7260cfbe Iustin Pop
196 0bbe448c Michael Hanselmann
    if method == luxi.REQ_SUBMIT_JOB:
197 e566ddbd Iustin Pop
      logging.info("Received new job")
198 0bbe448c Michael Hanselmann
      ops = [opcodes.OpCode.LoadOpCode(state) for state in args]
199 4c848b18 Michael Hanselmann
      return queue.SubmitJob(ops)
200 ffeffa1d Iustin Pop
201 2971c913 Iustin Pop
    if method == luxi.REQ_SUBMIT_MANY_JOBS:
202 2971c913 Iustin Pop
      logging.info("Received multiple jobs")
203 2971c913 Iustin Pop
      jobs = []
204 2971c913 Iustin Pop
      for ops in args:
205 2971c913 Iustin Pop
        jobs.append([opcodes.OpCode.LoadOpCode(state) for state in ops])
206 2971c913 Iustin Pop
      return queue.SubmitManyJobs(jobs)
207 2971c913 Iustin Pop
208 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_CANCEL_JOB:
209 3a2c7775 Michael Hanselmann
      job_id = args
210 e566ddbd Iustin Pop
      logging.info("Received job cancel request for %s", job_id)
211 0bbe448c Michael Hanselmann
      return queue.CancelJob(job_id)
212 ffeffa1d Iustin Pop
213 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_ARCHIVE_JOB:
214 3a2c7775 Michael Hanselmann
      job_id = args
215 e566ddbd Iustin Pop
      logging.info("Received job archive request for %s", job_id)
216 0bbe448c Michael Hanselmann
      return queue.ArchiveJob(job_id)
217 0bbe448c Michael Hanselmann
218 07cd723a Iustin Pop
    elif method == luxi.REQ_AUTOARCHIVE_JOBS:
219 f8ad5591 Michael Hanselmann
      (age, timeout) = args
220 e566ddbd Iustin Pop
      logging.info("Received job autoarchive request for age %s, timeout %s",
221 e566ddbd Iustin Pop
                   age, timeout)
222 f8ad5591 Michael Hanselmann
      return queue.AutoArchiveJobs(age, timeout)
223 07cd723a Iustin Pop
224 dfe57c22 Michael Hanselmann
    elif method == luxi.REQ_WAIT_FOR_JOB_CHANGE:
225 5c735209 Iustin Pop
      (job_id, fields, prev_job_info, prev_log_serial, timeout) = args
226 e566ddbd Iustin Pop
      logging.info("Received job poll request for %s", job_id)
227 6c5a7090 Michael Hanselmann
      return queue.WaitForJobChanges(job_id, fields, prev_job_info,
228 5c735209 Iustin Pop
                                     prev_log_serial, timeout)
229 dfe57c22 Michael Hanselmann
230 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_QUERY_JOBS:
231 0bbe448c Michael Hanselmann
      (job_ids, fields) = args
232 e566ddbd Iustin Pop
      if isinstance(job_ids, (tuple, list)) and job_ids:
233 1f864b60 Iustin Pop
        msg = utils.CommaJoin(job_ids)
234 e566ddbd Iustin Pop
      else:
235 e566ddbd Iustin Pop
        msg = str(job_ids)
236 e566ddbd Iustin Pop
      logging.info("Received job query request for %s", msg)
237 0bbe448c Michael Hanselmann
      return queue.QueryJobs(job_ids, fields)
238 0bbe448c Michael Hanselmann
239 ee6c7b94 Michael Hanselmann
    elif method == luxi.REQ_QUERY_INSTANCES:
240 ec79568d Iustin Pop
      (names, fields, use_locking) = args
241 e566ddbd Iustin Pop
      logging.info("Received instance query request for %s", names)
242 77921a95 Iustin Pop
      if use_locking:
243 debac808 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed",
244 debac808 Iustin Pop
                                   errors.ECODE_INVAL)
245 ec79568d Iustin Pop
      op = opcodes.OpQueryInstances(names=names, output_fields=fields,
246 ec79568d Iustin Pop
                                    use_locking=use_locking)
247 ee6c7b94 Michael Hanselmann
      return self._Query(op)
248 ee6c7b94 Michael Hanselmann
249 02f7fe54 Michael Hanselmann
    elif method == luxi.REQ_QUERY_NODES:
250 ec79568d Iustin Pop
      (names, fields, use_locking) = args
251 e566ddbd Iustin Pop
      logging.info("Received node query request for %s", names)
252 77921a95 Iustin Pop
      if use_locking:
253 debac808 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed",
254 debac808 Iustin Pop
                                   errors.ECODE_INVAL)
255 ec79568d Iustin Pop
      op = opcodes.OpQueryNodes(names=names, output_fields=fields,
256 ec79568d Iustin Pop
                                use_locking=use_locking)
257 02f7fe54 Michael Hanselmann
      return self._Query(op)
258 02f7fe54 Michael Hanselmann
259 32f93223 Michael Hanselmann
    elif method == luxi.REQ_QUERY_EXPORTS:
260 ec79568d Iustin Pop
      nodes, use_locking = args
261 77921a95 Iustin Pop
      if use_locking:
262 debac808 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed",
263 debac808 Iustin Pop
                                   errors.ECODE_INVAL)
264 e566ddbd Iustin Pop
      logging.info("Received exports query request")
265 ec79568d Iustin Pop
      op = opcodes.OpQueryExports(nodes=nodes, use_locking=use_locking)
266 32f93223 Michael Hanselmann
      return self._Query(op)
267 32f93223 Michael Hanselmann
268 ae5849b5 Michael Hanselmann
    elif method == luxi.REQ_QUERY_CONFIG_VALUES:
269 ae5849b5 Michael Hanselmann
      fields = args
270 e566ddbd Iustin Pop
      logging.info("Received config values query request for %s", fields)
271 ae5849b5 Michael Hanselmann
      op = opcodes.OpQueryConfigValues(output_fields=fields)
272 ae5849b5 Michael Hanselmann
      return self._Query(op)
273 ae5849b5 Michael Hanselmann
274 66baeccc Iustin Pop
    elif method == luxi.REQ_QUERY_CLUSTER_INFO:
275 e566ddbd Iustin Pop
      logging.info("Received cluster info query request")
276 66baeccc Iustin Pop
      op = opcodes.OpQueryClusterInfo()
277 66baeccc Iustin Pop
      return self._Query(op)
278 66baeccc Iustin Pop
279 7699c3af Iustin Pop
    elif method == luxi.REQ_QUERY_TAGS:
280 7699c3af Iustin Pop
      kind, name = args
281 7699c3af Iustin Pop
      logging.info("Received tags query request")
282 7699c3af Iustin Pop
      op = opcodes.OpGetTags(kind=kind, name=name)
283 7699c3af Iustin Pop
      return self._Query(op)
284 7699c3af Iustin Pop
285 19b9ba9a Michael Hanselmann
    elif method == luxi.REQ_QUERY_LOCKS:
286 19b9ba9a Michael Hanselmann
      (fields, sync) = args
287 19b9ba9a Michael Hanselmann
      logging.info("Received locks query request")
288 19b9ba9a Michael Hanselmann
      return self.server.context.glm.QueryLocks(fields, sync)
289 19b9ba9a Michael Hanselmann
290 3ccafd0e Iustin Pop
    elif method == luxi.REQ_QUEUE_SET_DRAIN_FLAG:
291 3ccafd0e Iustin Pop
      drain_flag = args
292 e566ddbd Iustin Pop
      logging.info("Received queue drain flag change request to %s",
293 e566ddbd Iustin Pop
                   drain_flag)
294 3ccafd0e Iustin Pop
      return queue.SetDrainFlag(drain_flag)
295 3ccafd0e Iustin Pop
296 05e50653 Michael Hanselmann
    elif method == luxi.REQ_SET_WATCHER_PAUSE:
297 05e50653 Michael Hanselmann
      (until, ) = args
298 05e50653 Michael Hanselmann
299 05e50653 Michael Hanselmann
      if until is None:
300 05e50653 Michael Hanselmann
        logging.info("Received request to no longer pause the watcher")
301 05e50653 Michael Hanselmann
      else:
302 05e50653 Michael Hanselmann
        if not isinstance(until, (int, float)):
303 05e50653 Michael Hanselmann
          raise TypeError("Duration must be an integer or float")
304 05e50653 Michael Hanselmann
305 05e50653 Michael Hanselmann
        if until < time.time():
306 05e50653 Michael Hanselmann
          raise errors.GenericError("Unable to set pause end time in the past")
307 05e50653 Michael Hanselmann
308 05e50653 Michael Hanselmann
        logging.info("Received request to pause the watcher until %s", until)
309 05e50653 Michael Hanselmann
310 05e50653 Michael Hanselmann
      return _SetWatcherPause(until)
311 05e50653 Michael Hanselmann
312 0bbe448c Michael Hanselmann
    else:
313 e566ddbd Iustin Pop
      logging.info("Received invalid request '%s'", method)
314 e566ddbd Iustin Pop
      raise ValueError("Invalid operation '%s'" % method)
315 ffeffa1d Iustin Pop
316 ee6c7b94 Michael Hanselmann
  def _Query(self, op):
317 ee6c7b94 Michael Hanselmann
    """Runs the specified opcode and returns the result.
318 ee6c7b94 Michael Hanselmann

319 ee6c7b94 Michael Hanselmann
    """
320 adfa97e3 Guido Trotter
    # Queries don't have a job id
321 adfa97e3 Guido Trotter
    proc = mcpu.Processor(self.server.context, None)
322 26d3fd2f Michael Hanselmann
323 26d3fd2f Michael Hanselmann
    # TODO: Executing an opcode using locks will acquire them in blocking mode.
324 26d3fd2f Michael Hanselmann
    # Consider using a timeout for retries.
325 031a3e57 Michael Hanselmann
    return proc.ExecOpCode(op, None)
326 ee6c7b94 Michael Hanselmann
327 ffeffa1d Iustin Pop
328 39dcf2ef Guido Trotter
class GanetiContext(object):
329 39dcf2ef Guido Trotter
  """Context common to all ganeti threads.
330 39dcf2ef Guido Trotter

331 39dcf2ef Guido Trotter
  This class creates and holds common objects shared by all threads.
332 39dcf2ef Guido Trotter

333 39dcf2ef Guido Trotter
  """
334 7260cfbe Iustin Pop
  # pylint: disable-msg=W0212
335 7260cfbe Iustin Pop
  # we do want to ensure a singleton here
336 39dcf2ef Guido Trotter
  _instance = None
337 39dcf2ef Guido Trotter
338 39dcf2ef Guido Trotter
  def __init__(self):
339 39dcf2ef Guido Trotter
    """Constructs a new GanetiContext object.
340 39dcf2ef Guido Trotter

341 39dcf2ef Guido Trotter
    There should be only a GanetiContext object at any time, so this
342 39dcf2ef Guido Trotter
    function raises an error if this is not the case.
343 39dcf2ef Guido Trotter

344 39dcf2ef Guido Trotter
    """
345 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "double GanetiContext instance"
346 39dcf2ef Guido Trotter
347 9113300d Michael Hanselmann
    # Create global configuration object
348 39dcf2ef Guido Trotter
    self.cfg = config.ConfigWriter()
349 9113300d Michael Hanselmann
350 9113300d Michael Hanselmann
    # Locking manager
351 984f7c32 Guido Trotter
    self.glm = locking.GanetiLockManager(
352 39dcf2ef Guido Trotter
                self.cfg.GetNodeList(),
353 39dcf2ef Guido Trotter
                self.cfg.GetInstanceList())
354 39dcf2ef Guido Trotter
355 9113300d Michael Hanselmann
    # Job queue
356 9113300d Michael Hanselmann
    self.jobqueue = jqueue.JobQueue(self)
357 9113300d Michael Hanselmann
358 39dcf2ef Guido Trotter
    # setting this also locks the class against attribute modifications
359 39dcf2ef Guido Trotter
    self.__class__._instance = self
360 39dcf2ef Guido Trotter
361 39dcf2ef Guido Trotter
  def __setattr__(self, name, value):
362 39dcf2ef Guido Trotter
    """Setting GanetiContext attributes is forbidden after initialization.
363 39dcf2ef Guido Trotter

364 39dcf2ef Guido Trotter
    """
365 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "Attempt to modify Ganeti Context"
366 39dcf2ef Guido Trotter
    object.__setattr__(self, name, value)
367 39dcf2ef Guido Trotter
368 0debfb35 Guido Trotter
  def AddNode(self, node, ec_id):
369 d8470559 Michael Hanselmann
    """Adds a node to the configuration and lock manager.
370 d8470559 Michael Hanselmann

371 d8470559 Michael Hanselmann
    """
372 d8470559 Michael Hanselmann
    # Add it to the configuration
373 0debfb35 Guido Trotter
    self.cfg.AddNode(node, ec_id)
374 d8470559 Michael Hanselmann
375 c36176cc Michael Hanselmann
    # If preseeding fails it'll not be added
376 99aabbed Iustin Pop
    self.jobqueue.AddNode(node)
377 c36176cc Michael Hanselmann
378 d8470559 Michael Hanselmann
    # Add the new node to the Ganeti Lock Manager
379 d8470559 Michael Hanselmann
    self.glm.add(locking.LEVEL_NODE, node.name)
380 d8470559 Michael Hanselmann
381 d8470559 Michael Hanselmann
  def ReaddNode(self, node):
382 d8470559 Michael Hanselmann
    """Updates a node that's already in the configuration
383 d8470559 Michael Hanselmann

384 d8470559 Michael Hanselmann
    """
385 c36176cc Michael Hanselmann
    # Synchronize the queue again
386 99aabbed Iustin Pop
    self.jobqueue.AddNode(node)
387 d8470559 Michael Hanselmann
388 d8470559 Michael Hanselmann
  def RemoveNode(self, name):
389 d8470559 Michael Hanselmann
    """Removes a node from the configuration and lock manager.
390 d8470559 Michael Hanselmann

391 d8470559 Michael Hanselmann
    """
392 d8470559 Michael Hanselmann
    # Remove node from configuration
393 d8470559 Michael Hanselmann
    self.cfg.RemoveNode(name)
394 d8470559 Michael Hanselmann
395 c36176cc Michael Hanselmann
    # Notify job queue
396 c36176cc Michael Hanselmann
    self.jobqueue.RemoveNode(name)
397 c36176cc Michael Hanselmann
398 d8470559 Michael Hanselmann
    # Remove the node from the Ganeti Lock Manager
399 d8470559 Michael Hanselmann
    self.glm.remove(locking.LEVEL_NODE, name)
400 d8470559 Michael Hanselmann
401 39dcf2ef Guido Trotter
402 05e50653 Michael Hanselmann
def _SetWatcherPause(until):
403 05e50653 Michael Hanselmann
  """Creates or removes the watcher pause file.
404 05e50653 Michael Hanselmann

405 05e50653 Michael Hanselmann
  @type until: None or int
406 05e50653 Michael Hanselmann
  @param until: Unix timestamp saying until when the watcher shouldn't run
407 05e50653 Michael Hanselmann

408 05e50653 Michael Hanselmann
  """
409 05e50653 Michael Hanselmann
  if until is None:
410 05e50653 Michael Hanselmann
    utils.RemoveFile(constants.WATCHER_PAUSEFILE)
411 05e50653 Michael Hanselmann
  else:
412 05e50653 Michael Hanselmann
    utils.WriteFile(constants.WATCHER_PAUSEFILE,
413 05e50653 Michael Hanselmann
                    data="%d\n" % (until, ))
414 05e50653 Michael Hanselmann
415 28b498cd Michael Hanselmann
  return until
416 28b498cd Michael Hanselmann
417 05e50653 Michael Hanselmann
418 e0e916fe Iustin Pop
@rpc.RunWithRPC
419 36205981 Iustin Pop
def CheckAgreement():
420 36205981 Iustin Pop
  """Check the agreement on who is the master.
421 36205981 Iustin Pop

422 36205981 Iustin Pop
  The function uses a very simple algorithm: we must get more positive
423 36205981 Iustin Pop
  than negative answers. Since in most of the cases we are the master,
424 36205981 Iustin Pop
  we'll use our own config file for getting the node list. In the
425 36205981 Iustin Pop
  future we could collect the current node list from our (possibly
426 36205981 Iustin Pop
  obsolete) known nodes.
427 36205981 Iustin Pop

428 d7cdb55d Iustin Pop
  In order to account for cold-start of all nodes, we retry for up to
429 d7cdb55d Iustin Pop
  a minute until we get a real answer as the top-voted one. If the
430 d7cdb55d Iustin Pop
  nodes are more out-of-sync, for now manual startup of the master
431 d7cdb55d Iustin Pop
  should be attempted.
432 d7cdb55d Iustin Pop

433 d7cdb55d Iustin Pop
  Note that for a even number of nodes cluster, we need at least half
434 d7cdb55d Iustin Pop
  of the nodes (beside ourselves) to vote for us. This creates a
435 d7cdb55d Iustin Pop
  problem on two-node clusters, since in this case we require the
436 d7cdb55d Iustin Pop
  other node to be up too to confirm our status.
437 d7cdb55d Iustin Pop

438 36205981 Iustin Pop
  """
439 b705c7a6 Manuel Franceschini
  myself = netutils.Hostname.GetSysName()
440 36205981 Iustin Pop
  #temp instantiation of a config writer, used only to get the node list
441 36205981 Iustin Pop
  cfg = config.ConfigWriter()
442 36205981 Iustin Pop
  node_list = cfg.GetNodeList()
443 36205981 Iustin Pop
  del cfg
444 d7cdb55d Iustin Pop
  retries = 6
445 d7cdb55d Iustin Pop
  while retries > 0:
446 d7cdb55d Iustin Pop
    votes = bootstrap.GatherMasterVotes(node_list)
447 d7cdb55d Iustin Pop
    if not votes:
448 d7cdb55d Iustin Pop
      # empty node list, this is a one node cluster
449 d7cdb55d Iustin Pop
      return True
450 d7cdb55d Iustin Pop
    if votes[0][0] is None:
451 d7cdb55d Iustin Pop
      retries -= 1
452 d7cdb55d Iustin Pop
      time.sleep(10)
453 36205981 Iustin Pop
      continue
454 d7cdb55d Iustin Pop
    break
455 d7cdb55d Iustin Pop
  if retries == 0:
456 e09fdcfa Iustin Pop
    logging.critical("Cluster inconsistent, most of the nodes didn't answer"
457 e09fdcfa Iustin Pop
                     " after multiple retries. Aborting startup")
458 d8f5a37d Iustin Pop
    logging.critical("Use the --no-voting option if you understand what"
459 d8f5a37d Iustin Pop
                     " effects it has on the cluster state")
460 e09fdcfa Iustin Pop
    return False
461 d7cdb55d Iustin Pop
  # here a real node is at the top of the list
462 d7cdb55d Iustin Pop
  all_votes = sum(item[1] for item in votes)
463 d7cdb55d Iustin Pop
  top_node, top_votes = votes[0]
464 8a20c732 Michael Hanselmann
465 d7cdb55d Iustin Pop
  result = False
466 d7cdb55d Iustin Pop
  if top_node != myself:
467 d7cdb55d Iustin Pop
    logging.critical("It seems we are not the master (top-voted node"
468 bbe19c17 Iustin Pop
                     " is %s with %d out of %d votes)", top_node, top_votes,
469 bbe19c17 Iustin Pop
                     all_votes)
470 d7cdb55d Iustin Pop
  elif top_votes < all_votes - top_votes:
471 36205981 Iustin Pop
    logging.critical("It seems we are not the master (%d votes for,"
472 d7cdb55d Iustin Pop
                     " %d votes against)", top_votes, all_votes - top_votes)
473 d7cdb55d Iustin Pop
  else:
474 d7cdb55d Iustin Pop
    result = True
475 d7cdb55d Iustin Pop
476 d7cdb55d Iustin Pop
  return result
477 36205981 Iustin Pop
478 6c948699 Michael Hanselmann
479 340f4757 Iustin Pop
@rpc.RunWithRPC
480 340f4757 Iustin Pop
def ActivateMasterIP():
481 340f4757 Iustin Pop
  # activate ip
482 340f4757 Iustin Pop
  master_node = ssconf.SimpleStore().GetMasterNode()
483 340f4757 Iustin Pop
  result = rpc.RpcRunner.call_node_start_master(master_node, False, False)
484 340f4757 Iustin Pop
  msg = result.fail_msg
485 340f4757 Iustin Pop
  if msg:
486 340f4757 Iustin Pop
    logging.error("Can't activate master IP address: %s", msg)
487 340f4757 Iustin Pop
488 340f4757 Iustin Pop
489 ed0efaa5 Michael Hanselmann
def CheckMasterd(options, args):
490 ed0efaa5 Michael Hanselmann
  """Initial checks whether to run or exit with a failure.
491 ed0efaa5 Michael Hanselmann

492 ed0efaa5 Michael Hanselmann
  """
493 f93427cd Iustin Pop
  if args: # masterd doesn't take any arguments
494 f93427cd Iustin Pop
    print >> sys.stderr, ("Usage: %s [-f] [-d]" % sys.argv[0])
495 f93427cd Iustin Pop
    sys.exit(constants.EXIT_FAILURE)
496 f93427cd Iustin Pop
497 ed0efaa5 Michael Hanselmann
  ssconf.CheckMaster(options.debug)
498 ed0efaa5 Michael Hanselmann
499 bbfd0568 René Nussbaumer
  try:
500 bbfd0568 René Nussbaumer
    options.uid = pwd.getpwnam(constants.MASTERD_USER).pw_uid
501 bbfd0568 René Nussbaumer
    options.gid = grp.getgrnam(constants.DAEMONS_GROUP).gr_gid
502 bbfd0568 René Nussbaumer
  except KeyError:
503 bbfd0568 René Nussbaumer
    print >> sys.stderr, ("User or group not existing on system: %s:%s" %
504 bbfd0568 René Nussbaumer
                          (constants.MASTERD_USER, constants.DAEMONS_GROUP))
505 bbfd0568 René Nussbaumer
    sys.exit(constants.EXIT_FAILURE)
506 bbfd0568 René Nussbaumer
507 4b63dc7a Iustin Pop
  # Check the configuration is sane before anything else
508 4b63dc7a Iustin Pop
  try:
509 4b63dc7a Iustin Pop
    config.ConfigWriter()
510 4b63dc7a Iustin Pop
  except errors.ConfigVersionMismatch, err:
511 4b63dc7a Iustin Pop
    v1 = "%s.%s.%s" % constants.SplitVersion(err.args[0])
512 4b63dc7a Iustin Pop
    v2 = "%s.%s.%s" % constants.SplitVersion(err.args[1])
513 4b63dc7a Iustin Pop
    print >> sys.stderr,  \
514 4b63dc7a Iustin Pop
        ("Configuration version mismatch. The current Ganeti software"
515 4b63dc7a Iustin Pop
         " expects version %s, but the on-disk configuration file has"
516 4b63dc7a Iustin Pop
         " version %s. This is likely the result of upgrading the"
517 4b63dc7a Iustin Pop
         " software without running the upgrade procedure. Please contact"
518 4b63dc7a Iustin Pop
         " your cluster administrator or complete the upgrade using the"
519 4b63dc7a Iustin Pop
         " cfgupgrade utility, after reading the upgrade notes." %
520 4b63dc7a Iustin Pop
         (v1, v2))
521 4b63dc7a Iustin Pop
    sys.exit(constants.EXIT_FAILURE)
522 4b63dc7a Iustin Pop
  except errors.ConfigurationError, err:
523 4b63dc7a Iustin Pop
    print >> sys.stderr, \
524 4b63dc7a Iustin Pop
        ("Configuration error while opening the configuration file: %s\n"
525 4b63dc7a Iustin Pop
         "This might be caused by an incomplete software upgrade or"
526 4b63dc7a Iustin Pop
         " by a corrupted configuration file. Until the problem is fixed"
527 4b63dc7a Iustin Pop
         " the master daemon cannot start." % str(err))
528 4b63dc7a Iustin Pop
    sys.exit(constants.EXIT_FAILURE)
529 bbfd0568 René Nussbaumer
530 ed0efaa5 Michael Hanselmann
  # If CheckMaster didn't fail we believe we are the master, but we have to
531 ed0efaa5 Michael Hanselmann
  # confirm with the other nodes.
532 ed0efaa5 Michael Hanselmann
  if options.no_voting:
533 ed0efaa5 Michael Hanselmann
    if options.yes_do_it:
534 ed0efaa5 Michael Hanselmann
      return
535 ed0efaa5 Michael Hanselmann
536 ed0efaa5 Michael Hanselmann
    sys.stdout.write("The 'no voting' option has been selected.\n")
537 ed0efaa5 Michael Hanselmann
    sys.stdout.write("This is dangerous, please confirm by"
538 ed0efaa5 Michael Hanselmann
                     " typing uppercase 'yes': ")
539 ed0efaa5 Michael Hanselmann
    sys.stdout.flush()
540 ed0efaa5 Michael Hanselmann
541 ed0efaa5 Michael Hanselmann
    confirmation = sys.stdin.readline().strip()
542 ed0efaa5 Michael Hanselmann
    if confirmation != "YES":
543 7260cfbe Iustin Pop
      print >> sys.stderr, "Aborting."
544 ed0efaa5 Michael Hanselmann
      sys.exit(constants.EXIT_FAILURE)
545 ed0efaa5 Michael Hanselmann
546 ed0efaa5 Michael Hanselmann
    return
547 ed0efaa5 Michael Hanselmann
548 ed0efaa5 Michael Hanselmann
  # CheckAgreement uses RPC and threads, hence it needs to be run in a separate
549 ed0efaa5 Michael Hanselmann
  # process before we call utils.Daemonize in the current process.
550 e0e916fe Iustin Pop
  if not utils.RunInSeparateProcess(CheckAgreement):
551 ed0efaa5 Michael Hanselmann
    sys.exit(constants.EXIT_FAILURE)
552 ed0efaa5 Michael Hanselmann
553 340f4757 Iustin Pop
  # ActivateMasterIP also uses RPC/threads, so we run it again via a
554 340f4757 Iustin Pop
  # separate process.
555 340f4757 Iustin Pop
556 340f4757 Iustin Pop
  # TODO: decide whether failure to activate the master IP is a fatal error
557 340f4757 Iustin Pop
  utils.RunInSeparateProcess(ActivateMasterIP)
558 340f4757 Iustin Pop
559 ed0efaa5 Michael Hanselmann
560 3ee53f1f Iustin Pop
def PrepMasterd(options, _):
561 3ee53f1f Iustin Pop
  """Prep master daemon function, executed with the PID file held.
562 3b316acb Iustin Pop

563 04ccf5e9 Guido Trotter
  """
564 04ccf5e9 Guido Trotter
  # This is safe to do as the pid file guarantees against
565 04ccf5e9 Guido Trotter
  # concurrent execution.
566 04ccf5e9 Guido Trotter
  utils.RemoveFile(constants.MASTER_SOCKET)
567 b1b6ea87 Iustin Pop
568 cdd7f900 Guido Trotter
  mainloop = daemon.Mainloop()
569 7e5a6e86 Guido Trotter
  master = MasterServer(mainloop, constants.MASTER_SOCKET,
570 bbfd0568 René Nussbaumer
                        options.uid, options.gid)
571 3ee53f1f Iustin Pop
  return (mainloop, master)
572 3ee53f1f Iustin Pop
573 3ee53f1f Iustin Pop
574 3ee53f1f Iustin Pop
def ExecMasterd(options, args, prep_data): # pylint: disable-msg=W0613
575 3ee53f1f Iustin Pop
  """Main master daemon function, executed with the PID file held.
576 3ee53f1f Iustin Pop

577 3ee53f1f Iustin Pop
  """
578 3ee53f1f Iustin Pop
  (mainloop, master) = prep_data
579 04ccf5e9 Guido Trotter
  try:
580 15486fa7 Michael Hanselmann
    rpc.Init()
581 4331f6cd Michael Hanselmann
    try:
582 15486fa7 Michael Hanselmann
      master.setup_queue()
583 15486fa7 Michael Hanselmann
      try:
584 cdd7f900 Guido Trotter
        mainloop.Run()
585 15486fa7 Michael Hanselmann
      finally:
586 15486fa7 Michael Hanselmann
        master.server_cleanup()
587 4331f6cd Michael Hanselmann
    finally:
588 15486fa7 Michael Hanselmann
      rpc.Shutdown()
589 a4af651e Iustin Pop
  finally:
590 227647ac Guido Trotter
    utils.RemoveFile(constants.MASTER_SOCKET)
591 a4af651e Iustin Pop
592 ffeffa1d Iustin Pop
593 29d91329 Michael Hanselmann
def Main():
594 04ccf5e9 Guido Trotter
  """Main function"""
595 04ccf5e9 Guido Trotter
  parser = OptionParser(description="Ganeti master daemon",
596 04ccf5e9 Guido Trotter
                        usage="%prog [-f] [-d]",
597 04ccf5e9 Guido Trotter
                        version="%%prog (ganeti) %s" %
598 04ccf5e9 Guido Trotter
                        constants.RELEASE_VERSION)
599 04ccf5e9 Guido Trotter
  parser.add_option("--no-voting", dest="no_voting",
600 04ccf5e9 Guido Trotter
                    help="Do not check that the nodes agree on this node"
601 04ccf5e9 Guido Trotter
                    " being the master and start the daemon unconditionally",
602 04ccf5e9 Guido Trotter
                    default=False, action="store_true")
603 04ccf5e9 Guido Trotter
  parser.add_option("--yes-do-it", dest="yes_do_it",
604 04ccf5e9 Guido Trotter
                    help="Override interactive check for --no-voting",
605 04ccf5e9 Guido Trotter
                    default=False, action="store_true")
606 3ee53f1f Iustin Pop
  daemon.GenericMain(constants.MASTERD, parser, CheckMasterd, PrepMasterd,
607 b42ea9ed Iustin Pop
                     ExecMasterd, multithreaded=True)