Statistics
| Branch: | Tag: | Revision:

root / daemons / ganeti-masterd @ 4b63dc7a

History | View | Annotate | Download (19.3 kB)

1 834f8b67 Iustin Pop
#!/usr/bin/python
2 ffeffa1d Iustin Pop
#
3 ffeffa1d Iustin Pop
4 b705c7a6 Manuel Franceschini
# Copyright (C) 2006, 2007, 2010 Google Inc.
5 ffeffa1d Iustin Pop
#
6 ffeffa1d Iustin Pop
# This program is free software; you can redistribute it and/or modify
7 ffeffa1d Iustin Pop
# it under the terms of the GNU General Public License as published by
8 ffeffa1d Iustin Pop
# the Free Software Foundation; either version 2 of the License, or
9 ffeffa1d Iustin Pop
# (at your option) any later version.
10 ffeffa1d Iustin Pop
#
11 ffeffa1d Iustin Pop
# This program is distributed in the hope that it will be useful, but
12 ffeffa1d Iustin Pop
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 ffeffa1d Iustin Pop
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 ffeffa1d Iustin Pop
# General Public License for more details.
15 ffeffa1d Iustin Pop
#
16 ffeffa1d Iustin Pop
# You should have received a copy of the GNU General Public License
17 ffeffa1d Iustin Pop
# along with this program; if not, write to the Free Software
18 ffeffa1d Iustin Pop
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 ffeffa1d Iustin Pop
# 02110-1301, USA.
20 ffeffa1d Iustin Pop
21 ffeffa1d Iustin Pop
22 ffeffa1d Iustin Pop
"""Master daemon program.
23 ffeffa1d Iustin Pop
24 ffeffa1d Iustin Pop
Some classes deviates from the standard style guide since the
25 ffeffa1d Iustin Pop
inheritance from parent classes requires it.
26 ffeffa1d Iustin Pop
27 ffeffa1d Iustin Pop
"""
28 ffeffa1d Iustin Pop
29 7260cfbe Iustin Pop
# pylint: disable-msg=C0103
30 7260cfbe Iustin Pop
# C0103: Invalid name ganeti-masterd
31 ffeffa1d Iustin Pop
32 bbfd0568 René Nussbaumer
import grp
33 bbfd0568 René Nussbaumer
import os
34 bbfd0568 René Nussbaumer
import pwd
35 c1f2901b Iustin Pop
import sys
36 cdd7f900 Guido Trotter
import socket
37 ffeffa1d Iustin Pop
import time
38 bbfd0568 René Nussbaumer
import tempfile
39 96cb3986 Michael Hanselmann
import logging
40 ffeffa1d Iustin Pop
41 c1f2901b Iustin Pop
from optparse import OptionParser
42 ffeffa1d Iustin Pop
43 39dcf2ef Guido Trotter
from ganeti import config
44 ffeffa1d Iustin Pop
from ganeti import constants
45 04ccf5e9 Guido Trotter
from ganeti import daemon
46 ffeffa1d Iustin Pop
from ganeti import mcpu
47 ffeffa1d Iustin Pop
from ganeti import opcodes
48 ffeffa1d Iustin Pop
from ganeti import jqueue
49 39dcf2ef Guido Trotter
from ganeti import locking
50 ffeffa1d Iustin Pop
from ganeti import luxi
51 ffeffa1d Iustin Pop
from ganeti import utils
52 c1f2901b Iustin Pop
from ganeti import errors
53 c1f2901b Iustin Pop
from ganeti import ssconf
54 23e50d39 Michael Hanselmann
from ganeti import workerpool
55 b1b6ea87 Iustin Pop
from ganeti import rpc
56 d7cdb55d Iustin Pop
from ganeti import bootstrap
57 a744b676 Manuel Franceschini
from ganeti import netutils
58 c1f2901b Iustin Pop
59 c1f2901b Iustin Pop
60 23e50d39 Michael Hanselmann
CLIENT_REQUEST_WORKERS = 16
61 23e50d39 Michael Hanselmann
62 c1f2901b Iustin Pop
EXIT_NOTMASTER = constants.EXIT_NOTMASTER
63 c1f2901b Iustin Pop
EXIT_NODESETUP_ERROR = constants.EXIT_NODESETUP_ERROR
64 ffeffa1d Iustin Pop
65 ffeffa1d Iustin Pop
66 23e50d39 Michael Hanselmann
class ClientRequestWorker(workerpool.BaseWorker):
67 e0dbb89b Guido Trotter
  # pylint: disable-msg=W0221
68 7e5a6e86 Guido Trotter
  def RunTask(self, server, message, client):
69 23e50d39 Michael Hanselmann
    """Process the request.
70 23e50d39 Michael Hanselmann
71 23e50d39 Michael Hanselmann
    """
72 7e5a6e86 Guido Trotter
    client_ops = ClientOps(server)
73 7e5a6e86 Guido Trotter
74 23e50d39 Michael Hanselmann
    try:
75 7e5a6e86 Guido Trotter
      (method, args) = luxi.ParseRequest(message)
76 7e5a6e86 Guido Trotter
    except luxi.ProtocolError, err:
77 7e5a6e86 Guido Trotter
      logging.error("Protocol Error: %s", err)
78 7e5a6e86 Guido Trotter
      client.close_log()
79 7e5a6e86 Guido Trotter
      return
80 7e5a6e86 Guido Trotter
81 7e5a6e86 Guido Trotter
    success = False
82 7e5a6e86 Guido Trotter
    try:
83 7e5a6e86 Guido Trotter
      result = client_ops.handle_request(method, args)
84 7e5a6e86 Guido Trotter
      success = True
85 7e5a6e86 Guido Trotter
    except errors.GenericError, err:
86 7e5a6e86 Guido Trotter
      logging.exception("Unexpected exception")
87 7e5a6e86 Guido Trotter
      success = False
88 7e5a6e86 Guido Trotter
      result = errors.EncodeException(err)
89 7e5a6e86 Guido Trotter
    except:
90 7e5a6e86 Guido Trotter
      logging.exception("Unexpected exception")
91 7e5a6e86 Guido Trotter
      err = sys.exc_info()
92 7e5a6e86 Guido Trotter
      result = "Caught exception: %s" % str(err[1])
93 7e5a6e86 Guido Trotter
94 7e5a6e86 Guido Trotter
    try:
95 7e5a6e86 Guido Trotter
      reply = luxi.FormatResponse(success, result)
96 7e5a6e86 Guido Trotter
      client.send_message(reply)
97 7e5a6e86 Guido Trotter
      # awake the main thread so that it can write out the data.
98 7e5a6e86 Guido Trotter
      server.awaker.signal()
99 e0dbb89b Guido Trotter
    except: # pylint: disable-msg=W0702
100 7e5a6e86 Guido Trotter
      logging.exception("Send error")
101 7e5a6e86 Guido Trotter
      client.close_log()
102 7e5a6e86 Guido Trotter
103 7e5a6e86 Guido Trotter
104 7e5a6e86 Guido Trotter
class MasterClientHandler(daemon.AsyncTerminatedMessageStream):
105 7e5a6e86 Guido Trotter
  """Handler for master peers.
106 7e5a6e86 Guido Trotter
107 7e5a6e86 Guido Trotter
  """
108 7e5a6e86 Guido Trotter
  _MAX_UNHANDLED = 1
109 7e5a6e86 Guido Trotter
  def __init__(self, server, connected_socket, client_address, family):
110 7e5a6e86 Guido Trotter
    daemon.AsyncTerminatedMessageStream.__init__(self, connected_socket,
111 7e5a6e86 Guido Trotter
                                                 client_address,
112 7e5a6e86 Guido Trotter
                                                 constants.LUXI_EOM,
113 7e5a6e86 Guido Trotter
                                                 family, self._MAX_UNHANDLED)
114 7e5a6e86 Guido Trotter
    self.server = server
115 7e5a6e86 Guido Trotter
116 7e5a6e86 Guido Trotter
  def handle_message(self, message, _):
117 b2e8a4d9 Michael Hanselmann
    self.server.request_workers.AddTask((self.server, message, self))
118 23e50d39 Michael Hanselmann
119 23e50d39 Michael Hanselmann
120 cdd7f900 Guido Trotter
class MasterServer(daemon.AsyncStreamServer):
121 cdd7f900 Guido Trotter
  """Master Server.
122 ffeffa1d Iustin Pop
123 cdd7f900 Guido Trotter
  This is the main asynchronous master server. It handles connections to the
124 cdd7f900 Guido Trotter
  master socket.
125 ffeffa1d Iustin Pop
126 ffeffa1d Iustin Pop
  """
127 7e5a6e86 Guido Trotter
  family = socket.AF_UNIX
128 7e5a6e86 Guido Trotter
129 7e5a6e86 Guido Trotter
  def __init__(self, mainloop, address, uid, gid):
130 cdd7f900 Guido Trotter
    """MasterServer constructor
131 ce862cd5 Guido Trotter
132 cdd7f900 Guido Trotter
    @type mainloop: ganeti.daemon.Mainloop
133 cdd7f900 Guido Trotter
    @param mainloop: Mainloop used to poll for I/O events
134 cdd7f900 Guido Trotter
    @param address: the unix socket address to bind the MasterServer to
135 bbfd0568 René Nussbaumer
    @param uid: The uid of the owner of the socket
136 bbfd0568 René Nussbaumer
    @param gid: The gid of the owner of the socket
137 ce862cd5 Guido Trotter
138 ce862cd5 Guido Trotter
    """
139 bbfd0568 René Nussbaumer
    temp_name = tempfile.mktemp(dir=os.path.dirname(address))
140 7e5a6e86 Guido Trotter
    daemon.AsyncStreamServer.__init__(self, self.family, temp_name)
141 bbfd0568 René Nussbaumer
    os.chmod(temp_name, 0770)
142 bbfd0568 René Nussbaumer
    os.chown(temp_name, uid, gid)
143 bbfd0568 René Nussbaumer
    os.rename(temp_name, address)
144 bbfd0568 René Nussbaumer
145 cdd7f900 Guido Trotter
    self.mainloop = mainloop
146 7e5a6e86 Guido Trotter
    self.awaker = daemon.AsyncAwaker()
147 50a3fbb2 Michael Hanselmann
148 50a3fbb2 Michael Hanselmann
    # We'll only start threads once we've forked.
149 9113300d Michael Hanselmann
    self.context = None
150 23e50d39 Michael Hanselmann
    self.request_workers = None
151 50a3fbb2 Michael Hanselmann
152 cdd7f900 Guido Trotter
  def handle_connection(self, connected_socket, client_address):
153 7e5a6e86 Guido Trotter
    # TODO: add connection count and limit the number of open connections to a
154 7e5a6e86 Guido Trotter
    # maximum number to avoid breaking for lack of file descriptors or memory.
155 7e5a6e86 Guido Trotter
    MasterClientHandler(self, connected_socket, client_address, self.family)
156 cdd7f900 Guido Trotter
157 50a3fbb2 Michael Hanselmann
  def setup_queue(self):
158 9113300d Michael Hanselmann
    self.context = GanetiContext()
159 89e2b4d2 Michael Hanselmann
    self.request_workers = workerpool.WorkerPool("ClientReq",
160 89e2b4d2 Michael Hanselmann
                                                 CLIENT_REQUEST_WORKERS,
161 23e50d39 Michael Hanselmann
                                                 ClientRequestWorker)
162 ffeffa1d Iustin Pop
163 c1f2901b Iustin Pop
  def server_cleanup(self):
164 c1f2901b Iustin Pop
    """Cleanup the server.
165 c1f2901b Iustin Pop
166 c1f2901b Iustin Pop
    This involves shutting down the processor threads and the master
167 c1f2901b Iustin Pop
    socket.
168 c1f2901b Iustin Pop
169 c1f2901b Iustin Pop
    """
170 50a3fbb2 Michael Hanselmann
    try:
171 cdd7f900 Guido Trotter
      self.close()
172 50a3fbb2 Michael Hanselmann
    finally:
173 23e50d39 Michael Hanselmann
      if self.request_workers:
174 36088c4c Michael Hanselmann
        self.request_workers.TerminateWorkers()
175 9113300d Michael Hanselmann
      if self.context:
176 9113300d Michael Hanselmann
        self.context.jobqueue.Shutdown()
177 ffeffa1d Iustin Pop
178 ffeffa1d Iustin Pop
179 ffeffa1d Iustin Pop
class ClientOps:
180 ffeffa1d Iustin Pop
  """Class holding high-level client operations."""
181 ffeffa1d Iustin Pop
  def __init__(self, server):
182 ffeffa1d Iustin Pop
    self.server = server
183 ffeffa1d Iustin Pop
184 7260cfbe Iustin Pop
  def handle_request(self, method, args): # pylint: disable-msg=R0911
185 9113300d Michael Hanselmann
    queue = self.server.context.jobqueue
186 0bbe448c Michael Hanselmann
187 0bbe448c Michael Hanselmann
    # TODO: Parameter validation
188 0bbe448c Michael Hanselmann
189 7260cfbe Iustin Pop
    # TODO: Rewrite to not exit in each 'if/elif' branch
190 7260cfbe Iustin Pop
191 0bbe448c Michael Hanselmann
    if method == luxi.REQ_SUBMIT_JOB:
192 e566ddbd Iustin Pop
      logging.info("Received new job")
193 0bbe448c Michael Hanselmann
      ops = [opcodes.OpCode.LoadOpCode(state) for state in args]
194 4c848b18 Michael Hanselmann
      return queue.SubmitJob(ops)
195 ffeffa1d Iustin Pop
196 2971c913 Iustin Pop
    if method == luxi.REQ_SUBMIT_MANY_JOBS:
197 2971c913 Iustin Pop
      logging.info("Received multiple jobs")
198 2971c913 Iustin Pop
      jobs = []
199 2971c913 Iustin Pop
      for ops in args:
200 2971c913 Iustin Pop
        jobs.append([opcodes.OpCode.LoadOpCode(state) for state in ops])
201 2971c913 Iustin Pop
      return queue.SubmitManyJobs(jobs)
202 2971c913 Iustin Pop
203 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_CANCEL_JOB:
204 3a2c7775 Michael Hanselmann
      job_id = args
205 e566ddbd Iustin Pop
      logging.info("Received job cancel request for %s", job_id)
206 0bbe448c Michael Hanselmann
      return queue.CancelJob(job_id)
207 ffeffa1d Iustin Pop
208 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_ARCHIVE_JOB:
209 3a2c7775 Michael Hanselmann
      job_id = args
210 e566ddbd Iustin Pop
      logging.info("Received job archive request for %s", job_id)
211 0bbe448c Michael Hanselmann
      return queue.ArchiveJob(job_id)
212 0bbe448c Michael Hanselmann
213 07cd723a Iustin Pop
    elif method == luxi.REQ_AUTOARCHIVE_JOBS:
214 f8ad5591 Michael Hanselmann
      (age, timeout) = args
215 e566ddbd Iustin Pop
      logging.info("Received job autoarchive request for age %s, timeout %s",
216 e566ddbd Iustin Pop
                   age, timeout)
217 f8ad5591 Michael Hanselmann
      return queue.AutoArchiveJobs(age, timeout)
218 07cd723a Iustin Pop
219 dfe57c22 Michael Hanselmann
    elif method == luxi.REQ_WAIT_FOR_JOB_CHANGE:
220 5c735209 Iustin Pop
      (job_id, fields, prev_job_info, prev_log_serial, timeout) = args
221 e566ddbd Iustin Pop
      logging.info("Received job poll request for %s", job_id)
222 6c5a7090 Michael Hanselmann
      return queue.WaitForJobChanges(job_id, fields, prev_job_info,
223 5c735209 Iustin Pop
                                     prev_log_serial, timeout)
224 dfe57c22 Michael Hanselmann
225 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_QUERY_JOBS:
226 0bbe448c Michael Hanselmann
      (job_ids, fields) = args
227 e566ddbd Iustin Pop
      if isinstance(job_ids, (tuple, list)) and job_ids:
228 1f864b60 Iustin Pop
        msg = utils.CommaJoin(job_ids)
229 e566ddbd Iustin Pop
      else:
230 e566ddbd Iustin Pop
        msg = str(job_ids)
231 e566ddbd Iustin Pop
      logging.info("Received job query request for %s", msg)
232 0bbe448c Michael Hanselmann
      return queue.QueryJobs(job_ids, fields)
233 0bbe448c Michael Hanselmann
234 ee6c7b94 Michael Hanselmann
    elif method == luxi.REQ_QUERY_INSTANCES:
235 ec79568d Iustin Pop
      (names, fields, use_locking) = args
236 e566ddbd Iustin Pop
      logging.info("Received instance query request for %s", names)
237 77921a95 Iustin Pop
      if use_locking:
238 debac808 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed",
239 debac808 Iustin Pop
                                   errors.ECODE_INVAL)
240 ec79568d Iustin Pop
      op = opcodes.OpQueryInstances(names=names, output_fields=fields,
241 ec79568d Iustin Pop
                                    use_locking=use_locking)
242 ee6c7b94 Michael Hanselmann
      return self._Query(op)
243 ee6c7b94 Michael Hanselmann
244 02f7fe54 Michael Hanselmann
    elif method == luxi.REQ_QUERY_NODES:
245 ec79568d Iustin Pop
      (names, fields, use_locking) = args
246 e566ddbd Iustin Pop
      logging.info("Received node query request for %s", names)
247 77921a95 Iustin Pop
      if use_locking:
248 debac808 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed",
249 debac808 Iustin Pop
                                   errors.ECODE_INVAL)
250 ec79568d Iustin Pop
      op = opcodes.OpQueryNodes(names=names, output_fields=fields,
251 ec79568d Iustin Pop
                                use_locking=use_locking)
252 02f7fe54 Michael Hanselmann
      return self._Query(op)
253 02f7fe54 Michael Hanselmann
254 32f93223 Michael Hanselmann
    elif method == luxi.REQ_QUERY_EXPORTS:
255 ec79568d Iustin Pop
      nodes, use_locking = args
256 77921a95 Iustin Pop
      if use_locking:
257 debac808 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed",
258 debac808 Iustin Pop
                                   errors.ECODE_INVAL)
259 e566ddbd Iustin Pop
      logging.info("Received exports query request")
260 ec79568d Iustin Pop
      op = opcodes.OpQueryExports(nodes=nodes, use_locking=use_locking)
261 32f93223 Michael Hanselmann
      return self._Query(op)
262 32f93223 Michael Hanselmann
263 ae5849b5 Michael Hanselmann
    elif method == luxi.REQ_QUERY_CONFIG_VALUES:
264 ae5849b5 Michael Hanselmann
      fields = args
265 e566ddbd Iustin Pop
      logging.info("Received config values query request for %s", fields)
266 ae5849b5 Michael Hanselmann
      op = opcodes.OpQueryConfigValues(output_fields=fields)
267 ae5849b5 Michael Hanselmann
      return self._Query(op)
268 ae5849b5 Michael Hanselmann
269 66baeccc Iustin Pop
    elif method == luxi.REQ_QUERY_CLUSTER_INFO:
270 e566ddbd Iustin Pop
      logging.info("Received cluster info query request")
271 66baeccc Iustin Pop
      op = opcodes.OpQueryClusterInfo()
272 66baeccc Iustin Pop
      return self._Query(op)
273 66baeccc Iustin Pop
274 7699c3af Iustin Pop
    elif method == luxi.REQ_QUERY_TAGS:
275 7699c3af Iustin Pop
      kind, name = args
276 7699c3af Iustin Pop
      logging.info("Received tags query request")
277 7699c3af Iustin Pop
      op = opcodes.OpGetTags(kind=kind, name=name)
278 7699c3af Iustin Pop
      return self._Query(op)
279 7699c3af Iustin Pop
280 19b9ba9a Michael Hanselmann
    elif method == luxi.REQ_QUERY_LOCKS:
281 19b9ba9a Michael Hanselmann
      (fields, sync) = args
282 19b9ba9a Michael Hanselmann
      logging.info("Received locks query request")
283 19b9ba9a Michael Hanselmann
      return self.server.context.glm.QueryLocks(fields, sync)
284 19b9ba9a Michael Hanselmann
285 3ccafd0e Iustin Pop
    elif method == luxi.REQ_QUEUE_SET_DRAIN_FLAG:
286 3ccafd0e Iustin Pop
      drain_flag = args
287 e566ddbd Iustin Pop
      logging.info("Received queue drain flag change request to %s",
288 e566ddbd Iustin Pop
                   drain_flag)
289 3ccafd0e Iustin Pop
      return queue.SetDrainFlag(drain_flag)
290 3ccafd0e Iustin Pop
291 05e50653 Michael Hanselmann
    elif method == luxi.REQ_SET_WATCHER_PAUSE:
292 05e50653 Michael Hanselmann
      (until, ) = args
293 05e50653 Michael Hanselmann
294 05e50653 Michael Hanselmann
      if until is None:
295 05e50653 Michael Hanselmann
        logging.info("Received request to no longer pause the watcher")
296 05e50653 Michael Hanselmann
      else:
297 05e50653 Michael Hanselmann
        if not isinstance(until, (int, float)):
298 05e50653 Michael Hanselmann
          raise TypeError("Duration must be an integer or float")
299 05e50653 Michael Hanselmann
300 05e50653 Michael Hanselmann
        if until < time.time():
301 05e50653 Michael Hanselmann
          raise errors.GenericError("Unable to set pause end time in the past")
302 05e50653 Michael Hanselmann
303 05e50653 Michael Hanselmann
        logging.info("Received request to pause the watcher until %s", until)
304 05e50653 Michael Hanselmann
305 05e50653 Michael Hanselmann
      return _SetWatcherPause(until)
306 05e50653 Michael Hanselmann
307 0bbe448c Michael Hanselmann
    else:
308 e566ddbd Iustin Pop
      logging.info("Received invalid request '%s'", method)
309 e566ddbd Iustin Pop
      raise ValueError("Invalid operation '%s'" % method)
310 ffeffa1d Iustin Pop
311 ee6c7b94 Michael Hanselmann
  def _Query(self, op):
312 ee6c7b94 Michael Hanselmann
    """Runs the specified opcode and returns the result.
313 ee6c7b94 Michael Hanselmann
314 ee6c7b94 Michael Hanselmann
    """
315 adfa97e3 Guido Trotter
    # Queries don't have a job id
316 adfa97e3 Guido Trotter
    proc = mcpu.Processor(self.server.context, None)
317 26d3fd2f Michael Hanselmann
318 26d3fd2f Michael Hanselmann
    # TODO: Executing an opcode using locks will acquire them in blocking mode.
319 26d3fd2f Michael Hanselmann
    # Consider using a timeout for retries.
320 031a3e57 Michael Hanselmann
    return proc.ExecOpCode(op, None)
321 ee6c7b94 Michael Hanselmann
322 ffeffa1d Iustin Pop
323 39dcf2ef Guido Trotter
class GanetiContext(object):
324 39dcf2ef Guido Trotter
  """Context common to all ganeti threads.
325 39dcf2ef Guido Trotter
326 39dcf2ef Guido Trotter
  This class creates and holds common objects shared by all threads.
327 39dcf2ef Guido Trotter
328 39dcf2ef Guido Trotter
  """
329 7260cfbe Iustin Pop
  # pylint: disable-msg=W0212
330 7260cfbe Iustin Pop
  # we do want to ensure a singleton here
331 39dcf2ef Guido Trotter
  _instance = None
332 39dcf2ef Guido Trotter
333 39dcf2ef Guido Trotter
  def __init__(self):
334 39dcf2ef Guido Trotter
    """Constructs a new GanetiContext object.
335 39dcf2ef Guido Trotter
336 39dcf2ef Guido Trotter
    There should be only a GanetiContext object at any time, so this
337 39dcf2ef Guido Trotter
    function raises an error if this is not the case.
338 39dcf2ef Guido Trotter
339 39dcf2ef Guido Trotter
    """
340 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "double GanetiContext instance"
341 39dcf2ef Guido Trotter
342 9113300d Michael Hanselmann
    # Create global configuration object
343 39dcf2ef Guido Trotter
    self.cfg = config.ConfigWriter()
344 9113300d Michael Hanselmann
345 9113300d Michael Hanselmann
    # Locking manager
346 984f7c32 Guido Trotter
    self.glm = locking.GanetiLockManager(
347 39dcf2ef Guido Trotter
                self.cfg.GetNodeList(),
348 39dcf2ef Guido Trotter
                self.cfg.GetInstanceList())
349 39dcf2ef Guido Trotter
350 9113300d Michael Hanselmann
    # Job queue
351 9113300d Michael Hanselmann
    self.jobqueue = jqueue.JobQueue(self)
352 9113300d Michael Hanselmann
353 39dcf2ef Guido Trotter
    # setting this also locks the class against attribute modifications
354 39dcf2ef Guido Trotter
    self.__class__._instance = self
355 39dcf2ef Guido Trotter
356 39dcf2ef Guido Trotter
  def __setattr__(self, name, value):
357 39dcf2ef Guido Trotter
    """Setting GanetiContext attributes is forbidden after initialization.
358 39dcf2ef Guido Trotter
359 39dcf2ef Guido Trotter
    """
360 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "Attempt to modify Ganeti Context"
361 39dcf2ef Guido Trotter
    object.__setattr__(self, name, value)
362 39dcf2ef Guido Trotter
363 0debfb35 Guido Trotter
  def AddNode(self, node, ec_id):
364 d8470559 Michael Hanselmann
    """Adds a node to the configuration and lock manager.
365 d8470559 Michael Hanselmann
366 d8470559 Michael Hanselmann
    """
367 d8470559 Michael Hanselmann
    # Add it to the configuration
368 0debfb35 Guido Trotter
    self.cfg.AddNode(node, ec_id)
369 d8470559 Michael Hanselmann
370 c36176cc Michael Hanselmann
    # If preseeding fails it'll not be added
371 99aabbed Iustin Pop
    self.jobqueue.AddNode(node)
372 c36176cc Michael Hanselmann
373 d8470559 Michael Hanselmann
    # Add the new node to the Ganeti Lock Manager
374 d8470559 Michael Hanselmann
    self.glm.add(locking.LEVEL_NODE, node.name)
375 d8470559 Michael Hanselmann
376 d8470559 Michael Hanselmann
  def ReaddNode(self, node):
377 d8470559 Michael Hanselmann
    """Updates a node that's already in the configuration
378 d8470559 Michael Hanselmann
379 d8470559 Michael Hanselmann
    """
380 c36176cc Michael Hanselmann
    # Synchronize the queue again
381 99aabbed Iustin Pop
    self.jobqueue.AddNode(node)
382 d8470559 Michael Hanselmann
383 d8470559 Michael Hanselmann
  def RemoveNode(self, name):
384 d8470559 Michael Hanselmann
    """Removes a node from the configuration and lock manager.
385 d8470559 Michael Hanselmann
386 d8470559 Michael Hanselmann
    """
387 d8470559 Michael Hanselmann
    # Remove node from configuration
388 d8470559 Michael Hanselmann
    self.cfg.RemoveNode(name)
389 d8470559 Michael Hanselmann
390 c36176cc Michael Hanselmann
    # Notify job queue
391 c36176cc Michael Hanselmann
    self.jobqueue.RemoveNode(name)
392 c36176cc Michael Hanselmann
393 d8470559 Michael Hanselmann
    # Remove the node from the Ganeti Lock Manager
394 d8470559 Michael Hanselmann
    self.glm.remove(locking.LEVEL_NODE, name)
395 d8470559 Michael Hanselmann
396 39dcf2ef Guido Trotter
397 05e50653 Michael Hanselmann
def _SetWatcherPause(until):
398 05e50653 Michael Hanselmann
  """Creates or removes the watcher pause file.
399 05e50653 Michael Hanselmann
400 05e50653 Michael Hanselmann
  @type until: None or int
401 05e50653 Michael Hanselmann
  @param until: Unix timestamp saying until when the watcher shouldn't run
402 05e50653 Michael Hanselmann
403 05e50653 Michael Hanselmann
  """
404 05e50653 Michael Hanselmann
  if until is None:
405 05e50653 Michael Hanselmann
    utils.RemoveFile(constants.WATCHER_PAUSEFILE)
406 05e50653 Michael Hanselmann
  else:
407 05e50653 Michael Hanselmann
    utils.WriteFile(constants.WATCHER_PAUSEFILE,
408 05e50653 Michael Hanselmann
                    data="%d\n" % (until, ))
409 05e50653 Michael Hanselmann
410 28b498cd Michael Hanselmann
  return until
411 28b498cd Michael Hanselmann
412 05e50653 Michael Hanselmann
413 e0e916fe Iustin Pop
@rpc.RunWithRPC
414 36205981 Iustin Pop
def CheckAgreement():
415 36205981 Iustin Pop
  """Check the agreement on who is the master.
416 36205981 Iustin Pop
417 36205981 Iustin Pop
  The function uses a very simple algorithm: we must get more positive
418 36205981 Iustin Pop
  than negative answers. Since in most of the cases we are the master,
419 36205981 Iustin Pop
  we'll use our own config file for getting the node list. In the
420 36205981 Iustin Pop
  future we could collect the current node list from our (possibly
421 36205981 Iustin Pop
  obsolete) known nodes.
422 36205981 Iustin Pop
423 d7cdb55d Iustin Pop
  In order to account for cold-start of all nodes, we retry for up to
424 d7cdb55d Iustin Pop
  a minute until we get a real answer as the top-voted one. If the
425 d7cdb55d Iustin Pop
  nodes are more out-of-sync, for now manual startup of the master
426 d7cdb55d Iustin Pop
  should be attempted.
427 d7cdb55d Iustin Pop
428 d7cdb55d Iustin Pop
  Note that for a even number of nodes cluster, we need at least half
429 d7cdb55d Iustin Pop
  of the nodes (beside ourselves) to vote for us. This creates a
430 d7cdb55d Iustin Pop
  problem on two-node clusters, since in this case we require the
431 d7cdb55d Iustin Pop
  other node to be up too to confirm our status.
432 d7cdb55d Iustin Pop
433 36205981 Iustin Pop
  """
434 b705c7a6 Manuel Franceschini
  myself = netutils.Hostname.GetSysName()
435 36205981 Iustin Pop
  #temp instantiation of a config writer, used only to get the node list
436 36205981 Iustin Pop
  cfg = config.ConfigWriter()
437 36205981 Iustin Pop
  node_list = cfg.GetNodeList()
438 36205981 Iustin Pop
  del cfg
439 d7cdb55d Iustin Pop
  retries = 6
440 d7cdb55d Iustin Pop
  while retries > 0:
441 d7cdb55d Iustin Pop
    votes = bootstrap.GatherMasterVotes(node_list)
442 d7cdb55d Iustin Pop
    if not votes:
443 d7cdb55d Iustin Pop
      # empty node list, this is a one node cluster
444 d7cdb55d Iustin Pop
      return True
445 d7cdb55d Iustin Pop
    if votes[0][0] is None:
446 d7cdb55d Iustin Pop
      retries -= 1
447 d7cdb55d Iustin Pop
      time.sleep(10)
448 36205981 Iustin Pop
      continue
449 d7cdb55d Iustin Pop
    break
450 d7cdb55d Iustin Pop
  if retries == 0:
451 e09fdcfa Iustin Pop
    logging.critical("Cluster inconsistent, most of the nodes didn't answer"
452 e09fdcfa Iustin Pop
                     " after multiple retries. Aborting startup")
453 d8f5a37d Iustin Pop
    logging.critical("Use the --no-voting option if you understand what"
454 d8f5a37d Iustin Pop
                     " effects it has on the cluster state")
455 e09fdcfa Iustin Pop
    return False
456 d7cdb55d Iustin Pop
  # here a real node is at the top of the list
457 d7cdb55d Iustin Pop
  all_votes = sum(item[1] for item in votes)
458 d7cdb55d Iustin Pop
  top_node, top_votes = votes[0]
459 8a20c732 Michael Hanselmann
460 d7cdb55d Iustin Pop
  result = False
461 d7cdb55d Iustin Pop
  if top_node != myself:
462 d7cdb55d Iustin Pop
    logging.critical("It seems we are not the master (top-voted node"
463 bbe19c17 Iustin Pop
                     " is %s with %d out of %d votes)", top_node, top_votes,
464 bbe19c17 Iustin Pop
                     all_votes)
465 d7cdb55d Iustin Pop
  elif top_votes < all_votes - top_votes:
466 36205981 Iustin Pop
    logging.critical("It seems we are not the master (%d votes for,"
467 d7cdb55d Iustin Pop
                     " %d votes against)", top_votes, all_votes - top_votes)
468 d7cdb55d Iustin Pop
  else:
469 d7cdb55d Iustin Pop
    result = True
470 d7cdb55d Iustin Pop
471 d7cdb55d Iustin Pop
  return result
472 36205981 Iustin Pop
473 6c948699 Michael Hanselmann
474 340f4757 Iustin Pop
@rpc.RunWithRPC
475 340f4757 Iustin Pop
def ActivateMasterIP():
476 340f4757 Iustin Pop
  # activate ip
477 340f4757 Iustin Pop
  master_node = ssconf.SimpleStore().GetMasterNode()
478 340f4757 Iustin Pop
  result = rpc.RpcRunner.call_node_start_master(master_node, False, False)
479 340f4757 Iustin Pop
  msg = result.fail_msg
480 340f4757 Iustin Pop
  if msg:
481 340f4757 Iustin Pop
    logging.error("Can't activate master IP address: %s", msg)
482 340f4757 Iustin Pop
483 340f4757 Iustin Pop
484 ed0efaa5 Michael Hanselmann
def CheckMasterd(options, args):
485 ed0efaa5 Michael Hanselmann
  """Initial checks whether to run or exit with a failure.
486 ed0efaa5 Michael Hanselmann
487 ed0efaa5 Michael Hanselmann
  """
488 f93427cd Iustin Pop
  if args: # masterd doesn't take any arguments
489 f93427cd Iustin Pop
    print >> sys.stderr, ("Usage: %s [-f] [-d]" % sys.argv[0])
490 f93427cd Iustin Pop
    sys.exit(constants.EXIT_FAILURE)
491 f93427cd Iustin Pop
492 ed0efaa5 Michael Hanselmann
  ssconf.CheckMaster(options.debug)
493 ed0efaa5 Michael Hanselmann
494 bbfd0568 René Nussbaumer
  try:
495 bbfd0568 René Nussbaumer
    options.uid = pwd.getpwnam(constants.MASTERD_USER).pw_uid
496 bbfd0568 René Nussbaumer
    options.gid = grp.getgrnam(constants.DAEMONS_GROUP).gr_gid
497 bbfd0568 René Nussbaumer
  except KeyError:
498 bbfd0568 René Nussbaumer
    print >> sys.stderr, ("User or group not existing on system: %s:%s" %
499 bbfd0568 René Nussbaumer
                          (constants.MASTERD_USER, constants.DAEMONS_GROUP))
500 bbfd0568 René Nussbaumer
    sys.exit(constants.EXIT_FAILURE)
501 bbfd0568 René Nussbaumer
502 4b63dc7a Iustin Pop
  # Check the configuration is sane before anything else
503 4b63dc7a Iustin Pop
  try:
504 4b63dc7a Iustin Pop
    config.ConfigWriter()
505 4b63dc7a Iustin Pop
  except errors.ConfigVersionMismatch, err:
506 4b63dc7a Iustin Pop
    v1 = "%s.%s.%s" % constants.SplitVersion(err.args[0])
507 4b63dc7a Iustin Pop
    v2 = "%s.%s.%s" % constants.SplitVersion(err.args[1])
508 4b63dc7a Iustin Pop
    print >> sys.stderr,  \
509 4b63dc7a Iustin Pop
        ("Configuration version mismatch. The current Ganeti software"
510 4b63dc7a Iustin Pop
         " expects version %s, but the on-disk configuration file has"
511 4b63dc7a Iustin Pop
         " version %s. This is likely the result of upgrading the"
512 4b63dc7a Iustin Pop
         " software without running the upgrade procedure. Please contact"
513 4b63dc7a Iustin Pop
         " your cluster administrator or complete the upgrade using the"
514 4b63dc7a Iustin Pop
         " cfgupgrade utility, after reading the upgrade notes." %
515 4b63dc7a Iustin Pop
         (v1, v2))
516 4b63dc7a Iustin Pop
    sys.exit(constants.EXIT_FAILURE)
517 4b63dc7a Iustin Pop
  except errors.ConfigurationError, err:
518 4b63dc7a Iustin Pop
    print >> sys.stderr, \
519 4b63dc7a Iustin Pop
        ("Configuration error while opening the configuration file: %s\n"
520 4b63dc7a Iustin Pop
         "This might be caused by an incomplete software upgrade or"
521 4b63dc7a Iustin Pop
         " by a corrupted configuration file. Until the problem is fixed"
522 4b63dc7a Iustin Pop
         " the master daemon cannot start." % str(err))
523 4b63dc7a Iustin Pop
    sys.exit(constants.EXIT_FAILURE)
524 bbfd0568 René Nussbaumer
525 ed0efaa5 Michael Hanselmann
  # If CheckMaster didn't fail we believe we are the master, but we have to
526 ed0efaa5 Michael Hanselmann
  # confirm with the other nodes.
527 ed0efaa5 Michael Hanselmann
  if options.no_voting:
528 ed0efaa5 Michael Hanselmann
    if options.yes_do_it:
529 ed0efaa5 Michael Hanselmann
      return
530 ed0efaa5 Michael Hanselmann
531 ed0efaa5 Michael Hanselmann
    sys.stdout.write("The 'no voting' option has been selected.\n")
532 ed0efaa5 Michael Hanselmann
    sys.stdout.write("This is dangerous, please confirm by"
533 ed0efaa5 Michael Hanselmann
                     " typing uppercase 'yes': ")
534 ed0efaa5 Michael Hanselmann
    sys.stdout.flush()
535 ed0efaa5 Michael Hanselmann
536 ed0efaa5 Michael Hanselmann
    confirmation = sys.stdin.readline().strip()
537 ed0efaa5 Michael Hanselmann
    if confirmation != "YES":
538 7260cfbe Iustin Pop
      print >> sys.stderr, "Aborting."
539 ed0efaa5 Michael Hanselmann
      sys.exit(constants.EXIT_FAILURE)
540 ed0efaa5 Michael Hanselmann
541 ed0efaa5 Michael Hanselmann
    return
542 ed0efaa5 Michael Hanselmann
543 ed0efaa5 Michael Hanselmann
  # CheckAgreement uses RPC and threads, hence it needs to be run in a separate
544 ed0efaa5 Michael Hanselmann
  # process before we call utils.Daemonize in the current process.
545 e0e916fe Iustin Pop
  if not utils.RunInSeparateProcess(CheckAgreement):
546 ed0efaa5 Michael Hanselmann
    sys.exit(constants.EXIT_FAILURE)
547 ed0efaa5 Michael Hanselmann
548 340f4757 Iustin Pop
  # ActivateMasterIP also uses RPC/threads, so we run it again via a
549 340f4757 Iustin Pop
  # separate process.
550 340f4757 Iustin Pop
551 340f4757 Iustin Pop
  # TODO: decide whether failure to activate the master IP is a fatal error
552 340f4757 Iustin Pop
  utils.RunInSeparateProcess(ActivateMasterIP)
553 340f4757 Iustin Pop
554 ed0efaa5 Michael Hanselmann
555 3ee53f1f Iustin Pop
def PrepMasterd(options, _):
556 3ee53f1f Iustin Pop
  """Prep master daemon function, executed with the PID file held.
557 3b316acb Iustin Pop
558 04ccf5e9 Guido Trotter
  """
559 04ccf5e9 Guido Trotter
  # This is safe to do as the pid file guarantees against
560 04ccf5e9 Guido Trotter
  # concurrent execution.
561 04ccf5e9 Guido Trotter
  utils.RemoveFile(constants.MASTER_SOCKET)
562 b1b6ea87 Iustin Pop
563 cdd7f900 Guido Trotter
  mainloop = daemon.Mainloop()
564 7e5a6e86 Guido Trotter
  master = MasterServer(mainloop, constants.MASTER_SOCKET,
565 bbfd0568 René Nussbaumer
                        options.uid, options.gid)
566 3ee53f1f Iustin Pop
  return (mainloop, master)
567 3ee53f1f Iustin Pop
568 3ee53f1f Iustin Pop
569 3ee53f1f Iustin Pop
def ExecMasterd(options, args, prep_data): # pylint: disable-msg=W0613
570 3ee53f1f Iustin Pop
  """Main master daemon function, executed with the PID file held.
571 3ee53f1f Iustin Pop
572 3ee53f1f Iustin Pop
  """
573 3ee53f1f Iustin Pop
  (mainloop, master) = prep_data
574 04ccf5e9 Guido Trotter
  try:
575 15486fa7 Michael Hanselmann
    rpc.Init()
576 4331f6cd Michael Hanselmann
    try:
577 15486fa7 Michael Hanselmann
      master.setup_queue()
578 15486fa7 Michael Hanselmann
      try:
579 cdd7f900 Guido Trotter
        mainloop.Run()
580 15486fa7 Michael Hanselmann
      finally:
581 15486fa7 Michael Hanselmann
        master.server_cleanup()
582 4331f6cd Michael Hanselmann
    finally:
583 15486fa7 Michael Hanselmann
      rpc.Shutdown()
584 a4af651e Iustin Pop
  finally:
585 227647ac Guido Trotter
    utils.RemoveFile(constants.MASTER_SOCKET)
586 a4af651e Iustin Pop
587 ffeffa1d Iustin Pop
588 04ccf5e9 Guido Trotter
def main():
589 04ccf5e9 Guido Trotter
  """Main function"""
590 04ccf5e9 Guido Trotter
  parser = OptionParser(description="Ganeti master daemon",
591 04ccf5e9 Guido Trotter
                        usage="%prog [-f] [-d]",
592 04ccf5e9 Guido Trotter
                        version="%%prog (ganeti) %s" %
593 04ccf5e9 Guido Trotter
                        constants.RELEASE_VERSION)
594 04ccf5e9 Guido Trotter
  parser.add_option("--no-voting", dest="no_voting",
595 04ccf5e9 Guido Trotter
                    help="Do not check that the nodes agree on this node"
596 04ccf5e9 Guido Trotter
                    " being the master and start the daemon unconditionally",
597 04ccf5e9 Guido Trotter
                    default=False, action="store_true")
598 04ccf5e9 Guido Trotter
  parser.add_option("--yes-do-it", dest="yes_do_it",
599 04ccf5e9 Guido Trotter
                    help="Override interactive check for --no-voting",
600 04ccf5e9 Guido Trotter
                    default=False, action="store_true")
601 3ee53f1f Iustin Pop
  daemon.GenericMain(constants.MASTERD, parser, CheckMasterd, PrepMasterd,
602 b42ea9ed Iustin Pop
                     ExecMasterd, multithreaded=True)
603 6c948699 Michael Hanselmann
604 04ccf5e9 Guido Trotter
605 ffeffa1d Iustin Pop
if __name__ == "__main__":
606 ffeffa1d Iustin Pop
  main()