Statistics
| Branch: | Tag: | Revision:

root / lib / server / masterd.py @ fb62843c

History | View | Annotate | Download (25.5 kB)

1 69cf3abd Michael Hanselmann
#
2 ffeffa1d Iustin Pop
#
3 ffeffa1d Iustin Pop
4 83c046a2 Iustin Pop
# Copyright (C) 2006, 2007, 2010, 2011, 2012 Google Inc.
5 ffeffa1d Iustin Pop
#
6 ffeffa1d Iustin Pop
# This program is free software; you can redistribute it and/or modify
7 ffeffa1d Iustin Pop
# it under the terms of the GNU General Public License as published by
8 ffeffa1d Iustin Pop
# the Free Software Foundation; either version 2 of the License, or
9 ffeffa1d Iustin Pop
# (at your option) any later version.
10 ffeffa1d Iustin Pop
#
11 ffeffa1d Iustin Pop
# This program is distributed in the hope that it will be useful, but
12 ffeffa1d Iustin Pop
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 ffeffa1d Iustin Pop
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 ffeffa1d Iustin Pop
# General Public License for more details.
15 ffeffa1d Iustin Pop
#
16 ffeffa1d Iustin Pop
# You should have received a copy of the GNU General Public License
17 ffeffa1d Iustin Pop
# along with this program; if not, write to the Free Software
18 ffeffa1d Iustin Pop
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 ffeffa1d Iustin Pop
# 02110-1301, USA.
20 ffeffa1d Iustin Pop
21 ffeffa1d Iustin Pop
22 ffeffa1d Iustin Pop
"""Master daemon program.
23 ffeffa1d Iustin Pop

24 ffeffa1d Iustin Pop
Some classes deviates from the standard style guide since the
25 ffeffa1d Iustin Pop
inheritance from parent classes requires it.
26 ffeffa1d Iustin Pop

27 ffeffa1d Iustin Pop
"""
28 ffeffa1d Iustin Pop
29 b459a848 Andrea Spadaccini
# pylint: disable=C0103
30 7260cfbe Iustin Pop
# C0103: Invalid name ganeti-masterd
31 ffeffa1d Iustin Pop
32 bbfd0568 René Nussbaumer
import grp
33 bbfd0568 René Nussbaumer
import os
34 bbfd0568 René Nussbaumer
import pwd
35 c1f2901b Iustin Pop
import sys
36 cdd7f900 Guido Trotter
import socket
37 ffeffa1d Iustin Pop
import time
38 bbfd0568 René Nussbaumer
import tempfile
39 96cb3986 Michael Hanselmann
import logging
40 ffeffa1d Iustin Pop
41 c1f2901b Iustin Pop
from optparse import OptionParser
42 ffeffa1d Iustin Pop
43 39dcf2ef Guido Trotter
from ganeti import config
44 ffeffa1d Iustin Pop
from ganeti import constants
45 04ccf5e9 Guido Trotter
from ganeti import daemon
46 ffeffa1d Iustin Pop
from ganeti import mcpu
47 ffeffa1d Iustin Pop
from ganeti import opcodes
48 ffeffa1d Iustin Pop
from ganeti import jqueue
49 39dcf2ef Guido Trotter
from ganeti import locking
50 ffeffa1d Iustin Pop
from ganeti import luxi
51 ffeffa1d Iustin Pop
from ganeti import utils
52 c1f2901b Iustin Pop
from ganeti import errors
53 c1f2901b Iustin Pop
from ganeti import ssconf
54 23e50d39 Michael Hanselmann
from ganeti import workerpool
55 b1b6ea87 Iustin Pop
from ganeti import rpc
56 d7cdb55d Iustin Pop
from ganeti import bootstrap
57 a744b676 Manuel Franceschini
from ganeti import netutils
58 28b71a76 Michael Hanselmann
from ganeti import objects
59 24d16f76 Michael Hanselmann
from ganeti import query
60 a20e4768 Michael Hanselmann
from ganeti import runtime
61 a5ce2ea2 Michael Hanselmann
from ganeti import pathutils
62 7c4bd156 Michael Hanselmann
from ganeti import ht
63 c1f2901b Iustin Pop
64 c1f2901b Iustin Pop
65 23e50d39 Michael Hanselmann
CLIENT_REQUEST_WORKERS = 16
66 23e50d39 Michael Hanselmann
67 c1f2901b Iustin Pop
EXIT_NOTMASTER = constants.EXIT_NOTMASTER
68 c1f2901b Iustin Pop
EXIT_NODESETUP_ERROR = constants.EXIT_NODESETUP_ERROR
69 ffeffa1d Iustin Pop
70 ffeffa1d Iustin Pop
71 4c91d2ad Iustin Pop
def _LogNewJob(status, info, ops):
72 4c91d2ad Iustin Pop
  """Log information about a recently submitted job.
73 4c91d2ad Iustin Pop

74 4c91d2ad Iustin Pop
  """
75 78fcfd43 Michael Hanselmann
  op_summary = utils.CommaJoin(op.Summary() for op in ops)
76 78fcfd43 Michael Hanselmann
77 4c91d2ad Iustin Pop
  if status:
78 78fcfd43 Michael Hanselmann
    logging.info("New job with id %s, summary: %s", info, op_summary)
79 4c91d2ad Iustin Pop
  else:
80 4c91d2ad Iustin Pop
    logging.info("Failed to submit job, reason: '%s', summary: %s",
81 78fcfd43 Michael Hanselmann
                 info, op_summary)
82 4c91d2ad Iustin Pop
83 4c91d2ad Iustin Pop
84 23e50d39 Michael Hanselmann
class ClientRequestWorker(workerpool.BaseWorker):
85 b459a848 Andrea Spadaccini
  # pylint: disable=W0221
86 7e5a6e86 Guido Trotter
  def RunTask(self, server, message, client):
87 23e50d39 Michael Hanselmann
    """Process the request.
88 23e50d39 Michael Hanselmann

89 23e50d39 Michael Hanselmann
    """
90 7e5a6e86 Guido Trotter
    client_ops = ClientOps(server)
91 7e5a6e86 Guido Trotter
92 23e50d39 Michael Hanselmann
    try:
93 e986f20c Michael Hanselmann
      (method, args, version) = luxi.ParseRequest(message)
94 7e5a6e86 Guido Trotter
    except luxi.ProtocolError, err:
95 7e5a6e86 Guido Trotter
      logging.error("Protocol Error: %s", err)
96 7e5a6e86 Guido Trotter
      client.close_log()
97 7e5a6e86 Guido Trotter
      return
98 7e5a6e86 Guido Trotter
99 7e5a6e86 Guido Trotter
    success = False
100 7e5a6e86 Guido Trotter
    try:
101 e986f20c Michael Hanselmann
      # Verify client's version if there was one in the request
102 e986f20c Michael Hanselmann
      if version is not None and version != constants.LUXI_VERSION:
103 e986f20c Michael Hanselmann
        raise errors.LuxiError("LUXI version mismatch, server %s, request %s" %
104 e986f20c Michael Hanselmann
                               (constants.LUXI_VERSION, version))
105 e986f20c Michael Hanselmann
106 7e5a6e86 Guido Trotter
      result = client_ops.handle_request(method, args)
107 7e5a6e86 Guido Trotter
      success = True
108 7e5a6e86 Guido Trotter
    except errors.GenericError, err:
109 7e5a6e86 Guido Trotter
      logging.exception("Unexpected exception")
110 7e5a6e86 Guido Trotter
      success = False
111 7e5a6e86 Guido Trotter
      result = errors.EncodeException(err)
112 7e5a6e86 Guido Trotter
    except:
113 7e5a6e86 Guido Trotter
      logging.exception("Unexpected exception")
114 7e5a6e86 Guido Trotter
      err = sys.exc_info()
115 7e5a6e86 Guido Trotter
      result = "Caught exception: %s" % str(err[1])
116 7e5a6e86 Guido Trotter
117 7e5a6e86 Guido Trotter
    try:
118 7e5a6e86 Guido Trotter
      reply = luxi.FormatResponse(success, result)
119 7e5a6e86 Guido Trotter
      client.send_message(reply)
120 7e5a6e86 Guido Trotter
      # awake the main thread so that it can write out the data.
121 7e5a6e86 Guido Trotter
      server.awaker.signal()
122 b459a848 Andrea Spadaccini
    except: # pylint: disable=W0702
123 7e5a6e86 Guido Trotter
      logging.exception("Send error")
124 7e5a6e86 Guido Trotter
      client.close_log()
125 7e5a6e86 Guido Trotter
126 7e5a6e86 Guido Trotter
127 7e5a6e86 Guido Trotter
class MasterClientHandler(daemon.AsyncTerminatedMessageStream):
128 7e5a6e86 Guido Trotter
  """Handler for master peers.
129 7e5a6e86 Guido Trotter

130 7e5a6e86 Guido Trotter
  """
131 7e5a6e86 Guido Trotter
  _MAX_UNHANDLED = 1
132 e687ec01 Michael Hanselmann
133 7e5a6e86 Guido Trotter
  def __init__(self, server, connected_socket, client_address, family):
134 7e5a6e86 Guido Trotter
    daemon.AsyncTerminatedMessageStream.__init__(self, connected_socket,
135 7e5a6e86 Guido Trotter
                                                 client_address,
136 7e5a6e86 Guido Trotter
                                                 constants.LUXI_EOM,
137 7e5a6e86 Guido Trotter
                                                 family, self._MAX_UNHANDLED)
138 7e5a6e86 Guido Trotter
    self.server = server
139 7e5a6e86 Guido Trotter
140 7e5a6e86 Guido Trotter
  def handle_message(self, message, _):
141 b2e8a4d9 Michael Hanselmann
    self.server.request_workers.AddTask((self.server, message, self))
142 23e50d39 Michael Hanselmann
143 23e50d39 Michael Hanselmann
144 5483fd73 Michael Hanselmann
class _MasterShutdownCheck:
145 5483fd73 Michael Hanselmann
  """Logic for master daemon shutdown.
146 5483fd73 Michael Hanselmann

147 5483fd73 Michael Hanselmann
  """
148 5483fd73 Michael Hanselmann
  #: How long to wait between checks
149 5483fd73 Michael Hanselmann
  _CHECK_INTERVAL = 5.0
150 5483fd73 Michael Hanselmann
151 5483fd73 Michael Hanselmann
  #: How long to wait after all jobs are done (e.g. to give clients time to
152 5483fd73 Michael Hanselmann
  #: retrieve the job status)
153 5483fd73 Michael Hanselmann
  _SHUTDOWN_LINGER = 5.0
154 5483fd73 Michael Hanselmann
155 5483fd73 Michael Hanselmann
  def __init__(self):
156 5483fd73 Michael Hanselmann
    """Initializes this class.
157 5483fd73 Michael Hanselmann

158 5483fd73 Michael Hanselmann
    """
159 5483fd73 Michael Hanselmann
    self._had_active_jobs = None
160 5483fd73 Michael Hanselmann
    self._linger_timeout = None
161 5483fd73 Michael Hanselmann
162 5483fd73 Michael Hanselmann
  def __call__(self, jq_prepare_result):
163 5483fd73 Michael Hanselmann
    """Determines if master daemon is ready for shutdown.
164 5483fd73 Michael Hanselmann

165 5483fd73 Michael Hanselmann
    @param jq_prepare_result: Result of L{jqueue.JobQueue.PrepareShutdown}
166 5483fd73 Michael Hanselmann
    @rtype: None or number
167 5483fd73 Michael Hanselmann
    @return: None if master daemon is ready, timeout if the check must be
168 5483fd73 Michael Hanselmann
             repeated
169 5483fd73 Michael Hanselmann

170 5483fd73 Michael Hanselmann
    """
171 5483fd73 Michael Hanselmann
    if jq_prepare_result:
172 5483fd73 Michael Hanselmann
      # Check again shortly
173 5483fd73 Michael Hanselmann
      logging.info("Job queue has been notified for shutdown but is still"
174 5483fd73 Michael Hanselmann
                   " busy; next check in %s seconds", self._CHECK_INTERVAL)
175 5483fd73 Michael Hanselmann
      self._had_active_jobs = True
176 5483fd73 Michael Hanselmann
      return self._CHECK_INTERVAL
177 5483fd73 Michael Hanselmann
178 5483fd73 Michael Hanselmann
    if not self._had_active_jobs:
179 5483fd73 Michael Hanselmann
      # Can shut down as there were no active jobs on the first check
180 5483fd73 Michael Hanselmann
      return None
181 5483fd73 Michael Hanselmann
182 5483fd73 Michael Hanselmann
    # No jobs are running anymore, but maybe some clients want to collect some
183 5483fd73 Michael Hanselmann
    # information. Give them a short amount of time.
184 5483fd73 Michael Hanselmann
    if self._linger_timeout is None:
185 5483fd73 Michael Hanselmann
      self._linger_timeout = utils.RunningTimeout(self._SHUTDOWN_LINGER, True)
186 5483fd73 Michael Hanselmann
187 5483fd73 Michael Hanselmann
    remaining = self._linger_timeout.Remaining()
188 5483fd73 Michael Hanselmann
189 5483fd73 Michael Hanselmann
    logging.info("Job queue no longer busy; shutting down master daemon"
190 5483fd73 Michael Hanselmann
                 " in %s seconds", remaining)
191 5483fd73 Michael Hanselmann
192 5483fd73 Michael Hanselmann
    # TODO: Should the master daemon socket be closed at this point? Doing so
193 5483fd73 Michael Hanselmann
    # wouldn't affect existing connections.
194 5483fd73 Michael Hanselmann
195 5483fd73 Michael Hanselmann
    if remaining < 0:
196 5483fd73 Michael Hanselmann
      return None
197 5483fd73 Michael Hanselmann
    else:
198 5483fd73 Michael Hanselmann
      return remaining
199 5483fd73 Michael Hanselmann
200 5483fd73 Michael Hanselmann
201 cdd7f900 Guido Trotter
class MasterServer(daemon.AsyncStreamServer):
202 cdd7f900 Guido Trotter
  """Master Server.
203 ffeffa1d Iustin Pop

204 cdd7f900 Guido Trotter
  This is the main asynchronous master server. It handles connections to the
205 cdd7f900 Guido Trotter
  master socket.
206 ffeffa1d Iustin Pop

207 ffeffa1d Iustin Pop
  """
208 7e5a6e86 Guido Trotter
  family = socket.AF_UNIX
209 7e5a6e86 Guido Trotter
210 e8a701f6 Michael Hanselmann
  def __init__(self, address, uid, gid):
211 cdd7f900 Guido Trotter
    """MasterServer constructor
212 ce862cd5 Guido Trotter

213 cdd7f900 Guido Trotter
    @param address: the unix socket address to bind the MasterServer to
214 bbfd0568 René Nussbaumer
    @param uid: The uid of the owner of the socket
215 bbfd0568 René Nussbaumer
    @param gid: The gid of the owner of the socket
216 ce862cd5 Guido Trotter

217 ce862cd5 Guido Trotter
    """
218 bbfd0568 René Nussbaumer
    temp_name = tempfile.mktemp(dir=os.path.dirname(address))
219 7e5a6e86 Guido Trotter
    daemon.AsyncStreamServer.__init__(self, self.family, temp_name)
220 bbfd0568 René Nussbaumer
    os.chmod(temp_name, 0770)
221 bbfd0568 René Nussbaumer
    os.chown(temp_name, uid, gid)
222 bbfd0568 René Nussbaumer
    os.rename(temp_name, address)
223 bbfd0568 René Nussbaumer
224 7e5a6e86 Guido Trotter
    self.awaker = daemon.AsyncAwaker()
225 50a3fbb2 Michael Hanselmann
226 50a3fbb2 Michael Hanselmann
    # We'll only start threads once we've forked.
227 9113300d Michael Hanselmann
    self.context = None
228 23e50d39 Michael Hanselmann
    self.request_workers = None
229 50a3fbb2 Michael Hanselmann
230 5483fd73 Michael Hanselmann
    self._shutdown_check = None
231 5483fd73 Michael Hanselmann
232 cdd7f900 Guido Trotter
  def handle_connection(self, connected_socket, client_address):
233 7e5a6e86 Guido Trotter
    # TODO: add connection count and limit the number of open connections to a
234 7e5a6e86 Guido Trotter
    # maximum number to avoid breaking for lack of file descriptors or memory.
235 7e5a6e86 Guido Trotter
    MasterClientHandler(self, connected_socket, client_address, self.family)
236 cdd7f900 Guido Trotter
237 50a3fbb2 Michael Hanselmann
  def setup_queue(self):
238 9113300d Michael Hanselmann
    self.context = GanetiContext()
239 89e2b4d2 Michael Hanselmann
    self.request_workers = workerpool.WorkerPool("ClientReq",
240 89e2b4d2 Michael Hanselmann
                                                 CLIENT_REQUEST_WORKERS,
241 23e50d39 Michael Hanselmann
                                                 ClientRequestWorker)
242 ffeffa1d Iustin Pop
243 5483fd73 Michael Hanselmann
  def WaitForShutdown(self):
244 5483fd73 Michael Hanselmann
    """Prepares server for shutdown.
245 5483fd73 Michael Hanselmann

246 5483fd73 Michael Hanselmann
    """
247 5483fd73 Michael Hanselmann
    if self._shutdown_check is None:
248 5483fd73 Michael Hanselmann
      self._shutdown_check = _MasterShutdownCheck()
249 5483fd73 Michael Hanselmann
250 5483fd73 Michael Hanselmann
    return self._shutdown_check(self.context.jobqueue.PrepareShutdown())
251 5483fd73 Michael Hanselmann
252 c1f2901b Iustin Pop
  def server_cleanup(self):
253 c1f2901b Iustin Pop
    """Cleanup the server.
254 c1f2901b Iustin Pop

255 c1f2901b Iustin Pop
    This involves shutting down the processor threads and the master
256 c1f2901b Iustin Pop
    socket.
257 c1f2901b Iustin Pop

258 c1f2901b Iustin Pop
    """
259 50a3fbb2 Michael Hanselmann
    try:
260 cdd7f900 Guido Trotter
      self.close()
261 50a3fbb2 Michael Hanselmann
    finally:
262 23e50d39 Michael Hanselmann
      if self.request_workers:
263 36088c4c Michael Hanselmann
        self.request_workers.TerminateWorkers()
264 9113300d Michael Hanselmann
      if self.context:
265 9113300d Michael Hanselmann
        self.context.jobqueue.Shutdown()
266 ffeffa1d Iustin Pop
267 ffeffa1d Iustin Pop
268 ffeffa1d Iustin Pop
class ClientOps:
269 ffeffa1d Iustin Pop
  """Class holding high-level client operations."""
270 ffeffa1d Iustin Pop
  def __init__(self, server):
271 ffeffa1d Iustin Pop
    self.server = server
272 ffeffa1d Iustin Pop
273 b459a848 Andrea Spadaccini
  def handle_request(self, method, args): # pylint: disable=R0911
274 e07f7f7a Michael Hanselmann
    context = self.server.context
275 e07f7f7a Michael Hanselmann
    queue = context.jobqueue
276 0bbe448c Michael Hanselmann
277 0bbe448c Michael Hanselmann
    # TODO: Parameter validation
278 a629ecb9 Iustin Pop
    if not isinstance(args, (tuple, list)):
279 a629ecb9 Iustin Pop
      logging.info("Received invalid arguments of type '%s'", type(args))
280 a629ecb9 Iustin Pop
      raise ValueError("Invalid arguments type '%s'" % type(args))
281 0bbe448c Michael Hanselmann
282 64d7e30f Klaus Aehlig
    if method not in luxi.REQ_ALL:
283 64d7e30f Klaus Aehlig
      logging.info("Received invalid request '%s'", method)
284 64d7e30f Klaus Aehlig
      raise ValueError("Invalid operation '%s'" % method)
285 64d7e30f Klaus Aehlig
286 7260cfbe Iustin Pop
    # TODO: Rewrite to not exit in each 'if/elif' branch
287 7260cfbe Iustin Pop
288 0bbe448c Michael Hanselmann
    if method == luxi.REQ_SUBMIT_JOB:
289 4c91d2ad Iustin Pop
      logging.info("Receiving new job")
290 734a2a7c René Nussbaumer
      (job_def, ) = args
291 734a2a7c René Nussbaumer
      ops = [opcodes.OpCode.LoadOpCode(state) for state in job_def]
292 4c91d2ad Iustin Pop
      job_id = queue.SubmitJob(ops)
293 4c91d2ad Iustin Pop
      _LogNewJob(True, job_id, ops)
294 4c91d2ad Iustin Pop
      return job_id
295 ffeffa1d Iustin Pop
296 1b5b1c49 René Nussbaumer
    elif method == luxi.REQ_SUBMIT_MANY_JOBS:
297 4c91d2ad Iustin Pop
      logging.info("Receiving multiple jobs")
298 734a2a7c René Nussbaumer
      (job_defs, ) = args
299 2971c913 Iustin Pop
      jobs = []
300 734a2a7c René Nussbaumer
      for ops in job_defs:
301 2971c913 Iustin Pop
        jobs.append([opcodes.OpCode.LoadOpCode(state) for state in ops])
302 4c91d2ad Iustin Pop
      job_ids = queue.SubmitManyJobs(jobs)
303 4c91d2ad Iustin Pop
      for ((status, job_id), ops) in zip(job_ids, jobs):
304 4c91d2ad Iustin Pop
        _LogNewJob(status, job_id, ops)
305 4c91d2ad Iustin Pop
      return job_ids
306 2971c913 Iustin Pop
307 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_CANCEL_JOB:
308 a629ecb9 Iustin Pop
      (job_id, ) = args
309 e566ddbd Iustin Pop
      logging.info("Received job cancel request for %s", job_id)
310 0bbe448c Michael Hanselmann
      return queue.CancelJob(job_id)
311 ffeffa1d Iustin Pop
312 f63ffb37 Michael Hanselmann
    elif method == luxi.REQ_CHANGE_JOB_PRIORITY:
313 f63ffb37 Michael Hanselmann
      (job_id, priority) = args
314 f63ffb37 Michael Hanselmann
      logging.info("Received request to change priority for job %s to %s",
315 f63ffb37 Michael Hanselmann
                   job_id, priority)
316 f63ffb37 Michael Hanselmann
      return queue.ChangeJobPriority(job_id, priority)
317 f63ffb37 Michael Hanselmann
318 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_ARCHIVE_JOB:
319 a629ecb9 Iustin Pop
      (job_id, ) = args
320 e566ddbd Iustin Pop
      logging.info("Received job archive request for %s", job_id)
321 0bbe448c Michael Hanselmann
      return queue.ArchiveJob(job_id)
322 0bbe448c Michael Hanselmann
323 83c046a2 Iustin Pop
    elif method == luxi.REQ_AUTO_ARCHIVE_JOBS:
324 f8ad5591 Michael Hanselmann
      (age, timeout) = args
325 e566ddbd Iustin Pop
      logging.info("Received job autoarchive request for age %s, timeout %s",
326 e566ddbd Iustin Pop
                   age, timeout)
327 f8ad5591 Michael Hanselmann
      return queue.AutoArchiveJobs(age, timeout)
328 07cd723a Iustin Pop
329 dfe57c22 Michael Hanselmann
    elif method == luxi.REQ_WAIT_FOR_JOB_CHANGE:
330 5c735209 Iustin Pop
      (job_id, fields, prev_job_info, prev_log_serial, timeout) = args
331 e566ddbd Iustin Pop
      logging.info("Received job poll request for %s", job_id)
332 6c5a7090 Michael Hanselmann
      return queue.WaitForJobChanges(job_id, fields, prev_job_info,
333 5c735209 Iustin Pop
                                     prev_log_serial, timeout)
334 dfe57c22 Michael Hanselmann
335 28b71a76 Michael Hanselmann
    elif method == luxi.REQ_QUERY:
336 a629ecb9 Iustin Pop
      (what, fields, qfilter) = args
337 28b71a76 Michael Hanselmann
338 b4b3266b Michael Hanselmann
      if what in constants.QR_VIA_OP:
339 b4b3266b Michael Hanselmann
        result = self._Query(opcodes.OpQuery(what=what, fields=fields,
340 b4b3266b Michael Hanselmann
                                             qfilter=qfilter))
341 b4b3266b Michael Hanselmann
      elif what == constants.QR_LOCK:
342 b4b3266b Michael Hanselmann
        if qfilter is not None:
343 2cfbc784 Iustin Pop
          raise errors.OpPrereqError("Lock queries can't be filtered",
344 2cfbc784 Iustin Pop
                                     errors.ECODE_INVAL)
345 b4b3266b Michael Hanselmann
        return context.glm.QueryLocks(fields)
346 b4b3266b Michael Hanselmann
      elif what == constants.QR_JOB:
347 b4b3266b Michael Hanselmann
        return queue.QueryJobs(fields, qfilter)
348 b4b3266b Michael Hanselmann
      elif what in constants.QR_VIA_LUXI:
349 28b71a76 Michael Hanselmann
        raise NotImplementedError
350 28b71a76 Michael Hanselmann
      else:
351 b4b3266b Michael Hanselmann
        raise errors.OpPrereqError("Resource type '%s' unknown" % what,
352 28b71a76 Michael Hanselmann
                                   errors.ECODE_INVAL)
353 28b71a76 Michael Hanselmann
354 28b71a76 Michael Hanselmann
      return result
355 28b71a76 Michael Hanselmann
356 28b71a76 Michael Hanselmann
    elif method == luxi.REQ_QUERY_FIELDS:
357 a629ecb9 Iustin Pop
      (what, fields) = args
358 a629ecb9 Iustin Pop
      req = objects.QueryFieldsRequest(what=what, fields=fields)
359 28b71a76 Michael Hanselmann
360 c1391810 Michael Hanselmann
      try:
361 c1391810 Michael Hanselmann
        fielddefs = query.ALL_FIELDS[req.what]
362 c1391810 Michael Hanselmann
      except KeyError:
363 28b71a76 Michael Hanselmann
        raise errors.OpPrereqError("Resource type '%s' unknown" % req.what,
364 28b71a76 Michael Hanselmann
                                   errors.ECODE_INVAL)
365 28b71a76 Michael Hanselmann
366 c1391810 Michael Hanselmann
      return query.QueryFields(fielddefs, req.fields)
367 28b71a76 Michael Hanselmann
368 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_QUERY_JOBS:
369 0bbe448c Michael Hanselmann
      (job_ids, fields) = args
370 e566ddbd Iustin Pop
      if isinstance(job_ids, (tuple, list)) and job_ids:
371 1f864b60 Iustin Pop
        msg = utils.CommaJoin(job_ids)
372 e566ddbd Iustin Pop
      else:
373 e566ddbd Iustin Pop
        msg = str(job_ids)
374 e566ddbd Iustin Pop
      logging.info("Received job query request for %s", msg)
375 e07f7f7a Michael Hanselmann
      return queue.OldStyleQueryJobs(job_ids, fields)
376 0bbe448c Michael Hanselmann
377 ee6c7b94 Michael Hanselmann
    elif method == luxi.REQ_QUERY_INSTANCES:
378 ec79568d Iustin Pop
      (names, fields, use_locking) = args
379 e566ddbd Iustin Pop
      logging.info("Received instance query request for %s", names)
380 77921a95 Iustin Pop
      if use_locking:
381 debac808 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed",
382 debac808 Iustin Pop
                                   errors.ECODE_INVAL)
383 f2af0bec Iustin Pop
      op = opcodes.OpInstanceQuery(names=names, output_fields=fields,
384 f2af0bec Iustin Pop
                                   use_locking=use_locking)
385 ee6c7b94 Michael Hanselmann
      return self._Query(op)
386 ee6c7b94 Michael Hanselmann
387 02f7fe54 Michael Hanselmann
    elif method == luxi.REQ_QUERY_NODES:
388 ec79568d Iustin Pop
      (names, fields, use_locking) = args
389 e566ddbd Iustin Pop
      logging.info("Received node query request for %s", names)
390 77921a95 Iustin Pop
      if use_locking:
391 debac808 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed",
392 debac808 Iustin Pop
                                   errors.ECODE_INVAL)
393 2237687b Iustin Pop
      op = opcodes.OpNodeQuery(names=names, output_fields=fields,
394 2237687b Iustin Pop
                               use_locking=use_locking)
395 02f7fe54 Michael Hanselmann
      return self._Query(op)
396 02f7fe54 Michael Hanselmann
397 a79ef2a5 Adeodato Simo
    elif method == luxi.REQ_QUERY_GROUPS:
398 a79ef2a5 Adeodato Simo
      (names, fields, use_locking) = args
399 a79ef2a5 Adeodato Simo
      logging.info("Received group query request for %s", names)
400 a79ef2a5 Adeodato Simo
      if use_locking:
401 a79ef2a5 Adeodato Simo
        raise errors.OpPrereqError("Sync queries are not allowed",
402 a79ef2a5 Adeodato Simo
                                   errors.ECODE_INVAL)
403 d4d654bd Iustin Pop
      op = opcodes.OpGroupQuery(names=names, output_fields=fields)
404 a79ef2a5 Adeodato Simo
      return self._Query(op)
405 a79ef2a5 Adeodato Simo
406 306bed0e Apollon Oikonomopoulos
    elif method == luxi.REQ_QUERY_NETWORKS:
407 306bed0e Apollon Oikonomopoulos
      (names, fields, use_locking) = args
408 306bed0e Apollon Oikonomopoulos
      logging.info("Received network query request for %s", names)
409 306bed0e Apollon Oikonomopoulos
      if use_locking:
410 306bed0e Apollon Oikonomopoulos
        raise errors.OpPrereqError("Sync queries are not allowed",
411 306bed0e Apollon Oikonomopoulos
                                   errors.ECODE_INVAL)
412 306bed0e Apollon Oikonomopoulos
      op = opcodes.OpNetworkQuery(names=names, output_fields=fields)
413 306bed0e Apollon Oikonomopoulos
      return self._Query(op)
414 306bed0e Apollon Oikonomopoulos
415 32f93223 Michael Hanselmann
    elif method == luxi.REQ_QUERY_EXPORTS:
416 a629ecb9 Iustin Pop
      (nodes, use_locking) = args
417 77921a95 Iustin Pop
      if use_locking:
418 debac808 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed",
419 debac808 Iustin Pop
                                   errors.ECODE_INVAL)
420 e566ddbd Iustin Pop
      logging.info("Received exports query request")
421 7ca2d4d8 Iustin Pop
      op = opcodes.OpBackupQuery(nodes=nodes, use_locking=use_locking)
422 32f93223 Michael Hanselmann
      return self._Query(op)
423 32f93223 Michael Hanselmann
424 ae5849b5 Michael Hanselmann
    elif method == luxi.REQ_QUERY_CONFIG_VALUES:
425 a629ecb9 Iustin Pop
      (fields, ) = args
426 e566ddbd Iustin Pop
      logging.info("Received config values query request for %s", fields)
427 2f093ea0 Iustin Pop
      op = opcodes.OpClusterConfigQuery(output_fields=fields)
428 ae5849b5 Michael Hanselmann
      return self._Query(op)
429 ae5849b5 Michael Hanselmann
430 66baeccc Iustin Pop
    elif method == luxi.REQ_QUERY_CLUSTER_INFO:
431 e566ddbd Iustin Pop
      logging.info("Received cluster info query request")
432 a2f7ab92 Iustin Pop
      op = opcodes.OpClusterQuery()
433 66baeccc Iustin Pop
      return self._Query(op)
434 66baeccc Iustin Pop
435 7699c3af Iustin Pop
    elif method == luxi.REQ_QUERY_TAGS:
436 a629ecb9 Iustin Pop
      (kind, name) = args
437 7699c3af Iustin Pop
      logging.info("Received tags query request")
438 cfdf561d Michael Hanselmann
      op = opcodes.OpTagsGet(kind=kind, name=name, use_locking=False)
439 7699c3af Iustin Pop
      return self._Query(op)
440 7699c3af Iustin Pop
441 83c046a2 Iustin Pop
    elif method == luxi.REQ_SET_DRAIN_FLAG:
442 a629ecb9 Iustin Pop
      (drain_flag, ) = args
443 e566ddbd Iustin Pop
      logging.info("Received queue drain flag change request to %s",
444 e566ddbd Iustin Pop
                   drain_flag)
445 3ccafd0e Iustin Pop
      return queue.SetDrainFlag(drain_flag)
446 3ccafd0e Iustin Pop
447 05e50653 Michael Hanselmann
    elif method == luxi.REQ_SET_WATCHER_PAUSE:
448 05e50653 Michael Hanselmann
      (until, ) = args
449 05e50653 Michael Hanselmann
450 7c4bd156 Michael Hanselmann
      return _SetWatcherPause(context, until)
451 05e50653 Michael Hanselmann
452 0bbe448c Michael Hanselmann
    else:
453 64d7e30f Klaus Aehlig
      logging.critical("Request '%s' in luxi.REQ_ALL, but not known", method)
454 64d7e30f Klaus Aehlig
      raise errors.ProgrammerError("Operation '%s' in luxi.REQ_ALL,"
455 64d7e30f Klaus Aehlig
                                   " but not implemented" % method)
456 ffeffa1d Iustin Pop
457 ee6c7b94 Michael Hanselmann
  def _Query(self, op):
458 ee6c7b94 Michael Hanselmann
    """Runs the specified opcode and returns the result.
459 ee6c7b94 Michael Hanselmann

460 ee6c7b94 Michael Hanselmann
    """
461 adfa97e3 Guido Trotter
    # Queries don't have a job id
462 dc4bdf73 Michael Hanselmann
    proc = mcpu.Processor(self.server.context, None, enable_locks=False)
463 26d3fd2f Michael Hanselmann
464 26d3fd2f Michael Hanselmann
    # TODO: Executing an opcode using locks will acquire them in blocking mode.
465 26d3fd2f Michael Hanselmann
    # Consider using a timeout for retries.
466 031a3e57 Michael Hanselmann
    return proc.ExecOpCode(op, None)
467 ee6c7b94 Michael Hanselmann
468 ffeffa1d Iustin Pop
469 39dcf2ef Guido Trotter
class GanetiContext(object):
470 39dcf2ef Guido Trotter
  """Context common to all ganeti threads.
471 39dcf2ef Guido Trotter

472 39dcf2ef Guido Trotter
  This class creates and holds common objects shared by all threads.
473 39dcf2ef Guido Trotter

474 39dcf2ef Guido Trotter
  """
475 b459a848 Andrea Spadaccini
  # pylint: disable=W0212
476 7260cfbe Iustin Pop
  # we do want to ensure a singleton here
477 39dcf2ef Guido Trotter
  _instance = None
478 39dcf2ef Guido Trotter
479 39dcf2ef Guido Trotter
  def __init__(self):
480 39dcf2ef Guido Trotter
    """Constructs a new GanetiContext object.
481 39dcf2ef Guido Trotter

482 39dcf2ef Guido Trotter
    There should be only a GanetiContext object at any time, so this
483 39dcf2ef Guido Trotter
    function raises an error if this is not the case.
484 39dcf2ef Guido Trotter

485 39dcf2ef Guido Trotter
    """
486 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "double GanetiContext instance"
487 39dcf2ef Guido Trotter
488 9113300d Michael Hanselmann
    # Create global configuration object
489 39dcf2ef Guido Trotter
    self.cfg = config.ConfigWriter()
490 9113300d Michael Hanselmann
491 9113300d Michael Hanselmann
    # Locking manager
492 984f7c32 Guido Trotter
    self.glm = locking.GanetiLockManager(
493 5ae4945a Iustin Pop
      self.cfg.GetNodeList(),
494 5ae4945a Iustin Pop
      self.cfg.GetNodeGroupList(),
495 da4a52a3 Thomas Thrainer
      [inst.name for inst in self.cfg.GetAllInstancesInfo().values()],
496 6c0a75db Dimitris Aragiorgis
      self.cfg.GetNetworkList())
497 39dcf2ef Guido Trotter
498 b2acdbdc Michael Hanselmann
    self.cfg.SetContext(self)
499 b2acdbdc Michael Hanselmann
500 87b3cb26 Michael Hanselmann
    # RPC runner
501 d5ea30e8 Michael Hanselmann
    self.rpc = rpc.RpcRunner(self.cfg, self.glm.AddToLockMonitor)
502 87b3cb26 Michael Hanselmann
503 cb4d3314 Michael Hanselmann
    # Job queue
504 cb4d3314 Michael Hanselmann
    self.jobqueue = jqueue.JobQueue(self)
505 cb4d3314 Michael Hanselmann
506 39dcf2ef Guido Trotter
    # setting this also locks the class against attribute modifications
507 39dcf2ef Guido Trotter
    self.__class__._instance = self
508 39dcf2ef Guido Trotter
509 39dcf2ef Guido Trotter
  def __setattr__(self, name, value):
510 39dcf2ef Guido Trotter
    """Setting GanetiContext attributes is forbidden after initialization.
511 39dcf2ef Guido Trotter

512 39dcf2ef Guido Trotter
    """
513 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "Attempt to modify Ganeti Context"
514 39dcf2ef Guido Trotter
    object.__setattr__(self, name, value)
515 39dcf2ef Guido Trotter
516 0debfb35 Guido Trotter
  def AddNode(self, node, ec_id):
517 d8470559 Michael Hanselmann
    """Adds a node to the configuration and lock manager.
518 d8470559 Michael Hanselmann

519 d8470559 Michael Hanselmann
    """
520 d8470559 Michael Hanselmann
    # Add it to the configuration
521 0debfb35 Guido Trotter
    self.cfg.AddNode(node, ec_id)
522 d8470559 Michael Hanselmann
523 c36176cc Michael Hanselmann
    # If preseeding fails it'll not be added
524 99aabbed Iustin Pop
    self.jobqueue.AddNode(node)
525 c36176cc Michael Hanselmann
526 d8470559 Michael Hanselmann
    # Add the new node to the Ganeti Lock Manager
527 1c3231aa Thomas Thrainer
    self.glm.add(locking.LEVEL_NODE, node.uuid)
528 1c3231aa Thomas Thrainer
    self.glm.add(locking.LEVEL_NODE_RES, node.uuid)
529 d8470559 Michael Hanselmann
530 d8470559 Michael Hanselmann
  def ReaddNode(self, node):
531 d8470559 Michael Hanselmann
    """Updates a node that's already in the configuration
532 d8470559 Michael Hanselmann

533 d8470559 Michael Hanselmann
    """
534 c36176cc Michael Hanselmann
    # Synchronize the queue again
535 99aabbed Iustin Pop
    self.jobqueue.AddNode(node)
536 d8470559 Michael Hanselmann
537 1c3231aa Thomas Thrainer
  def RemoveNode(self, node):
538 d8470559 Michael Hanselmann
    """Removes a node from the configuration and lock manager.
539 d8470559 Michael Hanselmann

540 d8470559 Michael Hanselmann
    """
541 d8470559 Michael Hanselmann
    # Remove node from configuration
542 1c3231aa Thomas Thrainer
    self.cfg.RemoveNode(node.uuid)
543 d8470559 Michael Hanselmann
544 c36176cc Michael Hanselmann
    # Notify job queue
545 1c3231aa Thomas Thrainer
    self.jobqueue.RemoveNode(node.name)
546 c36176cc Michael Hanselmann
547 d8470559 Michael Hanselmann
    # Remove the node from the Ganeti Lock Manager
548 1c3231aa Thomas Thrainer
    self.glm.remove(locking.LEVEL_NODE, node.uuid)
549 1c3231aa Thomas Thrainer
    self.glm.remove(locking.LEVEL_NODE_RES, node.uuid)
550 d8470559 Michael Hanselmann
551 39dcf2ef Guido Trotter
552 7c4bd156 Michael Hanselmann
def _SetWatcherPause(context, until):
553 05e50653 Michael Hanselmann
  """Creates or removes the watcher pause file.
554 05e50653 Michael Hanselmann

555 7c4bd156 Michael Hanselmann
  @type context: L{GanetiContext}
556 7c4bd156 Michael Hanselmann
  @param context: Global Ganeti context
557 05e50653 Michael Hanselmann
  @type until: None or int
558 05e50653 Michael Hanselmann
  @param until: Unix timestamp saying until when the watcher shouldn't run
559 05e50653 Michael Hanselmann

560 05e50653 Michael Hanselmann
  """
561 7c4bd156 Michael Hanselmann
  node_names = context.cfg.GetNodeList()
562 7c4bd156 Michael Hanselmann
563 05e50653 Michael Hanselmann
  if until is None:
564 7c4bd156 Michael Hanselmann
    logging.info("Received request to no longer pause watcher")
565 05e50653 Michael Hanselmann
  else:
566 7c4bd156 Michael Hanselmann
    if not ht.TNumber(until):
567 7c4bd156 Michael Hanselmann
      raise TypeError("Duration must be numeric")
568 7c4bd156 Michael Hanselmann
569 7c4bd156 Michael Hanselmann
    if until < time.time():
570 7c4bd156 Michael Hanselmann
      raise errors.GenericError("Unable to set pause end time in the past")
571 7c4bd156 Michael Hanselmann
572 7c4bd156 Michael Hanselmann
    logging.info("Received request to pause watcher until %s", until)
573 7c4bd156 Michael Hanselmann
574 7c4bd156 Michael Hanselmann
  result = context.rpc.call_set_watcher_pause(node_names, until)
575 7c4bd156 Michael Hanselmann
576 7c4bd156 Michael Hanselmann
  errmsg = utils.CommaJoin("%s (%s)" % (node_name, nres.fail_msg)
577 7c4bd156 Michael Hanselmann
                           for (node_name, nres) in result.items()
578 7c4bd156 Michael Hanselmann
                           if nres.fail_msg and not nres.offline)
579 7c4bd156 Michael Hanselmann
  if errmsg:
580 7c4bd156 Michael Hanselmann
    raise errors.OpExecError("Watcher pause was set where possible, but failed"
581 7c4bd156 Michael Hanselmann
                             " on the following node(s): %s" % errmsg)
582 05e50653 Michael Hanselmann
583 28b498cd Michael Hanselmann
  return until
584 28b498cd Michael Hanselmann
585 05e50653 Michael Hanselmann
586 e0e916fe Iustin Pop
@rpc.RunWithRPC
587 36205981 Iustin Pop
def CheckAgreement():
588 36205981 Iustin Pop
  """Check the agreement on who is the master.
589 36205981 Iustin Pop

590 36205981 Iustin Pop
  The function uses a very simple algorithm: we must get more positive
591 36205981 Iustin Pop
  than negative answers. Since in most of the cases we are the master,
592 36205981 Iustin Pop
  we'll use our own config file for getting the node list. In the
593 36205981 Iustin Pop
  future we could collect the current node list from our (possibly
594 36205981 Iustin Pop
  obsolete) known nodes.
595 36205981 Iustin Pop

596 d7cdb55d Iustin Pop
  In order to account for cold-start of all nodes, we retry for up to
597 d7cdb55d Iustin Pop
  a minute until we get a real answer as the top-voted one. If the
598 d7cdb55d Iustin Pop
  nodes are more out-of-sync, for now manual startup of the master
599 d7cdb55d Iustin Pop
  should be attempted.
600 d7cdb55d Iustin Pop

601 d7cdb55d Iustin Pop
  Note that for a even number of nodes cluster, we need at least half
602 d7cdb55d Iustin Pop
  of the nodes (beside ourselves) to vote for us. This creates a
603 d7cdb55d Iustin Pop
  problem on two-node clusters, since in this case we require the
604 d7cdb55d Iustin Pop
  other node to be up too to confirm our status.
605 d7cdb55d Iustin Pop

606 36205981 Iustin Pop
  """
607 b705c7a6 Manuel Franceschini
  myself = netutils.Hostname.GetSysName()
608 36205981 Iustin Pop
  #temp instantiation of a config writer, used only to get the node list
609 36205981 Iustin Pop
  cfg = config.ConfigWriter()
610 1c3231aa Thomas Thrainer
  node_names = cfg.GetNodeNames(cfg.GetNodeList())
611 36205981 Iustin Pop
  del cfg
612 d7cdb55d Iustin Pop
  retries = 6
613 d7cdb55d Iustin Pop
  while retries > 0:
614 1c3231aa Thomas Thrainer
    votes = bootstrap.GatherMasterVotes(node_names)
615 d7cdb55d Iustin Pop
    if not votes:
616 d7cdb55d Iustin Pop
      # empty node list, this is a one node cluster
617 d7cdb55d Iustin Pop
      return True
618 d7cdb55d Iustin Pop
    if votes[0][0] is None:
619 d7cdb55d Iustin Pop
      retries -= 1
620 d7cdb55d Iustin Pop
      time.sleep(10)
621 36205981 Iustin Pop
      continue
622 d7cdb55d Iustin Pop
    break
623 d7cdb55d Iustin Pop
  if retries == 0:
624 e09fdcfa Iustin Pop
    logging.critical("Cluster inconsistent, most of the nodes didn't answer"
625 e09fdcfa Iustin Pop
                     " after multiple retries. Aborting startup")
626 d8f5a37d Iustin Pop
    logging.critical("Use the --no-voting option if you understand what"
627 d8f5a37d Iustin Pop
                     " effects it has on the cluster state")
628 e09fdcfa Iustin Pop
    return False
629 d7cdb55d Iustin Pop
  # here a real node is at the top of the list
630 d7cdb55d Iustin Pop
  all_votes = sum(item[1] for item in votes)
631 d7cdb55d Iustin Pop
  top_node, top_votes = votes[0]
632 8a20c732 Michael Hanselmann
633 d7cdb55d Iustin Pop
  result = False
634 d7cdb55d Iustin Pop
  if top_node != myself:
635 d7cdb55d Iustin Pop
    logging.critical("It seems we are not the master (top-voted node"
636 bbe19c17 Iustin Pop
                     " is %s with %d out of %d votes)", top_node, top_votes,
637 bbe19c17 Iustin Pop
                     all_votes)
638 d7cdb55d Iustin Pop
  elif top_votes < all_votes - top_votes:
639 36205981 Iustin Pop
    logging.critical("It seems we are not the master (%d votes for,"
640 d7cdb55d Iustin Pop
                     " %d votes against)", top_votes, all_votes - top_votes)
641 d7cdb55d Iustin Pop
  else:
642 d7cdb55d Iustin Pop
    result = True
643 d7cdb55d Iustin Pop
644 d7cdb55d Iustin Pop
  return result
645 36205981 Iustin Pop
646 6c948699 Michael Hanselmann
647 340f4757 Iustin Pop
@rpc.RunWithRPC
648 340f4757 Iustin Pop
def ActivateMasterIP():
649 340f4757 Iustin Pop
  # activate ip
650 8da2bd43 Andrea Spadaccini
  cfg = config.ConfigWriter()
651 f9d20654 Andrea Spadaccini
  master_params = cfg.GetMasterNetworkParameters()
652 57c7bc57 Andrea Spadaccini
  ems = cfg.GetUseExternalMipScript()
653 8da2bd43 Andrea Spadaccini
  runner = rpc.BootstrapRunner()
654 1c3231aa Thomas Thrainer
  # we use the node name, as the configuration is only available here yet
655 1c3231aa Thomas Thrainer
  result = runner.call_node_activate_master_ip(
656 1c3231aa Thomas Thrainer
             cfg.GetNodeName(master_params.uuid), master_params, ems)
657 8da2bd43 Andrea Spadaccini
658 340f4757 Iustin Pop
  msg = result.fail_msg
659 340f4757 Iustin Pop
  if msg:
660 340f4757 Iustin Pop
    logging.error("Can't activate master IP address: %s", msg)
661 340f4757 Iustin Pop
662 340f4757 Iustin Pop
663 ed0efaa5 Michael Hanselmann
def CheckMasterd(options, args):
664 ed0efaa5 Michael Hanselmann
  """Initial checks whether to run or exit with a failure.
665 ed0efaa5 Michael Hanselmann

666 ed0efaa5 Michael Hanselmann
  """
667 f93427cd Iustin Pop
  if args: # masterd doesn't take any arguments
668 f93427cd Iustin Pop
    print >> sys.stderr, ("Usage: %s [-f] [-d]" % sys.argv[0])
669 f93427cd Iustin Pop
    sys.exit(constants.EXIT_FAILURE)
670 f93427cd Iustin Pop
671 ed0efaa5 Michael Hanselmann
  ssconf.CheckMaster(options.debug)
672 ed0efaa5 Michael Hanselmann
673 bbfd0568 René Nussbaumer
  try:
674 bbfd0568 René Nussbaumer
    options.uid = pwd.getpwnam(constants.MASTERD_USER).pw_uid
675 bbfd0568 René Nussbaumer
    options.gid = grp.getgrnam(constants.DAEMONS_GROUP).gr_gid
676 bbfd0568 René Nussbaumer
  except KeyError:
677 bbfd0568 René Nussbaumer
    print >> sys.stderr, ("User or group not existing on system: %s:%s" %
678 bbfd0568 René Nussbaumer
                          (constants.MASTERD_USER, constants.DAEMONS_GROUP))
679 bbfd0568 René Nussbaumer
    sys.exit(constants.EXIT_FAILURE)
680 bbfd0568 René Nussbaumer
681 a20e4768 Michael Hanselmann
  # Determine static runtime architecture information
682 a20e4768 Michael Hanselmann
  runtime.InitArchInfo()
683 a20e4768 Michael Hanselmann
684 4b63dc7a Iustin Pop
  # Check the configuration is sane before anything else
685 4b63dc7a Iustin Pop
  try:
686 4b63dc7a Iustin Pop
    config.ConfigWriter()
687 4b63dc7a Iustin Pop
  except errors.ConfigVersionMismatch, err:
688 4b63dc7a Iustin Pop
    v1 = "%s.%s.%s" % constants.SplitVersion(err.args[0])
689 4b63dc7a Iustin Pop
    v2 = "%s.%s.%s" % constants.SplitVersion(err.args[1])
690 4b63dc7a Iustin Pop
    print >> sys.stderr,  \
691 4b63dc7a Iustin Pop
        ("Configuration version mismatch. The current Ganeti software"
692 4b63dc7a Iustin Pop
         " expects version %s, but the on-disk configuration file has"
693 4b63dc7a Iustin Pop
         " version %s. This is likely the result of upgrading the"
694 4b63dc7a Iustin Pop
         " software without running the upgrade procedure. Please contact"
695 4b63dc7a Iustin Pop
         " your cluster administrator or complete the upgrade using the"
696 4b63dc7a Iustin Pop
         " cfgupgrade utility, after reading the upgrade notes." %
697 4b63dc7a Iustin Pop
         (v1, v2))
698 4b63dc7a Iustin Pop
    sys.exit(constants.EXIT_FAILURE)
699 4b63dc7a Iustin Pop
  except errors.ConfigurationError, err:
700 4b63dc7a Iustin Pop
    print >> sys.stderr, \
701 4b63dc7a Iustin Pop
        ("Configuration error while opening the configuration file: %s\n"
702 4b63dc7a Iustin Pop
         "This might be caused by an incomplete software upgrade or"
703 4b63dc7a Iustin Pop
         " by a corrupted configuration file. Until the problem is fixed"
704 4b63dc7a Iustin Pop
         " the master daemon cannot start." % str(err))
705 4b63dc7a Iustin Pop
    sys.exit(constants.EXIT_FAILURE)
706 bbfd0568 René Nussbaumer
707 ed0efaa5 Michael Hanselmann
  # If CheckMaster didn't fail we believe we are the master, but we have to
708 ed0efaa5 Michael Hanselmann
  # confirm with the other nodes.
709 ed0efaa5 Michael Hanselmann
  if options.no_voting:
710 675e2bf5 Iustin Pop
    if not options.yes_do_it:
711 675e2bf5 Iustin Pop
      sys.stdout.write("The 'no voting' option has been selected.\n")
712 675e2bf5 Iustin Pop
      sys.stdout.write("This is dangerous, please confirm by"
713 675e2bf5 Iustin Pop
                       " typing uppercase 'yes': ")
714 675e2bf5 Iustin Pop
      sys.stdout.flush()
715 ed0efaa5 Michael Hanselmann
716 675e2bf5 Iustin Pop
      confirmation = sys.stdin.readline().strip()
717 675e2bf5 Iustin Pop
      if confirmation != "YES":
718 675e2bf5 Iustin Pop
        print >> sys.stderr, "Aborting."
719 675e2bf5 Iustin Pop
        sys.exit(constants.EXIT_FAILURE)
720 ed0efaa5 Michael Hanselmann
721 675e2bf5 Iustin Pop
  else:
722 675e2bf5 Iustin Pop
    # CheckAgreement uses RPC and threads, hence it needs to be run in
723 675e2bf5 Iustin Pop
    # a separate process before we call utils.Daemonize in the current
724 675e2bf5 Iustin Pop
    # process.
725 675e2bf5 Iustin Pop
    if not utils.RunInSeparateProcess(CheckAgreement):
726 ed0efaa5 Michael Hanselmann
      sys.exit(constants.EXIT_FAILURE)
727 ed0efaa5 Michael Hanselmann
728 340f4757 Iustin Pop
  # ActivateMasterIP also uses RPC/threads, so we run it again via a
729 340f4757 Iustin Pop
  # separate process.
730 340f4757 Iustin Pop
731 340f4757 Iustin Pop
  # TODO: decide whether failure to activate the master IP is a fatal error
732 340f4757 Iustin Pop
  utils.RunInSeparateProcess(ActivateMasterIP)
733 340f4757 Iustin Pop
734 ed0efaa5 Michael Hanselmann
735 3ee53f1f Iustin Pop
def PrepMasterd(options, _):
736 3ee53f1f Iustin Pop
  """Prep master daemon function, executed with the PID file held.
737 3b316acb Iustin Pop

738 04ccf5e9 Guido Trotter
  """
739 04ccf5e9 Guido Trotter
  # This is safe to do as the pid file guarantees against
740 04ccf5e9 Guido Trotter
  # concurrent execution.
741 a5ce2ea2 Michael Hanselmann
  utils.RemoveFile(pathutils.MASTER_SOCKET)
742 b1b6ea87 Iustin Pop
743 cdd7f900 Guido Trotter
  mainloop = daemon.Mainloop()
744 a5ce2ea2 Michael Hanselmann
  master = MasterServer(pathutils.MASTER_SOCKET, options.uid, options.gid)
745 3ee53f1f Iustin Pop
  return (mainloop, master)
746 3ee53f1f Iustin Pop
747 3ee53f1f Iustin Pop
748 b459a848 Andrea Spadaccini
def ExecMasterd(options, args, prep_data): # pylint: disable=W0613
749 3ee53f1f Iustin Pop
  """Main master daemon function, executed with the PID file held.
750 3ee53f1f Iustin Pop

751 3ee53f1f Iustin Pop
  """
752 3ee53f1f Iustin Pop
  (mainloop, master) = prep_data
753 04ccf5e9 Guido Trotter
  try:
754 15486fa7 Michael Hanselmann
    rpc.Init()
755 4331f6cd Michael Hanselmann
    try:
756 15486fa7 Michael Hanselmann
      master.setup_queue()
757 15486fa7 Michael Hanselmann
      try:
758 5483fd73 Michael Hanselmann
        mainloop.Run(shutdown_wait_fn=master.WaitForShutdown)
759 15486fa7 Michael Hanselmann
      finally:
760 15486fa7 Michael Hanselmann
        master.server_cleanup()
761 4331f6cd Michael Hanselmann
    finally:
762 15486fa7 Michael Hanselmann
      rpc.Shutdown()
763 a4af651e Iustin Pop
  finally:
764 a5ce2ea2 Michael Hanselmann
    utils.RemoveFile(pathutils.MASTER_SOCKET)
765 a4af651e Iustin Pop
766 5483fd73 Michael Hanselmann
  logging.info("Clean master daemon shutdown")
767 5483fd73 Michael Hanselmann
768 ffeffa1d Iustin Pop
769 29d91329 Michael Hanselmann
def Main():
770 04ccf5e9 Guido Trotter
  """Main function"""
771 04ccf5e9 Guido Trotter
  parser = OptionParser(description="Ganeti master daemon",
772 04ccf5e9 Guido Trotter
                        usage="%prog [-f] [-d]",
773 04ccf5e9 Guido Trotter
                        version="%%prog (ganeti) %s" %
774 04ccf5e9 Guido Trotter
                        constants.RELEASE_VERSION)
775 04ccf5e9 Guido Trotter
  parser.add_option("--no-voting", dest="no_voting",
776 04ccf5e9 Guido Trotter
                    help="Do not check that the nodes agree on this node"
777 04ccf5e9 Guido Trotter
                    " being the master and start the daemon unconditionally",
778 04ccf5e9 Guido Trotter
                    default=False, action="store_true")
779 04ccf5e9 Guido Trotter
  parser.add_option("--yes-do-it", dest="yes_do_it",
780 04ccf5e9 Guido Trotter
                    help="Override interactive check for --no-voting",
781 04ccf5e9 Guido Trotter
                    default=False, action="store_true")
782 3ee53f1f Iustin Pop
  daemon.GenericMain(constants.MASTERD, parser, CheckMasterd, PrepMasterd,
783 b42ea9ed Iustin Pop
                     ExecMasterd, multithreaded=True)