Statistics
| Branch: | Tag: | Revision:

root / daemons / ganeti-masterd @ 7e5a6e86

History | View | Annotate | Download (17.6 kB)

1 834f8b67 Iustin Pop
#!/usr/bin/python
2 ffeffa1d Iustin Pop
#
3 ffeffa1d Iustin Pop
4 ffeffa1d Iustin Pop
# Copyright (C) 2006, 2007 Google Inc.
5 ffeffa1d Iustin Pop
#
6 ffeffa1d Iustin Pop
# This program is free software; you can redistribute it and/or modify
7 ffeffa1d Iustin Pop
# it under the terms of the GNU General Public License as published by
8 ffeffa1d Iustin Pop
# the Free Software Foundation; either version 2 of the License, or
9 ffeffa1d Iustin Pop
# (at your option) any later version.
10 ffeffa1d Iustin Pop
#
11 ffeffa1d Iustin Pop
# This program is distributed in the hope that it will be useful, but
12 ffeffa1d Iustin Pop
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 ffeffa1d Iustin Pop
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 ffeffa1d Iustin Pop
# General Public License for more details.
15 ffeffa1d Iustin Pop
#
16 ffeffa1d Iustin Pop
# You should have received a copy of the GNU General Public License
17 ffeffa1d Iustin Pop
# along with this program; if not, write to the Free Software
18 ffeffa1d Iustin Pop
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 ffeffa1d Iustin Pop
# 02110-1301, USA.
20 ffeffa1d Iustin Pop
21 ffeffa1d Iustin Pop
22 ffeffa1d Iustin Pop
"""Master daemon program.
23 ffeffa1d Iustin Pop
24 ffeffa1d Iustin Pop
Some classes deviates from the standard style guide since the
25 ffeffa1d Iustin Pop
inheritance from parent classes requires it.
26 ffeffa1d Iustin Pop
27 ffeffa1d Iustin Pop
"""
28 ffeffa1d Iustin Pop
29 7260cfbe Iustin Pop
# pylint: disable-msg=C0103
30 7260cfbe Iustin Pop
# C0103: Invalid name ganeti-masterd
31 ffeffa1d Iustin Pop
32 bbfd0568 René Nussbaumer
import grp
33 bbfd0568 René Nussbaumer
import os
34 bbfd0568 René Nussbaumer
import pwd
35 c1f2901b Iustin Pop
import sys
36 cdd7f900 Guido Trotter
import socket
37 ffeffa1d Iustin Pop
import time
38 bbfd0568 René Nussbaumer
import tempfile
39 96cb3986 Michael Hanselmann
import logging
40 ffeffa1d Iustin Pop
41 c1f2901b Iustin Pop
from optparse import OptionParser
42 ffeffa1d Iustin Pop
43 39dcf2ef Guido Trotter
from ganeti import config
44 ffeffa1d Iustin Pop
from ganeti import constants
45 04ccf5e9 Guido Trotter
from ganeti import daemon
46 ffeffa1d Iustin Pop
from ganeti import mcpu
47 ffeffa1d Iustin Pop
from ganeti import opcodes
48 ffeffa1d Iustin Pop
from ganeti import jqueue
49 39dcf2ef Guido Trotter
from ganeti import locking
50 ffeffa1d Iustin Pop
from ganeti import luxi
51 ffeffa1d Iustin Pop
from ganeti import utils
52 c1f2901b Iustin Pop
from ganeti import errors
53 c1f2901b Iustin Pop
from ganeti import ssconf
54 23e50d39 Michael Hanselmann
from ganeti import workerpool
55 b1b6ea87 Iustin Pop
from ganeti import rpc
56 d7cdb55d Iustin Pop
from ganeti import bootstrap
57 c1f2901b Iustin Pop
58 c1f2901b Iustin Pop
59 23e50d39 Michael Hanselmann
CLIENT_REQUEST_WORKERS = 16
60 23e50d39 Michael Hanselmann
61 c1f2901b Iustin Pop
EXIT_NOTMASTER = constants.EXIT_NOTMASTER
62 c1f2901b Iustin Pop
EXIT_NODESETUP_ERROR = constants.EXIT_NODESETUP_ERROR
63 ffeffa1d Iustin Pop
64 ffeffa1d Iustin Pop
65 23e50d39 Michael Hanselmann
class ClientRequestWorker(workerpool.BaseWorker):
66 7260cfbe Iustin Pop
   # pylint: disable-msg=W0221
67 7e5a6e86 Guido Trotter
  def RunTask(self, server, message, client):
68 23e50d39 Michael Hanselmann
    """Process the request.
69 23e50d39 Michael Hanselmann
70 23e50d39 Michael Hanselmann
    """
71 7e5a6e86 Guido Trotter
    client_ops = ClientOps(server)
72 7e5a6e86 Guido Trotter
73 23e50d39 Michael Hanselmann
    try:
74 7e5a6e86 Guido Trotter
      (method, args) = luxi.ParseRequest(message)
75 7e5a6e86 Guido Trotter
    except luxi.ProtocolError, err:
76 7e5a6e86 Guido Trotter
      logging.error("Protocol Error: %s", err)
77 7e5a6e86 Guido Trotter
      client.close_log()
78 7e5a6e86 Guido Trotter
      return
79 7e5a6e86 Guido Trotter
80 7e5a6e86 Guido Trotter
    success = False
81 7e5a6e86 Guido Trotter
    try:
82 7e5a6e86 Guido Trotter
      result = client_ops.handle_request(method, args)
83 7e5a6e86 Guido Trotter
      success = True
84 7e5a6e86 Guido Trotter
    except errors.GenericError, err:
85 7e5a6e86 Guido Trotter
      logging.exception("Unexpected exception")
86 7e5a6e86 Guido Trotter
      success = False
87 7e5a6e86 Guido Trotter
      result = errors.EncodeException(err)
88 7e5a6e86 Guido Trotter
    except:
89 7e5a6e86 Guido Trotter
      logging.exception("Unexpected exception")
90 7e5a6e86 Guido Trotter
      err = sys.exc_info()
91 7e5a6e86 Guido Trotter
      result = "Caught exception: %s" % str(err[1])
92 7e5a6e86 Guido Trotter
93 7e5a6e86 Guido Trotter
    try:
94 7e5a6e86 Guido Trotter
      reply = luxi.FormatResponse(success, result)
95 7e5a6e86 Guido Trotter
      client.send_message(reply)
96 7e5a6e86 Guido Trotter
      # awake the main thread so that it can write out the data.
97 7e5a6e86 Guido Trotter
      server.awaker.signal()
98 7e5a6e86 Guido Trotter
    except:
99 7e5a6e86 Guido Trotter
      logging.exception("Send error")
100 7e5a6e86 Guido Trotter
      client.close_log()
101 7e5a6e86 Guido Trotter
102 7e5a6e86 Guido Trotter
103 7e5a6e86 Guido Trotter
class MasterClientHandler(daemon.AsyncTerminatedMessageStream):
104 7e5a6e86 Guido Trotter
  """Handler for master peers.
105 7e5a6e86 Guido Trotter
106 7e5a6e86 Guido Trotter
  """
107 7e5a6e86 Guido Trotter
  _MAX_UNHANDLED = 1
108 7e5a6e86 Guido Trotter
  def __init__(self, server, connected_socket, client_address, family):
109 7e5a6e86 Guido Trotter
    daemon.AsyncTerminatedMessageStream.__init__(self, connected_socket,
110 7e5a6e86 Guido Trotter
                                                 client_address,
111 7e5a6e86 Guido Trotter
                                                 constants.LUXI_EOM,
112 7e5a6e86 Guido Trotter
                                                 family, self._MAX_UNHANDLED)
113 7e5a6e86 Guido Trotter
    self.server = server
114 7e5a6e86 Guido Trotter
115 7e5a6e86 Guido Trotter
  def handle_message(self, message, _):
116 7e5a6e86 Guido Trotter
    self.server.request_workers.AddTask(self.server, message, self)
117 23e50d39 Michael Hanselmann
118 23e50d39 Michael Hanselmann
119 cdd7f900 Guido Trotter
class MasterServer(daemon.AsyncStreamServer):
120 cdd7f900 Guido Trotter
  """Master Server.
121 ffeffa1d Iustin Pop
122 cdd7f900 Guido Trotter
  This is the main asynchronous master server. It handles connections to the
123 cdd7f900 Guido Trotter
  master socket.
124 ffeffa1d Iustin Pop
125 ffeffa1d Iustin Pop
  """
126 7e5a6e86 Guido Trotter
  family = socket.AF_UNIX
127 7e5a6e86 Guido Trotter
128 7e5a6e86 Guido Trotter
  def __init__(self, mainloop, address, uid, gid):
129 cdd7f900 Guido Trotter
    """MasterServer constructor
130 ce862cd5 Guido Trotter
131 cdd7f900 Guido Trotter
    @type mainloop: ganeti.daemon.Mainloop
132 cdd7f900 Guido Trotter
    @param mainloop: Mainloop used to poll for I/O events
133 cdd7f900 Guido Trotter
    @param address: the unix socket address to bind the MasterServer to
134 bbfd0568 René Nussbaumer
    @param uid: The uid of the owner of the socket
135 bbfd0568 René Nussbaumer
    @param gid: The gid of the owner of the socket
136 ce862cd5 Guido Trotter
137 ce862cd5 Guido Trotter
    """
138 bbfd0568 René Nussbaumer
    temp_name = tempfile.mktemp(dir=os.path.dirname(address))
139 7e5a6e86 Guido Trotter
    daemon.AsyncStreamServer.__init__(self, self.family, temp_name)
140 bbfd0568 René Nussbaumer
    os.chmod(temp_name, 0770)
141 bbfd0568 René Nussbaumer
    os.chown(temp_name, uid, gid)
142 bbfd0568 René Nussbaumer
    os.rename(temp_name, address)
143 bbfd0568 René Nussbaumer
144 cdd7f900 Guido Trotter
    self.mainloop = mainloop
145 7e5a6e86 Guido Trotter
    self.awaker = daemon.AsyncAwaker()
146 50a3fbb2 Michael Hanselmann
147 50a3fbb2 Michael Hanselmann
    # We'll only start threads once we've forked.
148 9113300d Michael Hanselmann
    self.context = None
149 23e50d39 Michael Hanselmann
    self.request_workers = None
150 50a3fbb2 Michael Hanselmann
151 cdd7f900 Guido Trotter
  def handle_connection(self, connected_socket, client_address):
152 7e5a6e86 Guido Trotter
    # TODO: add connection count and limit the number of open connections to a
153 7e5a6e86 Guido Trotter
    # maximum number to avoid breaking for lack of file descriptors or memory.
154 7e5a6e86 Guido Trotter
    MasterClientHandler(self, connected_socket, client_address, self.family)
155 cdd7f900 Guido Trotter
156 50a3fbb2 Michael Hanselmann
  def setup_queue(self):
157 9113300d Michael Hanselmann
    self.context = GanetiContext()
158 89e2b4d2 Michael Hanselmann
    self.request_workers = workerpool.WorkerPool("ClientReq",
159 89e2b4d2 Michael Hanselmann
                                                 CLIENT_REQUEST_WORKERS,
160 23e50d39 Michael Hanselmann
                                                 ClientRequestWorker)
161 ffeffa1d Iustin Pop
162 c1f2901b Iustin Pop
  def server_cleanup(self):
163 c1f2901b Iustin Pop
    """Cleanup the server.
164 c1f2901b Iustin Pop
165 c1f2901b Iustin Pop
    This involves shutting down the processor threads and the master
166 c1f2901b Iustin Pop
    socket.
167 c1f2901b Iustin Pop
168 c1f2901b Iustin Pop
    """
169 50a3fbb2 Michael Hanselmann
    try:
170 cdd7f900 Guido Trotter
      self.close()
171 50a3fbb2 Michael Hanselmann
    finally:
172 23e50d39 Michael Hanselmann
      if self.request_workers:
173 36088c4c Michael Hanselmann
        self.request_workers.TerminateWorkers()
174 9113300d Michael Hanselmann
      if self.context:
175 9113300d Michael Hanselmann
        self.context.jobqueue.Shutdown()
176 ffeffa1d Iustin Pop
177 ffeffa1d Iustin Pop
178 ffeffa1d Iustin Pop
class ClientOps:
179 ffeffa1d Iustin Pop
  """Class holding high-level client operations."""
180 ffeffa1d Iustin Pop
  def __init__(self, server):
181 ffeffa1d Iustin Pop
    self.server = server
182 ffeffa1d Iustin Pop
183 7260cfbe Iustin Pop
  def handle_request(self, method, args): # pylint: disable-msg=R0911
184 9113300d Michael Hanselmann
    queue = self.server.context.jobqueue
185 0bbe448c Michael Hanselmann
186 0bbe448c Michael Hanselmann
    # TODO: Parameter validation
187 0bbe448c Michael Hanselmann
188 7260cfbe Iustin Pop
    # TODO: Rewrite to not exit in each 'if/elif' branch
189 7260cfbe Iustin Pop
190 0bbe448c Michael Hanselmann
    if method == luxi.REQ_SUBMIT_JOB:
191 e566ddbd Iustin Pop
      logging.info("Received new job")
192 0bbe448c Michael Hanselmann
      ops = [opcodes.OpCode.LoadOpCode(state) for state in args]
193 4c848b18 Michael Hanselmann
      return queue.SubmitJob(ops)
194 ffeffa1d Iustin Pop
195 2971c913 Iustin Pop
    if method == luxi.REQ_SUBMIT_MANY_JOBS:
196 2971c913 Iustin Pop
      logging.info("Received multiple jobs")
197 2971c913 Iustin Pop
      jobs = []
198 2971c913 Iustin Pop
      for ops in args:
199 2971c913 Iustin Pop
        jobs.append([opcodes.OpCode.LoadOpCode(state) for state in ops])
200 2971c913 Iustin Pop
      return queue.SubmitManyJobs(jobs)
201 2971c913 Iustin Pop
202 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_CANCEL_JOB:
203 3a2c7775 Michael Hanselmann
      job_id = args
204 e566ddbd Iustin Pop
      logging.info("Received job cancel request for %s", job_id)
205 0bbe448c Michael Hanselmann
      return queue.CancelJob(job_id)
206 ffeffa1d Iustin Pop
207 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_ARCHIVE_JOB:
208 3a2c7775 Michael Hanselmann
      job_id = args
209 e566ddbd Iustin Pop
      logging.info("Received job archive request for %s", job_id)
210 0bbe448c Michael Hanselmann
      return queue.ArchiveJob(job_id)
211 0bbe448c Michael Hanselmann
212 07cd723a Iustin Pop
    elif method == luxi.REQ_AUTOARCHIVE_JOBS:
213 f8ad5591 Michael Hanselmann
      (age, timeout) = args
214 e566ddbd Iustin Pop
      logging.info("Received job autoarchive request for age %s, timeout %s",
215 e566ddbd Iustin Pop
                   age, timeout)
216 f8ad5591 Michael Hanselmann
      return queue.AutoArchiveJobs(age, timeout)
217 07cd723a Iustin Pop
218 dfe57c22 Michael Hanselmann
    elif method == luxi.REQ_WAIT_FOR_JOB_CHANGE:
219 5c735209 Iustin Pop
      (job_id, fields, prev_job_info, prev_log_serial, timeout) = args
220 e566ddbd Iustin Pop
      logging.info("Received job poll request for %s", job_id)
221 6c5a7090 Michael Hanselmann
      return queue.WaitForJobChanges(job_id, fields, prev_job_info,
222 5c735209 Iustin Pop
                                     prev_log_serial, timeout)
223 dfe57c22 Michael Hanselmann
224 0bbe448c Michael Hanselmann
    elif method == luxi.REQ_QUERY_JOBS:
225 0bbe448c Michael Hanselmann
      (job_ids, fields) = args
226 e566ddbd Iustin Pop
      if isinstance(job_ids, (tuple, list)) and job_ids:
227 1f864b60 Iustin Pop
        msg = utils.CommaJoin(job_ids)
228 e566ddbd Iustin Pop
      else:
229 e566ddbd Iustin Pop
        msg = str(job_ids)
230 e566ddbd Iustin Pop
      logging.info("Received job query request for %s", msg)
231 0bbe448c Michael Hanselmann
      return queue.QueryJobs(job_ids, fields)
232 0bbe448c Michael Hanselmann
233 ee6c7b94 Michael Hanselmann
    elif method == luxi.REQ_QUERY_INSTANCES:
234 ec79568d Iustin Pop
      (names, fields, use_locking) = args
235 e566ddbd Iustin Pop
      logging.info("Received instance query request for %s", names)
236 77921a95 Iustin Pop
      if use_locking:
237 debac808 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed",
238 debac808 Iustin Pop
                                   errors.ECODE_INVAL)
239 ec79568d Iustin Pop
      op = opcodes.OpQueryInstances(names=names, output_fields=fields,
240 ec79568d Iustin Pop
                                    use_locking=use_locking)
241 ee6c7b94 Michael Hanselmann
      return self._Query(op)
242 ee6c7b94 Michael Hanselmann
243 02f7fe54 Michael Hanselmann
    elif method == luxi.REQ_QUERY_NODES:
244 ec79568d Iustin Pop
      (names, fields, use_locking) = args
245 e566ddbd Iustin Pop
      logging.info("Received node query request for %s", names)
246 77921a95 Iustin Pop
      if use_locking:
247 debac808 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed",
248 debac808 Iustin Pop
                                   errors.ECODE_INVAL)
249 ec79568d Iustin Pop
      op = opcodes.OpQueryNodes(names=names, output_fields=fields,
250 ec79568d Iustin Pop
                                use_locking=use_locking)
251 02f7fe54 Michael Hanselmann
      return self._Query(op)
252 02f7fe54 Michael Hanselmann
253 32f93223 Michael Hanselmann
    elif method == luxi.REQ_QUERY_EXPORTS:
254 ec79568d Iustin Pop
      nodes, use_locking = args
255 77921a95 Iustin Pop
      if use_locking:
256 debac808 Iustin Pop
        raise errors.OpPrereqError("Sync queries are not allowed",
257 debac808 Iustin Pop
                                   errors.ECODE_INVAL)
258 e566ddbd Iustin Pop
      logging.info("Received exports query request")
259 ec79568d Iustin Pop
      op = opcodes.OpQueryExports(nodes=nodes, use_locking=use_locking)
260 32f93223 Michael Hanselmann
      return self._Query(op)
261 32f93223 Michael Hanselmann
262 ae5849b5 Michael Hanselmann
    elif method == luxi.REQ_QUERY_CONFIG_VALUES:
263 ae5849b5 Michael Hanselmann
      fields = args
264 e566ddbd Iustin Pop
      logging.info("Received config values query request for %s", fields)
265 ae5849b5 Michael Hanselmann
      op = opcodes.OpQueryConfigValues(output_fields=fields)
266 ae5849b5 Michael Hanselmann
      return self._Query(op)
267 ae5849b5 Michael Hanselmann
268 66baeccc Iustin Pop
    elif method == luxi.REQ_QUERY_CLUSTER_INFO:
269 e566ddbd Iustin Pop
      logging.info("Received cluster info query request")
270 66baeccc Iustin Pop
      op = opcodes.OpQueryClusterInfo()
271 66baeccc Iustin Pop
      return self._Query(op)
272 66baeccc Iustin Pop
273 7699c3af Iustin Pop
    elif method == luxi.REQ_QUERY_TAGS:
274 7699c3af Iustin Pop
      kind, name = args
275 7699c3af Iustin Pop
      logging.info("Received tags query request")
276 7699c3af Iustin Pop
      op = opcodes.OpGetTags(kind=kind, name=name)
277 7699c3af Iustin Pop
      return self._Query(op)
278 7699c3af Iustin Pop
279 3ccafd0e Iustin Pop
    elif method == luxi.REQ_QUEUE_SET_DRAIN_FLAG:
280 3ccafd0e Iustin Pop
      drain_flag = args
281 e566ddbd Iustin Pop
      logging.info("Received queue drain flag change request to %s",
282 e566ddbd Iustin Pop
                   drain_flag)
283 3ccafd0e Iustin Pop
      return queue.SetDrainFlag(drain_flag)
284 3ccafd0e Iustin Pop
285 05e50653 Michael Hanselmann
    elif method == luxi.REQ_SET_WATCHER_PAUSE:
286 05e50653 Michael Hanselmann
      (until, ) = args
287 05e50653 Michael Hanselmann
288 05e50653 Michael Hanselmann
      if until is None:
289 05e50653 Michael Hanselmann
        logging.info("Received request to no longer pause the watcher")
290 05e50653 Michael Hanselmann
      else:
291 05e50653 Michael Hanselmann
        if not isinstance(until, (int, float)):
292 05e50653 Michael Hanselmann
          raise TypeError("Duration must be an integer or float")
293 05e50653 Michael Hanselmann
294 05e50653 Michael Hanselmann
        if until < time.time():
295 05e50653 Michael Hanselmann
          raise errors.GenericError("Unable to set pause end time in the past")
296 05e50653 Michael Hanselmann
297 05e50653 Michael Hanselmann
        logging.info("Received request to pause the watcher until %s", until)
298 05e50653 Michael Hanselmann
299 05e50653 Michael Hanselmann
      return _SetWatcherPause(until)
300 05e50653 Michael Hanselmann
301 0bbe448c Michael Hanselmann
    else:
302 e566ddbd Iustin Pop
      logging.info("Received invalid request '%s'", method)
303 e566ddbd Iustin Pop
      raise ValueError("Invalid operation '%s'" % method)
304 ffeffa1d Iustin Pop
305 ee6c7b94 Michael Hanselmann
  def _Query(self, op):
306 ee6c7b94 Michael Hanselmann
    """Runs the specified opcode and returns the result.
307 ee6c7b94 Michael Hanselmann
308 ee6c7b94 Michael Hanselmann
    """
309 adfa97e3 Guido Trotter
    # Queries don't have a job id
310 adfa97e3 Guido Trotter
    proc = mcpu.Processor(self.server.context, None)
311 031a3e57 Michael Hanselmann
    return proc.ExecOpCode(op, None)
312 ee6c7b94 Michael Hanselmann
313 ffeffa1d Iustin Pop
314 39dcf2ef Guido Trotter
class GanetiContext(object):
315 39dcf2ef Guido Trotter
  """Context common to all ganeti threads.
316 39dcf2ef Guido Trotter
317 39dcf2ef Guido Trotter
  This class creates and holds common objects shared by all threads.
318 39dcf2ef Guido Trotter
319 39dcf2ef Guido Trotter
  """
320 7260cfbe Iustin Pop
  # pylint: disable-msg=W0212
321 7260cfbe Iustin Pop
  # we do want to ensure a singleton here
322 39dcf2ef Guido Trotter
  _instance = None
323 39dcf2ef Guido Trotter
324 39dcf2ef Guido Trotter
  def __init__(self):
325 39dcf2ef Guido Trotter
    """Constructs a new GanetiContext object.
326 39dcf2ef Guido Trotter
327 39dcf2ef Guido Trotter
    There should be only a GanetiContext object at any time, so this
328 39dcf2ef Guido Trotter
    function raises an error if this is not the case.
329 39dcf2ef Guido Trotter
330 39dcf2ef Guido Trotter
    """
331 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "double GanetiContext instance"
332 39dcf2ef Guido Trotter
333 9113300d Michael Hanselmann
    # Create global configuration object
334 39dcf2ef Guido Trotter
    self.cfg = config.ConfigWriter()
335 9113300d Michael Hanselmann
336 9113300d Michael Hanselmann
    # Locking manager
337 984f7c32 Guido Trotter
    self.glm = locking.GanetiLockManager(
338 39dcf2ef Guido Trotter
                self.cfg.GetNodeList(),
339 39dcf2ef Guido Trotter
                self.cfg.GetInstanceList())
340 39dcf2ef Guido Trotter
341 9113300d Michael Hanselmann
    # Job queue
342 9113300d Michael Hanselmann
    self.jobqueue = jqueue.JobQueue(self)
343 9113300d Michael Hanselmann
344 39dcf2ef Guido Trotter
    # setting this also locks the class against attribute modifications
345 39dcf2ef Guido Trotter
    self.__class__._instance = self
346 39dcf2ef Guido Trotter
347 39dcf2ef Guido Trotter
  def __setattr__(self, name, value):
348 39dcf2ef Guido Trotter
    """Setting GanetiContext attributes is forbidden after initialization.
349 39dcf2ef Guido Trotter
350 39dcf2ef Guido Trotter
    """
351 39dcf2ef Guido Trotter
    assert self.__class__._instance is None, "Attempt to modify Ganeti Context"
352 39dcf2ef Guido Trotter
    object.__setattr__(self, name, value)
353 39dcf2ef Guido Trotter
354 0debfb35 Guido Trotter
  def AddNode(self, node, ec_id):
355 d8470559 Michael Hanselmann
    """Adds a node to the configuration and lock manager.
356 d8470559 Michael Hanselmann
357 d8470559 Michael Hanselmann
    """
358 d8470559 Michael Hanselmann
    # Add it to the configuration
359 0debfb35 Guido Trotter
    self.cfg.AddNode(node, ec_id)
360 d8470559 Michael Hanselmann
361 c36176cc Michael Hanselmann
    # If preseeding fails it'll not be added
362 99aabbed Iustin Pop
    self.jobqueue.AddNode(node)
363 c36176cc Michael Hanselmann
364 d8470559 Michael Hanselmann
    # Add the new node to the Ganeti Lock Manager
365 d8470559 Michael Hanselmann
    self.glm.add(locking.LEVEL_NODE, node.name)
366 d8470559 Michael Hanselmann
367 d8470559 Michael Hanselmann
  def ReaddNode(self, node):
368 d8470559 Michael Hanselmann
    """Updates a node that's already in the configuration
369 d8470559 Michael Hanselmann
370 d8470559 Michael Hanselmann
    """
371 c36176cc Michael Hanselmann
    # Synchronize the queue again
372 99aabbed Iustin Pop
    self.jobqueue.AddNode(node)
373 d8470559 Michael Hanselmann
374 d8470559 Michael Hanselmann
  def RemoveNode(self, name):
375 d8470559 Michael Hanselmann
    """Removes a node from the configuration and lock manager.
376 d8470559 Michael Hanselmann
377 d8470559 Michael Hanselmann
    """
378 d8470559 Michael Hanselmann
    # Remove node from configuration
379 d8470559 Michael Hanselmann
    self.cfg.RemoveNode(name)
380 d8470559 Michael Hanselmann
381 c36176cc Michael Hanselmann
    # Notify job queue
382 c36176cc Michael Hanselmann
    self.jobqueue.RemoveNode(name)
383 c36176cc Michael Hanselmann
384 d8470559 Michael Hanselmann
    # Remove the node from the Ganeti Lock Manager
385 d8470559 Michael Hanselmann
    self.glm.remove(locking.LEVEL_NODE, name)
386 d8470559 Michael Hanselmann
387 39dcf2ef Guido Trotter
388 05e50653 Michael Hanselmann
def _SetWatcherPause(until):
389 05e50653 Michael Hanselmann
  """Creates or removes the watcher pause file.
390 05e50653 Michael Hanselmann
391 05e50653 Michael Hanselmann
  @type until: None or int
392 05e50653 Michael Hanselmann
  @param until: Unix timestamp saying until when the watcher shouldn't run
393 05e50653 Michael Hanselmann
394 05e50653 Michael Hanselmann
  """
395 05e50653 Michael Hanselmann
  if until is None:
396 05e50653 Michael Hanselmann
    utils.RemoveFile(constants.WATCHER_PAUSEFILE)
397 05e50653 Michael Hanselmann
  else:
398 05e50653 Michael Hanselmann
    utils.WriteFile(constants.WATCHER_PAUSEFILE,
399 05e50653 Michael Hanselmann
                    data="%d\n" % (until, ))
400 05e50653 Michael Hanselmann
401 28b498cd Michael Hanselmann
  return until
402 28b498cd Michael Hanselmann
403 05e50653 Michael Hanselmann
404 36205981 Iustin Pop
def CheckAgreement():
405 36205981 Iustin Pop
  """Check the agreement on who is the master.
406 36205981 Iustin Pop
407 36205981 Iustin Pop
  The function uses a very simple algorithm: we must get more positive
408 36205981 Iustin Pop
  than negative answers. Since in most of the cases we are the master,
409 36205981 Iustin Pop
  we'll use our own config file for getting the node list. In the
410 36205981 Iustin Pop
  future we could collect the current node list from our (possibly
411 36205981 Iustin Pop
  obsolete) known nodes.
412 36205981 Iustin Pop
413 d7cdb55d Iustin Pop
  In order to account for cold-start of all nodes, we retry for up to
414 d7cdb55d Iustin Pop
  a minute until we get a real answer as the top-voted one. If the
415 d7cdb55d Iustin Pop
  nodes are more out-of-sync, for now manual startup of the master
416 d7cdb55d Iustin Pop
  should be attempted.
417 d7cdb55d Iustin Pop
418 d7cdb55d Iustin Pop
  Note that for a even number of nodes cluster, we need at least half
419 d7cdb55d Iustin Pop
  of the nodes (beside ourselves) to vote for us. This creates a
420 d7cdb55d Iustin Pop
  problem on two-node clusters, since in this case we require the
421 d7cdb55d Iustin Pop
  other node to be up too to confirm our status.
422 d7cdb55d Iustin Pop
423 36205981 Iustin Pop
  """
424 36205981 Iustin Pop
  myself = utils.HostInfo().name
425 36205981 Iustin Pop
  #temp instantiation of a config writer, used only to get the node list
426 36205981 Iustin Pop
  cfg = config.ConfigWriter()
427 36205981 Iustin Pop
  node_list = cfg.GetNodeList()
428 36205981 Iustin Pop
  del cfg
429 d7cdb55d Iustin Pop
  retries = 6
430 d7cdb55d Iustin Pop
  while retries > 0:
431 d7cdb55d Iustin Pop
    votes = bootstrap.GatherMasterVotes(node_list)
432 d7cdb55d Iustin Pop
    if not votes:
433 d7cdb55d Iustin Pop
      # empty node list, this is a one node cluster
434 d7cdb55d Iustin Pop
      return True
435 d7cdb55d Iustin Pop
    if votes[0][0] is None:
436 d7cdb55d Iustin Pop
      retries -= 1
437 d7cdb55d Iustin Pop
      time.sleep(10)
438 36205981 Iustin Pop
      continue
439 d7cdb55d Iustin Pop
    break
440 d7cdb55d Iustin Pop
  if retries == 0:
441 e09fdcfa Iustin Pop
    logging.critical("Cluster inconsistent, most of the nodes didn't answer"
442 e09fdcfa Iustin Pop
                     " after multiple retries. Aborting startup")
443 d8f5a37d Iustin Pop
    logging.critical("Use the --no-voting option if you understand what"
444 d8f5a37d Iustin Pop
                     " effects it has on the cluster state")
445 e09fdcfa Iustin Pop
    return False
446 d7cdb55d Iustin Pop
  # here a real node is at the top of the list
447 d7cdb55d Iustin Pop
  all_votes = sum(item[1] for item in votes)
448 d7cdb55d Iustin Pop
  top_node, top_votes = votes[0]
449 8a20c732 Michael Hanselmann
450 d7cdb55d Iustin Pop
  result = False
451 d7cdb55d Iustin Pop
  if top_node != myself:
452 d7cdb55d Iustin Pop
    logging.critical("It seems we are not the master (top-voted node"
453 bbe19c17 Iustin Pop
                     " is %s with %d out of %d votes)", top_node, top_votes,
454 bbe19c17 Iustin Pop
                     all_votes)
455 d7cdb55d Iustin Pop
  elif top_votes < all_votes - top_votes:
456 36205981 Iustin Pop
    logging.critical("It seems we are not the master (%d votes for,"
457 d7cdb55d Iustin Pop
                     " %d votes against)", top_votes, all_votes - top_votes)
458 d7cdb55d Iustin Pop
  else:
459 d7cdb55d Iustin Pop
    result = True
460 d7cdb55d Iustin Pop
461 d7cdb55d Iustin Pop
  return result
462 36205981 Iustin Pop
463 6c948699 Michael Hanselmann
464 ed0efaa5 Michael Hanselmann
def CheckAgreementWithRpc():
465 4331f6cd Michael Hanselmann
  rpc.Init()
466 4331f6cd Michael Hanselmann
  try:
467 ed0efaa5 Michael Hanselmann
    return CheckAgreement()
468 4331f6cd Michael Hanselmann
  finally:
469 4331f6cd Michael Hanselmann
    rpc.Shutdown()
470 ffeffa1d Iustin Pop
471 c1f2901b Iustin Pop
472 ed0efaa5 Michael Hanselmann
def CheckMasterd(options, args):
473 ed0efaa5 Michael Hanselmann
  """Initial checks whether to run or exit with a failure.
474 ed0efaa5 Michael Hanselmann
475 ed0efaa5 Michael Hanselmann
  """
476 f93427cd Iustin Pop
  if args: # masterd doesn't take any arguments
477 f93427cd Iustin Pop
    print >> sys.stderr, ("Usage: %s [-f] [-d]" % sys.argv[0])
478 f93427cd Iustin Pop
    sys.exit(constants.EXIT_FAILURE)
479 f93427cd Iustin Pop
480 ed0efaa5 Michael Hanselmann
  ssconf.CheckMaster(options.debug)
481 ed0efaa5 Michael Hanselmann
482 bbfd0568 René Nussbaumer
  try:
483 bbfd0568 René Nussbaumer
    options.uid = pwd.getpwnam(constants.MASTERD_USER).pw_uid
484 bbfd0568 René Nussbaumer
    options.gid = grp.getgrnam(constants.DAEMONS_GROUP).gr_gid
485 bbfd0568 René Nussbaumer
  except KeyError:
486 bbfd0568 René Nussbaumer
    print >> sys.stderr, ("User or group not existing on system: %s:%s" %
487 bbfd0568 René Nussbaumer
                          (constants.MASTERD_USER, constants.DAEMONS_GROUP))
488 bbfd0568 René Nussbaumer
    sys.exit(constants.EXIT_FAILURE)
489 bbfd0568 René Nussbaumer
490 bbfd0568 René Nussbaumer
491 ed0efaa5 Michael Hanselmann
  # If CheckMaster didn't fail we believe we are the master, but we have to
492 ed0efaa5 Michael Hanselmann
  # confirm with the other nodes.
493 ed0efaa5 Michael Hanselmann
  if options.no_voting:
494 ed0efaa5 Michael Hanselmann
    if options.yes_do_it:
495 ed0efaa5 Michael Hanselmann
      return
496 ed0efaa5 Michael Hanselmann
497 ed0efaa5 Michael Hanselmann
    sys.stdout.write("The 'no voting' option has been selected.\n")
498 ed0efaa5 Michael Hanselmann
    sys.stdout.write("This is dangerous, please confirm by"
499 ed0efaa5 Michael Hanselmann
                     " typing uppercase 'yes': ")
500 ed0efaa5 Michael Hanselmann
    sys.stdout.flush()
501 ed0efaa5 Michael Hanselmann
502 ed0efaa5 Michael Hanselmann
    confirmation = sys.stdin.readline().strip()
503 ed0efaa5 Michael Hanselmann
    if confirmation != "YES":
504 7260cfbe Iustin Pop
      print >> sys.stderr, "Aborting."
505 ed0efaa5 Michael Hanselmann
      sys.exit(constants.EXIT_FAILURE)
506 ed0efaa5 Michael Hanselmann
507 ed0efaa5 Michael Hanselmann
    return
508 ed0efaa5 Michael Hanselmann
509 ed0efaa5 Michael Hanselmann
  # CheckAgreement uses RPC and threads, hence it needs to be run in a separate
510 ed0efaa5 Michael Hanselmann
  # process before we call utils.Daemonize in the current process.
511 eb58f7bd Michael Hanselmann
  if not utils.RunInSeparateProcess(CheckAgreementWithRpc):
512 ed0efaa5 Michael Hanselmann
    sys.exit(constants.EXIT_FAILURE)
513 ed0efaa5 Michael Hanselmann
514 ed0efaa5 Michael Hanselmann
515 2d54e29c Iustin Pop
def ExecMasterd (options, args): # pylint: disable-msg=W0613
516 6c948699 Michael Hanselmann
  """Main master daemon function, executed with the PID file held.
517 3b316acb Iustin Pop
518 04ccf5e9 Guido Trotter
  """
519 04ccf5e9 Guido Trotter
  # This is safe to do as the pid file guarantees against
520 04ccf5e9 Guido Trotter
  # concurrent execution.
521 04ccf5e9 Guido Trotter
  utils.RemoveFile(constants.MASTER_SOCKET)
522 b1b6ea87 Iustin Pop
523 cdd7f900 Guido Trotter
  mainloop = daemon.Mainloop()
524 7e5a6e86 Guido Trotter
  master = MasterServer(mainloop, constants.MASTER_SOCKET,
525 bbfd0568 René Nussbaumer
                        options.uid, options.gid)
526 04ccf5e9 Guido Trotter
  try:
527 15486fa7 Michael Hanselmann
    rpc.Init()
528 4331f6cd Michael Hanselmann
    try:
529 15486fa7 Michael Hanselmann
      # activate ip
530 b2890442 Guido Trotter
      master_node = ssconf.SimpleStore().GetMasterNode()
531 3583908a Guido Trotter
      result = rpc.RpcRunner.call_node_start_master(master_node, False, False)
532 3cebe102 Michael Hanselmann
      msg = result.fail_msg
533 b726aff0 Iustin Pop
      if msg:
534 b726aff0 Iustin Pop
        logging.error("Can't activate master IP address: %s", msg)
535 15486fa7 Michael Hanselmann
536 15486fa7 Michael Hanselmann
      master.setup_queue()
537 15486fa7 Michael Hanselmann
      try:
538 cdd7f900 Guido Trotter
        mainloop.Run()
539 15486fa7 Michael Hanselmann
      finally:
540 15486fa7 Michael Hanselmann
        master.server_cleanup()
541 4331f6cd Michael Hanselmann
    finally:
542 15486fa7 Michael Hanselmann
      rpc.Shutdown()
543 a4af651e Iustin Pop
  finally:
544 227647ac Guido Trotter
    utils.RemoveFile(constants.MASTER_SOCKET)
545 a4af651e Iustin Pop
546 ffeffa1d Iustin Pop
547 04ccf5e9 Guido Trotter
def main():
548 04ccf5e9 Guido Trotter
  """Main function"""
549 04ccf5e9 Guido Trotter
  parser = OptionParser(description="Ganeti master daemon",
550 04ccf5e9 Guido Trotter
                        usage="%prog [-f] [-d]",
551 04ccf5e9 Guido Trotter
                        version="%%prog (ganeti) %s" %
552 04ccf5e9 Guido Trotter
                        constants.RELEASE_VERSION)
553 04ccf5e9 Guido Trotter
  parser.add_option("--no-voting", dest="no_voting",
554 04ccf5e9 Guido Trotter
                    help="Do not check that the nodes agree on this node"
555 04ccf5e9 Guido Trotter
                    " being the master and start the daemon unconditionally",
556 04ccf5e9 Guido Trotter
                    default=False, action="store_true")
557 04ccf5e9 Guido Trotter
  parser.add_option("--yes-do-it", dest="yes_do_it",
558 04ccf5e9 Guido Trotter
                    help="Override interactive check for --no-voting",
559 04ccf5e9 Guido Trotter
                    default=False, action="store_true")
560 04ccf5e9 Guido Trotter
  dirs = [(constants.RUN_GANETI_DIR, constants.RUN_DIRS_MODE),
561 04ccf5e9 Guido Trotter
          (constants.SOCKET_DIR, constants.SOCKET_DIR_MODE),
562 04ccf5e9 Guido Trotter
         ]
563 04ccf5e9 Guido Trotter
  daemon.GenericMain(constants.MASTERD, parser, dirs,
564 30dabd03 Michael Hanselmann
                     CheckMasterd, ExecMasterd,
565 30dabd03 Michael Hanselmann
                     multithreaded=True)
566 6c948699 Michael Hanselmann
567 04ccf5e9 Guido Trotter
568 ffeffa1d Iustin Pop
if __name__ == "__main__":
569 ffeffa1d Iustin Pop
  main()