Statistics
| Branch: | Tag: | Revision:

root / daemons / ganeti-masterd @ 9894ece7

History | View | Annotate | Download (10.2 kB)

1
#!/usr/bin/python -u
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Master daemon program.
23

    
24
Some classes deviates from the standard style guide since the
25
inheritance from parent classes requires it.
26

    
27
"""
28

    
29

    
30
import sys
31
import SocketServer
32
import time
33
import collections
34
import Queue
35
import random
36
import signal
37
import simplejson
38
import logging
39

    
40
from cStringIO import StringIO
41
from optparse import OptionParser
42

    
43
from ganeti import config
44
from ganeti import constants
45
from ganeti import mcpu
46
from ganeti import opcodes
47
from ganeti import jqueue
48
from ganeti import locking
49
from ganeti import luxi
50
from ganeti import utils
51
from ganeti import errors
52
from ganeti import ssconf
53
from ganeti import logger
54
from ganeti import workerpool
55
from ganeti import rpc
56

    
57

    
58
CLIENT_REQUEST_WORKERS = 16
59

    
60
EXIT_NOTMASTER = constants.EXIT_NOTMASTER
61
EXIT_NODESETUP_ERROR = constants.EXIT_NODESETUP_ERROR
62

    
63

    
64
class ClientRequestWorker(workerpool.BaseWorker):
65
  def RunTask(self, server, request, client_address):
66
    """Process the request.
67

    
68
    This is copied from the code in ThreadingMixIn.
69

    
70
    """
71
    try:
72
      server.finish_request(request, client_address)
73
      server.close_request(request)
74
    except:
75
      server.handle_error(request, client_address)
76
      server.close_request(request)
77

    
78

    
79
class IOServer(SocketServer.UnixStreamServer):
80
  """IO thread class.
81

    
82
  This class takes care of initializing the other threads, setting
83
  signal handlers (which are processed only in this thread), and doing
84
  cleanup at shutdown.
85

    
86
  """
87
  def __init__(self, address, rqhandler):
88
    """IOServer constructor
89

    
90
    Args:
91
      address: the address to bind this IOServer to
92
      rqhandler: RequestHandler type object
93

    
94
    """
95
    SocketServer.UnixStreamServer.__init__(self, address, rqhandler)
96

    
97
    # We'll only start threads once we've forked.
98
    self.context = None
99
    self.request_workers = None
100

    
101
  def setup_queue(self):
102
    self.context = GanetiContext()
103
    self.request_workers = workerpool.WorkerPool(CLIENT_REQUEST_WORKERS,
104
                                                 ClientRequestWorker)
105

    
106
  def process_request(self, request, client_address):
107
    """Add task to workerpool to process request.
108

    
109
    """
110
    self.request_workers.AddTask(self, request, client_address)
111

    
112
  def serve_forever(self):
113
    """Handle one request at a time until told to quit."""
114
    sighandler = utils.SignalHandler([signal.SIGINT, signal.SIGTERM])
115
    try:
116
      while not sighandler.called:
117
        self.handle_request()
118
    finally:
119
      sighandler.Reset()
120

    
121
  def server_cleanup(self):
122
    """Cleanup the server.
123

    
124
    This involves shutting down the processor threads and the master
125
    socket.
126

    
127
    """
128
    try:
129
      self.server_close()
130
    finally:
131
      if self.request_workers:
132
        self.request_workers.TerminateWorkers()
133
      if self.context:
134
        self.context.jobqueue.Shutdown()
135

    
136

    
137
class ClientRqHandler(SocketServer.BaseRequestHandler):
138
  """Client handler"""
139
  EOM = '\3'
140
  READ_SIZE = 4096
141

    
142
  def setup(self):
143
    self._buffer = ""
144
    self._msgs = collections.deque()
145
    self._ops = ClientOps(self.server)
146

    
147
  def handle(self):
148
    while True:
149
      msg = self.read_message()
150
      if msg is None:
151
        logging.info("client closed connection")
152
        break
153

    
154
      request = simplejson.loads(msg)
155
      logging.debug("request: %s", request)
156
      if not isinstance(request, dict):
157
        logging.error("wrong request received: %s", msg)
158
        break
159

    
160
      method = request.get(luxi.KEY_METHOD, None)
161
      args = request.get(luxi.KEY_ARGS, None)
162
      if method is None or args is None:
163
        logging.error("no method or args in request")
164
        break
165

    
166
      success = False
167
      try:
168
        result = self._ops.handle_request(method, args)
169
        success = True
170
      except:
171
        logging.error("Unexpected exception", exc_info=True)
172
        err = sys.exc_info()
173
        result = "Caught exception: %s" % str(err[1])
174

    
175
      response = {
176
        luxi.KEY_SUCCESS: success,
177
        luxi.KEY_RESULT: result,
178
        }
179
      logging.debug("response: %s", response)
180
      self.send_message(simplejson.dumps(response))
181

    
182
  def read_message(self):
183
    while not self._msgs:
184
      data = self.request.recv(self.READ_SIZE)
185
      if not data:
186
        return None
187
      new_msgs = (self._buffer + data).split(self.EOM)
188
      self._buffer = new_msgs.pop()
189
      self._msgs.extend(new_msgs)
190
    return self._msgs.popleft()
191

    
192
  def send_message(self, msg):
193
    #print "sending", msg
194
    self.request.sendall(msg + self.EOM)
195

    
196

    
197
class ClientOps:
198
  """Class holding high-level client operations."""
199
  def __init__(self, server):
200
    self.server = server
201

    
202
  def handle_request(self, method, args):
203
    queue = self.server.context.jobqueue
204

    
205
    # TODO: Parameter validation
206

    
207
    if method == luxi.REQ_SUBMIT_JOB:
208
      ops = [opcodes.OpCode.LoadOpCode(state) for state in args]
209
      return queue.SubmitJob(ops)
210

    
211
    elif method == luxi.REQ_CANCEL_JOB:
212
      job_id = args
213
      return queue.CancelJob(job_id)
214

    
215
    elif method == luxi.REQ_ARCHIVE_JOB:
216
      job_id = args
217
      return queue.ArchiveJob(job_id)
218

    
219
    elif method == luxi.REQ_WAIT_FOR_JOB_CHANGE:
220
      (job_id, fields, previous) = args
221
      return queue.WaitForJobChanges(job_id, fields, previous)
222

    
223
    elif method == luxi.REQ_QUERY_JOBS:
224
      (job_ids, fields) = args
225
      return queue.QueryJobs(job_ids, fields)
226

    
227
    elif method == luxi.REQ_QUERY_INSTANCES:
228
      (names, fields) = args
229
      op = opcodes.OpQueryInstances(names=names, output_fields=fields)
230
      return self._Query(op)
231

    
232
    elif method == luxi.REQ_QUERY_NODES:
233
      (names, fields) = args
234
      op = opcodes.OpQueryNodes(names=names, output_fields=fields)
235
      return self._Query(op)
236

    
237
    elif method == luxi.REQ_QUERY_EXPORTS:
238
      nodes = args
239
      op = opcodes.OpQueryExports(nodes=nodes)
240
      return self._Query(op)
241

    
242
    else:
243
      raise ValueError("Invalid operation")
244

    
245
  def _DummyLog(self, *args):
246
    pass
247

    
248
  def _Query(self, op):
249
    """Runs the specified opcode and returns the result.
250

    
251
    """
252
    proc = mcpu.Processor(self.server.context)
253
    # TODO: Where should log messages go?
254
    return proc.ExecOpCode(op, self._DummyLog)
255

    
256

    
257
class GanetiContext(object):
258
  """Context common to all ganeti threads.
259

    
260
  This class creates and holds common objects shared by all threads.
261

    
262
  """
263
  _instance = None
264

    
265
  def __init__(self):
266
    """Constructs a new GanetiContext object.
267

    
268
    There should be only a GanetiContext object at any time, so this
269
    function raises an error if this is not the case.
270

    
271
    """
272
    assert self.__class__._instance is None, "double GanetiContext instance"
273

    
274
    # Create global configuration object
275
    self.cfg = config.ConfigWriter()
276

    
277
    # Locking manager
278
    self.glm = locking.GanetiLockManager(
279
                self.cfg.GetNodeList(),
280
                self.cfg.GetInstanceList())
281

    
282
    # Job queue
283
    self.jobqueue = jqueue.JobQueue(self)
284

    
285
    # setting this also locks the class against attribute modifications
286
    self.__class__._instance = self
287

    
288
  def __setattr__(self, name, value):
289
    """Setting GanetiContext attributes is forbidden after initialization.
290

    
291
    """
292
    assert self.__class__._instance is None, "Attempt to modify Ganeti Context"
293
    object.__setattr__(self, name, value)
294

    
295
  def AddNode(self, node):
296
    """Adds a node to the configuration and lock manager.
297

    
298
    """
299
    # Add it to the configuration
300
    self.cfg.AddNode(node)
301

    
302
    # If preseeding fails it'll not be added
303
    self.jobqueue.AddNode(node.name)
304

    
305
    # Add the new node to the Ganeti Lock Manager
306
    self.glm.add(locking.LEVEL_NODE, node.name)
307

    
308
  def ReaddNode(self, node):
309
    """Updates a node that's already in the configuration
310

    
311
    """
312
    # Synchronize the queue again
313
    self.jobqueue.AddNode(node.name)
314

    
315
  def RemoveNode(self, name):
316
    """Removes a node from the configuration and lock manager.
317

    
318
    """
319
    # Remove node from configuration
320
    self.cfg.RemoveNode(name)
321

    
322
    # Notify job queue
323
    self.jobqueue.RemoveNode(name)
324

    
325
    # Remove the node from the Ganeti Lock Manager
326
    self.glm.remove(locking.LEVEL_NODE, name)
327

    
328

    
329
def ParseOptions():
330
  """Parse the command line options.
331

    
332
  Returns:
333
    (options, args) as from OptionParser.parse_args()
334

    
335
  """
336
  parser = OptionParser(description="Ganeti master daemon",
337
                        usage="%prog [-f] [-d]",
338
                        version="%%prog (ganeti) %s" %
339
                        constants.RELEASE_VERSION)
340

    
341
  parser.add_option("-f", "--foreground", dest="fork",
342
                    help="Don't detach from the current terminal",
343
                    default=True, action="store_false")
344
  parser.add_option("-d", "--debug", dest="debug",
345
                    help="Enable some debug messages",
346
                    default=False, action="store_true")
347
  options, args = parser.parse_args()
348
  return options, args
349

    
350

    
351
def main():
352
  """Main function"""
353

    
354
  options, args = ParseOptions()
355
  utils.debug = options.debug
356
  utils.no_fork = True
357

    
358
  ssconf.CheckMaster(options.debug)
359

    
360
  master = IOServer(constants.MASTER_SOCKET, ClientRqHandler)
361

    
362
  # become a daemon
363
  if options.fork:
364
    utils.Daemonize(logfile=constants.LOG_MASTERDAEMON,
365
                    noclose_fds=[master.fileno()])
366

    
367
  utils.WritePidFile(constants.MASTERD_PID)
368

    
369
  logger.SetupLogging(constants.LOG_MASTERDAEMON, debug=options.debug,
370
                      stderr_logging=not options.fork)
371

    
372
  logging.info("ganeti master daemon startup")
373

    
374
  # activate ip
375
  master_node = ssconf.SimpleStore().GetMasterNode()
376
  if not rpc.call_node_start_master(master_node, False):
377
    logging.error("Can't activate master IP address")
378

    
379
  master.setup_queue()
380
  try:
381
    master.serve_forever()
382
  finally:
383
    master.server_cleanup()
384
    utils.RemovePidFile(constants.MASTERD_PID)
385

    
386

    
387
if __name__ == "__main__":
388
  main()