Statistics
| Branch: | Tag: | Revision:

root / lib / rpc.py @ 26d502d0

History | View | Annotate | Download (19.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Inter-node RPC library.
23

24
"""
25

    
26
# pylint: disable=C0103,R0201,R0904
27
# C0103: Invalid name, since call_ are not valid
28
# R0201: Method could be a function, we keep all rpcs instance methods
29
# as not to change them back and forth between static/instance methods
30
# if they need to start using instance attributes
31
# R0904: Too many public methods
32

    
33
import os
34
import logging
35
import zlib
36
import base64
37
import pycurl
38
import threading
39

    
40
from ganeti import utils
41
from ganeti import objects
42
from ganeti import http
43
from ganeti import serializer
44
from ganeti import constants
45
from ganeti import errors
46
from ganeti import netutils
47
from ganeti import ssconf
48
from ganeti import runtime
49
from ganeti import compat
50
from ganeti import rpc_defs
51

    
52
# Special module generated at build time
53
from ganeti import _generated_rpc
54

    
55
# pylint has a bug here, doesn't see this import
56
import ganeti.http.client  # pylint: disable=W0611
57

    
58

    
59
# Timeout for connecting to nodes (seconds)
60
_RPC_CONNECT_TIMEOUT = 5
61

    
62
_RPC_CLIENT_HEADERS = [
63
  "Content-type: %s" % http.HTTP_APP_JSON,
64
  "Expect:",
65
  ]
66

    
67
# Various time constants for the timeout table
68
_TMO_URGENT = 60 # one minute
69
_TMO_FAST = 5 * 60 # five minutes
70
_TMO_NORMAL = 15 * 60 # 15 minutes
71
_TMO_SLOW = 3600 # one hour
72
_TMO_4HRS = 4 * 3600
73
_TMO_1DAY = 86400
74

    
75
#: Special value to describe an offline host
76
_OFFLINE = object()
77

    
78

    
79
def Init():
80
  """Initializes the module-global HTTP client manager.
81

82
  Must be called before using any RPC function and while exactly one thread is
83
  running.
84

85
  """
86
  # curl_global_init(3) and curl_global_cleanup(3) must be called with only
87
  # one thread running. This check is just a safety measure -- it doesn't
88
  # cover all cases.
89
  assert threading.activeCount() == 1, \
90
         "Found more than one active thread when initializing pycURL"
91

    
92
  logging.info("Using PycURL %s", pycurl.version)
93

    
94
  pycurl.global_init(pycurl.GLOBAL_ALL)
95

    
96

    
97
def Shutdown():
98
  """Stops the module-global HTTP client manager.
99

100
  Must be called before quitting the program and while exactly one thread is
101
  running.
102

103
  """
104
  pycurl.global_cleanup()
105

    
106

    
107
def _ConfigRpcCurl(curl):
108
  noded_cert = str(constants.NODED_CERT_FILE)
109

    
110
  curl.setopt(pycurl.FOLLOWLOCATION, False)
111
  curl.setopt(pycurl.CAINFO, noded_cert)
112
  curl.setopt(pycurl.SSL_VERIFYHOST, 0)
113
  curl.setopt(pycurl.SSL_VERIFYPEER, True)
114
  curl.setopt(pycurl.SSLCERTTYPE, "PEM")
115
  curl.setopt(pycurl.SSLCERT, noded_cert)
116
  curl.setopt(pycurl.SSLKEYTYPE, "PEM")
117
  curl.setopt(pycurl.SSLKEY, noded_cert)
118
  curl.setopt(pycurl.CONNECTTIMEOUT, _RPC_CONNECT_TIMEOUT)
119

    
120

    
121
def RunWithRPC(fn):
122
  """RPC-wrapper decorator.
123

124
  When applied to a function, it runs it with the RPC system
125
  initialized, and it shutsdown the system afterwards. This means the
126
  function must be called without RPC being initialized.
127

128
  """
129
  def wrapper(*args, **kwargs):
130
    Init()
131
    try:
132
      return fn(*args, **kwargs)
133
    finally:
134
      Shutdown()
135
  return wrapper
136

    
137

    
138
def _Compress(data):
139
  """Compresses a string for transport over RPC.
140

141
  Small amounts of data are not compressed.
142

143
  @type data: str
144
  @param data: Data
145
  @rtype: tuple
146
  @return: Encoded data to send
147

148
  """
149
  # Small amounts of data are not compressed
150
  if len(data) < 512:
151
    return (constants.RPC_ENCODING_NONE, data)
152

    
153
  # Compress with zlib and encode in base64
154
  return (constants.RPC_ENCODING_ZLIB_BASE64,
155
          base64.b64encode(zlib.compress(data, 3)))
156

    
157

    
158
class RpcResult(object):
159
  """RPC Result class.
160

161
  This class holds an RPC result. It is needed since in multi-node
162
  calls we can't raise an exception just because one one out of many
163
  failed, and therefore we use this class to encapsulate the result.
164

165
  @ivar data: the data payload, for successful results, or None
166
  @ivar call: the name of the RPC call
167
  @ivar node: the name of the node to which we made the call
168
  @ivar offline: whether the operation failed because the node was
169
      offline, as opposed to actual failure; offline=True will always
170
      imply failed=True, in order to allow simpler checking if
171
      the user doesn't care about the exact failure mode
172
  @ivar fail_msg: the error message if the call failed
173

174
  """
175
  def __init__(self, data=None, failed=False, offline=False,
176
               call=None, node=None):
177
    self.offline = offline
178
    self.call = call
179
    self.node = node
180

    
181
    if offline:
182
      self.fail_msg = "Node is marked offline"
183
      self.data = self.payload = None
184
    elif failed:
185
      self.fail_msg = self._EnsureErr(data)
186
      self.data = self.payload = None
187
    else:
188
      self.data = data
189
      if not isinstance(self.data, (tuple, list)):
190
        self.fail_msg = ("RPC layer error: invalid result type (%s)" %
191
                         type(self.data))
192
        self.payload = None
193
      elif len(data) != 2:
194
        self.fail_msg = ("RPC layer error: invalid result length (%d), "
195
                         "expected 2" % len(self.data))
196
        self.payload = None
197
      elif not self.data[0]:
198
        self.fail_msg = self._EnsureErr(self.data[1])
199
        self.payload = None
200
      else:
201
        # finally success
202
        self.fail_msg = None
203
        self.payload = data[1]
204

    
205
    for attr_name in ["call", "data", "fail_msg",
206
                      "node", "offline", "payload"]:
207
      assert hasattr(self, attr_name), "Missing attribute %s" % attr_name
208

    
209
  @staticmethod
210
  def _EnsureErr(val):
211
    """Helper to ensure we return a 'True' value for error."""
212
    if val:
213
      return val
214
    else:
215
      return "No error information"
216

    
217
  def Raise(self, msg, prereq=False, ecode=None):
218
    """If the result has failed, raise an OpExecError.
219

220
    This is used so that LU code doesn't have to check for each
221
    result, but instead can call this function.
222

223
    """
224
    if not self.fail_msg:
225
      return
226

    
227
    if not msg: # one could pass None for default message
228
      msg = ("Call '%s' to node '%s' has failed: %s" %
229
             (self.call, self.node, self.fail_msg))
230
    else:
231
      msg = "%s: %s" % (msg, self.fail_msg)
232
    if prereq:
233
      ec = errors.OpPrereqError
234
    else:
235
      ec = errors.OpExecError
236
    if ecode is not None:
237
      args = (msg, ecode)
238
    else:
239
      args = (msg, )
240
    raise ec(*args) # pylint: disable=W0142
241

    
242

    
243
def _SsconfResolver(node_list,
244
                    ssc=ssconf.SimpleStore,
245
                    nslookup_fn=netutils.Hostname.GetIP):
246
  """Return addresses for given node names.
247

248
  @type node_list: list
249
  @param node_list: List of node names
250
  @type ssc: class
251
  @param ssc: SimpleStore class that is used to obtain node->ip mappings
252
  @type nslookup_fn: callable
253
  @param nslookup_fn: function use to do NS lookup
254
  @rtype: list of tuple; (string, string)
255
  @return: List of tuples containing node name and IP address
256

257
  """
258
  ss = ssc()
259
  iplist = ss.GetNodePrimaryIPList()
260
  family = ss.GetPrimaryIPFamily()
261
  ipmap = dict(entry.split() for entry in iplist)
262

    
263
  result = []
264
  for node in node_list:
265
    ip = ipmap.get(node)
266
    if ip is None:
267
      ip = nslookup_fn(node, family=family)
268
    result.append((node, ip))
269

    
270
  return result
271

    
272

    
273
class _StaticResolver:
274
  def __init__(self, addresses):
275
    """Initializes this class.
276

277
    """
278
    self._addresses = addresses
279

    
280
  def __call__(self, hosts):
281
    """Returns static addresses for hosts.
282

283
    """
284
    assert len(hosts) == len(self._addresses)
285
    return zip(hosts, self._addresses)
286

    
287

    
288
def _CheckConfigNode(name, node):
289
  """Checks if a node is online.
290

291
  @type name: string
292
  @param name: Node name
293
  @type node: L{objects.Node} or None
294
  @param node: Node object
295

296
  """
297
  if node is None:
298
    # Depend on DNS for name resolution
299
    ip = name
300
  elif node.offline:
301
    ip = _OFFLINE
302
  else:
303
    ip = node.primary_ip
304
  return (name, ip)
305

    
306

    
307
def _NodeConfigResolver(single_node_fn, all_nodes_fn, hosts):
308
  """Calculate node addresses using configuration.
309

310
  """
311
  # Special case for single-host lookups
312
  if len(hosts) == 1:
313
    (name, ) = hosts
314
    return [_CheckConfigNode(name, single_node_fn(name))]
315
  else:
316
    all_nodes = all_nodes_fn()
317
    return [_CheckConfigNode(name, all_nodes.get(name, None))
318
            for name in hosts]
319

    
320

    
321
class _RpcProcessor:
322
  def __init__(self, resolver, port, lock_monitor_cb=None):
323
    """Initializes this class.
324

325
    @param resolver: callable accepting a list of hostnames, returning a list
326
      of tuples containing name and IP address (IP address can be the name or
327
      the special value L{_OFFLINE} to mark offline machines)
328
    @type port: int
329
    @param port: TCP port
330
    @param lock_monitor_cb: Callable for registering with lock monitor
331

332
    """
333
    self._resolver = resolver
334
    self._port = port
335
    self._lock_monitor_cb = lock_monitor_cb
336

    
337
  @staticmethod
338
  def _PrepareRequests(hosts, port, procedure, body, read_timeout):
339
    """Prepares requests by sorting offline hosts into separate list.
340

341
    """
342
    results = {}
343
    requests = {}
344

    
345
    for (name, ip) in hosts:
346
      if ip is _OFFLINE:
347
        # Node is marked as offline
348
        results[name] = RpcResult(node=name, offline=True, call=procedure)
349
      else:
350
        requests[name] = \
351
          http.client.HttpClientRequest(str(ip), port,
352
                                        http.HTTP_PUT, str("/%s" % procedure),
353
                                        headers=_RPC_CLIENT_HEADERS,
354
                                        post_data=body,
355
                                        read_timeout=read_timeout,
356
                                        nicename="%s/%s" % (name, procedure),
357
                                        curl_config_fn=_ConfigRpcCurl)
358

    
359
    return (results, requests)
360

    
361
  @staticmethod
362
  def _CombineResults(results, requests, procedure):
363
    """Combines pre-computed results for offline hosts with actual call results.
364

365
    """
366
    for name, req in requests.items():
367
      if req.success and req.resp_status_code == http.HTTP_OK:
368
        host_result = RpcResult(data=serializer.LoadJson(req.resp_body),
369
                                node=name, call=procedure)
370
      else:
371
        # TODO: Better error reporting
372
        if req.error:
373
          msg = req.error
374
        else:
375
          msg = req.resp_body
376

    
377
        logging.error("RPC error in %s on node %s: %s", procedure, name, msg)
378
        host_result = RpcResult(data=msg, failed=True, node=name,
379
                                call=procedure)
380

    
381
      results[name] = host_result
382

    
383
    return results
384

    
385
  def __call__(self, hosts, procedure, body, read_timeout=None,
386
               _req_process_fn=http.client.ProcessRequests):
387
    """Makes an RPC request to a number of nodes.
388

389
    @type hosts: sequence
390
    @param hosts: Hostnames
391
    @type procedure: string
392
    @param procedure: Request path
393
    @type body: string
394
    @param body: Request body
395
    @type read_timeout: int or None
396
    @param read_timeout: Read timeout for request
397

398
    """
399
    assert read_timeout is not None, \
400
      "Missing RPC read timeout for procedure '%s'" % procedure
401

    
402
    (results, requests) = \
403
      self._PrepareRequests(self._resolver(hosts), self._port, procedure,
404
                            str(body), read_timeout)
405

    
406
    _req_process_fn(requests.values(), lock_monitor_cb=self._lock_monitor_cb)
407

    
408
    assert not frozenset(results).intersection(requests)
409

    
410
    return self._CombineResults(results, requests, procedure)
411

    
412

    
413
class _RpcClientBase:
414
  def __init__(self, resolver, encoder_fn, lock_monitor_cb=None):
415
    """Initializes this class.
416

417
    """
418
    self._proc = _RpcProcessor(resolver,
419
                               netutils.GetDaemonPort(constants.NODED),
420
                               lock_monitor_cb=lock_monitor_cb)
421
    self._encoder = compat.partial(self._EncodeArg, encoder_fn)
422

    
423
  @staticmethod
424
  def _EncodeArg(encoder_fn, (argkind, value)):
425
    """Encode argument.
426

427
    """
428
    if argkind is None:
429
      return value
430
    else:
431
      return encoder_fn(argkind)(value)
432

    
433
  def _Call(self, cdef, node_list, timeout, args):
434
    """Entry point for automatically generated RPC wrappers.
435

436
    """
437
    (procedure, _, _, argdefs, postproc_fn, _) = cdef
438

    
439
    body = serializer.DumpJson(map(self._encoder,
440
                                   zip(map(compat.snd, argdefs), args)),
441
                               indent=False)
442

    
443
    result = self._proc(node_list, procedure, body, read_timeout=timeout)
444

    
445
    if postproc_fn:
446
      return postproc_fn(result)
447
    else:
448
      return result
449

    
450

    
451
def _ObjectToDict(value):
452
  """Converts an object to a dictionary.
453

454
  @note: See L{objects}.
455

456
  """
457
  return value.ToDict()
458

    
459

    
460
def _ObjectListToDict(value):
461
  """Converts a list of L{objects} to dictionaries.
462

463
  """
464
  return map(_ObjectToDict, value)
465

    
466

    
467
def _EncodeNodeToDiskDict(value):
468
  """Encodes a dictionary with node name as key and disk objects as values.
469

470
  """
471
  return dict((name, _ObjectListToDict(disks))
472
              for name, disks in value.items())
473

    
474

    
475
def _PrepareFileUpload(filename):
476
  """Loads a file and prepares it for an upload to nodes.
477

478
  """
479
  data = _Compress(utils.ReadFile(filename))
480
  st = os.stat(filename)
481
  getents = runtime.GetEnts()
482
  return [filename, data, st.st_mode, getents.LookupUid(st.st_uid),
483
          getents.LookupGid(st.st_gid), st.st_atime, st.st_mtime]
484

    
485

    
486
def _PrepareFinalizeExportDisks(snap_disks):
487
  """Encodes disks for finalizing export.
488

489
  """
490
  flat_disks = []
491

    
492
  for disk in snap_disks:
493
    if isinstance(disk, bool):
494
      flat_disks.append(disk)
495
    else:
496
      flat_disks.append(disk.ToDict())
497

    
498
  return flat_disks
499

    
500

    
501
def _EncodeImportExportIO((ieio, ieioargs)):
502
  """Encodes import/export I/O information.
503

504
  """
505
  if ieio == constants.IEIO_RAW_DISK:
506
    assert len(ieioargs) == 1
507
    return (ieio, (ieioargs[0].ToDict(), ))
508

    
509
  if ieio == constants.IEIO_SCRIPT:
510
    assert len(ieioargs) == 2
511
    return (ieio, (ieioargs[0].ToDict(), ieioargs[1]))
512

    
513
  return (ieio, ieioargs)
514

    
515

    
516
def _EncodeBlockdevRename(value):
517
  """Encodes information for renaming block devices.
518

519
  """
520
  return [(d.ToDict(), uid) for d, uid in value]
521

    
522

    
523
#: Generic encoders
524
_ENCODERS = {
525
  rpc_defs.ED_OBJECT_DICT: _ObjectToDict,
526
  rpc_defs.ED_OBJECT_DICT_LIST: _ObjectListToDict,
527
  rpc_defs.ED_NODE_TO_DISK_DICT: _EncodeNodeToDiskDict,
528
  rpc_defs.ED_FILE_DETAILS: _PrepareFileUpload,
529
  rpc_defs.ED_COMPRESS: _Compress,
530
  rpc_defs.ED_FINALIZE_EXPORT_DISKS: _PrepareFinalizeExportDisks,
531
  rpc_defs.ED_IMPEXP_IO: _EncodeImportExportIO,
532
  rpc_defs.ED_BLOCKDEV_RENAME: _EncodeBlockdevRename,
533
  }
534

    
535

    
536
class RpcRunner(_RpcClientBase,
537
                _generated_rpc.RpcClientDefault,
538
                _generated_rpc.RpcClientBootstrap,
539
                _generated_rpc.RpcClientConfig):
540
  """RPC runner class.
541

542
  """
543
  def __init__(self, context):
544
    """Initialized the RPC runner.
545

546
    @type context: C{masterd.GanetiContext}
547
    @param context: Ganeti context
548

549
    """
550
    self._cfg = context.cfg
551

    
552
    encoders = _ENCODERS.copy()
553

    
554
    # Add encoders requiring configuration object
555
    encoders.update({
556
      rpc_defs.ED_INST_DICT: self._InstDict,
557
      rpc_defs.ED_INST_DICT_HVP_BEP: self._InstDictHvpBep,
558
      rpc_defs.ED_INST_DICT_OSP: self._InstDictOsp,
559
      })
560

    
561
    # Resolver using configuration
562
    resolver = compat.partial(_NodeConfigResolver, self._cfg.GetNodeInfo,
563
                              self._cfg.GetAllNodesInfo)
564

    
565
    # Pylint doesn't recognize multiple inheritance properly, see
566
    # <http://www.logilab.org/ticket/36586> and
567
    # <http://www.logilab.org/ticket/35642>
568
    # pylint: disable=W0233
569
    _RpcClientBase.__init__(self, resolver, encoders.get,
570
                            lock_monitor_cb=context.glm.AddToLockMonitor)
571
    _generated_rpc.RpcClientConfig.__init__(self)
572
    _generated_rpc.RpcClientBootstrap.__init__(self)
573
    _generated_rpc.RpcClientDefault.__init__(self)
574

    
575
  def _InstDict(self, instance, hvp=None, bep=None, osp=None):
576
    """Convert the given instance to a dict.
577

578
    This is done via the instance's ToDict() method and additionally
579
    we fill the hvparams with the cluster defaults.
580

581
    @type instance: L{objects.Instance}
582
    @param instance: an Instance object
583
    @type hvp: dict or None
584
    @param hvp: a dictionary with overridden hypervisor parameters
585
    @type bep: dict or None
586
    @param bep: a dictionary with overridden backend parameters
587
    @type osp: dict or None
588
    @param osp: a dictionary with overridden os parameters
589
    @rtype: dict
590
    @return: the instance dict, with the hvparams filled with the
591
        cluster defaults
592

593
    """
594
    idict = instance.ToDict()
595
    cluster = self._cfg.GetClusterInfo()
596
    idict["hvparams"] = cluster.FillHV(instance)
597
    if hvp is not None:
598
      idict["hvparams"].update(hvp)
599
    idict["beparams"] = cluster.FillBE(instance)
600
    if bep is not None:
601
      idict["beparams"].update(bep)
602
    idict["osparams"] = cluster.SimpleFillOS(instance.os, instance.osparams)
603
    if osp is not None:
604
      idict["osparams"].update(osp)
605
    for nic in idict["nics"]:
606
      nic['nicparams'] = objects.FillDict(
607
        cluster.nicparams[constants.PP_DEFAULT],
608
        nic['nicparams'])
609
    return idict
610

    
611
  def _InstDictHvpBep(self, (instance, hvp, bep)):
612
    """Wrapper for L{_InstDict}.
613

614
    """
615
    return self._InstDict(instance, hvp=hvp, bep=bep)
616

    
617
  def _InstDictOsp(self, (instance, osparams)):
618
    """Wrapper for L{_InstDict}.
619

620
    """
621
    return self._InstDict(instance, osp=osparams)
622

    
623
  #
624
  # Begin RPC calls
625
  #
626

    
627
  def call_test_delay(self, node_list, duration): # pylint: disable=W0221
628
    """Sleep for a fixed time on given node(s).
629

630
    This is a multi-node call.
631

632
    """
633
    # TODO: Use callable timeout calculation
634
    return _generated_rpc.RpcClientDefault.call_test_delay(self,
635
      node_list, duration, read_timeout=int(duration + 5))
636

    
637

    
638
class JobQueueRunner(_RpcClientBase, _generated_rpc.RpcClientJobQueue):
639
  """RPC wrappers for job queue.
640

641
  """
642
  def __init__(self, context, address_list):
643
    """Initializes this class.
644

645
    """
646
    if address_list is None:
647
      resolver = _SsconfResolver
648
    else:
649
      # Caller provided an address list
650
      resolver = _StaticResolver(address_list)
651

    
652
    _RpcClientBase.__init__(self, resolver, _ENCODERS.get,
653
                            lock_monitor_cb=context.glm.AddToLockMonitor)
654
    _generated_rpc.RpcClientJobQueue.__init__(self)
655

    
656

    
657
class BootstrapRunner(_RpcClientBase, _generated_rpc.RpcClientBootstrap):
658
  """RPC wrappers for bootstrapping.
659

660
  """
661
  def __init__(self):
662
    """Initializes this class.
663

664
    """
665
    _RpcClientBase.__init__(self, _SsconfResolver, _ENCODERS.get)
666
    _generated_rpc.RpcClientBootstrap.__init__(self)
667

    
668

    
669
class ConfigRunner(_RpcClientBase, _generated_rpc.RpcClientConfig):
670
  """RPC wrappers for L{config}.
671

672
  """
673
  def __init__(self, context, address_list):
674
    """Initializes this class.
675

676
    """
677
    if context:
678
      lock_monitor_cb = context.glm.AddToLockMonitor
679
    else:
680
      lock_monitor_cb = None
681

    
682
    if address_list is None:
683
      resolver = _SsconfResolver
684
    else:
685
      # Caller provided an address list
686
      resolver = _StaticResolver(address_list)
687

    
688
    _RpcClientBase.__init__(self, resolver, _ENCODERS.get,
689
                            lock_monitor_cb=lock_monitor_cb)
690
    _generated_rpc.RpcClientConfig.__init__(self)