Statistics
| Branch: | Tag: | Revision:

root / lib / rpc.py @ 4fbe3851

History | View | Annotate | Download (20.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Inter-node RPC library.
23

24
"""
25

    
26
# pylint: disable=C0103,R0201,R0904
27
# C0103: Invalid name, since call_ are not valid
28
# R0201: Method could be a function, we keep all rpcs instance methods
29
# as not to change them back and forth between static/instance methods
30
# if they need to start using instance attributes
31
# R0904: Too many public methods
32

    
33
import os
34
import logging
35
import zlib
36
import base64
37
import pycurl
38
import threading
39

    
40
from ganeti import utils
41
from ganeti import objects
42
from ganeti import http
43
from ganeti import serializer
44
from ganeti import constants
45
from ganeti import errors
46
from ganeti import netutils
47
from ganeti import ssconf
48
from ganeti import runtime
49
from ganeti import compat
50

    
51
# Special module generated at build time
52
from ganeti import _generated_rpc
53

    
54
# pylint has a bug here, doesn't see this import
55
import ganeti.http.client  # pylint: disable=W0611
56

    
57

    
58
# Timeout for connecting to nodes (seconds)
59
_RPC_CONNECT_TIMEOUT = 5
60

    
61
_RPC_CLIENT_HEADERS = [
62
  "Content-type: %s" % http.HTTP_APP_JSON,
63
  "Expect:",
64
  ]
65

    
66
# Various time constants for the timeout table
67
_TMO_URGENT = 60 # one minute
68
_TMO_FAST = 5 * 60 # five minutes
69
_TMO_NORMAL = 15 * 60 # 15 minutes
70
_TMO_SLOW = 3600 # one hour
71
_TMO_4HRS = 4 * 3600
72
_TMO_1DAY = 86400
73

    
74
# Timeout table that will be built later by decorators
75
# Guidelines for choosing timeouts:
76
# - call used during watcher: timeout -> 1min, _TMO_URGENT
77
# - trivial (but be sure it is trivial) (e.g. reading a file): 5min, _TMO_FAST
78
# - other calls: 15 min, _TMO_NORMAL
79
# - special calls (instance add, etc.): either _TMO_SLOW (1h) or huge timeouts
80

    
81
_TIMEOUTS = {
82
}
83

    
84
#: Special value to describe an offline host
85
_OFFLINE = object()
86

    
87

    
88
def Init():
89
  """Initializes the module-global HTTP client manager.
90

91
  Must be called before using any RPC function and while exactly one thread is
92
  running.
93

94
  """
95
  # curl_global_init(3) and curl_global_cleanup(3) must be called with only
96
  # one thread running. This check is just a safety measure -- it doesn't
97
  # cover all cases.
98
  assert threading.activeCount() == 1, \
99
         "Found more than one active thread when initializing pycURL"
100

    
101
  logging.info("Using PycURL %s", pycurl.version)
102

    
103
  pycurl.global_init(pycurl.GLOBAL_ALL)
104

    
105

    
106
def Shutdown():
107
  """Stops the module-global HTTP client manager.
108

109
  Must be called before quitting the program and while exactly one thread is
110
  running.
111

112
  """
113
  pycurl.global_cleanup()
114

    
115

    
116
def _ConfigRpcCurl(curl):
117
  noded_cert = str(constants.NODED_CERT_FILE)
118

    
119
  curl.setopt(pycurl.FOLLOWLOCATION, False)
120
  curl.setopt(pycurl.CAINFO, noded_cert)
121
  curl.setopt(pycurl.SSL_VERIFYHOST, 0)
122
  curl.setopt(pycurl.SSL_VERIFYPEER, True)
123
  curl.setopt(pycurl.SSLCERTTYPE, "PEM")
124
  curl.setopt(pycurl.SSLCERT, noded_cert)
125
  curl.setopt(pycurl.SSLKEYTYPE, "PEM")
126
  curl.setopt(pycurl.SSLKEY, noded_cert)
127
  curl.setopt(pycurl.CONNECTTIMEOUT, _RPC_CONNECT_TIMEOUT)
128

    
129

    
130
def _RpcTimeout(secs):
131
  """Timeout decorator.
132

133
  When applied to a rpc call_* function, it updates the global timeout
134
  table with the given function/timeout.
135

136
  """
137
  def decorator(f):
138
    name = f.__name__
139
    assert name.startswith("call_")
140
    _TIMEOUTS[name[len("call_"):]] = secs
141
    return f
142
  return decorator
143

    
144

    
145
def RunWithRPC(fn):
146
  """RPC-wrapper decorator.
147

148
  When applied to a function, it runs it with the RPC system
149
  initialized, and it shutsdown the system afterwards. This means the
150
  function must be called without RPC being initialized.
151

152
  """
153
  def wrapper(*args, **kwargs):
154
    Init()
155
    try:
156
      return fn(*args, **kwargs)
157
    finally:
158
      Shutdown()
159
  return wrapper
160

    
161

    
162
def _Compress(data):
163
  """Compresses a string for transport over RPC.
164

165
  Small amounts of data are not compressed.
166

167
  @type data: str
168
  @param data: Data
169
  @rtype: tuple
170
  @return: Encoded data to send
171

172
  """
173
  # Small amounts of data are not compressed
174
  if len(data) < 512:
175
    return (constants.RPC_ENCODING_NONE, data)
176

    
177
  # Compress with zlib and encode in base64
178
  return (constants.RPC_ENCODING_ZLIB_BASE64,
179
          base64.b64encode(zlib.compress(data, 3)))
180

    
181

    
182
class RpcResult(object):
183
  """RPC Result class.
184

185
  This class holds an RPC result. It is needed since in multi-node
186
  calls we can't raise an exception just because one one out of many
187
  failed, and therefore we use this class to encapsulate the result.
188

189
  @ivar data: the data payload, for successful results, or None
190
  @ivar call: the name of the RPC call
191
  @ivar node: the name of the node to which we made the call
192
  @ivar offline: whether the operation failed because the node was
193
      offline, as opposed to actual failure; offline=True will always
194
      imply failed=True, in order to allow simpler checking if
195
      the user doesn't care about the exact failure mode
196
  @ivar fail_msg: the error message if the call failed
197

198
  """
199
  def __init__(self, data=None, failed=False, offline=False,
200
               call=None, node=None):
201
    self.offline = offline
202
    self.call = call
203
    self.node = node
204

    
205
    if offline:
206
      self.fail_msg = "Node is marked offline"
207
      self.data = self.payload = None
208
    elif failed:
209
      self.fail_msg = self._EnsureErr(data)
210
      self.data = self.payload = None
211
    else:
212
      self.data = data
213
      if not isinstance(self.data, (tuple, list)):
214
        self.fail_msg = ("RPC layer error: invalid result type (%s)" %
215
                         type(self.data))
216
        self.payload = None
217
      elif len(data) != 2:
218
        self.fail_msg = ("RPC layer error: invalid result length (%d), "
219
                         "expected 2" % len(self.data))
220
        self.payload = None
221
      elif not self.data[0]:
222
        self.fail_msg = self._EnsureErr(self.data[1])
223
        self.payload = None
224
      else:
225
        # finally success
226
        self.fail_msg = None
227
        self.payload = data[1]
228

    
229
    for attr_name in ["call", "data", "fail_msg",
230
                      "node", "offline", "payload"]:
231
      assert hasattr(self, attr_name), "Missing attribute %s" % attr_name
232

    
233
  @staticmethod
234
  def _EnsureErr(val):
235
    """Helper to ensure we return a 'True' value for error."""
236
    if val:
237
      return val
238
    else:
239
      return "No error information"
240

    
241
  def Raise(self, msg, prereq=False, ecode=None):
242
    """If the result has failed, raise an OpExecError.
243

244
    This is used so that LU code doesn't have to check for each
245
    result, but instead can call this function.
246

247
    """
248
    if not self.fail_msg:
249
      return
250

    
251
    if not msg: # one could pass None for default message
252
      msg = ("Call '%s' to node '%s' has failed: %s" %
253
             (self.call, self.node, self.fail_msg))
254
    else:
255
      msg = "%s: %s" % (msg, self.fail_msg)
256
    if prereq:
257
      ec = errors.OpPrereqError
258
    else:
259
      ec = errors.OpExecError
260
    if ecode is not None:
261
      args = (msg, ecode)
262
    else:
263
      args = (msg, )
264
    raise ec(*args) # pylint: disable=W0142
265

    
266

    
267
def _SsconfResolver(node_list,
268
                    ssc=ssconf.SimpleStore,
269
                    nslookup_fn=netutils.Hostname.GetIP):
270
  """Return addresses for given node names.
271

272
  @type node_list: list
273
  @param node_list: List of node names
274
  @type ssc: class
275
  @param ssc: SimpleStore class that is used to obtain node->ip mappings
276
  @type nslookup_fn: callable
277
  @param nslookup_fn: function use to do NS lookup
278
  @rtype: list of tuple; (string, string)
279
  @return: List of tuples containing node name and IP address
280

281
  """
282
  ss = ssc()
283
  iplist = ss.GetNodePrimaryIPList()
284
  family = ss.GetPrimaryIPFamily()
285
  ipmap = dict(entry.split() for entry in iplist)
286

    
287
  result = []
288
  for node in node_list:
289
    ip = ipmap.get(node)
290
    if ip is None:
291
      ip = nslookup_fn(node, family=family)
292
    result.append((node, ip))
293

    
294
  return result
295

    
296

    
297
class _StaticResolver:
298
  def __init__(self, addresses):
299
    """Initializes this class.
300

301
    """
302
    self._addresses = addresses
303

    
304
  def __call__(self, hosts):
305
    """Returns static addresses for hosts.
306

307
    """
308
    assert len(hosts) == len(self._addresses)
309
    return zip(hosts, self._addresses)
310

    
311

    
312
def _CheckConfigNode(name, node):
313
  """Checks if a node is online.
314

315
  @type name: string
316
  @param name: Node name
317
  @type node: L{objects.Node} or None
318
  @param node: Node object
319

320
  """
321
  if node is None:
322
    # Depend on DNS for name resolution
323
    ip = name
324
  elif node.offline:
325
    ip = _OFFLINE
326
  else:
327
    ip = node.primary_ip
328
  return (name, ip)
329

    
330

    
331
def _NodeConfigResolver(single_node_fn, all_nodes_fn, hosts):
332
  """Calculate node addresses using configuration.
333

334
  """
335
  # Special case for single-host lookups
336
  if len(hosts) == 1:
337
    (name, ) = hosts
338
    return [_CheckConfigNode(name, single_node_fn(name))]
339
  else:
340
    all_nodes = all_nodes_fn()
341
    return [_CheckConfigNode(name, all_nodes.get(name, None))
342
            for name in hosts]
343

    
344

    
345
class _RpcProcessor:
346
  def __init__(self, resolver, port, lock_monitor_cb=None):
347
    """Initializes this class.
348

349
    @param resolver: callable accepting a list of hostnames, returning a list
350
      of tuples containing name and IP address (IP address can be the name or
351
      the special value L{_OFFLINE} to mark offline machines)
352
    @type port: int
353
    @param port: TCP port
354
    @param lock_monitor_cb: Callable for registering with lock monitor
355

356
    """
357
    self._resolver = resolver
358
    self._port = port
359
    self._lock_monitor_cb = lock_monitor_cb
360

    
361
  @staticmethod
362
  def _PrepareRequests(hosts, port, procedure, body, read_timeout):
363
    """Prepares requests by sorting offline hosts into separate list.
364

365
    """
366
    results = {}
367
    requests = {}
368

    
369
    for (name, ip) in hosts:
370
      if ip is _OFFLINE:
371
        # Node is marked as offline
372
        results[name] = RpcResult(node=name, offline=True, call=procedure)
373
      else:
374
        requests[name] = \
375
          http.client.HttpClientRequest(str(ip), port,
376
                                        http.HTTP_PUT, str("/%s" % procedure),
377
                                        headers=_RPC_CLIENT_HEADERS,
378
                                        post_data=body,
379
                                        read_timeout=read_timeout,
380
                                        nicename="%s/%s" % (name, procedure),
381
                                        curl_config_fn=_ConfigRpcCurl)
382

    
383
    return (results, requests)
384

    
385
  @staticmethod
386
  def _CombineResults(results, requests, procedure):
387
    """Combines pre-computed results for offline hosts with actual call results.
388

389
    """
390
    for name, req in requests.items():
391
      if req.success and req.resp_status_code == http.HTTP_OK:
392
        host_result = RpcResult(data=serializer.LoadJson(req.resp_body),
393
                                node=name, call=procedure)
394
      else:
395
        # TODO: Better error reporting
396
        if req.error:
397
          msg = req.error
398
        else:
399
          msg = req.resp_body
400

    
401
        logging.error("RPC error in %s on node %s: %s", procedure, name, msg)
402
        host_result = RpcResult(data=msg, failed=True, node=name,
403
                                call=procedure)
404

    
405
      results[name] = host_result
406

    
407
    return results
408

    
409
  def __call__(self, hosts, procedure, body, read_timeout=None,
410
               _req_process_fn=http.client.ProcessRequests):
411
    """Makes an RPC request to a number of nodes.
412

413
    @type hosts: sequence
414
    @param hosts: Hostnames
415
    @type procedure: string
416
    @param procedure: Request path
417
    @type body: string
418
    @param body: Request body
419
    @type read_timeout: int or None
420
    @param read_timeout: Read timeout for request
421

422
    """
423
    if read_timeout is None:
424
      read_timeout = _TIMEOUTS.get(procedure, None)
425

    
426
    assert read_timeout is not None, \
427
      "Missing RPC read timeout for procedure '%s'" % procedure
428

    
429
    (results, requests) = \
430
      self._PrepareRequests(self._resolver(hosts), self._port, procedure,
431
                            str(body), read_timeout)
432

    
433
    _req_process_fn(requests.values(), lock_monitor_cb=self._lock_monitor_cb)
434

    
435
    assert not frozenset(results).intersection(requests)
436

    
437
    return self._CombineResults(results, requests, procedure)
438

    
439

    
440
class RpcRunner(_generated_rpc.RpcClientDefault,
441
                _generated_rpc.RpcClientBootstrap,
442
                _generated_rpc.RpcClientConfig):
443
  """RPC runner class.
444

445
  """
446
  def __init__(self, context):
447
    """Initialized the RPC runner.
448

449
    @type context: C{masterd.GanetiContext}
450
    @param context: Ganeti context
451

452
    """
453
    # Pylint doesn't recognize multiple inheritance properly, see
454
    # <http://www.logilab.org/ticket/36586> and
455
    # <http://www.logilab.org/ticket/35642>
456
    # pylint: disable=W0233
457
    _generated_rpc.RpcClientConfig.__init__(self)
458
    _generated_rpc.RpcClientBootstrap.__init__(self)
459
    _generated_rpc.RpcClientDefault.__init__(self)
460

    
461
    self._cfg = context.cfg
462
    self._proc = _RpcProcessor(compat.partial(_NodeConfigResolver,
463
                                              self._cfg.GetNodeInfo,
464
                                              self._cfg.GetAllNodesInfo),
465
                               netutils.GetDaemonPort(constants.NODED),
466
                               lock_monitor_cb=context.glm.AddToLockMonitor)
467

    
468
  def _InstDict(self, instance, hvp=None, bep=None, osp=None):
469
    """Convert the given instance to a dict.
470

471
    This is done via the instance's ToDict() method and additionally
472
    we fill the hvparams with the cluster defaults.
473

474
    @type instance: L{objects.Instance}
475
    @param instance: an Instance object
476
    @type hvp: dict or None
477
    @param hvp: a dictionary with overridden hypervisor parameters
478
    @type bep: dict or None
479
    @param bep: a dictionary with overridden backend parameters
480
    @type osp: dict or None
481
    @param osp: a dictionary with overridden os parameters
482
    @rtype: dict
483
    @return: the instance dict, with the hvparams filled with the
484
        cluster defaults
485

486
    """
487
    idict = instance.ToDict()
488
    cluster = self._cfg.GetClusterInfo()
489
    idict["hvparams"] = cluster.FillHV(instance)
490
    if hvp is not None:
491
      idict["hvparams"].update(hvp)
492
    idict["beparams"] = cluster.FillBE(instance)
493
    if bep is not None:
494
      idict["beparams"].update(bep)
495
    idict["osparams"] = cluster.SimpleFillOS(instance.os, instance.osparams)
496
    if osp is not None:
497
      idict["osparams"].update(osp)
498
    for nic in idict["nics"]:
499
      nic['nicparams'] = objects.FillDict(
500
        cluster.nicparams[constants.PP_DEFAULT],
501
        nic['nicparams'])
502
    return idict
503

    
504
  def _InstDictHvpBep(self, (instance, hvp, bep)):
505
    """Wrapper for L{_InstDict}.
506

507
    """
508
    return self._InstDict(instance, hvp=hvp, bep=bep)
509

    
510
  def _InstDictOsp(self, (instance, osparams)):
511
    """Wrapper for L{_InstDict}.
512

513
    """
514
    return self._InstDict(instance, osp=osparams)
515

    
516
  def _Call(self, node_list, procedure, timeout, args):
517
    """Entry point for automatically generated RPC wrappers.
518

519
    """
520
    body = serializer.DumpJson(args, indent=False)
521

    
522
    return self._proc(node_list, procedure, body, read_timeout=timeout)
523

    
524
  @staticmethod
525
  def _BlockdevFindPostProc(result):
526
    if not result.fail_msg and result.payload is not None:
527
      result.payload = objects.BlockDevStatus.FromDict(result.payload)
528
    return result
529

    
530
  @staticmethod
531
  def _BlockdevGetMirrorStatusPostProc(result):
532
    if not result.fail_msg:
533
      result.payload = [objects.BlockDevStatus.FromDict(i)
534
                        for i in result.payload]
535
    return result
536

    
537
  @staticmethod
538
  def _BlockdevGetMirrorStatusMultiPostProc(result):
539
    for nres in result.values():
540
      if nres.fail_msg:
541
        continue
542

    
543
      for idx, (success, status) in enumerate(nres.payload):
544
        if success:
545
          nres.payload[idx] = (success, objects.BlockDevStatus.FromDict(status))
546

    
547
    return result
548

    
549
  @staticmethod
550
  def _OsGetPostProc(result):
551
    if not result.fail_msg and isinstance(result.payload, dict):
552
      result.payload = objects.OS.FromDict(result.payload)
553
    return result
554

    
555
  @staticmethod
556
  def _PrepareFinalizeExportDisks(snap_disks):
557
    flat_disks = []
558

    
559
    for disk in snap_disks:
560
      if isinstance(disk, bool):
561
        flat_disks.append(disk)
562
      else:
563
        flat_disks.append(disk.ToDict())
564

    
565
    return flat_disks
566

    
567
  @staticmethod
568
  def _ImpExpStatusPostProc(result):
569
    """Post-processor for import/export status.
570

571
    @rtype: Payload containing list of L{objects.ImportExportStatus} instances
572
    @return: Returns a list of the state of each named import/export or None if
573
             a status couldn't be retrieved
574

575
    """
576
    if not result.fail_msg:
577
      decoded = []
578

    
579
      for i in result.payload:
580
        if i is None:
581
          decoded.append(None)
582
          continue
583
        decoded.append(objects.ImportExportStatus.FromDict(i))
584

    
585
      result.payload = decoded
586

    
587
    return result
588

    
589
  @staticmethod
590
  def _EncodeImportExportIO(ieio, ieioargs):
591
    """Encodes import/export I/O information.
592

593
    """
594
    if ieio == constants.IEIO_RAW_DISK:
595
      assert len(ieioargs) == 1
596
      return (ieioargs[0].ToDict(), )
597

    
598
    if ieio == constants.IEIO_SCRIPT:
599
      assert len(ieioargs) == 2
600
      return (ieioargs[0].ToDict(), ieioargs[1])
601

    
602
    return ieioargs
603

    
604
  @staticmethod
605
  def _PrepareFileUpload(filename):
606
    """Loads a file and prepares it for an upload to nodes.
607

608
    """
609
    data = _Compress(utils.ReadFile(filename))
610
    st = os.stat(filename)
611
    getents = runtime.GetEnts()
612
    return [filename, data, st.st_mode, getents.LookupUid(st.st_uid),
613
            getents.LookupGid(st.st_gid), st.st_atime, st.st_mtime]
614

    
615
  #
616
  # Begin RPC calls
617
  #
618

    
619
  def call_test_delay(self, node_list, duration, read_timeout=None):
620
    """Sleep for a fixed time on given node(s).
621

622
    This is a multi-node call.
623

624
    """
625
    assert read_timeout is None
626
    return self.call_test_delay(node_list, duration,
627
                                read_timeout=int(duration + 5))
628

    
629

    
630
class JobQueueRunner(_generated_rpc.RpcClientJobQueue):
631
  """RPC wrappers for job queue.
632

633
  """
634
  _Compress = staticmethod(_Compress)
635

    
636
  def __init__(self, context, address_list):
637
    """Initializes this class.
638

639
    """
640
    _generated_rpc.RpcClientJobQueue.__init__(self)
641

    
642
    if address_list is None:
643
      resolver = _SsconfResolver
644
    else:
645
      # Caller provided an address list
646
      resolver = _StaticResolver(address_list)
647

    
648
    self._proc = _RpcProcessor(resolver,
649
                               netutils.GetDaemonPort(constants.NODED),
650
                               lock_monitor_cb=context.glm.AddToLockMonitor)
651

    
652
  def _Call(self, node_list, procedure, timeout, args):
653
    """Entry point for automatically generated RPC wrappers.
654

655
    """
656
    body = serializer.DumpJson(args, indent=False)
657

    
658
    return self._proc(node_list, procedure, body, read_timeout=timeout)
659

    
660

    
661
class BootstrapRunner(_generated_rpc.RpcClientBootstrap):
662
  """RPC wrappers for bootstrapping.
663

664
  """
665
  def __init__(self):
666
    """Initializes this class.
667

668
    """
669
    _generated_rpc.RpcClientBootstrap.__init__(self)
670

    
671
    self._proc = _RpcProcessor(_SsconfResolver,
672
                               netutils.GetDaemonPort(constants.NODED))
673

    
674
  def _Call(self, node_list, procedure, timeout, args):
675
    """Entry point for automatically generated RPC wrappers.
676

677
    """
678
    body = serializer.DumpJson(args, indent=False)
679

    
680
    return self._proc(node_list, procedure, body, read_timeout=timeout)
681

    
682

    
683
class ConfigRunner(_generated_rpc.RpcClientConfig):
684
  """RPC wrappers for L{config}.
685

686
  """
687
  _PrepareFileUpload = \
688
    staticmethod(RpcRunner._PrepareFileUpload) # pylint: disable=W0212
689

    
690
  def __init__(self, address_list):
691
    """Initializes this class.
692

693
    """
694
    _generated_rpc.RpcClientConfig.__init__(self)
695

    
696
    if address_list is None:
697
      resolver = _SsconfResolver
698
    else:
699
      # Caller provided an address list
700
      resolver = _StaticResolver(address_list)
701

    
702
    self._proc = _RpcProcessor(resolver,
703
                               netutils.GetDaemonPort(constants.NODED))
704

    
705
  def _Call(self, node_list, procedure, timeout, args):
706
    """Entry point for automatically generated RPC wrappers.
707

708
    """
709
    body = serializer.DumpJson(args, indent=False)
710

    
711
    return self._proc(node_list, procedure, body, read_timeout=timeout)