Statistics
| Branch: | Tag: | Revision:

root / lib / storage / drbd.py @ 65fc2388

History | View | Annotate | Download (34.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2010, 2011, 2012, 2013 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""DRBD block device related functionality"""
23

    
24
import errno
25
import logging
26
import time
27

    
28
from ganeti import constants
29
from ganeti import utils
30
from ganeti import errors
31
from ganeti import netutils
32
from ganeti import objects
33
from ganeti.storage import base
34
from ganeti.storage.drbd_info import DRBD8Info
35
from ganeti.storage import drbd_info
36
from ganeti.storage import drbd_cmdgen
37

    
38

    
39
# Size of reads in _CanReadDevice
40

    
41
_DEVICE_READ_SIZE = 128 * 1024
42

    
43

    
44
class DRBD8(object):
45
  """Various methods to deals with the DRBD system as a whole.
46

47
  This class provides a set of methods to deal with the DRBD installation on
48
  the node or with uninitialized devices as opposed to a DRBD device.
49

50
  """
51
  _USERMODE_HELPER_FILE = "/sys/module/drbd/parameters/usermode_helper"
52

    
53
  _MAX_MINORS = 255
54

    
55
  @staticmethod
56
  def GetUsermodeHelper(filename=_USERMODE_HELPER_FILE):
57
    """Returns DRBD usermode_helper currently set.
58

59
    @type filename: string
60
    @param filename: the filename to read the usermode helper from
61
    @rtype: string
62
    @return: the currently configured DRBD usermode helper
63

64
    """
65
    try:
66
      helper = utils.ReadFile(filename).splitlines()[0]
67
    except EnvironmentError, err:
68
      if err.errno == errno.ENOENT:
69
        base.ThrowError("The file %s cannot be opened, check if the module"
70
                        " is loaded (%s)", filename, str(err))
71
      else:
72
        base.ThrowError("Can't read DRBD helper file %s: %s",
73
                        filename, str(err))
74
    if not helper:
75
      base.ThrowError("Can't read any data from %s", filename)
76
    return helper
77

    
78
  @staticmethod
79
  def GetProcInfo():
80
    """Reads and parses information from /proc/drbd.
81

82
    @rtype: DRBD8Info
83
    @return: a L{DRBD8Info} instance containing the current /proc/drbd info
84

85
    """
86
    return DRBD8Info.CreateFromFile()
87

    
88
  @staticmethod
89
  def GetUsedDevs():
90
    """Compute the list of used DRBD minors.
91

92
    @rtype: list of ints
93

94
    """
95
    info = DRBD8.GetProcInfo()
96
    return filter(lambda m: not info.GetMinorStatus(m).is_unconfigured,
97
                  info.GetMinors())
98

    
99
  @staticmethod
100
  def FindUnusedMinor():
101
    """Find an unused DRBD device.
102

103
    This is specific to 8.x as the minors are allocated dynamically,
104
    so non-existing numbers up to a max minor count are actually free.
105

106
    @rtype: int
107

108
    """
109
    highest = None
110
    info = DRBD8.GetProcInfo()
111
    for minor in info.GetMinors():
112
      status = info.GetMinorStatus(minor)
113
      if not status.is_in_use:
114
        return minor
115
      highest = max(highest, minor)
116

    
117
    if highest is None: # there are no minors in use at all
118
      return 0
119
    if highest >= DRBD8._MAX_MINORS:
120
      logging.error("Error: no free drbd minors!")
121
      raise errors.BlockDeviceError("Can't find a free DRBD minor")
122

    
123
    return highest + 1
124

    
125
  @staticmethod
126
  def GetCmdGenerator(info):
127
    """Creates a suitable L{BaseDRBDCmdGenerator} based on the given info.
128

129
    @type info: DRBD8Info
130
    @rtype: BaseDRBDCmdGenerator
131

132
    """
133
    version = info.GetVersion()
134
    if version["k_minor"] <= 3:
135
      return drbd_cmdgen.DRBD83CmdGenerator(version)
136
    else:
137
      return drbd_cmdgen.DRBD84CmdGenerator(version)
138

    
139
  @staticmethod
140
  def ShutdownAll(minor):
141
    """Deactivate the device.
142

143
    This will, of course, fail if the device is in use.
144

145
    @type minor: int
146
    @param minor: the minor to shut down
147

148
    """
149
    info = DRBD8.GetProcInfo()
150
    cmd_gen = DRBD8.GetCmdGenerator(info)
151

    
152
    cmd = cmd_gen.GenDownCmd(minor)
153
    result = utils.RunCmd(cmd)
154
    if result.failed:
155
      base.ThrowError("drbd%d: can't shutdown drbd device: %s",
156
                      minor, result.output)
157

    
158

    
159
class DRBD8Dev(base.BlockDev):
160
  """DRBD v8.x block device.
161

162
  This implements the local host part of the DRBD device, i.e. it
163
  doesn't do anything to the supposed peer. If you need a fully
164
  connected DRBD pair, you need to use this class on both hosts.
165

166
  The unique_id for the drbd device is a (local_ip, local_port,
167
  remote_ip, remote_port, local_minor, secret) tuple, and it must have
168
  two children: the data device and the meta_device. The meta device
169
  is checked for valid size and is zeroed on create.
170

171
  """
172
  _DRBD_MAJOR = 147
173

    
174
  # timeout constants
175
  _NET_RECONFIG_TIMEOUT = 60
176

    
177
  def __init__(self, unique_id, children, size, params):
178
    if children and children.count(None) > 0:
179
      children = []
180
    if len(children) not in (0, 2):
181
      raise ValueError("Invalid configuration data %s" % str(children))
182
    if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 6:
183
      raise ValueError("Invalid configuration data %s" % str(unique_id))
184
    (self._lhost, self._lport,
185
     self._rhost, self._rport,
186
     self._aminor, self._secret) = unique_id
187
    if children:
188
      if not _CanReadDevice(children[1].dev_path):
189
        logging.info("drbd%s: Ignoring unreadable meta device", self._aminor)
190
        children = []
191
    super(DRBD8Dev, self).__init__(unique_id, children, size, params)
192
    self.major = self._DRBD_MAJOR
193

    
194
    info = DRBD8.GetProcInfo()
195
    version = info.GetVersion()
196
    if version["k_major"] != 8:
197
      base.ThrowError("Mismatch in DRBD kernel version and requested ganeti"
198
                      " usage: kernel is %s.%s, ganeti wants 8.x",
199
                      version["k_major"], version["k_minor"])
200

    
201
    if version["k_minor"] <= 3:
202
      self._show_info_cls = drbd_info.DRBD83ShowInfo
203
    else:
204
      self._show_info_cls = drbd_info.DRBD84ShowInfo
205

    
206
    self._cmd_gen = DRBD8.GetCmdGenerator(info)
207

    
208
    if (self._lhost is not None and self._lhost == self._rhost and
209
            self._lport == self._rport):
210
      raise ValueError("Invalid configuration data, same local/remote %s" %
211
                       (unique_id,))
212
    self.Attach()
213

    
214
  @staticmethod
215
  def _DevPath(minor):
216
    """Return the path to a drbd device for a given minor.
217

218
    @type minor: int
219
    @rtype: string
220

221
    """
222
    return "/dev/drbd%d" % minor
223

    
224
  def _SetFromMinor(self, minor):
225
    """Set our parameters based on the given minor.
226

227
    This sets our minor variable and our dev_path.
228

229
    @type minor: int
230

231
    """
232
    if minor is None:
233
      self.minor = self.dev_path = None
234
      self.attached = False
235
    else:
236
      self.minor = minor
237
      self.dev_path = self._DevPath(minor)
238
      self.attached = True
239

    
240
  @staticmethod
241
  def _CheckMetaSize(meta_device):
242
    """Check if the given meta device looks like a valid one.
243

244
    This currently only checks the size, which must be around
245
    128MiB.
246

247
    @type meta_device: string
248
    @param meta_device: the path to the device to check
249

250
    """
251
    result = utils.RunCmd(["blockdev", "--getsize", meta_device])
252
    if result.failed:
253
      base.ThrowError("Failed to get device size: %s - %s",
254
                      result.fail_reason, result.output)
255
    try:
256
      sectors = int(result.stdout)
257
    except (TypeError, ValueError):
258
      base.ThrowError("Invalid output from blockdev: '%s'", result.stdout)
259
    num_bytes = sectors * 512
260
    if num_bytes < 128 * 1024 * 1024: # less than 128MiB
261
      base.ThrowError("Meta device too small (%.2fMib)",
262
                      (num_bytes / 1024 / 1024))
263
    # the maximum *valid* size of the meta device when living on top
264
    # of LVM is hard to compute: it depends on the number of stripes
265
    # and the PE size; e.g. a 2-stripe, 64MB PE will result in a 128MB
266
    # (normal size), but an eight-stripe 128MB PE will result in a 1GB
267
    # size meta device; as such, we restrict it to 1GB (a little bit
268
    # too generous, but making assumptions about PE size is hard)
269
    if num_bytes > 1024 * 1024 * 1024:
270
      base.ThrowError("Meta device too big (%.2fMiB)",
271
                      (num_bytes / 1024 / 1024))
272

    
273
  def _GetShowData(self, minor):
274
    """Return the `drbdsetup show` data.
275

276
    @type minor: int
277
    @param minor: the minor to collect show output for
278
    @rtype: string
279

280
    """
281
    result = utils.RunCmd(self._cmd_gen.GenShowCmd(minor))
282
    if result.failed:
283
      logging.error("Can't display the drbd config: %s - %s",
284
                    result.fail_reason, result.output)
285
      return None
286
    return result.stdout
287

    
288
  def _GetShowInfo(self, minor):
289
    """Return parsed information from `drbdsetup show`.
290

291
    @type minor: int
292
    @param minor: the minor to return information for
293
    @rtype: dict as described in L{drbd_info.BaseShowInfo.GetDevInfo}
294

295
    """
296
    return self._show_info_cls.GetDevInfo(self._GetShowData(minor))
297

    
298
  def _MatchesLocal(self, info):
299
    """Test if our local config matches with an existing device.
300

301
    The parameter should be as returned from `_GetShowInfo()`. This
302
    method tests if our local backing device is the same as the one in
303
    the info parameter, in effect testing if we look like the given
304
    device.
305

306
    @type info: dict as described in L{drbd_info.BaseShowInfo.GetDevInfo}
307
    @rtype: boolean
308

309
    """
310
    if self._children:
311
      backend, meta = self._children
312
    else:
313
      backend = meta = None
314

    
315
    if backend is not None:
316
      retval = ("local_dev" in info and info["local_dev"] == backend.dev_path)
317
    else:
318
      retval = ("local_dev" not in info)
319

    
320
    if meta is not None:
321
      retval = retval and ("meta_dev" in info and
322
                           info["meta_dev"] == meta.dev_path)
323
      if "meta_index" in info:
324
        retval = retval and info["meta_index"] == 0
325
    else:
326
      retval = retval and ("meta_dev" not in info and
327
                           "meta_index" not in info)
328
    return retval
329

    
330
  def _MatchesNet(self, info):
331
    """Test if our network config matches with an existing device.
332

333
    The parameter should be as returned from `_GetShowInfo()`. This
334
    method tests if our network configuration is the same as the one
335
    in the info parameter, in effect testing if we look like the given
336
    device.
337

338
    @type info: dict as described in L{drbd_info.BaseShowInfo.GetDevInfo}
339
    @rtype: boolean
340

341
    """
342
    if (((self._lhost is None and not ("local_addr" in info)) and
343
         (self._rhost is None and not ("remote_addr" in info)))):
344
      return True
345

    
346
    if self._lhost is None:
347
      return False
348

    
349
    if not ("local_addr" in info and
350
            "remote_addr" in info):
351
      return False
352

    
353
    retval = (info["local_addr"] == (self._lhost, self._lport))
354
    retval = (retval and
355
              info["remote_addr"] == (self._rhost, self._rport))
356
    return retval
357

    
358
  def _AssembleLocal(self, minor, backend, meta, size):
359
    """Configure the local part of a DRBD device.
360

361
    @type minor: int
362
    @param minor: the minor to assemble locally
363
    @type backend: string
364
    @param backend: path to the data device to use
365
    @type meta: string
366
    @param meta: path to the meta device to use
367
    @type size: int
368
    @param size: size in MiB
369

370
    """
371
    cmds = self._cmd_gen.GenLocalInitCmds(minor, backend, meta,
372
                                          size, self.params)
373

    
374
    for cmd in cmds:
375
      result = utils.RunCmd(cmd)
376
      if result.failed:
377
        base.ThrowError("drbd%d: can't attach local disk: %s",
378
                        minor, result.output)
379

    
380
  def _AssembleNet(self, minor, net_info, dual_pri=False, hmac=None,
381
                   secret=None):
382
    """Configure the network part of the device.
383

384
    @type minor: int
385
    @param minor: the minor to assemble the network for
386
    @type net_info: (string, int, string, int)
387
    @param net_info: tuple containing the local address, local port, remote
388
      address and remote port
389
    @type dual_pri: boolean
390
    @param dual_pri: whether two primaries should be allowed or not
391
    @type hmac: string
392
    @param hmac: the HMAC algorithm to use
393
    @type secret: string
394
    @param secret: the shared secret to use
395

396
    """
397
    lhost, lport, rhost, rport = net_info
398
    if None in net_info:
399
      # we don't want network connection and actually want to make
400
      # sure its shutdown
401
      self._ShutdownNet(minor)
402
      return
403

    
404
    if dual_pri:
405
      protocol = constants.DRBD_MIGRATION_NET_PROTOCOL
406
    else:
407
      protocol = self.params[constants.LDP_PROTOCOL]
408

    
409
    # Workaround for a race condition. When DRBD is doing its dance to
410
    # establish a connection with its peer, it also sends the
411
    # synchronization speed over the wire. In some cases setting the
412
    # sync speed only after setting up both sides can race with DRBD
413
    # connecting, hence we set it here before telling DRBD anything
414
    # about its peer.
415
    sync_errors = self._SetMinorSyncParams(minor, self.params)
416
    if sync_errors:
417
      base.ThrowError("drbd%d: can't set the synchronization parameters: %s" %
418
                      (minor, utils.CommaJoin(sync_errors)))
419

    
420
    family = self._GetNetFamily(minor, lhost, rhost)
421

    
422
    cmd = self._cmd_gen.GenNetInitCmd(minor, family, lhost, lport,
423
                                      rhost, rport, protocol,
424
                                      dual_pri, hmac, secret, self.params)
425

    
426
    result = utils.RunCmd(cmd)
427
    if result.failed:
428
      base.ThrowError("drbd%d: can't setup network: %s - %s",
429
                      minor, result.fail_reason, result.output)
430

    
431
    def _CheckNetworkConfig():
432
      info = self._GetShowInfo(minor)
433
      if not "local_addr" in info or not "remote_addr" in info:
434
        raise utils.RetryAgain()
435

    
436
      if (info["local_addr"] != (lhost, lport) or
437
          info["remote_addr"] != (rhost, rport)):
438
        raise utils.RetryAgain()
439

    
440
    try:
441
      utils.Retry(_CheckNetworkConfig, 1.0, 10.0)
442
    except utils.RetryTimeout:
443
      base.ThrowError("drbd%d: timeout while configuring network", minor)
444

    
445
  @staticmethod
446
  def _GetNetFamily(minor, lhost, rhost):
447
    if netutils.IP6Address.IsValid(lhost):
448
      if not netutils.IP6Address.IsValid(rhost):
449
        base.ThrowError("drbd%d: can't connect ip %s to ip %s" %
450
                        (minor, lhost, rhost))
451
      return "ipv6"
452
    elif netutils.IP4Address.IsValid(lhost):
453
      if not netutils.IP4Address.IsValid(rhost):
454
        base.ThrowError("drbd%d: can't connect ip %s to ip %s" %
455
                        (minor, lhost, rhost))
456
      return "ipv4"
457
    else:
458
      base.ThrowError("drbd%d: Invalid ip %s" % (minor, lhost))
459

    
460
  def AddChildren(self, devices):
461
    """Add a disk to the DRBD device.
462

463
    @type devices: list of L{BlockDev}
464
    @param devices: a list of exactly two L{BlockDev} objects; the first
465
      denotes the data device, the second the meta device for this DRBD device
466

467
    """
468
    if self.minor is None:
469
      base.ThrowError("drbd%d: can't attach to dbrd8 during AddChildren",
470
                      self._aminor)
471
    if len(devices) != 2:
472
      base.ThrowError("drbd%d: need two devices for AddChildren", self.minor)
473
    info = self._GetShowInfo(self.minor)
474
    if "local_dev" in info:
475
      base.ThrowError("drbd%d: already attached to a local disk", self.minor)
476
    backend, meta = devices
477
    if backend.dev_path is None or meta.dev_path is None:
478
      base.ThrowError("drbd%d: children not ready during AddChildren",
479
                      self.minor)
480
    backend.Open()
481
    meta.Open()
482
    self._CheckMetaSize(meta.dev_path)
483
    self._InitMeta(DRBD8.FindUnusedMinor(), meta.dev_path)
484

    
485
    self._AssembleLocal(self.minor, backend.dev_path, meta.dev_path, self.size)
486
    self._children = devices
487

    
488
  def RemoveChildren(self, devices):
489
    """Detach the drbd device from local storage.
490

491
    @type devices: list of L{BlockDev}
492
    @param devices: a list of exactly two L{BlockDev} objects; the first
493
      denotes the data device, the second the meta device for this DRBD device
494

495
    """
496
    if self.minor is None:
497
      base.ThrowError("drbd%d: can't attach to drbd8 during RemoveChildren",
498
                      self._aminor)
499
    # early return if we don't actually have backing storage
500
    info = self._GetShowInfo(self.minor)
501
    if "local_dev" not in info:
502
      return
503
    if len(self._children) != 2:
504
      base.ThrowError("drbd%d: we don't have two children: %s", self.minor,
505
                      self._children)
506
    if self._children.count(None) == 2: # we don't actually have children :)
507
      logging.warning("drbd%d: requested detach while detached", self.minor)
508
      return
509
    if len(devices) != 2:
510
      base.ThrowError("drbd%d: we need two children in RemoveChildren",
511
                      self.minor)
512
    for child, dev in zip(self._children, devices):
513
      if dev != child.dev_path:
514
        base.ThrowError("drbd%d: mismatch in local storage (%s != %s) in"
515
                        " RemoveChildren", self.minor, dev, child.dev_path)
516

    
517
    self._ShutdownLocal(self.minor)
518
    self._children = []
519

    
520
  def _SetMinorSyncParams(self, minor, params):
521
    """Set the parameters of the DRBD syncer.
522

523
    This is the low-level implementation.
524

525
    @type minor: int
526
    @param minor: the drbd minor whose settings we change
527
    @type params: dict
528
    @param params: LD level disk parameters related to the synchronization
529
    @rtype: list
530
    @return: a list of error messages
531

532
    """
533
    cmd = self._cmd_gen.GenSyncParamsCmd(minor, params)
534
    result = utils.RunCmd(cmd)
535
    if result.failed:
536
      msg = ("Can't change syncer rate: %s - %s" %
537
             (result.fail_reason, result.output))
538
      logging.error(msg)
539
      return [msg]
540

    
541
    return []
542

    
543
  def SetSyncParams(self, params):
544
    """Set the synchronization parameters of the DRBD syncer.
545

546
    See L{BlockDev.SetSyncParams} for parameter description.
547

548
    """
549
    if self.minor is None:
550
      err = "Not attached during SetSyncParams"
551
      logging.info(err)
552
      return [err]
553

    
554
    children_result = super(DRBD8Dev, self).SetSyncParams(params)
555
    children_result.extend(self._SetMinorSyncParams(self.minor, params))
556
    return children_result
557

    
558
  def PauseResumeSync(self, pause):
559
    """Pauses or resumes the sync of a DRBD device.
560

561
    See L{BlockDev.PauseResumeSync} for parameter description.
562

563
    """
564
    if self.minor is None:
565
      logging.info("Not attached during PauseSync")
566
      return False
567

    
568
    children_result = super(DRBD8Dev, self).PauseResumeSync(pause)
569

    
570
    if pause:
571
      cmd = self._cmd_gen.GenPauseSyncCmd(self.minor)
572
    else:
573
      cmd = self._cmd_gen.GenResumeSyncCmd(self.minor)
574

    
575
    result = utils.RunCmd(cmd)
576
    if result.failed:
577
      logging.error("Can't %s: %s - %s", cmd,
578
                    result.fail_reason, result.output)
579
    return not result.failed and children_result
580

    
581
  def GetProcStatus(self):
582
    """Return the current status data from /proc/drbd for this device.
583

584
    @rtype: DRBD8Status
585

586
    """
587
    if self.minor is None:
588
      base.ThrowError("drbd%d: GetStats() called while not attached",
589
                      self._aminor)
590
    info = DRBD8.GetProcInfo()
591
    if not info.HasMinorStatus(self.minor):
592
      base.ThrowError("drbd%d: can't find myself in /proc", self.minor)
593
    return info.GetMinorStatus(self.minor)
594

    
595
  def GetSyncStatus(self):
596
    """Returns the sync status of the device.
597

598
    If sync_percent is None, it means all is ok
599
    If estimated_time is None, it means we can't estimate
600
    the time needed, otherwise it's the time left in seconds.
601

602
    We set the is_degraded parameter to True on two conditions:
603
    network not connected or local disk missing.
604

605
    We compute the ldisk parameter based on whether we have a local
606
    disk or not.
607

608
    @rtype: objects.BlockDevStatus
609

610
    """
611
    if self.minor is None and not self.Attach():
612
      base.ThrowError("drbd%d: can't Attach() in GetSyncStatus", self._aminor)
613

    
614
    stats = self.GetProcStatus()
615
    is_degraded = not stats.is_connected or not stats.is_disk_uptodate
616

    
617
    if stats.is_disk_uptodate:
618
      ldisk_status = constants.LDS_OKAY
619
    elif stats.is_diskless:
620
      ldisk_status = constants.LDS_FAULTY
621
    else:
622
      ldisk_status = constants.LDS_UNKNOWN
623

    
624
    return objects.BlockDevStatus(dev_path=self.dev_path,
625
                                  major=self.major,
626
                                  minor=self.minor,
627
                                  sync_percent=stats.sync_percent,
628
                                  estimated_time=stats.est_time,
629
                                  is_degraded=is_degraded,
630
                                  ldisk_status=ldisk_status)
631

    
632
  def Open(self, force=False):
633
    """Make the local state primary.
634

635
    If the 'force' parameter is given, DRBD is instructed to switch the device
636
    into primary mode. Since this is a potentially dangerous operation, the
637
    force flag should be only given after creation, when it actually is
638
    mandatory.
639

640
    """
641
    if self.minor is None and not self.Attach():
642
      logging.error("DRBD cannot attach to a device during open")
643
      return False
644

    
645
    cmd = self._cmd_gen.GenPrimaryCmd(self.minor, force)
646

    
647
    result = utils.RunCmd(cmd)
648
    if result.failed:
649
      base.ThrowError("drbd%d: can't make drbd device primary: %s", self.minor,
650
                      result.output)
651

    
652
  def Close(self):
653
    """Make the local state secondary.
654

655
    This will, of course, fail if the device is in use.
656

657
    """
658
    if self.minor is None and not self.Attach():
659
      base.ThrowError("drbd%d: can't Attach() in Close()", self._aminor)
660
    cmd = self._cmd_gen.GenSecondaryCmd(self.minor)
661
    result = utils.RunCmd(cmd)
662
    if result.failed:
663
      base.ThrowError("drbd%d: can't switch drbd device to secondary: %s",
664
                      self.minor, result.output)
665

    
666
  def DisconnectNet(self):
667
    """Removes network configuration.
668

669
    This method shutdowns the network side of the device.
670

671
    The method will wait up to a hardcoded timeout for the device to
672
    go into standalone after the 'disconnect' command before
673
    re-configuring it, as sometimes it takes a while for the
674
    disconnect to actually propagate and thus we might issue a 'net'
675
    command while the device is still connected. If the device will
676
    still be attached to the network and we time out, we raise an
677
    exception.
678

679
    """
680
    if self.minor is None:
681
      base.ThrowError("drbd%d: disk not attached in re-attach net",
682
                      self._aminor)
683

    
684
    if None in (self._lhost, self._lport, self._rhost, self._rport):
685
      base.ThrowError("drbd%d: DRBD disk missing network info in"
686
                      " DisconnectNet()", self.minor)
687

    
688
    class _DisconnectStatus:
689
      def __init__(self, ever_disconnected):
690
        self.ever_disconnected = ever_disconnected
691

    
692
    dstatus = _DisconnectStatus(base.IgnoreError(self._ShutdownNet, self.minor))
693

    
694
    def _WaitForDisconnect():
695
      if self.GetProcStatus().is_standalone:
696
        return
697

    
698
      # retry the disconnect, it seems possible that due to a well-time
699
      # disconnect on the peer, my disconnect command might be ignored and
700
      # forgotten
701
      dstatus.ever_disconnected = \
702
        base.IgnoreError(self._ShutdownNet, self.minor) or \
703
        dstatus.ever_disconnected
704

    
705
      raise utils.RetryAgain()
706

    
707
    # Keep start time
708
    start_time = time.time()
709

    
710
    try:
711
      # Start delay at 100 milliseconds and grow up to 2 seconds
712
      utils.Retry(_WaitForDisconnect, (0.1, 1.5, 2.0),
713
                  self._NET_RECONFIG_TIMEOUT)
714
    except utils.RetryTimeout:
715
      if dstatus.ever_disconnected:
716
        msg = ("drbd%d: device did not react to the"
717
               " 'disconnect' command in a timely manner")
718
      else:
719
        msg = "drbd%d: can't shutdown network, even after multiple retries"
720

    
721
      base.ThrowError(msg, self.minor)
722

    
723
    reconfig_time = time.time() - start_time
724
    if reconfig_time > (self._NET_RECONFIG_TIMEOUT * 0.25):
725
      logging.info("drbd%d: DisconnectNet: detach took %.3f seconds",
726
                   self.minor, reconfig_time)
727

    
728
  def AttachNet(self, multimaster):
729
    """Reconnects the network.
730

731
    This method connects the network side of the device with a
732
    specified multi-master flag. The device needs to be 'Standalone'
733
    but have valid network configuration data.
734

735
    @type multimaster: boolean
736
    @param multimaster: init the network in dual-primary mode
737

738
    """
739
    if self.minor is None:
740
      base.ThrowError("drbd%d: device not attached in AttachNet", self._aminor)
741

    
742
    if None in (self._lhost, self._lport, self._rhost, self._rport):
743
      base.ThrowError("drbd%d: missing network info in AttachNet()", self.minor)
744

    
745
    status = self.GetProcStatus()
746

    
747
    if not status.is_standalone:
748
      base.ThrowError("drbd%d: device is not standalone in AttachNet",
749
                      self.minor)
750

    
751
    self._AssembleNet(self.minor,
752
                      (self._lhost, self._lport, self._rhost, self._rport),
753
                      dual_pri=multimaster, hmac=constants.DRBD_HMAC_ALG,
754
                      secret=self._secret)
755

    
756
  def Attach(self):
757
    """Check if our minor is configured.
758

759
    This doesn't do any device configurations - it only checks if the
760
    minor is in a state different from Unconfigured.
761

762
    Note that this function will not change the state of the system in
763
    any way (except in case of side-effects caused by reading from
764
    /proc).
765

766
    """
767
    used_devs = DRBD8.GetUsedDevs()
768
    if self._aminor in used_devs:
769
      minor = self._aminor
770
    else:
771
      minor = None
772

    
773
    self._SetFromMinor(minor)
774
    return minor is not None
775

    
776
  def Assemble(self):
777
    """Assemble the drbd.
778

779
    Method:
780
      - if we have a configured device, we try to ensure that it matches
781
        our config
782
      - if not, we create it from zero
783
      - anyway, set the device parameters
784

785
    """
786
    super(DRBD8Dev, self).Assemble()
787

    
788
    self.Attach()
789
    if self.minor is None:
790
      # local device completely unconfigured
791
      self._FastAssemble()
792
    else:
793
      # we have to recheck the local and network status and try to fix
794
      # the device
795
      self._SlowAssemble()
796

    
797
    sync_errors = self.SetSyncParams(self.params)
798
    if sync_errors:
799
      base.ThrowError("drbd%d: can't set the synchronization parameters: %s" %
800
                      (self.minor, utils.CommaJoin(sync_errors)))
801

    
802
  def _SlowAssemble(self):
803
    """Assembles the DRBD device from a (partially) configured device.
804

805
    In case of partially attached (local device matches but no network
806
    setup), we perform the network attach. If successful, we re-test
807
    the attach if can return success.
808

809
    """
810
    # TODO: Rewrite to not use a for loop just because there is 'break'
811
    # pylint: disable=W0631
812
    net_data = (self._lhost, self._lport, self._rhost, self._rport)
813
    for minor in (self._aminor,):
814
      info = self._GetShowInfo(minor)
815
      match_l = self._MatchesLocal(info)
816
      match_r = self._MatchesNet(info)
817

    
818
      if match_l and match_r:
819
        # everything matches
820
        break
821

    
822
      if match_l and not match_r and "local_addr" not in info:
823
        # disk matches, but not attached to network, attach and recheck
824
        self._AssembleNet(minor, net_data, hmac=constants.DRBD_HMAC_ALG,
825
                          secret=self._secret)
826
        if self._MatchesNet(self._GetShowInfo(minor)):
827
          break
828
        else:
829
          base.ThrowError("drbd%d: network attach successful, but 'drbdsetup"
830
                          " show' disagrees", minor)
831

    
832
      if match_r and "local_dev" not in info:
833
        # no local disk, but network attached and it matches
834
        self._AssembleLocal(minor, self._children[0].dev_path,
835
                            self._children[1].dev_path, self.size)
836
        if self._MatchesLocal(self._GetShowInfo(minor)):
837
          break
838
        else:
839
          base.ThrowError("drbd%d: disk attach successful, but 'drbdsetup"
840
                          " show' disagrees", minor)
841

    
842
      # this case must be considered only if we actually have local
843
      # storage, i.e. not in diskless mode, because all diskless
844
      # devices are equal from the point of view of local
845
      # configuration
846
      if (match_l and "local_dev" in info and
847
          not match_r and "local_addr" in info):
848
        # strange case - the device network part points to somewhere
849
        # else, even though its local storage is ours; as we own the
850
        # drbd space, we try to disconnect from the remote peer and
851
        # reconnect to our correct one
852
        try:
853
          self._ShutdownNet(minor)
854
        except errors.BlockDeviceError, err:
855
          base.ThrowError("drbd%d: device has correct local storage, wrong"
856
                          " remote peer and is unable to disconnect in order"
857
                          " to attach to the correct peer: %s", minor, str(err))
858
        # note: _AssembleNet also handles the case when we don't want
859
        # local storage (i.e. one or more of the _[lr](host|port) is
860
        # None)
861
        self._AssembleNet(minor, net_data, hmac=constants.DRBD_HMAC_ALG,
862
                          secret=self._secret)
863
        if self._MatchesNet(self._GetShowInfo(minor)):
864
          break
865
        else:
866
          base.ThrowError("drbd%d: network attach successful, but 'drbdsetup"
867
                          " show' disagrees", minor)
868

    
869
    else:
870
      minor = None
871

    
872
    self._SetFromMinor(minor)
873
    if minor is None:
874
      base.ThrowError("drbd%d: cannot activate, unknown or unhandled reason",
875
                      self._aminor)
876

    
877
  def _FastAssemble(self):
878
    """Assemble the drbd device from zero.
879

880
    This is run when in Assemble we detect our minor is unused.
881

882
    """
883
    minor = self._aminor
884
    if self._children and self._children[0] and self._children[1]:
885
      self._AssembleLocal(minor, self._children[0].dev_path,
886
                          self._children[1].dev_path, self.size)
887
    if self._lhost and self._lport and self._rhost and self._rport:
888
      self._AssembleNet(minor,
889
                        (self._lhost, self._lport, self._rhost, self._rport),
890
                        hmac=constants.DRBD_HMAC_ALG, secret=self._secret)
891
    self._SetFromMinor(minor)
892

    
893
  def _ShutdownLocal(self, minor):
894
    """Detach from the local device.
895

896
    I/Os will continue to be served from the remote device. If we
897
    don't have a remote device, this operation will fail.
898

899
    @type minor: int
900
    @param minor: the device to detach from the local device
901

902
    """
903
    cmd = self._cmd_gen.GenDetachCmd(minor)
904
    result = utils.RunCmd(cmd)
905
    if result.failed:
906
      base.ThrowError("drbd%d: can't detach local disk: %s",
907
                      minor, result.output)
908

    
909
  def _ShutdownNet(self, minor):
910
    """Disconnect from the remote peer.
911

912
    This fails if we don't have a local device.
913

914
    @type minor: boolean
915
    @param minor: the device to disconnect from the remote peer
916

917
    """
918
    family = self._GetNetFamily(minor, self._lhost, self._rhost)
919
    cmd = self._cmd_gen.GenDisconnectCmd(minor, family,
920
                                         self._lhost, self._lport,
921
                                         self._rhost, self._rport)
922
    result = utils.RunCmd(cmd)
923
    if result.failed:
924
      base.ThrowError("drbd%d: can't shutdown network: %s",
925
                      minor, result.output)
926

    
927
  def Shutdown(self):
928
    """Shutdown the DRBD device.
929

930
    """
931
    if self.minor is None and not self.Attach():
932
      logging.info("drbd%d: not attached during Shutdown()", self._aminor)
933
      return
934

    
935
    try:
936
      DRBD8.ShutdownAll(self.minor)
937
    finally:
938
      self.minor = None
939
      self.dev_path = None
940

    
941
  def Remove(self):
942
    """Stub remove for DRBD devices.
943

944
    """
945
    self.Shutdown()
946

    
947
  def Rename(self, new_id):
948
    """Rename a device.
949

950
    This is not supported for drbd devices.
951

952
    """
953
    raise errors.ProgrammerError("Can't rename a drbd device")
954

    
955
  def Grow(self, amount, dryrun, backingstore, excl_stor):
956
    """Resize the DRBD device and its backing storage.
957

958
    See L{BlockDev.Grow} for parameter description.
959

960
    """
961
    if self.minor is None:
962
      base.ThrowError("drbd%d: Grow called while not attached", self._aminor)
963
    if len(self._children) != 2 or None in self._children:
964
      base.ThrowError("drbd%d: cannot grow diskless device", self.minor)
965
    self._children[0].Grow(amount, dryrun, backingstore, excl_stor)
966
    if dryrun or backingstore:
967
      # DRBD does not support dry-run mode and is not backing storage,
968
      # so we'll return here
969
      return
970
    cmd = self._cmd_gen.GenResizeCmd(self.minor, self.size + amount)
971
    result = utils.RunCmd(cmd)
972
    if result.failed:
973
      base.ThrowError("drbd%d: resize failed: %s", self.minor, result.output)
974

    
975
  @classmethod
976
  def _InitMeta(cls, minor, dev_path):
977
    """Initialize a meta device.
978

979
    This will not work if the given minor is in use.
980

981
    @type minor: int
982
    @param minor: the DRBD minor whose (future) meta device should be
983
      initialized
984
    @type dev_path: string
985
    @param dev_path: path to the meta device to initialize
986

987
    """
988
    # Zero the metadata first, in order to make sure drbdmeta doesn't
989
    # try to auto-detect existing filesystems or similar (see
990
    # http://code.google.com/p/ganeti/issues/detail?id=182); we only
991
    # care about the first 128MB of data in the device, even though it
992
    # can be bigger
993
    result = utils.RunCmd([constants.DD_CMD,
994
                           "if=/dev/zero", "of=%s" % dev_path,
995
                           "bs=1048576", "count=128", "oflag=direct"])
996
    if result.failed:
997
      base.ThrowError("Can't wipe the meta device: %s", result.output)
998

    
999
    info = DRBD8.GetProcInfo()
1000
    cmd_gen = DRBD8.GetCmdGenerator(info)
1001
    cmd = cmd_gen.GenInitMetaCmd(minor, dev_path)
1002

    
1003
    result = utils.RunCmd(cmd)
1004
    if result.failed:
1005
      base.ThrowError("Can't initialize meta device: %s", result.output)
1006

    
1007
  @classmethod
1008
  def Create(cls, unique_id, children, size, spindles, params, excl_stor):
1009
    """Create a new DRBD8 device.
1010

1011
    Since DRBD devices are not created per se, just assembled, this
1012
    function only initializes the metadata.
1013

1014
    """
1015
    if len(children) != 2:
1016
      raise errors.ProgrammerError("Invalid setup for the drbd device")
1017
    if excl_stor:
1018
      raise errors.ProgrammerError("DRBD device requested with"
1019
                                   " exclusive_storage")
1020
    # check that the minor is unused
1021
    aminor = unique_id[4]
1022

    
1023
    info = DRBD8.GetProcInfo()
1024
    if info.HasMinorStatus(aminor):
1025
      status = info.GetMinorStatus(aminor)
1026
      in_use = status.is_in_use
1027
    else:
1028
      in_use = False
1029
    if in_use:
1030
      base.ThrowError("drbd%d: minor is already in use at Create() time",
1031
                      aminor)
1032
    meta = children[1]
1033
    meta.Assemble()
1034
    if not meta.Attach():
1035
      base.ThrowError("drbd%d: can't attach to meta device '%s'",
1036
                      aminor, meta)
1037
    cls._CheckMetaSize(meta.dev_path)
1038
    cls._InitMeta(aminor, meta.dev_path)
1039
    return cls(unique_id, children, size, params)
1040

    
1041

    
1042
def _CanReadDevice(path):
1043
  """Check if we can read from the given device.
1044

1045
  This tries to read the first 128k of the device.
1046

1047
  @type path: string
1048

1049
  """
1050
  try:
1051
    utils.ReadFile(path, size=_DEVICE_READ_SIZE)
1052
    return True
1053
  except EnvironmentError:
1054
    logging.warning("Can't read from device %s", path, exc_info=True)
1055
    return False