Statistics
| Branch: | Tag: | Revision:

root / lib / bdev.py @ b2dabfd6

History | View | Annotate | Download (73.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Block device abstraction"""
23

    
24
import re
25
import time
26
import errno
27
import pyparsing as pyp
28

    
29
from ganeti import utils
30
from ganeti import logger
31
from ganeti import errors
32
from ganeti import constants
33

    
34

    
35
class BlockDev(object):
36
  """Block device abstract class.
37

38
  A block device can be in the following states:
39
    - not existing on the system, and by `Create()` it goes into:
40
    - existing but not setup/not active, and by `Assemble()` goes into:
41
    - active read-write and by `Open()` it goes into
42
    - online (=used, or ready for use)
43

44
  A device can also be online but read-only, however we are not using
45
  the readonly state (MD and LV have it, if needed in the future)
46
  and we are usually looking at this like at a stack, so it's easier
47
  to conceptualise the transition from not-existing to online and back
48
  like a linear one.
49

50
  The many different states of the device are due to the fact that we
51
  need to cover many device types:
52
    - logical volumes are created, lvchange -a y $lv, and used
53
    - md arrays are created or assembled and used
54
    - drbd devices are attached to a local disk/remote peer and made primary
55

56
  The status of the device can be examined by `GetStatus()`, which
57
  returns a numerical value, depending on the position in the
58
  transition stack of the device.
59

60
  A block device is identified by three items:
61
    - the /dev path of the device (dynamic)
62
    - a unique ID of the device (static)
63
    - it's major/minor pair (dynamic)
64

65
  Not all devices implement both the first two as distinct items. LVM
66
  logical volumes have their unique ID (the pair volume group, logical
67
  volume name) in a 1-to-1 relation to the dev path. For MD devices,
68
  the /dev path is dynamic and the unique ID is the UUID generated at
69
  array creation plus the slave list. For DRBD devices, the /dev path
70
  is again dynamic and the unique id is the pair (host1, dev1),
71
  (host2, dev2).
72

73
  You can get to a device in two ways:
74
    - creating the (real) device, which returns you
75
      an attached instance (lvcreate, mdadm --create)
76
    - attaching of a python instance to an existing (real) device
77

78
  The second point, the attachement to a device, is different
79
  depending on whether the device is assembled or not. At init() time,
80
  we search for a device with the same unique_id as us. If found,
81
  good. It also means that the device is already assembled. If not,
82
  after assembly we'll have our correct major/minor.
83

84
  """
85
  STATUS_UNKNOWN = 0
86
  STATUS_EXISTING = 1
87
  STATUS_STANDBY = 2
88
  STATUS_ONLINE = 3
89

    
90
  STATUS_MAP = {
91
    STATUS_UNKNOWN: "unknown",
92
    STATUS_EXISTING: "existing",
93
    STATUS_STANDBY: "ready for use",
94
    STATUS_ONLINE: "online",
95
    }
96

    
97
  def __init__(self, unique_id, children):
98
    self._children = children
99
    self.dev_path = None
100
    self.unique_id = unique_id
101
    self.major = None
102
    self.minor = None
103

    
104
  def Assemble(self):
105
    """Assemble the device from its components.
106

107
    If this is a plain block device (e.g. LVM) than assemble does
108
    nothing, as the LVM has no children and we don't put logical
109
    volumes offline.
110

111
    One guarantee is that after the device has been assembled, it
112
    knows its major/minor numbers. This allows other devices (usually
113
    parents) to probe correctly for their children.
114

115
    """
116
    status = True
117
    for child in self._children:
118
      if not isinstance(child, BlockDev):
119
        raise TypeError("Invalid child passed of type '%s'" % type(child))
120
      if not status:
121
        break
122
      status = status and child.Assemble()
123
      if not status:
124
        break
125

    
126
      try:
127
        child.Open()
128
      except errors.BlockDeviceError:
129
        for child in self._children:
130
          child.Shutdown()
131
        raise
132

    
133
    if not status:
134
      for child in self._children:
135
        child.Shutdown()
136
    return status
137

    
138
  def Attach(self):
139
    """Find a device which matches our config and attach to it.
140

141
    """
142
    raise NotImplementedError
143

    
144
  def Close(self):
145
    """Notifies that the device will no longer be used for I/O.
146

147
    """
148
    raise NotImplementedError
149

    
150
  @classmethod
151
  def Create(cls, unique_id, children, size):
152
    """Create the device.
153

154
    If the device cannot be created, it will return None
155
    instead. Error messages go to the logging system.
156

157
    Note that for some devices, the unique_id is used, and for other,
158
    the children. The idea is that these two, taken together, are
159
    enough for both creation and assembly (later).
160

161
    """
162
    raise NotImplementedError
163

    
164
  def Remove(self):
165
    """Remove this device.
166

167
    This makes sense only for some of the device types: LV and to a
168
    lesser degree, md devices. Also note that if the device can't
169
    attach, the removal can't be completed.
170

171
    """
172
    raise NotImplementedError
173

    
174
  def Rename(self, new_id):
175
    """Rename this device.
176

177
    This may or may not make sense for a given device type.
178

179
    """
180
    raise NotImplementedError
181

    
182
  def GetStatus(self):
183
    """Return the status of the device.
184

185
    """
186
    raise NotImplementedError
187

    
188
  def Open(self, force=False):
189
    """Make the device ready for use.
190

191
    This makes the device ready for I/O. For now, just the DRBD
192
    devices need this.
193

194
    The force parameter signifies that if the device has any kind of
195
    --force thing, it should be used, we know what we are doing.
196

197
    """
198
    raise NotImplementedError
199

    
200
  def Shutdown(self):
201
    """Shut down the device, freeing its children.
202

203
    This undoes the `Assemble()` work, except for the child
204
    assembling; as such, the children on the device are still
205
    assembled after this call.
206

207
    """
208
    raise NotImplementedError
209

    
210
  def SetSyncSpeed(self, speed):
211
    """Adjust the sync speed of the mirror.
212

213
    In case this is not a mirroring device, this is no-op.
214

215
    """
216
    result = True
217
    if self._children:
218
      for child in self._children:
219
        result = result and child.SetSyncSpeed(speed)
220
    return result
221

    
222
  def GetSyncStatus(self):
223
    """Returns the sync status of the device.
224

225
    If this device is a mirroring device, this function returns the
226
    status of the mirror.
227

228
    Returns:
229
     (sync_percent, estimated_time, is_degraded, ldisk)
230

231
    If sync_percent is None, it means the device is not syncing.
232

233
    If estimated_time is None, it means we can't estimate
234
    the time needed, otherwise it's the time left in seconds.
235

236
    If is_degraded is True, it means the device is missing
237
    redundancy. This is usually a sign that something went wrong in
238
    the device setup, if sync_percent is None.
239

240
    The ldisk parameter represents the degradation of the local
241
    data. This is only valid for some devices, the rest will always
242
    return False (not degraded).
243

244
    """
245
    return None, None, False, False
246

    
247

    
248
  def CombinedSyncStatus(self):
249
    """Calculate the mirror status recursively for our children.
250

251
    The return value is the same as for `GetSyncStatus()` except the
252
    minimum percent and maximum time are calculated across our
253
    children.
254

255
    """
256
    min_percent, max_time, is_degraded, ldisk = self.GetSyncStatus()
257
    if self._children:
258
      for child in self._children:
259
        c_percent, c_time, c_degraded, c_ldisk = child.GetSyncStatus()
260
        if min_percent is None:
261
          min_percent = c_percent
262
        elif c_percent is not None:
263
          min_percent = min(min_percent, c_percent)
264
        if max_time is None:
265
          max_time = c_time
266
        elif c_time is not None:
267
          max_time = max(max_time, c_time)
268
        is_degraded = is_degraded or c_degraded
269
        ldisk = ldisk or c_ldisk
270
    return min_percent, max_time, is_degraded, ldisk
271

    
272

    
273
  def SetInfo(self, text):
274
    """Update metadata with info text.
275

276
    Only supported for some device types.
277

278
    """
279
    for child in self._children:
280
      child.SetInfo(text)
281

    
282

    
283
  def __repr__(self):
284
    return ("<%s: unique_id: %s, children: %s, %s:%s, %s>" %
285
            (self.__class__, self.unique_id, self._children,
286
             self.major, self.minor, self.dev_path))
287

    
288

    
289
class LogicalVolume(BlockDev):
290
  """Logical Volume block device.
291

292
  """
293
  def __init__(self, unique_id, children):
294
    """Attaches to a LV device.
295

296
    The unique_id is a tuple (vg_name, lv_name)
297

298
    """
299
    super(LogicalVolume, self).__init__(unique_id, children)
300
    if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2:
301
      raise ValueError("Invalid configuration data %s" % str(unique_id))
302
    self._vg_name, self._lv_name = unique_id
303
    self.dev_path = "/dev/%s/%s" % (self._vg_name, self._lv_name)
304
    self.Attach()
305

    
306
  @classmethod
307
  def Create(cls, unique_id, children, size):
308
    """Create a new logical volume.
309

310
    """
311
    if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2:
312
      raise ValueError("Invalid configuration data %s" % str(unique_id))
313
    vg_name, lv_name = unique_id
314
    pvs_info = cls.GetPVInfo(vg_name)
315
    if not pvs_info:
316
      raise errors.BlockDeviceError("Can't compute PV info for vg %s" %
317
                                    vg_name)
318
    pvs_info.sort()
319
    pvs_info.reverse()
320

    
321
    pvlist = [ pv[1] for pv in pvs_info ]
322
    free_size = sum([ pv[0] for pv in pvs_info ])
323

    
324
    # The size constraint should have been checked from the master before
325
    # calling the create function.
326
    if free_size < size:
327
      raise errors.BlockDeviceError("Not enough free space: required %s,"
328
                                    " available %s" % (size, free_size))
329
    result = utils.RunCmd(["lvcreate", "-L%dm" % size, "-n%s" % lv_name,
330
                           vg_name] + pvlist)
331
    if result.failed:
332
      raise errors.BlockDeviceError(result.fail_reason)
333
    return LogicalVolume(unique_id, children)
334

    
335
  @staticmethod
336
  def GetPVInfo(vg_name):
337
    """Get the free space info for PVs in a volume group.
338

339
    Args:
340
      vg_name: the volume group name
341

342
    Returns:
343
      list of (free_space, name) with free_space in mebibytes
344

345
    """
346
    command = ["pvs", "--noheadings", "--nosuffix", "--units=m",
347
               "-opv_name,vg_name,pv_free,pv_attr", "--unbuffered",
348
               "--separator=:"]
349
    result = utils.RunCmd(command)
350
    if result.failed:
351
      logger.Error("Can't get the PV information: %s" % result.fail_reason)
352
      return None
353
    data = []
354
    for line in result.stdout.splitlines():
355
      fields = line.strip().split(':')
356
      if len(fields) != 4:
357
        logger.Error("Can't parse pvs output: line '%s'" % line)
358
        return None
359
      # skip over pvs from another vg or ones which are not allocatable
360
      if fields[1] != vg_name or fields[3][0] != 'a':
361
        continue
362
      data.append((float(fields[2]), fields[0]))
363

    
364
    return data
365

    
366
  def Remove(self):
367
    """Remove this logical volume.
368

369
    """
370
    if not self.minor and not self.Attach():
371
      # the LV does not exist
372
      return True
373
    result = utils.RunCmd(["lvremove", "-f", "%s/%s" %
374
                           (self._vg_name, self._lv_name)])
375
    if result.failed:
376
      logger.Error("Can't lvremove: %s" % result.fail_reason)
377

    
378
    return not result.failed
379

    
380
  def Rename(self, new_id):
381
    """Rename this logical volume.
382

383
    """
384
    if not isinstance(new_id, (tuple, list)) or len(new_id) != 2:
385
      raise errors.ProgrammerError("Invalid new logical id '%s'" % new_id)
386
    new_vg, new_name = new_id
387
    if new_vg != self._vg_name:
388
      raise errors.ProgrammerError("Can't move a logical volume across"
389
                                   " volume groups (from %s to to %s)" %
390
                                   (self._vg_name, new_vg))
391
    result = utils.RunCmd(["lvrename", new_vg, self._lv_name, new_name])
392
    if result.failed:
393
      raise errors.BlockDeviceError("Failed to rename the logical volume: %s" %
394
                                    result.output)
395
    self._lv_name = new_name
396
    self.dev_path = "/dev/%s/%s" % (self._vg_name, self._lv_name)
397

    
398
  def Attach(self):
399
    """Attach to an existing LV.
400

401
    This method will try to see if an existing and active LV exists
402
    which matches our name. If so, its major/minor will be
403
    recorded.
404

405
    """
406
    result = utils.RunCmd(["lvdisplay", self.dev_path])
407
    if result.failed:
408
      logger.Error("Can't find LV %s: %s, %s" %
409
                   (self.dev_path, result.fail_reason, result.output))
410
      return False
411
    match = re.compile("^ *Block device *([0-9]+):([0-9]+).*$")
412
    for line in result.stdout.splitlines():
413
      match_result = match.match(line)
414
      if match_result:
415
        self.major = int(match_result.group(1))
416
        self.minor = int(match_result.group(2))
417
        return True
418
    return False
419

    
420
  def Assemble(self):
421
    """Assemble the device.
422

423
    We alway run `lvchange -ay` on the LV to ensure it's active before
424
    use, as there were cases when xenvg was not active after boot
425
    (also possibly after disk issues).
426

427
    """
428
    result = utils.RunCmd(["lvchange", "-ay", self.dev_path])
429
    if result.failed:
430
      logger.Error("Can't activate lv %s: %s" % (self.dev_path, result.output))
431
    return not result.failed
432

    
433
  def Shutdown(self):
434
    """Shutdown the device.
435

436
    This is a no-op for the LV device type, as we don't deactivate the
437
    volumes on shutdown.
438

439
    """
440
    return True
441

    
442
  def GetStatus(self):
443
    """Return the status of the device.
444

445
    Logical volumes will can be in all four states, although we don't
446
    deactivate (lvchange -an) them when shutdown, so STATUS_EXISTING
447
    should not be seen for our devices.
448

449
    """
450
    result = utils.RunCmd(["lvs", "--noheadings", "-olv_attr", self.dev_path])
451
    if result.failed:
452
      logger.Error("Can't display lv: %s" % result.fail_reason)
453
      return self.STATUS_UNKNOWN
454
    out = result.stdout.strip()
455
    # format: type/permissions/alloc/fixed_minor/state/open
456
    if len(out) != 6:
457
      return self.STATUS_UNKNOWN
458
    #writable = (out[1] == "w")
459
    active = (out[4] == "a")
460
    online = (out[5] == "o")
461
    if online:
462
      retval = self.STATUS_ONLINE
463
    elif active:
464
      retval = self.STATUS_STANDBY
465
    else:
466
      retval = self.STATUS_EXISTING
467

    
468
    return retval
469

    
470
  def GetSyncStatus(self):
471
    """Returns the sync status of the device.
472

473
    If this device is a mirroring device, this function returns the
474
    status of the mirror.
475

476
    Returns:
477
     (sync_percent, estimated_time, is_degraded, ldisk)
478

479
    For logical volumes, sync_percent and estimated_time are always
480
    None (no recovery in progress, as we don't handle the mirrored LV
481
    case). The is_degraded parameter is the inverse of the ldisk
482
    parameter.
483

484
    For the ldisk parameter, we check if the logical volume has the
485
    'virtual' type, which means it's not backed by existing storage
486
    anymore (read from it return I/O error). This happens after a
487
    physical disk failure and subsequent 'vgreduce --removemissing' on
488
    the volume group.
489

490
    """
491
    result = utils.RunCmd(["lvs", "--noheadings", "-olv_attr", self.dev_path])
492
    if result.failed:
493
      logger.Error("Can't display lv: %s" % result.fail_reason)
494
      return None, None, True, True
495
    out = result.stdout.strip()
496
    # format: type/permissions/alloc/fixed_minor/state/open
497
    if len(out) != 6:
498
      logger.Debug("Error in lvs output: attrs=%s, len != 6" % out)
499
      return None, None, True, True
500
    ldisk = out[0] == 'v' # virtual volume, i.e. doesn't have
501
                          # backing storage
502
    return None, None, ldisk, ldisk
503

    
504
  def Open(self, force=False):
505
    """Make the device ready for I/O.
506

507
    This is a no-op for the LV device type.
508

509
    """
510
    pass
511

    
512
  def Close(self):
513
    """Notifies that the device will no longer be used for I/O.
514

515
    This is a no-op for the LV device type.
516

517
    """
518
    pass
519

    
520
  def Snapshot(self, size):
521
    """Create a snapshot copy of an lvm block device.
522

523
    """
524
    snap_name = self._lv_name + ".snap"
525

    
526
    # remove existing snapshot if found
527
    snap = LogicalVolume((self._vg_name, snap_name), None)
528
    snap.Remove()
529

    
530
    pvs_info = self.GetPVInfo(self._vg_name)
531
    if not pvs_info:
532
      raise errors.BlockDeviceError("Can't compute PV info for vg %s" %
533
                                    self._vg_name)
534
    pvs_info.sort()
535
    pvs_info.reverse()
536
    free_size, pv_name = pvs_info[0]
537
    if free_size < size:
538
      raise errors.BlockDeviceError("Not enough free space: required %s,"
539
                                    " available %s" % (size, free_size))
540

    
541
    result = utils.RunCmd(["lvcreate", "-L%dm" % size, "-s",
542
                           "-n%s" % snap_name, self.dev_path])
543
    if result.failed:
544
      raise errors.BlockDeviceError("command: %s error: %s" %
545
                                    (result.cmd, result.fail_reason))
546

    
547
    return snap_name
548

    
549
  def SetInfo(self, text):
550
    """Update metadata with info text.
551

552
    """
553
    BlockDev.SetInfo(self, text)
554

    
555
    # Replace invalid characters
556
    text = re.sub('^[^A-Za-z0-9_+.]', '_', text)
557
    text = re.sub('[^-A-Za-z0-9_+.]', '_', text)
558

    
559
    # Only up to 128 characters are allowed
560
    text = text[:128]
561

    
562
    result = utils.RunCmd(["lvchange", "--addtag", text,
563
                           self.dev_path])
564
    if result.failed:
565
      raise errors.BlockDeviceError("Command: %s error: %s" %
566
                                    (result.cmd, result.fail_reason))
567

    
568

    
569
class MDRaid1(BlockDev):
570
  """raid1 device implemented via md.
571

572
  """
573
  def __init__(self, unique_id, children):
574
    super(MDRaid1, self).__init__(unique_id, children)
575
    self.major = 9
576
    self.Attach()
577

    
578
  def Attach(self):
579
    """Find an array which matches our config and attach to it.
580

581
    This tries to find a MD array which has the same UUID as our own.
582

583
    """
584
    minor = self._FindMDByUUID(self.unique_id)
585
    if minor is not None:
586
      self._SetFromMinor(minor)
587
    else:
588
      self.minor = None
589
      self.dev_path = None
590

    
591
    return (minor is not None)
592

    
593
  @staticmethod
594
  def _GetUsedDevs():
595
    """Compute the list of in-use MD devices.
596

597
    It doesn't matter if the used device have other raid level, just
598
    that they are in use.
599

600
    """
601
    mdstat = open("/proc/mdstat", "r")
602
    data = mdstat.readlines()
603
    mdstat.close()
604

    
605
    used_md = {}
606
    valid_line = re.compile("^md([0-9]+) : .*$")
607
    for line in data:
608
      match = valid_line.match(line)
609
      if match:
610
        md_no = int(match.group(1))
611
        used_md[md_no] = line
612

    
613
    return used_md
614

    
615
  @staticmethod
616
  def _GetDevInfo(minor):
617
    """Get info about a MD device.
618

619
    Currently only uuid is returned.
620

621
    """
622
    result = utils.RunCmd(["mdadm", "-D", "/dev/md%d" % minor])
623
    if result.failed:
624
      logger.Error("Can't display md: %s" % result.fail_reason)
625
      return None
626
    retval = {}
627
    for line in result.stdout.splitlines():
628
      line = line.strip()
629
      kv = line.split(" : ", 1)
630
      if kv:
631
        if kv[0] == "UUID":
632
          retval["uuid"] = kv[1].split()[0]
633
        elif kv[0] == "State":
634
          retval["state"] = kv[1].split(", ")
635
    return retval
636

    
637
  @staticmethod
638
  def _FindUnusedMinor():
639
    """Compute an unused MD minor.
640

641
    This code assumes that there are 256 minors only.
642

643
    """
644
    used_md = MDRaid1._GetUsedDevs()
645
    i = 0
646
    while i < 256:
647
      if i not in used_md:
648
        break
649
      i += 1
650
    if i == 256:
651
      logger.Error("Critical: Out of md minor numbers.")
652
      raise errors.BlockDeviceError("Can't find a free MD minor")
653
    return i
654

    
655
  @classmethod
656
  def _FindMDByUUID(cls, uuid):
657
    """Find the minor of an MD array with a given UUID.
658

659
    """
660
    md_list = cls._GetUsedDevs()
661
    for minor in md_list:
662
      info = cls._GetDevInfo(minor)
663
      if info and info["uuid"] == uuid:
664
        return minor
665
    return None
666

    
667
  @staticmethod
668
  def _ZeroSuperblock(dev_path):
669
    """Zero the possible locations for an MD superblock.
670

671
    The zero-ing can't be done via ``mdadm --zero-superblock`` as that
672
    fails in versions 2.x with the same error code as non-writable
673
    device.
674

675
    The superblocks are located at (negative values are relative to
676
    the end of the block device):
677
      - -128k to end for version 0.90 superblock
678
      - -8k to -12k for version 1.0 superblock (included in the above)
679
      - 0k to 4k for version 1.1 superblock
680
      - 4k to 8k for version 1.2 superblock
681

682
    To cover all situations, the zero-ing will be:
683
      - 0k to 128k
684
      - -128k to end
685

686
    As such, the minimum device size must be 128k, otherwise we'll get
687
    I/O errors.
688

689
    Note that this function depends on the fact that one can open,
690
    read and write block devices normally.
691

692
    """
693
    overwrite_size = 128 * 1024
694
    empty_buf = '\0' * overwrite_size
695
    fd = open(dev_path, "r+")
696
    try:
697
      fd.seek(0, 0)
698
      p1 = fd.tell()
699
      fd.write(empty_buf)
700
      p2 = fd.tell()
701
      logger.Debug("Zeroed %s from %d to %d" % (dev_path, p1, p2))
702
      fd.seek(-overwrite_size, 2)
703
      p1 = fd.tell()
704
      fd.write(empty_buf)
705
      p2 = fd.tell()
706
      logger.Debug("Zeroed %s from %d to %d" % (dev_path, p1, p2))
707
    finally:
708
      fd.close()
709

    
710
  @classmethod
711
  def Create(cls, unique_id, children, size):
712
    """Create a new MD raid1 array.
713

714
    """
715
    if not isinstance(children, (tuple, list)):
716
      raise ValueError("Invalid setup data for MDRaid1 dev: %s" %
717
                       str(children))
718
    for i in children:
719
      if not isinstance(i, BlockDev):
720
        raise ValueError("Invalid member in MDRaid1 dev: %s" % type(i))
721
    for i in children:
722
      try:
723
        cls._ZeroSuperblock(i.dev_path)
724
      except EnvironmentError, err:
725
        logger.Error("Can't zero superblock for %s: %s" %
726
                     (i.dev_path, str(err)))
727
        return None
728
    minor = cls._FindUnusedMinor()
729
    result = utils.RunCmd(["mdadm", "--create", "/dev/md%d" % minor,
730
                           "--auto=yes", "--force", "-l1",
731
                           "-n%d" % len(children)] +
732
                          [dev.dev_path for dev in children])
733

    
734
    if result.failed:
735
      logger.Error("Can't create md: %s: %s" % (result.fail_reason,
736
                                                result.output))
737
      return None
738
    info = cls._GetDevInfo(minor)
739
    if not info or not "uuid" in info:
740
      logger.Error("Wrong information returned from mdadm -D: %s" % str(info))
741
      return None
742
    return MDRaid1(info["uuid"], children)
743

    
744
  def Remove(self):
745
    """Stub remove function for MD RAID 1 arrays.
746

747
    We don't remove the superblock right now. Mark a to do.
748

749
    """
750
    #TODO: maybe zero superblock on child devices?
751
    return self.Shutdown()
752

    
753
  def Rename(self, new_id):
754
    """Rename a device.
755

756
    This is not supported for md raid1 devices.
757

758
    """
759
    raise errors.ProgrammerError("Can't rename a md raid1 device")
760

    
761
  def AddChildren(self, devices):
762
    """Add new member(s) to the md raid1.
763

764
    """
765
    if self.minor is None and not self.Attach():
766
      raise errors.BlockDeviceError("Can't attach to device")
767

    
768
    args = ["mdadm", "-a", self.dev_path]
769
    for dev in devices:
770
      if dev.dev_path is None:
771
        raise errors.BlockDeviceError("Child '%s' is not initialised" % dev)
772
      dev.Open()
773
      args.append(dev.dev_path)
774
    result = utils.RunCmd(args)
775
    if result.failed:
776
      raise errors.BlockDeviceError("Failed to add new device to array: %s" %
777
                                    result.output)
778
    new_len = len(self._children) + len(devices)
779
    result = utils.RunCmd(["mdadm", "--grow", self.dev_path, "-n", new_len])
780
    if result.failed:
781
      raise errors.BlockDeviceError("Can't grow md array: %s" %
782
                                    result.output)
783
    self._children.extend(devices)
784

    
785
  def RemoveChildren(self, devices):
786
    """Remove member(s) from the md raid1.
787

788
    """
789
    if self.minor is None and not self.Attach():
790
      raise errors.BlockDeviceError("Can't attach to device")
791
    new_len = len(self._children) - len(devices)
792
    if new_len < 1:
793
      raise errors.BlockDeviceError("Can't reduce to less than one child")
794
    args = ["mdadm", "-f", self.dev_path]
795
    orig_devs = []
796
    for dev in devices:
797
      args.append(dev)
798
      for c in self._children:
799
        if c.dev_path == dev:
800
          orig_devs.append(c)
801
          break
802
      else:
803
        raise errors.BlockDeviceError("Can't find device '%s' for removal" %
804
                                      dev)
805
    result = utils.RunCmd(args)
806
    if result.failed:
807
      raise errors.BlockDeviceError("Failed to mark device(s) as failed: %s" %
808
                                    result.output)
809

    
810
    # it seems here we need a short delay for MD to update its
811
    # superblocks
812
    time.sleep(0.5)
813
    args[1] = "-r"
814
    result = utils.RunCmd(args)
815
    if result.failed:
816
      raise errors.BlockDeviceError("Failed to remove device(s) from array:"
817
                                    " %s" % result.output)
818
    result = utils.RunCmd(["mdadm", "--grow", "--force", self.dev_path,
819
                           "-n", new_len])
820
    if result.failed:
821
      raise errors.BlockDeviceError("Can't shrink md array: %s" %
822
                                    result.output)
823
    for dev in orig_devs:
824
      self._children.remove(dev)
825

    
826
  def GetStatus(self):
827
    """Return the status of the device.
828

829
    """
830
    self.Attach()
831
    if self.minor is None:
832
      retval = self.STATUS_UNKNOWN
833
    else:
834
      retval = self.STATUS_ONLINE
835
    return retval
836

    
837
  def _SetFromMinor(self, minor):
838
    """Set our parameters based on the given minor.
839

840
    This sets our minor variable and our dev_path.
841

842
    """
843
    self.minor = minor
844
    self.dev_path = "/dev/md%d" % minor
845

    
846
  def Assemble(self):
847
    """Assemble the MD device.
848

849
    At this point we should have:
850
      - list of children devices
851
      - uuid
852

853
    """
854
    result = super(MDRaid1, self).Assemble()
855
    if not result:
856
      return result
857
    md_list = self._GetUsedDevs()
858
    for minor in md_list:
859
      info = self._GetDevInfo(minor)
860
      if info and info["uuid"] == self.unique_id:
861
        self._SetFromMinor(minor)
862
        logger.Info("MD array %s already started" % str(self))
863
        return True
864
    free_minor = self._FindUnusedMinor()
865
    result = utils.RunCmd(["mdadm", "-A", "--auto=yes", "--uuid",
866
                           self.unique_id, "/dev/md%d" % free_minor] +
867
                          [bdev.dev_path for bdev in self._children])
868
    if result.failed:
869
      logger.Error("Can't assemble MD array: %s: %s" %
870
                   (result.fail_reason, result.output))
871
      self.minor = None
872
    else:
873
      self.minor = free_minor
874
    return not result.failed
875

    
876
  def Shutdown(self):
877
    """Tear down the MD array.
878

879
    This does a 'mdadm --stop' so after this command, the array is no
880
    longer available.
881

882
    """
883
    if self.minor is None and not self.Attach():
884
      logger.Info("MD object not attached to a device")
885
      return True
886

    
887
    result = utils.RunCmd(["mdadm", "--stop", "/dev/md%d" % self.minor])
888
    if result.failed:
889
      logger.Error("Can't stop MD array: %s" % result.fail_reason)
890
      return False
891
    self.minor = None
892
    self.dev_path = None
893
    return True
894

    
895
  def SetSyncSpeed(self, kbytes):
896
    """Set the maximum sync speed for the MD array.
897

898
    """
899
    result = super(MDRaid1, self).SetSyncSpeed(kbytes)
900
    if self.minor is None:
901
      logger.Error("MD array not attached to a device")
902
      return False
903
    f = open("/sys/block/md%d/md/sync_speed_max" % self.minor, "w")
904
    try:
905
      f.write("%d" % kbytes)
906
    finally:
907
      f.close()
908
    f = open("/sys/block/md%d/md/sync_speed_min" % self.minor, "w")
909
    try:
910
      f.write("%d" % (kbytes/2))
911
    finally:
912
      f.close()
913
    return result
914

    
915
  def GetSyncStatus(self):
916
    """Returns the sync status of the device.
917

918
    Returns:
919
     (sync_percent, estimated_time, is_degraded, ldisk)
920

921
    If sync_percent is None, it means all is ok
922
    If estimated_time is None, it means we can't esimate
923
    the time needed, otherwise it's the time left in seconds.
924

925
    The ldisk parameter is always true for MD devices.
926

927
    """
928
    if self.minor is None and not self.Attach():
929
      raise errors.BlockDeviceError("Can't attach to device in GetSyncStatus")
930
    dev_info = self._GetDevInfo(self.minor)
931
    is_clean = ("state" in dev_info and
932
                len(dev_info["state"]) == 1 and
933
                dev_info["state"][0] in ("clean", "active"))
934
    sys_path = "/sys/block/md%s/md/" % self.minor
935
    f = file(sys_path + "sync_action")
936
    sync_status = f.readline().strip()
937
    f.close()
938
    if sync_status == "idle":
939
      return None, None, not is_clean, False
940
    f = file(sys_path + "sync_completed")
941
    sync_completed = f.readline().strip().split(" / ")
942
    f.close()
943
    if len(sync_completed) != 2:
944
      return 0, None, not is_clean, False
945
    sync_done, sync_total = [float(i) for i in sync_completed]
946
    sync_percent = 100.0*sync_done/sync_total
947
    f = file(sys_path + "sync_speed")
948
    sync_speed_k = int(f.readline().strip())
949
    if sync_speed_k == 0:
950
      time_est = None
951
    else:
952
      time_est = (sync_total - sync_done) / 2 / sync_speed_k
953
    return sync_percent, time_est, not is_clean, False
954

    
955
  def Open(self, force=False):
956
    """Make the device ready for I/O.
957

958
    This is a no-op for the MDRaid1 device type, although we could use
959
    the 2.6.18's new array_state thing.
960

961
    """
962
    pass
963

    
964
  def Close(self):
965
    """Notifies that the device will no longer be used for I/O.
966

967
    This is a no-op for the MDRaid1 device type, but see comment for
968
    `Open()`.
969

970
    """
971
    pass
972

    
973

    
974
class BaseDRBD(BlockDev):
975
  """Base DRBD class.
976

977
  This class contains a few bits of common functionality between the
978
  0.7 and 8.x versions of DRBD.
979

980
  """
981
  _VERSION_RE = re.compile(r"^version: (\d+)\.(\d+)\.(\d+)"
982
                           r" \(api:(\d+)/proto:(\d+)(?:-(\d+))?\)")
983

    
984
  _DRBD_MAJOR = 147
985
  _ST_UNCONFIGURED = "Unconfigured"
986
  _ST_WFCONNECTION = "WFConnection"
987
  _ST_CONNECTED = "Connected"
988

    
989
  @staticmethod
990
  def _GetProcData():
991
    """Return data from /proc/drbd.
992

993
    """
994
    stat = open("/proc/drbd", "r")
995
    try:
996
      data = stat.read().splitlines()
997
    finally:
998
      stat.close()
999
    if not data:
1000
      raise errors.BlockDeviceError("Can't read any data from /proc/drbd")
1001
    return data
1002

    
1003
  @staticmethod
1004
  def _MassageProcData(data):
1005
    """Transform the output of _GetProdData into a nicer form.
1006

1007
    Returns:
1008
      a dictionary of minor: joined lines from /proc/drbd for that minor
1009

1010
    """
1011
    lmatch = re.compile("^ *([0-9]+):.*$")
1012
    results = {}
1013
    old_minor = old_line = None
1014
    for line in data:
1015
      lresult = lmatch.match(line)
1016
      if lresult is not None:
1017
        if old_minor is not None:
1018
          results[old_minor] = old_line
1019
        old_minor = int(lresult.group(1))
1020
        old_line = line
1021
      else:
1022
        if old_minor is not None:
1023
          old_line += " " + line.strip()
1024
    # add last line
1025
    if old_minor is not None:
1026
      results[old_minor] = old_line
1027
    return results
1028

    
1029
  @classmethod
1030
  def _GetVersion(cls):
1031
    """Return the DRBD version.
1032

1033
    This will return a dict with keys:
1034
      k_major,
1035
      k_minor,
1036
      k_point,
1037
      api,
1038
      proto,
1039
      proto2 (only on drbd > 8.2.X)
1040

1041
    """
1042
    proc_data = cls._GetProcData()
1043
    first_line = proc_data[0].strip()
1044
    version = cls._VERSION_RE.match(first_line)
1045
    if not version:
1046
      raise errors.BlockDeviceError("Can't parse DRBD version from '%s'" %
1047
                                    first_line)
1048

    
1049
    values = version.groups()
1050
    retval = {'k_major': int(values[0]),
1051
              'k_minor': int(values[1]),
1052
              'k_point': int(values[2]),
1053
              'api': int(values[3]),
1054
              'proto': int(values[4]),
1055
             }
1056
    if values[5] is not None:
1057
      retval['proto2'] = values[5]
1058

    
1059
    return retval
1060

    
1061
  @staticmethod
1062
  def _DevPath(minor):
1063
    """Return the path to a drbd device for a given minor.
1064

1065
    """
1066
    return "/dev/drbd%d" % minor
1067

    
1068
  @classmethod
1069
  def _GetUsedDevs(cls):
1070
    """Compute the list of used DRBD devices.
1071

1072
    """
1073
    data = cls._GetProcData()
1074

    
1075
    used_devs = {}
1076
    valid_line = re.compile("^ *([0-9]+): cs:([^ ]+).*$")
1077
    for line in data:
1078
      match = valid_line.match(line)
1079
      if not match:
1080
        continue
1081
      minor = int(match.group(1))
1082
      state = match.group(2)
1083
      if state == cls._ST_UNCONFIGURED:
1084
        continue
1085
      used_devs[minor] = state, line
1086

    
1087
    return used_devs
1088

    
1089
  def _SetFromMinor(self, minor):
1090
    """Set our parameters based on the given minor.
1091

1092
    This sets our minor variable and our dev_path.
1093

1094
    """
1095
    if minor is None:
1096
      self.minor = self.dev_path = None
1097
    else:
1098
      self.minor = minor
1099
      self.dev_path = self._DevPath(minor)
1100

    
1101
  @staticmethod
1102
  def _CheckMetaSize(meta_device):
1103
    """Check if the given meta device looks like a valid one.
1104

1105
    This currently only check the size, which must be around
1106
    128MiB.
1107

1108
    """
1109
    result = utils.RunCmd(["blockdev", "--getsize", meta_device])
1110
    if result.failed:
1111
      logger.Error("Failed to get device size: %s" % result.fail_reason)
1112
      return False
1113
    try:
1114
      sectors = int(result.stdout)
1115
    except ValueError:
1116
      logger.Error("Invalid output from blockdev: '%s'" % result.stdout)
1117
      return False
1118
    bytes = sectors * 512
1119
    if bytes < 128 * 1024 * 1024: # less than 128MiB
1120
      logger.Error("Meta device too small (%.2fMib)" % (bytes / 1024 / 1024))
1121
      return False
1122
    if bytes > (128 + 32) * 1024 * 1024: # account for an extra (big) PE on LVM
1123
      logger.Error("Meta device too big (%.2fMiB)" % (bytes / 1024 / 1024))
1124
      return False
1125
    return True
1126

    
1127
  def Rename(self, new_id):
1128
    """Rename a device.
1129

1130
    This is not supported for drbd devices.
1131

1132
    """
1133
    raise errors.ProgrammerError("Can't rename a drbd device")
1134

    
1135

    
1136
class DRBDev(BaseDRBD):
1137
  """DRBD block device.
1138

1139
  This implements the local host part of the DRBD device, i.e. it
1140
  doesn't do anything to the supposed peer. If you need a fully
1141
  connected DRBD pair, you need to use this class on both hosts.
1142

1143
  The unique_id for the drbd device is the (local_ip, local_port,
1144
  remote_ip, remote_port) tuple, and it must have two children: the
1145
  data device and the meta_device. The meta device is checked for
1146
  valid size and is zeroed on create.
1147

1148
  """
1149
  def __init__(self, unique_id, children):
1150
    super(DRBDev, self).__init__(unique_id, children)
1151
    self.major = self._DRBD_MAJOR
1152
    version = self._GetVersion()
1153
    if version['k_major'] != 0 and version['k_minor'] != 7:
1154
      raise errors.BlockDeviceError("Mismatch in DRBD kernel version and"
1155
                                    " requested ganeti usage: kernel is"
1156
                                    " %s.%s, ganeti wants 0.7" %
1157
                                    (version['k_major'], version['k_minor']))
1158
    if len(children) != 2:
1159
      raise ValueError("Invalid configuration data %s" % str(children))
1160
    if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 4:
1161
      raise ValueError("Invalid configuration data %s" % str(unique_id))
1162
    self._lhost, self._lport, self._rhost, self._rport = unique_id
1163
    self.Attach()
1164

    
1165
  @classmethod
1166
  def _FindUnusedMinor(cls):
1167
    """Find an unused DRBD device.
1168

1169
    """
1170
    data = cls._GetProcData()
1171

    
1172
    valid_line = re.compile("^ *([0-9]+): cs:Unconfigured$")
1173
    for line in data:
1174
      match = valid_line.match(line)
1175
      if match:
1176
        return int(match.group(1))
1177
    logger.Error("Error: no free drbd minors!")
1178
    raise errors.BlockDeviceError("Can't find a free DRBD minor")
1179

    
1180
  @classmethod
1181
  def _GetDevInfo(cls, minor):
1182
    """Get details about a given DRBD minor.
1183

1184
    This return, if available, the local backing device in (major,
1185
    minor) formant and the local and remote (ip, port) information.
1186

1187
    """
1188
    data = {}
1189
    result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "show"])
1190
    if result.failed:
1191
      logger.Error("Can't display the drbd config: %s" % result.fail_reason)
1192
      return data
1193
    out = result.stdout
1194
    if out == "Not configured\n":
1195
      return data
1196
    for line in out.splitlines():
1197
      if "local_dev" not in data:
1198
        match = re.match("^Lower device: ([0-9]+):([0-9]+) .*$", line)
1199
        if match:
1200
          data["local_dev"] = (int(match.group(1)), int(match.group(2)))
1201
          continue
1202
      if "meta_dev" not in data:
1203
        match = re.match("^Meta device: (([0-9]+):([0-9]+)|internal).*$", line)
1204
        if match:
1205
          if match.group(2) is not None and match.group(3) is not None:
1206
            # matched on the major/minor
1207
            data["meta_dev"] = (int(match.group(2)), int(match.group(3)))
1208
          else:
1209
            # matched on the "internal" string
1210
            data["meta_dev"] = match.group(1)
1211
            # in this case, no meta_index is in the output
1212
            data["meta_index"] = -1
1213
          continue
1214
      if "meta_index" not in data:
1215
        match = re.match("^Meta index: ([0-9]+).*$", line)
1216
        if match:
1217
          data["meta_index"] = int(match.group(1))
1218
          continue
1219
      if "local_addr" not in data:
1220
        match = re.match("^Local address: ([0-9.]+):([0-9]+)$", line)
1221
        if match:
1222
          data["local_addr"] = (match.group(1), int(match.group(2)))
1223
          continue
1224
      if "remote_addr" not in data:
1225
        match = re.match("^Remote address: ([0-9.]+):([0-9]+)$", line)
1226
        if match:
1227
          data["remote_addr"] = (match.group(1), int(match.group(2)))
1228
          continue
1229
    return data
1230

    
1231
  def _MatchesLocal(self, info):
1232
    """Test if our local config matches with an existing device.
1233

1234
    The parameter should be as returned from `_GetDevInfo()`. This
1235
    method tests if our local backing device is the same as the one in
1236
    the info parameter, in effect testing if we look like the given
1237
    device.
1238

1239
    """
1240
    if not ("local_dev" in info and "meta_dev" in info and
1241
            "meta_index" in info):
1242
      return False
1243

    
1244
    backend = self._children[0]
1245
    if backend is not None:
1246
      retval = (info["local_dev"] == (backend.major, backend.minor))
1247
    else:
1248
      retval = (info["local_dev"] == (0, 0))
1249
    meta = self._children[1]
1250
    if meta is not None:
1251
      retval = retval and (info["meta_dev"] == (meta.major, meta.minor))
1252
      retval = retval and (info["meta_index"] == 0)
1253
    else:
1254
      retval = retval and (info["meta_dev"] == "internal" and
1255
                           info["meta_index"] == -1)
1256
    return retval
1257

    
1258
  def _MatchesNet(self, info):
1259
    """Test if our network config matches with an existing device.
1260

1261
    The parameter should be as returned from `_GetDevInfo()`. This
1262
    method tests if our network configuration is the same as the one
1263
    in the info parameter, in effect testing if we look like the given
1264
    device.
1265

1266
    """
1267
    if (((self._lhost is None and not ("local_addr" in info)) and
1268
         (self._rhost is None and not ("remote_addr" in info)))):
1269
      return True
1270

    
1271
    if self._lhost is None:
1272
      return False
1273

    
1274
    if not ("local_addr" in info and
1275
            "remote_addr" in info):
1276
      return False
1277

    
1278
    retval = (info["local_addr"] == (self._lhost, self._lport))
1279
    retval = (retval and
1280
              info["remote_addr"] == (self._rhost, self._rport))
1281
    return retval
1282

    
1283
  @classmethod
1284
  def _AssembleLocal(cls, minor, backend, meta):
1285
    """Configure the local part of a DRBD device.
1286

1287
    This is the first thing that must be done on an unconfigured DRBD
1288
    device. And it must be done only once.
1289

1290
    """
1291
    if not cls._CheckMetaSize(meta):
1292
      return False
1293
    result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "disk",
1294
                           backend, meta, "0", "-e", "detach"])
1295
    if result.failed:
1296
      logger.Error("Can't attach local disk: %s" % result.output)
1297
    return not result.failed
1298

    
1299
  @classmethod
1300
  def _ShutdownLocal(cls, minor):
1301
    """Detach from the local device.
1302

1303
    I/Os will continue to be served from the remote device. If we
1304
    don't have a remote device, this operation will fail.
1305

1306
    """
1307
    result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "detach"])
1308
    if result.failed:
1309
      logger.Error("Can't detach local device: %s" % result.output)
1310
    return not result.failed
1311

    
1312
  @staticmethod
1313
  def _ShutdownAll(minor):
1314
    """Deactivate the device.
1315

1316
    This will, of course, fail if the device is in use.
1317

1318
    """
1319
    result = utils.RunCmd(["drbdsetup", DRBDev._DevPath(minor), "down"])
1320
    if result.failed:
1321
      logger.Error("Can't shutdown drbd device: %s" % result.output)
1322
    return not result.failed
1323

    
1324
  @classmethod
1325
  def _AssembleNet(cls, minor, net_info, protocol):
1326
    """Configure the network part of the device.
1327

1328
    This operation can be, in theory, done multiple times, but there
1329
    have been cases (in lab testing) in which the network part of the
1330
    device had become stuck and couldn't be shut down because activity
1331
    from the new peer (also stuck) triggered a timer re-init and
1332
    needed remote peer interface shutdown in order to clear. So please
1333
    don't change online the net config.
1334

1335
    """
1336
    lhost, lport, rhost, rport = net_info
1337
    result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "net",
1338
                           "%s:%s" % (lhost, lport), "%s:%s" % (rhost, rport),
1339
                           protocol])
1340
    if result.failed:
1341
      logger.Error("Can't setup network for dbrd device: %s" %
1342
                   result.fail_reason)
1343
      return False
1344

    
1345
    timeout = time.time() + 10
1346
    ok = False
1347
    while time.time() < timeout:
1348
      info = cls._GetDevInfo(minor)
1349
      if not "local_addr" in info or not "remote_addr" in info:
1350
        time.sleep(1)
1351
        continue
1352
      if (info["local_addr"] != (lhost, lport) or
1353
          info["remote_addr"] != (rhost, rport)):
1354
        time.sleep(1)
1355
        continue
1356
      ok = True
1357
      break
1358
    if not ok:
1359
      logger.Error("Timeout while configuring network")
1360
      return False
1361
    return True
1362

    
1363
  @classmethod
1364
  def _ShutdownNet(cls, minor):
1365
    """Disconnect from the remote peer.
1366

1367
    This fails if we don't have a local device.
1368

1369
    """
1370
    result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "disconnect"])
1371
    if result.failed:
1372
      logger.Error("Can't shutdown network: %s" % result.output)
1373
    return not result.failed
1374

    
1375
  def Assemble(self):
1376
    """Assemble the drbd.
1377

1378
    Method:
1379
      - if we have a local backing device, we bind to it by:
1380
        - checking the list of used drbd devices
1381
        - check if the local minor use of any of them is our own device
1382
        - if yes, abort?
1383
        - if not, bind
1384
      - if we have a local/remote net info:
1385
        - redo the local backing device step for the remote device
1386
        - check if any drbd device is using the local port,
1387
          if yes abort
1388
        - check if any remote drbd device is using the remote
1389
          port, if yes abort (for now)
1390
        - bind our net port
1391
        - bind the remote net port
1392

1393
    """
1394
    self.Attach()
1395
    if self.minor is not None:
1396
      logger.Info("Already assembled")
1397
      return True
1398

    
1399
    result = super(DRBDev, self).Assemble()
1400
    if not result:
1401
      return result
1402

    
1403
    minor = self._FindUnusedMinor()
1404
    need_localdev_teardown = False
1405
    if self._children[0]:
1406
      result = self._AssembleLocal(minor, self._children[0].dev_path,
1407
                                   self._children[1].dev_path)
1408
      if not result:
1409
        return False
1410
      need_localdev_teardown = True
1411
    if self._lhost and self._lport and self._rhost and self._rport:
1412
      result = self._AssembleNet(minor,
1413
                                 (self._lhost, self._lport,
1414
                                  self._rhost, self._rport),
1415
                                 "C")
1416
      if not result:
1417
        if need_localdev_teardown:
1418
          # we will ignore failures from this
1419
          logger.Error("net setup failed, tearing down local device")
1420
          self._ShutdownAll(minor)
1421
        return False
1422
    self._SetFromMinor(minor)
1423
    return True
1424

    
1425
  def Shutdown(self):
1426
    """Shutdown the DRBD device.
1427

1428
    """
1429
    if self.minor is None and not self.Attach():
1430
      logger.Info("DRBD device not attached to a device during Shutdown")
1431
      return True
1432
    if not self._ShutdownAll(self.minor):
1433
      return False
1434
    self.minor = None
1435
    self.dev_path = None
1436
    return True
1437

    
1438
  def Attach(self):
1439
    """Find a DRBD device which matches our config and attach to it.
1440

1441
    In case of partially attached (local device matches but no network
1442
    setup), we perform the network attach. If successful, we re-test
1443
    the attach if can return success.
1444

1445
    """
1446
    for minor in self._GetUsedDevs():
1447
      info = self._GetDevInfo(minor)
1448
      match_l = self._MatchesLocal(info)
1449
      match_r = self._MatchesNet(info)
1450
      if match_l and match_r:
1451
        break
1452
      if match_l and not match_r and "local_addr" not in info:
1453
        res_r = self._AssembleNet(minor,
1454
                                  (self._lhost, self._lport,
1455
                                   self._rhost, self._rport),
1456
                                  "C")
1457
        if res_r and self._MatchesNet(self._GetDevInfo(minor)):
1458
          break
1459
    else:
1460
      minor = None
1461

    
1462
    self._SetFromMinor(minor)
1463
    return minor is not None
1464

    
1465
  def Open(self, force=False):
1466
    """Make the local state primary.
1467

1468
    If the 'force' parameter is given, the '--do-what-I-say' parameter
1469
    is given. Since this is a pottentialy dangerous operation, the
1470
    force flag should be only given after creation, when it actually
1471
    has to be given.
1472

1473
    """
1474
    if self.minor is None and not self.Attach():
1475
      logger.Error("DRBD cannot attach to a device during open")
1476
      return False
1477
    cmd = ["drbdsetup", self.dev_path, "primary"]
1478
    if force:
1479
      cmd.append("--do-what-I-say")
1480
    result = utils.RunCmd(cmd)
1481
    if result.failed:
1482
      msg = ("Can't make drbd device primary: %s" % result.output)
1483
      logger.Error(msg)
1484
      raise errors.BlockDeviceError(msg)
1485

    
1486
  def Close(self):
1487
    """Make the local state secondary.
1488

1489
    This will, of course, fail if the device is in use.
1490

1491
    """
1492
    if self.minor is None and not self.Attach():
1493
      logger.Info("Instance not attached to a device")
1494
      raise errors.BlockDeviceError("Can't find device")
1495
    result = utils.RunCmd(["drbdsetup", self.dev_path, "secondary"])
1496
    if result.failed:
1497
      msg = ("Can't switch drbd device to"
1498
             " secondary: %s" % result.output)
1499
      logger.Error(msg)
1500
      raise errors.BlockDeviceError(msg)
1501

    
1502
  def SetSyncSpeed(self, kbytes):
1503
    """Set the speed of the DRBD syncer.
1504

1505
    """
1506
    children_result = super(DRBDev, self).SetSyncSpeed(kbytes)
1507
    if self.minor is None:
1508
      logger.Info("Instance not attached to a device")
1509
      return False
1510
    result = utils.RunCmd(["drbdsetup", self.dev_path, "syncer", "-r", "%d" %
1511
                           kbytes])
1512
    if result.failed:
1513
      logger.Error("Can't change syncer rate: %s " % result.fail_reason)
1514
    return not result.failed and children_result
1515

    
1516
  def GetSyncStatus(self):
1517
    """Returns the sync status of the device.
1518

1519
    Returns:
1520
     (sync_percent, estimated_time, is_degraded, ldisk)
1521

1522
    If sync_percent is None, it means all is ok
1523
    If estimated_time is None, it means we can't esimate
1524
    the time needed, otherwise it's the time left in seconds.
1525

1526
    The ldisk parameter will be returned as True, since the DRBD7
1527
    devices have not been converted.
1528

1529
    """
1530
    if self.minor is None and not self.Attach():
1531
      raise errors.BlockDeviceError("Can't attach to device in GetSyncStatus")
1532
    proc_info = self._MassageProcData(self._GetProcData())
1533
    if self.minor not in proc_info:
1534
      raise errors.BlockDeviceError("Can't find myself in /proc (minor %d)" %
1535
                                    self.minor)
1536
    line = proc_info[self.minor]
1537
    match = re.match("^.*sync'ed: *([0-9.]+)%.*"
1538
                     " finish: ([0-9]+):([0-9]+):([0-9]+) .*$", line)
1539
    if match:
1540
      sync_percent = float(match.group(1))
1541
      hours = int(match.group(2))
1542
      minutes = int(match.group(3))
1543
      seconds = int(match.group(4))
1544
      est_time = hours * 3600 + minutes * 60 + seconds
1545
    else:
1546
      sync_percent = None
1547
      est_time = None
1548
    match = re.match("^ *[0-9]+: cs:([^ ]+).*$", line)
1549
    if not match:
1550
      raise errors.BlockDeviceError("Can't find my data in /proc (minor %d)" %
1551
                                    self.minor)
1552
    client_state = match.group(1)
1553
    is_degraded = client_state != "Connected"
1554
    return sync_percent, est_time, is_degraded, False
1555

    
1556
  def GetStatus(self):
1557
    """Compute the status of the DRBD device
1558

1559
    Note that DRBD devices don't have the STATUS_EXISTING state.
1560

1561
    """
1562
    if self.minor is None and not self.Attach():
1563
      return self.STATUS_UNKNOWN
1564

    
1565
    data = self._GetProcData()
1566
    match = re.compile("^ *%d: cs:[^ ]+ st:(Primary|Secondary)/.*$" %
1567
                       self.minor)
1568
    for line in data:
1569
      mresult = match.match(line)
1570
      if mresult:
1571
        break
1572
    else:
1573
      logger.Error("Can't find myself!")
1574
      return self.STATUS_UNKNOWN
1575

    
1576
    state = mresult.group(2)
1577
    if state == "Primary":
1578
      result = self.STATUS_ONLINE
1579
    else:
1580
      result = self.STATUS_STANDBY
1581

    
1582
    return result
1583

    
1584
  @staticmethod
1585
  def _ZeroDevice(device):
1586
    """Zero a device.
1587

1588
    This writes until we get ENOSPC.
1589

1590
    """
1591
    f = open(device, "w")
1592
    buf = "\0" * 1048576
1593
    try:
1594
      while True:
1595
        f.write(buf)
1596
    except IOError, err:
1597
      if err.errno != errno.ENOSPC:
1598
        raise
1599

    
1600
  @classmethod
1601
  def Create(cls, unique_id, children, size):
1602
    """Create a new DRBD device.
1603

1604
    Since DRBD devices are not created per se, just assembled, this
1605
    function just zeroes the meta device.
1606

1607
    """
1608
    if len(children) != 2:
1609
      raise errors.ProgrammerError("Invalid setup for the drbd device")
1610
    meta = children[1]
1611
    meta.Assemble()
1612
    if not meta.Attach():
1613
      raise errors.BlockDeviceError("Can't attach to meta device")
1614
    if not cls._CheckMetaSize(meta.dev_path):
1615
      raise errors.BlockDeviceError("Invalid meta device")
1616
    logger.Info("Started zeroing device %s" % meta.dev_path)
1617
    cls._ZeroDevice(meta.dev_path)
1618
    logger.Info("Done zeroing device %s" % meta.dev_path)
1619
    return cls(unique_id, children)
1620

    
1621
  def Remove(self):
1622
    """Stub remove for DRBD devices.
1623

1624
    """
1625
    return self.Shutdown()
1626

    
1627

    
1628
class DRBD8(BaseDRBD):
1629
  """DRBD v8.x block device.
1630

1631
  This implements the local host part of the DRBD device, i.e. it
1632
  doesn't do anything to the supposed peer. If you need a fully
1633
  connected DRBD pair, you need to use this class on both hosts.
1634

1635
  The unique_id for the drbd device is the (local_ip, local_port,
1636
  remote_ip, remote_port) tuple, and it must have two children: the
1637
  data device and the meta_device. The meta device is checked for
1638
  valid size and is zeroed on create.
1639

1640
  """
1641
  _MAX_MINORS = 255
1642
  _PARSE_SHOW = None
1643

    
1644
  def __init__(self, unique_id, children):
1645
    if children and children.count(None) > 0:
1646
      children = []
1647
    super(DRBD8, self).__init__(unique_id, children)
1648
    self.major = self._DRBD_MAJOR
1649
    version = self._GetVersion()
1650
    if version['k_major'] != 8 :
1651
      raise errors.BlockDeviceError("Mismatch in DRBD kernel version and"
1652
                                    " requested ganeti usage: kernel is"
1653
                                    " %s.%s, ganeti wants 8.x" %
1654
                                    (version['k_major'], version['k_minor']))
1655

    
1656
    if len(children) not in (0, 2):
1657
      raise ValueError("Invalid configuration data %s" % str(children))
1658
    if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 4:
1659
      raise ValueError("Invalid configuration data %s" % str(unique_id))
1660
    self._lhost, self._lport, self._rhost, self._rport = unique_id
1661
    self.Attach()
1662

    
1663
  @classmethod
1664
  def _InitMeta(cls, minor, dev_path):
1665
    """Initialize a meta device.
1666

1667
    This will not work if the given minor is in use.
1668

1669
    """
1670
    result = utils.RunCmd(["drbdmeta", "--force", cls._DevPath(minor),
1671
                           "v08", dev_path, "0", "create-md"])
1672
    if result.failed:
1673
      raise errors.BlockDeviceError("Can't initialize meta device: %s" %
1674
                                    result.output)
1675

    
1676
  @classmethod
1677
  def _FindUnusedMinor(cls):
1678
    """Find an unused DRBD device.
1679

1680
    This is specific to 8.x as the minors are allocated dynamically,
1681
    so non-existing numbers up to a max minor count are actually free.
1682

1683
    """
1684
    data = cls._GetProcData()
1685

    
1686
    unused_line = re.compile("^ *([0-9]+): cs:Unconfigured$")
1687
    used_line = re.compile("^ *([0-9]+): cs:")
1688
    highest = None
1689
    for line in data:
1690
      match = unused_line.match(line)
1691
      if match:
1692
        return int(match.group(1))
1693
      match = used_line.match(line)
1694
      if match:
1695
        minor = int(match.group(1))
1696
        highest = max(highest, minor)
1697
    if highest is None: # there are no minors in use at all
1698
      return 0
1699
    if highest >= cls._MAX_MINORS:
1700
      logger.Error("Error: no free drbd minors!")
1701
      raise errors.BlockDeviceError("Can't find a free DRBD minor")
1702
    return highest + 1
1703

    
1704
  @classmethod
1705
  def _IsValidMeta(cls, meta_device):
1706
    """Check if the given meta device looks like a valid one.
1707

1708
    """
1709
    minor = cls._FindUnusedMinor()
1710
    minor_path = cls._DevPath(minor)
1711
    result = utils.RunCmd(["drbdmeta", minor_path,
1712
                           "v08", meta_device, "0",
1713
                           "dstate"])
1714
    if result.failed:
1715
      logger.Error("Invalid meta device %s: %s" % (meta_device, result.output))
1716
      return False
1717
    return True
1718

    
1719
  @classmethod
1720
  def _GetShowParser(cls):
1721
    """Return a parser for `drbd show` output.
1722

1723
    This will either create or return an already-create parser for the
1724
    output of the command `drbd show`.
1725

1726
    """
1727
    if cls._PARSE_SHOW is not None:
1728
      return cls._PARSE_SHOW
1729

    
1730
    # pyparsing setup
1731
    lbrace = pyp.Literal("{").suppress()
1732
    rbrace = pyp.Literal("}").suppress()
1733
    semi = pyp.Literal(";").suppress()
1734
    # this also converts the value to an int
1735
    number = pyp.Word(pyp.nums).setParseAction(lambda s, l, t: int(t[0]))
1736

    
1737
    comment = pyp.Literal ("#") + pyp.Optional(pyp.restOfLine)
1738
    defa = pyp.Literal("_is_default").suppress()
1739
    dbl_quote = pyp.Literal('"').suppress()
1740

    
1741
    keyword = pyp.Word(pyp.alphanums + '-')
1742

    
1743
    # value types
1744
    value = pyp.Word(pyp.alphanums + '_-/.:')
1745
    quoted = dbl_quote + pyp.CharsNotIn('"') + dbl_quote
1746
    addr_port = (pyp.Word(pyp.nums + '.') + pyp.Literal(':').suppress() +
1747
                 number)
1748
    # meta device, extended syntax
1749
    meta_value = ((value ^ quoted) + pyp.Literal('[').suppress() +
1750
                  number + pyp.Word(']').suppress())
1751

    
1752
    # a statement
1753
    stmt = (~rbrace + keyword + ~lbrace +
1754
            (addr_port ^ value ^ quoted ^ meta_value) +
1755
            pyp.Optional(defa) + semi +
1756
            pyp.Optional(pyp.restOfLine).suppress())
1757

    
1758
    # an entire section
1759
    section_name = pyp.Word(pyp.alphas + '_')
1760
    section = section_name + lbrace + pyp.ZeroOrMore(pyp.Group(stmt)) + rbrace
1761

    
1762
    bnf = pyp.ZeroOrMore(pyp.Group(section ^ stmt))
1763
    bnf.ignore(comment)
1764

    
1765
    cls._PARSE_SHOW = bnf
1766

    
1767
    return bnf
1768

    
1769
  @classmethod
1770
  def _GetShowData(cls, minor):
1771
    """Return the `drbdsetup show` data for a minor.
1772

1773
    """
1774
    result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "show"])
1775
    if result.failed:
1776
      logger.Error("Can't display the drbd config: %s" % result.fail_reason)
1777
      return None
1778
    return result.stdout
1779

    
1780
  @classmethod
1781
  def _GetDevInfo(cls, out):
1782
    """Parse details about a given DRBD minor.
1783

1784
    This return, if available, the local backing device (as a path)
1785
    and the local and remote (ip, port) information from a string
1786
    containing the output of the `drbdsetup show` command as returned
1787
    by _GetShowData.
1788

1789
    """
1790
    data = {}
1791
    if not out:
1792
      return data
1793

    
1794
    bnf = cls._GetShowParser()
1795
    # run pyparse
1796

    
1797
    try:
1798
      results = bnf.parseString(out)
1799
    except pyp.ParseException, err:
1800
      raise errors.BlockDeviceError("Can't parse drbdsetup show output: %s" %
1801
                                    str(err))
1802

    
1803
    # and massage the results into our desired format
1804
    for section in results:
1805
      sname = section[0]
1806
      if sname == "_this_host":
1807
        for lst in section[1:]:
1808
          if lst[0] == "disk":
1809
            data["local_dev"] = lst[1]
1810
          elif lst[0] == "meta-disk":
1811
            data["meta_dev"] = lst[1]
1812
            data["meta_index"] = lst[2]
1813
          elif lst[0] == "address":
1814
            data["local_addr"] = tuple(lst[1:])
1815
      elif sname == "_remote_host":
1816
        for lst in section[1:]:
1817
          if lst[0] == "address":
1818
            data["remote_addr"] = tuple(lst[1:])
1819
    return data
1820

    
1821
  def _MatchesLocal(self, info):
1822
    """Test if our local config matches with an existing device.
1823

1824
    The parameter should be as returned from `_GetDevInfo()`. This
1825
    method tests if our local backing device is the same as the one in
1826
    the info parameter, in effect testing if we look like the given
1827
    device.
1828

1829
    """
1830
    if self._children:
1831
      backend, meta = self._children
1832
    else:
1833
      backend = meta = None
1834

    
1835
    if backend is not None:
1836
      retval = ("local_dev" in info and info["local_dev"] == backend.dev_path)
1837
    else:
1838
      retval = ("local_dev" not in info)
1839

    
1840
    if meta is not None:
1841
      retval = retval and ("meta_dev" in info and
1842
                           info["meta_dev"] == meta.dev_path)
1843
      retval = retval and ("meta_index" in info and
1844
                           info["meta_index"] == 0)
1845
    else:
1846
      retval = retval and ("meta_dev" not in info and
1847
                           "meta_index" not in info)
1848
    return retval
1849

    
1850
  def _MatchesNet(self, info):
1851
    """Test if our network config matches with an existing device.
1852

1853
    The parameter should be as returned from `_GetDevInfo()`. This
1854
    method tests if our network configuration is the same as the one
1855
    in the info parameter, in effect testing if we look like the given
1856
    device.
1857

1858
    """
1859
    if (((self._lhost is None and not ("local_addr" in info)) and
1860
         (self._rhost is None and not ("remote_addr" in info)))):
1861
      return True
1862

    
1863
    if self._lhost is None:
1864
      return False
1865

    
1866
    if not ("local_addr" in info and
1867
            "remote_addr" in info):
1868
      return False
1869

    
1870
    retval = (info["local_addr"] == (self._lhost, self._lport))
1871
    retval = (retval and
1872
              info["remote_addr"] == (self._rhost, self._rport))
1873
    return retval
1874

    
1875
  @classmethod
1876
  def _AssembleLocal(cls, minor, backend, meta):
1877
    """Configure the local part of a DRBD device.
1878

1879
    This is the first thing that must be done on an unconfigured DRBD
1880
    device. And it must be done only once.
1881

1882
    """
1883
    if not cls._IsValidMeta(meta):
1884
      return False
1885
    result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "disk",
1886
                           backend, meta, "0", "-e", "detach",
1887
                           "--create-device"])
1888
    if result.failed:
1889
      logger.Error("Can't attach local disk: %s" % result.output)
1890
    return not result.failed
1891

    
1892
  @classmethod
1893
  def _AssembleNet(cls, minor, net_info, protocol,
1894
                   dual_pri=False, hmac=None, secret=None):
1895
    """Configure the network part of the device.
1896

1897
    """
1898
    lhost, lport, rhost, rport = net_info
1899
    if None in net_info:
1900
      # we don't want network connection and actually want to make
1901
      # sure its shutdown
1902
      return cls._ShutdownNet(minor)
1903

    
1904
    args = ["drbdsetup", cls._DevPath(minor), "net",
1905
            "%s:%s" % (lhost, lport), "%s:%s" % (rhost, rport), protocol,
1906
            "-A", "discard-zero-changes",
1907
            "-B", "consensus",
1908
            ]
1909
    if dual_pri:
1910
      args.append("-m")
1911
    if hmac and secret:
1912
      args.extend(["-a", hmac, "-x", secret])
1913
    result = utils.RunCmd(args)
1914
    if result.failed:
1915
      logger.Error("Can't setup network for dbrd device: %s" %
1916
                   result.fail_reason)
1917
      return False
1918

    
1919
    timeout = time.time() + 10
1920
    ok = False
1921
    while time.time() < timeout:
1922
      info = cls._GetDevInfo(cls._GetShowData(minor))
1923
      if not "local_addr" in info or not "remote_addr" in info:
1924
        time.sleep(1)
1925
        continue
1926
      if (info["local_addr"] != (lhost, lport) or
1927
          info["remote_addr"] != (rhost, rport)):
1928
        time.sleep(1)
1929
        continue
1930
      ok = True
1931
      break
1932
    if not ok:
1933
      logger.Error("Timeout while configuring network")
1934
      return False
1935
    return True
1936

    
1937
  def AddChildren(self, devices):
1938
    """Add a disk to the DRBD device.
1939

1940
    """
1941
    if self.minor is None:
1942
      raise errors.BlockDeviceError("Can't attach to dbrd8 during AddChildren")
1943
    if len(devices) != 2:
1944
      raise errors.BlockDeviceError("Need two devices for AddChildren")
1945
    info = self._GetDevInfo(self._GetShowData(self.minor))
1946
    if "local_dev" in info:
1947
      raise errors.BlockDeviceError("DRBD8 already attached to a local disk")
1948
    backend, meta = devices
1949
    if backend.dev_path is None or meta.dev_path is None:
1950
      raise errors.BlockDeviceError("Children not ready during AddChildren")
1951
    backend.Open()
1952
    meta.Open()
1953
    if not self._CheckMetaSize(meta.dev_path):
1954
      raise errors.BlockDeviceError("Invalid meta device size")
1955
    self._InitMeta(self._FindUnusedMinor(), meta.dev_path)
1956
    if not self._IsValidMeta(meta.dev_path):
1957
      raise errors.BlockDeviceError("Cannot initalize meta device")
1958

    
1959
    if not self._AssembleLocal(self.minor, backend.dev_path, meta.dev_path):
1960
      raise errors.BlockDeviceError("Can't attach to local storage")
1961
    self._children = devices
1962

    
1963
  def RemoveChildren(self, devices):
1964
    """Detach the drbd device from local storage.
1965

1966
    """
1967
    if self.minor is None:
1968
      raise errors.BlockDeviceError("Can't attach to drbd8 during"
1969
                                    " RemoveChildren")
1970
    # early return if we don't actually have backing storage
1971
    info = self._GetDevInfo(self._GetShowData(self.minor))
1972
    if "local_dev" not in info:
1973
      return
1974
    if len(self._children) != 2:
1975
      raise errors.BlockDeviceError("We don't have two children: %s" %
1976
                                    self._children)
1977
    if self._children.count(None) == 2: # we don't actually have children :)
1978
      logger.Error("Requested detach while detached")
1979
      return
1980
    if len(devices) != 2:
1981
      raise errors.BlockDeviceError("We need two children in RemoveChildren")
1982
    for child, dev in zip(self._children, devices):
1983
      if dev != child.dev_path:
1984
        raise errors.BlockDeviceError("Mismatch in local storage"
1985
                                      " (%s != %s) in RemoveChildren" %
1986
                                      (dev, child.dev_path))
1987

    
1988
    if not self._ShutdownLocal(self.minor):
1989
      raise errors.BlockDeviceError("Can't detach from local storage")
1990
    self._children = []
1991

    
1992
  def SetSyncSpeed(self, kbytes):
1993
    """Set the speed of the DRBD syncer.
1994

1995
    """
1996
    children_result = super(DRBD8, self).SetSyncSpeed(kbytes)
1997
    if self.minor is None:
1998
      logger.Info("Instance not attached to a device")
1999
      return False
2000
    result = utils.RunCmd(["drbdsetup", self.dev_path, "syncer", "-r", "%d" %
2001
                           kbytes])
2002
    if result.failed:
2003
      logger.Error("Can't change syncer rate: %s " % result.fail_reason)
2004
    return not result.failed and children_result
2005

    
2006
  def GetSyncStatus(self):
2007
    """Returns the sync status of the device.
2008

2009
    Returns:
2010
     (sync_percent, estimated_time, is_degraded)
2011

2012
    If sync_percent is None, it means all is ok
2013
    If estimated_time is None, it means we can't esimate
2014
    the time needed, otherwise it's the time left in seconds.
2015

2016

2017
    We set the is_degraded parameter to True on two conditions:
2018
    network not connected or local disk missing.
2019

2020
    We compute the ldisk parameter based on wheter we have a local
2021
    disk or not.
2022

2023
    """
2024
    if self.minor is None and not self.Attach():
2025
      raise errors.BlockDeviceError("Can't attach to device in GetSyncStatus")
2026
    proc_info = self._MassageProcData(self._GetProcData())
2027
    if self.minor not in proc_info:
2028
      raise errors.BlockDeviceError("Can't find myself in /proc (minor %d)" %
2029
                                    self.minor)
2030
    line = proc_info[self.minor]
2031
    match = re.match("^.*sync'ed: *([0-9.]+)%.*"
2032
                     " finish: ([0-9]+):([0-9]+):([0-9]+) .*$", line)
2033
    if match:
2034
      sync_percent = float(match.group(1))
2035
      hours = int(match.group(2))
2036
      minutes = int(match.group(3))
2037
      seconds = int(match.group(4))
2038
      est_time = hours * 3600 + minutes * 60 + seconds
2039
    else:
2040
      sync_percent = None
2041
      est_time = None
2042
    match = re.match("^ *\d+: cs:(\w+).*ds:(\w+)/(\w+).*$", line)
2043
    if not match:
2044
      raise errors.BlockDeviceError("Can't find my data in /proc (minor %d)" %
2045
                                    self.minor)
2046
    client_state = match.group(1)
2047
    local_disk_state = match.group(2)
2048
    ldisk = local_disk_state != "UpToDate"
2049
    is_degraded = client_state != "Connected"
2050
    return sync_percent, est_time, is_degraded or ldisk, ldisk
2051

    
2052
  def GetStatus(self):
2053
    """Compute the status of the DRBD device
2054

2055
    Note that DRBD devices don't have the STATUS_EXISTING state.
2056

2057
    """
2058
    if self.minor is None and not self.Attach():
2059
      return self.STATUS_UNKNOWN
2060

    
2061
    data = self._GetProcData()
2062
    match = re.compile("^ *%d: cs:[^ ]+ st:(Primary|Secondary)/.*$" %
2063
                       self.minor)
2064
    for line in data:
2065
      mresult = match.match(line)
2066
      if mresult:
2067
        break
2068
    else:
2069
      logger.Error("Can't find myself!")
2070
      return self.STATUS_UNKNOWN
2071

    
2072
    state = mresult.group(2)
2073
    if state == "Primary":
2074
      result = self.STATUS_ONLINE
2075
    else:
2076
      result = self.STATUS_STANDBY
2077

    
2078
    return result
2079

    
2080
  def Open(self, force=False):
2081
    """Make the local state primary.
2082

2083
    If the 'force' parameter is given, the '--do-what-I-say' parameter
2084
    is given. Since this is a pottentialy dangerous operation, the
2085
    force flag should be only given after creation, when it actually
2086
    has to be given.
2087

2088
    """
2089
    if self.minor is None and not self.Attach():
2090
      logger.Error("DRBD cannot attach to a device during open")
2091
      return False
2092
    cmd = ["drbdsetup", self.dev_path, "primary"]
2093
    if force:
2094
      cmd.append("-o")
2095
    result = utils.RunCmd(cmd)
2096
    if result.failed:
2097
      msg = ("Can't make drbd device primary: %s" % result.output)
2098
      logger.Error(msg)
2099
      raise errors.BlockDeviceError(msg)
2100

    
2101
  def Close(self):
2102
    """Make the local state secondary.
2103

2104
    This will, of course, fail if the device is in use.
2105

2106
    """
2107
    if self.minor is None and not self.Attach():
2108
      logger.Info("Instance not attached to a device")
2109
      raise errors.BlockDeviceError("Can't find device")
2110
    result = utils.RunCmd(["drbdsetup", self.dev_path, "secondary"])
2111
    if result.failed:
2112
      msg = ("Can't switch drbd device to"
2113
             " secondary: %s" % result.output)
2114
      logger.Error(msg)
2115
      raise errors.BlockDeviceError(msg)
2116

    
2117
  def Attach(self):
2118
    """Find a DRBD device which matches our config and attach to it.
2119

2120
    In case of partially attached (local device matches but no network
2121
    setup), we perform the network attach. If successful, we re-test
2122
    the attach if can return success.
2123

2124
    """
2125
    for minor in self._GetUsedDevs():
2126
      info = self._GetDevInfo(self._GetShowData(minor))
2127
      match_l = self._MatchesLocal(info)
2128
      match_r = self._MatchesNet(info)
2129
      if match_l and match_r:
2130
        break
2131
      if match_l and not match_r and "local_addr" not in info:
2132
        res_r = self._AssembleNet(minor,
2133
                                  (self._lhost, self._lport,
2134
                                   self._rhost, self._rport),
2135
                                  "C")
2136
        if res_r:
2137
          if self._MatchesNet(self._GetDevInfo(self._GetShowData(minor))):
2138
            break
2139
      # the weakest case: we find something that is only net attached
2140
      # even though we were passed some children at init time
2141
      if match_r and "local_dev" not in info:
2142
        break
2143
      if match_l and not match_r and "local_addr" in info:
2144
        # strange case - the device network part points to somewhere
2145
        # else, even though its local storage is ours; as we own the
2146
        # drbd space, we try to disconnect from the remote peer and
2147
        # reconnect to our correct one
2148
        if not self._ShutdownNet(minor):
2149
          raise errors.BlockDeviceError("Device has correct local storage,"
2150
                                        " wrong remote peer and is unable to"
2151
                                        " disconnect in order to attach to"
2152
                                        " the correct peer")
2153
        # note: _AssembleNet also handles the case when we don't want
2154
        # local storage (i.e. one or more of the _[lr](host|port) is
2155
        # None)
2156
        if (self._AssembleNet(minor, (self._lhost, self._lport,
2157
                                      self._rhost, self._rport), "C") and
2158
            self._MatchesNet(self._GetDevInfo(self._GetShowData(minor)))):
2159
          break
2160

    
2161
    else:
2162
      minor = None
2163

    
2164
    self._SetFromMinor(minor)
2165
    return minor is not None
2166

    
2167
  def Assemble(self):
2168
    """Assemble the drbd.
2169

2170
    Method:
2171
      - if we have a local backing device, we bind to it by:
2172
        - checking the list of used drbd devices
2173
        - check if the local minor use of any of them is our own device
2174
        - if yes, abort?
2175
        - if not, bind
2176
      - if we have a local/remote net info:
2177
        - redo the local backing device step for the remote device
2178
        - check if any drbd device is using the local port,
2179
          if yes abort
2180
        - check if any remote drbd device is using the remote
2181
          port, if yes abort (for now)
2182
        - bind our net port
2183
        - bind the remote net port
2184

2185
    """
2186
    self.Attach()
2187
    if self.minor is not None:
2188
      logger.Info("Already assembled")
2189
      return True
2190

    
2191
    result = super(DRBD8, self).Assemble()
2192
    if not result:
2193
      return result
2194

    
2195
    minor = self._FindUnusedMinor()
2196
    need_localdev_teardown = False
2197
    if self._children and self._children[0] and self._children[1]:
2198
      result = self._AssembleLocal(minor, self._children[0].dev_path,
2199
                                   self._children[1].dev_path)
2200
      if not result:
2201
        return False
2202
      need_localdev_teardown = True
2203
    if self._lhost and self._lport and self._rhost and self._rport:
2204
      result = self._AssembleNet(minor,
2205
                                 (self._lhost, self._lport,
2206
                                  self._rhost, self._rport),
2207
                                 "C")
2208
      if not result:
2209
        if need_localdev_teardown:
2210
          # we will ignore failures from this
2211
          logger.Error("net setup failed, tearing down local device")
2212
          self._ShutdownAll(minor)
2213
        return False
2214
    self._SetFromMinor(minor)
2215
    return True
2216

    
2217
  @classmethod
2218
  def _ShutdownLocal(cls, minor):
2219
    """Detach from the local device.
2220

2221
    I/Os will continue to be served from the remote device. If we
2222
    don't have a remote device, this operation will fail.
2223

2224
    """
2225
    result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "detach"])
2226
    if result.failed:
2227
      logger.Error("Can't detach local device: %s" % result.output)
2228
    return not result.failed
2229

    
2230
  @classmethod
2231
  def _ShutdownNet(cls, minor):
2232
    """Disconnect from the remote peer.
2233

2234
    This fails if we don't have a local device.
2235

2236
    """
2237
    result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "disconnect"])
2238
    if result.failed:
2239
      logger.Error("Can't shutdown network: %s" % result.output)
2240
    return not result.failed
2241

    
2242
  @classmethod
2243
  def _ShutdownAll(cls, minor):
2244
    """Deactivate the device.
2245

2246
    This will, of course, fail if the device is in use.
2247

2248
    """
2249
    result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "down"])
2250
    if result.failed:
2251
      logger.Error("Can't shutdown drbd device: %s" % result.output)
2252
    return not result.failed
2253

    
2254
  def Shutdown(self):
2255
    """Shutdown the DRBD device.
2256

2257
    """
2258
    if self.minor is None and not self.Attach():
2259
      logger.Info("DRBD device not attached to a device during Shutdown")
2260
      return True
2261
    if not self._ShutdownAll(self.minor):
2262
      return False
2263
    self.minor = None
2264
    self.dev_path = None
2265
    return True
2266

    
2267
  def Remove(self):
2268
    """Stub remove for DRBD devices.
2269

2270
    """
2271
    return self.Shutdown()
2272

    
2273
  @classmethod
2274
  def Create(cls, unique_id, children, size):
2275
    """Create a new DRBD8 device.
2276

2277
    Since DRBD devices are not created per se, just assembled, this
2278
    function only initializes the metadata.
2279

2280
    """
2281
    if len(children) != 2:
2282
      raise errors.ProgrammerError("Invalid setup for the drbd device")
2283
    meta = children[1]
2284
    meta.Assemble()
2285
    if not meta.Attach():
2286
      raise errors.BlockDeviceError("Can't attach to meta device")
2287
    if not cls._CheckMetaSize(meta.dev_path):
2288
      raise errors.BlockDeviceError("Invalid meta device size")
2289
    cls._InitMeta(cls._FindUnusedMinor(), meta.dev_path)
2290
    if not cls._IsValidMeta(meta.dev_path):
2291
      raise errors.BlockDeviceError("Cannot initalize meta device")
2292
    return cls(unique_id, children)
2293

    
2294

    
2295
DEV_MAP = {
2296
  constants.LD_LV: LogicalVolume,
2297
  constants.LD_MD_R1: MDRaid1,
2298
  constants.LD_DRBD7: DRBDev,
2299
  constants.LD_DRBD8: DRBD8,
2300
  }
2301

    
2302

    
2303
def FindDevice(dev_type, unique_id, children):
2304
  """Search for an existing, assembled device.
2305

2306
  This will succeed only if the device exists and is assembled, but it
2307
  does not do any actions in order to activate the device.
2308

2309
  """
2310
  if dev_type not in DEV_MAP:
2311
    raise errors.ProgrammerError("Invalid block device type '%s'" % dev_type)
2312
  device = DEV_MAP[dev_type](unique_id, children)
2313
  if not device.Attach():
2314
    return None
2315
  return  device
2316

    
2317

    
2318
def AttachOrAssemble(dev_type, unique_id, children):
2319
  """Try to attach or assemble an existing device.
2320

2321
  This will attach to an existing assembled device or will assemble
2322
  the device, as needed, to bring it fully up.
2323

2324
  """
2325
  if dev_type not in DEV_MAP:
2326
    raise errors.ProgrammerError("Invalid block device type '%s'" % dev_type)
2327
  device = DEV_MAP[dev_type](unique_id, children)
2328
  if not device.Attach():
2329
    device.Assemble()
2330
    if not device.Attach():
2331
      raise errors.BlockDeviceError("Can't find a valid block device for"
2332
                                    " %s/%s/%s" %
2333
                                    (dev_type, unique_id, children))
2334
  return device
2335

    
2336

    
2337
def Create(dev_type, unique_id, children, size):
2338
  """Create a device.
2339

2340
  """
2341
  if dev_type not in DEV_MAP:
2342
    raise errors.ProgrammerError("Invalid block device type '%s'" % dev_type)
2343
  device = DEV_MAP[dev_type].Create(unique_id, children, size)
2344
  return device