Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ dcb93971

History | View | Annotate | Download (36.9 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import tempfile
30
import stat
31
import errno
32
import re
33
import subprocess
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def ListConfigFiles():
47
  """Return a list of the config files present on the local node.
48
  """
49

    
50
  configfiles = []
51

    
52
  for testfile in constants.MASTER_CONFIGFILES:
53
    if os.path.exists(testfile):
54
      configfiles.append(testfile)
55

    
56
  for testfile in constants.NODE_CONFIGFILES:
57
    if os.path.exists(testfile):
58
      configfiles.append(testfile)
59

    
60
  return configfiles
61

    
62

    
63
def StartMaster():
64
  """Activate local node as master node.
65

66
  There are two needed steps for this:
67
    - run the master script
68
    - register the cron script
69

70
  """
71
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
72

    
73
  if result.failed:
74
    logger.Error("could not activate cluster interface with command %s,"
75
                 " error: '%s'" % (result.cmd, result.output))
76
    return False
77

    
78
  utils.RemoveFile(constants.MASTER_CRON_LINK)
79
  os.symlink(constants.MASTER_CRON_FILE, constants.MASTER_CRON_LINK)
80
  return True
81

    
82

    
83
def StopMaster():
84
  """Deactivate this node as master.
85

86
  This does two things:
87
    - run the master stop script
88
    - remove link to master cron script.
89

90
  """
91
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
92

    
93
  if result.failed:
94
    logger.Error("could not deactivate cluster interface with command %s,"
95
                 " error: '%s'" % (result.cmd, result.output))
96
    return False
97

    
98
  utils.RemoveFile(constants.MASTER_CRON_LINK)
99

    
100
  return True
101

    
102

    
103
def AddNode(dsa, dsapub, rsa, rsapub, ssh, sshpub):
104
  """ adds the node to the cluster
105
      - updates the hostkey
106
      - adds the ssh-key
107
      - sets the node id
108
      - sets the node status to installed
109
  """
110

    
111
  f = open("/etc/ssh/ssh_host_rsa_key", 'w')
112
  f.write(rsa)
113
  f.close()
114

    
115
  f = open("/etc/ssh/ssh_host_rsa_key.pub", 'w')
116
  f.write(rsapub)
117
  f.close()
118

    
119
  f = open("/etc/ssh/ssh_host_dsa_key", 'w')
120
  f.write(dsa)
121
  f.close()
122

    
123
  f = open("/etc/ssh/ssh_host_dsa_key.pub", 'w')
124
  f.write(dsapub)
125
  f.close()
126

    
127
  if not os.path.isdir("/root/.ssh"):
128
    os.mkdir("/root/.ssh")
129

    
130
  f = open("/root/.ssh/id_dsa", 'w')
131
  f.write(ssh)
132
  f.close()
133

    
134
  f = open("/root/.ssh/id_dsa.pub", 'w')
135
  f.write(sshpub)
136
  f.close()
137

    
138
  f = open('/root/.ssh/id_dsa.pub', 'r')
139
  try:
140
    utils.AddAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
141
  finally:
142
    f.close()
143

    
144
  utils.RunCmd(["/etc/init.d/ssh", "restart"])
145

    
146
  utils.RemoveFile("/root/.ssh/known_hosts")
147
  return True
148

    
149

    
150
def LeaveCluster():
151
  """Cleans up the current node and prepares it to be removed from the cluster.
152

153
  """
154
  if os.path.exists(constants.DATA_DIR):
155
    for dirpath, dirnames, filenames in os.walk(constants.DATA_DIR):
156
      if dirpath == constants.DATA_DIR:
157
        for i in filenames:
158
          os.unlink(os.path.join(dirpath, i))
159

    
160
  f = open('/root/.ssh/id_dsa.pub', 'r')
161
  try:
162
    utils.RemoveAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
163
  finally:
164
    f.close()
165

    
166
  utils.RemoveFile('/root/.ssh/id_dsa')
167
  utils.RemoveFile('/root/.ssh/id_dsa.pub')
168

    
169

    
170
def GetNodeInfo(vgname):
171
  """ gives back a hash with different informations
172
  about the node
173

174
  Returns:
175
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
176
      'memory_free' : xxx, 'memory_total' : xxx }
177
    where
178
    vg_size is the size of the configured volume group in MiB
179
    vg_free is the free size of the volume group in MiB
180
    memory_dom0 is the memory allocated for domain0 in MiB
181
    memory_free is the currently available (free) ram in MiB
182
    memory_total is the total number of ram in MiB
183
  """
184

    
185
  outputarray = {}
186
  vginfo = _GetVGInfo(vgname)
187
  outputarray['vg_size'] = vginfo['vg_size']
188
  outputarray['vg_free'] = vginfo['vg_free']
189

    
190
  hyper = hypervisor.GetHypervisor()
191
  hyp_info = hyper.GetNodeInfo()
192
  if hyp_info is not None:
193
    outputarray.update(hyp_info)
194

    
195
  return outputarray
196

    
197

    
198
def VerifyNode(what):
199
  """Verify the status of the local node.
200

201
  Args:
202
    what - a dictionary of things to check:
203
      'filelist' : list of files for which to compute checksums
204
      'nodelist' : list of nodes we should check communication with
205
      'hypervisor': run the hypervisor-specific verify
206

207
  Requested files on local node are checksummed and the result returned.
208

209
  The nodelist is traversed, with the following checks being made
210
  for each node:
211
  - known_hosts key correct
212
  - correct resolving of node name (target node returns its own hostname
213
    by ssh-execution of 'hostname', result compared against name in list.
214

215
  """
216

    
217
  result = {}
218

    
219
  if 'hypervisor' in what:
220
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
221

    
222
  if 'filelist' in what:
223
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
224

    
225
  if 'nodelist' in what:
226
    result['nodelist'] = {}
227
    for node in what['nodelist']:
228
      success, message = ssh.VerifyNodeHostname(node)
229
      if not success:
230
        result['nodelist'][node] = message
231
  return result
232

    
233

    
234
def GetVolumeList(vg_name):
235
  """Compute list of logical volumes and their size.
236

237
  Returns:
238
    dictionary of all partions (key) with their size:
239
    test1: 20.06MiB
240

241
  """
242
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m",
243
                         "-oname,size", vg_name])
244
  if result.failed:
245
    logger.Error("Failed to list logical volumes, lvs output: %s" %
246
                 result.output)
247
    return {}
248

    
249
  lvlist = [line.split() for line in result.output.splitlines()]
250
  return dict(lvlist)
251

    
252

    
253
def ListVolumeGroups():
254
  """List the volume groups and their size
255

256
  Returns:
257
    Dictionary with keys volume name and values the size of the volume
258

259
  """
260
  return utils.ListVolumeGroups()
261

    
262

    
263
def NodeVolumes():
264
  """List all volumes on this node.
265

266
  """
267
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
268
                         "--separator=|",
269
                         "--options=lv_name,lv_size,devices,vg_name"])
270
  if result.failed:
271
    logger.Error("Failed to list logical volumes, lvs output: %s" %
272
                 result.output)
273
    return {}
274

    
275
  def parse_dev(dev):
276
    if '(' in dev:
277
      return dev.split('(')[0]
278
    else:
279
      return dev
280

    
281
  def map_line(line):
282
    return {
283
      'name': line[0].strip(),
284
      'size': line[1].strip(),
285
      'dev': parse_dev(line[2].strip()),
286
      'vg': line[3].strip(),
287
    }
288

    
289
  return [map_line(line.split('|')) for line in result.output.splitlines()]
290

    
291

    
292
def BridgesExist(bridges_list):
293
  """Check if a list of bridges exist on the current node
294

295
  Returns:
296
    True if all of them exist, false otherwise
297

298
  """
299
  for bridge in bridges_list:
300
    if not utils.BridgeExists(bridge):
301
      return False
302

    
303
  return True
304

    
305

    
306
def GetInstanceList():
307
  """ provides a list of instances
308

309
  Returns:
310
    A list of all running instances on the current node
311
    - instance1.example.com
312
    - instance2.example.com
313
  """
314

    
315
  try:
316
    names = hypervisor.GetHypervisor().ListInstances()
317
  except errors.HypervisorError, err:
318
    logger.Error("error enumerating instances: %s" % str(err))
319
    raise
320

    
321
  return names
322

    
323

    
324
def GetInstanceInfo(instance):
325
  """ gives back the informations about an instance
326
  as a dictonary
327

328
  Args:
329
    instance: name of the instance (ex. instance1.example.com)
330

331
  Returns:
332
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
333
    where
334
    memory: memory size of instance (int)
335
    state: xen state of instance (string)
336
    time: cpu time of instance (float)
337
  """
338

    
339
  output = {}
340

    
341
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
342
  if iinfo is not None:
343
    output['memory'] = iinfo[2]
344
    output['state'] = iinfo[4]
345
    output['time'] = iinfo[5]
346

    
347
  return output
348

    
349

    
350
def GetAllInstancesInfo():
351
  """Gather data about all instances.
352

353
  This is the equivalent of `GetInstanceInfo()`, except that it
354
  computes data for all instances at once, thus being faster if one
355
  needs data about more than one instance.
356

357
  Returns: a dictionary of dictionaries, keys being the instance name,
358
    and with values:
359
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
360
    where
361
    memory: memory size of instance (int)
362
    state: xen state of instance (string)
363
    time: cpu time of instance (float)
364
    vcpus: the number of cpus
365
  """
366

    
367
  output = {}
368

    
369
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
370
  if iinfo:
371
    for name, id, memory, vcpus, state, times in iinfo:
372
      output[name] = {
373
        'memory': memory,
374
        'vcpus': vcpus,
375
        'state': state,
376
        'time': times,
377
        }
378

    
379
  return output
380

    
381

    
382
def AddOSToInstance(instance, os_disk, swap_disk):
383
  """Add an os to an instance.
384

385
  Args:
386
    instance: the instance object
387
    os_disk: the instance-visible name of the os device
388
    swap_disk: the instance-visible name of the swap device
389

390
  """
391
  inst_os = OSFromDisk(instance.os)
392

    
393
  create_script = inst_os.create_script
394

    
395
  for os_device in instance.disks:
396
    if os_device.iv_name == os_disk:
397
      break
398
  else:
399
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
400
    return False
401

    
402
  for swap_device in instance.disks:
403
    if swap_device.iv_name == swap_disk:
404
      break
405
  else:
406
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
407
    return False
408

    
409
  real_os_dev = _RecursiveFindBD(os_device)
410
  if real_os_dev is None:
411
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
412
                                  str(os_device))
413
  real_os_dev.Open()
414

    
415
  real_swap_dev = _RecursiveFindBD(swap_device)
416
  if real_swap_dev is None:
417
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
418
                                  str(swap_device))
419
  real_swap_dev.Open()
420

    
421
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
422
                                     instance.name, int(time.time()))
423
  if not os.path.exists(constants.LOG_OS_DIR):
424
    os.mkdir(constants.LOG_OS_DIR, 0750)
425

    
426
  command = utils.BuildShellCmd("cd %s; %s -i %s -b %s -s %s &>%s",
427
                                inst_os.path, create_script, instance.name,
428
                                real_os_dev.dev_path, real_swap_dev.dev_path,
429
                                logfile)
430

    
431
  result = utils.RunCmd(command)
432

    
433
  if result.failed:
434
    logger.Error("os create command '%s' returned error: %s"
435
                 " output: %s" %
436
                 (command, result.fail_reason, result.output))
437
    return False
438

    
439
  return True
440

    
441

    
442
def _GetVGInfo(vg_name):
443
  """Get informations about the volume group.
444

445
  Args:
446
    vg_name: the volume group
447

448
  Returns:
449
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
450
    where
451
    vg_size is the total size of the volume group in MiB
452
    vg_free is the free size of the volume group in MiB
453
    pv_count are the number of physical disks in that vg
454

455
  """
456
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
457
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
458

    
459
  if retval.failed:
460
    errmsg = "volume group %s not present" % vg_name
461
    logger.Error(errmsg)
462
    raise errors.LVMError(errmsg)
463
  valarr = retval.stdout.strip().split(':')
464
  retdic = {
465
    "vg_size": int(round(float(valarr[0]), 0)),
466
    "vg_free": int(round(float(valarr[1]), 0)),
467
    "pv_count": int(valarr[2]),
468
    }
469
  return retdic
470

    
471

    
472
def _GatherBlockDevs(instance):
473
  """Set up an instance's block device(s).
474

475
  This is run on the primary node at instance startup. The block
476
  devices must be already assembled.
477

478
  """
479
  block_devices = []
480
  for disk in instance.disks:
481
    device = _RecursiveFindBD(disk)
482
    if device is None:
483
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
484
                                    str(disk))
485
    device.Open()
486
    block_devices.append((disk, device))
487
  return block_devices
488

    
489

    
490
def StartInstance(instance, extra_args):
491
  """Start an instance.
492

493
  Args:
494
    instance - name of instance to start.
495
  """
496

    
497
  running_instances = GetInstanceList()
498

    
499
  if instance.name in running_instances:
500
    return True
501

    
502
  block_devices = _GatherBlockDevs(instance)
503
  hyper = hypervisor.GetHypervisor()
504

    
505
  try:
506
    hyper.StartInstance(instance, block_devices, extra_args)
507
  except errors.HypervisorError, err:
508
    logger.Error("Failed to start instance: %s" % err)
509
    return False
510

    
511
  return True
512

    
513

    
514
def ShutdownInstance(instance):
515
  """Shut an instance down.
516

517
  Args:
518
    instance - name of instance to shutdown.
519
  """
520

    
521
  running_instances = GetInstanceList()
522

    
523
  if instance.name not in running_instances:
524
    return True
525

    
526
  hyper = hypervisor.GetHypervisor()
527
  try:
528
    hyper.StopInstance(instance)
529
  except errors.HypervisorError, err:
530
    logger.Error("Failed to stop instance: %s" % err)
531
    return False
532

    
533
  # test every 10secs for 2min
534
  shutdown_ok = False
535

    
536
  time.sleep(1)
537
  for dummy in range(11):
538
    if instance.name not in GetInstanceList():
539
      break
540
    time.sleep(10)
541
  else:
542
    # the shutdown did not succeed
543
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
544

    
545
    try:
546
      hyper.StopInstance(instance, force=True)
547
    except errors.HypervisorError, err:
548
      logger.Error("Failed to stop instance: %s" % err)
549
      return False
550

    
551
    time.sleep(1)
552
    if instance.name in GetInstanceList():
553
      logger.Error("could not shutdown instance '%s' even by destroy")
554
      return False
555

    
556
  return True
557

    
558

    
559
def CreateBlockDevice(disk, size, on_primary):
560
  """Creates a block device for an instance.
561

562
  Args:
563
   bdev: a ganeti.objects.Disk object
564
   size: the size of the physical underlying devices
565
   do_open: if the device should be `Assemble()`-d and
566
            `Open()`-ed after creation
567

568
  Returns:
569
    the new unique_id of the device (this can sometime be
570
    computed only after creation), or None. On secondary nodes,
571
    it's not required to return anything.
572

573
  """
574
  clist = []
575
  if disk.children:
576
    for child in disk.children:
577
      crdev = _RecursiveAssembleBD(child, on_primary)
578
      if on_primary or disk.AssembleOnSecondary():
579
        # we need the children open in case the device itself has to
580
        # be assembled
581
        crdev.Open()
582
      else:
583
        crdev.Close()
584
      clist.append(crdev)
585
  try:
586
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
587
    if device is not None:
588
      logger.Info("removing existing device %s" % disk)
589
      device.Remove()
590
  except errors.BlockDeviceError, err:
591
    pass
592

    
593
  device = bdev.Create(disk.dev_type, disk.physical_id,
594
                       clist, size)
595
  if device is None:
596
    raise ValueError("Can't create child device for %s, %s" %
597
                     (disk, size))
598
  if on_primary or disk.AssembleOnSecondary():
599
    device.Assemble()
600
    device.SetSyncSpeed(30*1024)
601
    if on_primary or disk.OpenOnSecondary():
602
      device.Open(force=True)
603
  physical_id = device.unique_id
604
  return physical_id
605

    
606

    
607
def RemoveBlockDevice(disk):
608
  """Remove a block device.
609

610
  This is intended to be called recursively.
611

612
  """
613
  try:
614
    # since we are removing the device, allow a partial match
615
    # this allows removal of broken mirrors
616
    rdev = _RecursiveFindBD(disk, allow_partial=True)
617
  except errors.BlockDeviceError, err:
618
    # probably can't attach
619
    logger.Info("Can't attach to device %s in remove" % disk)
620
    rdev = None
621
  if rdev is not None:
622
    result = rdev.Remove()
623
  else:
624
    result = True
625
  if disk.children:
626
    for child in disk.children:
627
      result = result and RemoveBlockDevice(child)
628
  return result
629

    
630

    
631
def _RecursiveAssembleBD(disk, as_primary):
632
  """Activate a block device for an instance.
633

634
  This is run on the primary and secondary nodes for an instance.
635

636
  This function is called recursively.
637

638
  Args:
639
    disk: a objects.Disk object
640
    as_primary: if we should make the block device read/write
641

642
  Returns:
643
    the assembled device or None (in case no device was assembled)
644

645
  If the assembly is not successful, an exception is raised.
646

647
  """
648
  children = []
649
  if disk.children:
650
    for chld_disk in disk.children:
651
      children.append(_RecursiveAssembleBD(chld_disk, as_primary))
652

    
653
  if as_primary or disk.AssembleOnSecondary():
654
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
655
    r_dev.SetSyncSpeed(30*1024)
656
    result = r_dev
657
    if as_primary or disk.OpenOnSecondary():
658
      r_dev.Open()
659
    else:
660
      r_dev.Close()
661
  else:
662
    result = True
663
  return result
664

    
665

    
666
def AssembleBlockDevice(disk, as_primary):
667
  """Activate a block device for an instance.
668

669
  This is a wrapper over _RecursiveAssembleBD.
670

671
  Returns:
672
    a /dev path for primary nodes
673
    True for secondary nodes
674

675
  """
676
  result = _RecursiveAssembleBD(disk, as_primary)
677
  if isinstance(result, bdev.BlockDev):
678
    result = result.dev_path
679
  return result
680

    
681

    
682
def ShutdownBlockDevice(disk):
683
  """Shut down a block device.
684

685
  First, if the device is assembled (can `Attach()`), then the device
686
  is shutdown. Then the children of the device are shutdown.
687

688
  This function is called recursively. Note that we don't cache the
689
  children or such, as oppossed to assemble, shutdown of different
690
  devices doesn't require that the upper device was active.
691

692
  """
693
  r_dev = _RecursiveFindBD(disk)
694
  if r_dev is not None:
695
    result = r_dev.Shutdown()
696
  else:
697
    result = True
698
  if disk.children:
699
    for child in disk.children:
700
      result = result and ShutdownBlockDevice(child)
701
  return result
702

    
703

    
704
def MirrorAddChild(md_cdev, new_cdev):
705
  """Extend an MD raid1 array.
706

707
  """
708
  md_bdev = _RecursiveFindBD(md_cdev, allow_partial=True)
709
  if md_bdev is None:
710
    logger.Error("Can't find md device")
711
    return False
712
  new_bdev = _RecursiveFindBD(new_cdev)
713
  if new_bdev is None:
714
    logger.Error("Can't find new device to add")
715
    return False
716
  new_bdev.Open()
717
  md_bdev.AddChild(new_bdev)
718
  return True
719

    
720

    
721
def MirrorRemoveChild(md_cdev, new_cdev):
722
  """Reduce an MD raid1 array.
723

724
  """
725
  md_bdev = _RecursiveFindBD(md_cdev)
726
  if md_bdev is None:
727
    return False
728
  new_bdev = _RecursiveFindBD(new_cdev)
729
  if new_bdev is None:
730
    return False
731
  new_bdev.Open()
732
  md_bdev.RemoveChild(new_bdev.dev_path)
733
  return True
734

    
735

    
736
def GetMirrorStatus(disks):
737
  """Get the mirroring status of a list of devices.
738

739
  Args:
740
    disks: list of `objects.Disk`
741

742
  Returns:
743
    list of (mirror_done, estimated_time) tuples, which
744
    are the result of bdev.BlockDevice.CombinedSyncStatus()
745

746
  """
747
  stats = []
748
  for dsk in disks:
749
    rbd = _RecursiveFindBD(dsk)
750
    if rbd is None:
751
      raise errors.BlockDeviceError, "Can't find device %s" % str(dsk)
752
    stats.append(rbd.CombinedSyncStatus())
753
  return stats
754

    
755

    
756
def _RecursiveFindBD(disk, allow_partial=False):
757
  """Check if a device is activated.
758

759
  If so, return informations about the real device.
760

761
  Args:
762
    disk: the objects.Disk instance
763
    allow_partial: don't abort the find if a child of the
764
                   device can't be found; this is intended to be
765
                   used when repairing mirrors
766

767
  Returns:
768
    None if the device can't be found
769
    otherwise the device instance
770

771
  """
772
  children = []
773
  if disk.children:
774
    for chdisk in disk.children:
775
      children.append(_RecursiveFindBD(chdisk))
776

    
777
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
778

    
779

    
780
def FindBlockDevice(disk):
781
  """Check if a device is activated.
782

783
  If so, return informations about the real device.
784

785
  Args:
786
    disk: the objects.Disk instance
787
  Returns:
788
    None if the device can't be found
789
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
790

791
  """
792
  rbd = _RecursiveFindBD(disk)
793
  if rbd is None:
794
    return rbd
795
  sync_p, est_t, is_degr = rbd.GetSyncStatus()
796
  return rbd.dev_path, rbd.major, rbd.minor, sync_p, est_t, is_degr
797

    
798

    
799
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
800
  """Write a file to the filesystem.
801

802
  This allows the master to overwrite(!) a file. It will only perform
803
  the operation if the file belongs to a list of configuration files.
804

805
  """
806
  if not os.path.isabs(file_name):
807
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
808
                 file_name)
809
    return False
810

    
811
  allowed_files = [constants.CLUSTER_CONF_FILE, "/etc/hosts",
812
                   "/etc/ssh/ssh_known_hosts"]
813
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
814
  if file_name not in allowed_files:
815
    logger.Error("Filename passed to UploadFile not in allowed"
816
                 " upload targets: '%s'" % file_name)
817
    return False
818

    
819
  dir_name, small_name = os.path.split(file_name)
820
  fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
821
  # here we need to make sure we remove the temp file, if any error
822
  # leaves it in place
823
  try:
824
    os.chown(new_name, uid, gid)
825
    os.chmod(new_name, mode)
826
    os.write(fd, data)
827
    os.fsync(fd)
828
    os.utime(new_name, (atime, mtime))
829
    os.rename(new_name, file_name)
830
  finally:
831
    os.close(fd)
832
    utils.RemoveFile(new_name)
833
  return True
834

    
835
def _ErrnoOrStr(err):
836
  """Format an EnvironmentError exception.
837

838
  If the `err` argument has an errno attribute, it will be looked up
839
  and converted into a textual EXXXX description. Otherwise the string
840
  representation of the error will be returned.
841

842
  """
843
  if hasattr(err, 'errno'):
844
    detail = errno.errorcode[err.errno]
845
  else:
846
    detail = str(err)
847
  return detail
848

    
849

    
850
def _OSOndiskVersion(name, os_dir=None):
851
  """Compute and return the api version of a given OS.
852

853
  This function will try to read the api version of the os given by
854
  the 'name' parameter. By default, it wil use the constants.OS_DIR
855
  as top-level directory for OSes, but this can be overriden by the
856
  use of the os_dir parameter. Return value will be either an
857
  integer denoting the version or None in the case when this is not
858
  a valid OS name.
859

860
  """
861
  if os_dir is None:
862
    os_dir = os.path.sep.join([constants.OS_DIR, name])
863

    
864
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
865

    
866
  try:
867
    st = os.stat(api_file)
868
  except EnvironmentError, err:
869
    raise errors.InvalidOS, (name, "'ganeti_api_version' file not"
870
                             " found (%s)" % _ErrnoOrStr(err))
871

    
872
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
873
    raise errors.InvalidOS, (name, "'ganeti_api_version' file is not"
874
                             " a regular file")
875

    
876
  try:
877
    f = open(api_file)
878
    try:
879
      api_version = f.read(256)
880
    finally:
881
      f.close()
882
  except EnvironmentError, err:
883
    raise errors.InvalidOS, (name, "error while reading the"
884
                             " API version (%s)" % _ErrnoOrStr(err))
885

    
886
  api_version = api_version.strip()
887
  try:
888
    api_version = int(api_version)
889
  except (TypeError, ValueError), err:
890
    raise errors.InvalidOS, (name, "API version is not integer (%s)" %
891
                             str(err))
892

    
893
  return api_version
894

    
895
def DiagnoseOS(top_dir=None):
896
  """Compute the validity for all OSes.
897

898
  For each name in the give top_dir parameter (if not given, defaults
899
  to constants.OS_DIR), it will return an object. If this is a valid
900
  os, the object will be an instance of the object.OS class. If not,
901
  it will be an instance of errors.InvalidOS and this signifies that
902
  this name does not correspond to a valid OS.
903

904
  Returns:
905
    list of objects
906

907
  """
908
  if top_dir is None:
909
    top_dir = constants.OS_DIR
910

    
911
  try:
912
    f_names = os.listdir(top_dir)
913
  except EnvironmentError, err:
914
    logger.Error("Can't list the OS directory: %s" % str(err))
915
    return False
916
  result = []
917
  for name in f_names:
918
    try:
919
      os_inst = OSFromDisk(name, os.path.sep.join([top_dir, name]))
920
      result.append(os_inst)
921
    except errors.InvalidOS, err:
922
      result.append(err)
923

    
924
  return result
925

    
926

    
927
def OSFromDisk(name, os_dir=None):
928
  """Create an OS instance from disk.
929

930
  This function will return an OS instance if the given name is a
931
  valid OS name. Otherwise, it will raise an appropriate
932
  `errors.InvalidOS` exception, detailing why this is not a valid
933
  OS.
934

935
  """
936
  if os_dir is None:
937
    os_dir = os.path.sep.join([constants.OS_DIR, name])
938

    
939
  api_version = _OSOndiskVersion(name, os_dir)
940

    
941
  if api_version != constants.OS_API_VERSION:
942
    raise errors.InvalidOS, (name, "API version mismatch (found %s want %s)"
943
                             % (api_version, constants.OS_API_VERSION))
944

    
945
  # OS Scripts dictionary, we will populate it with the actual script names
946
  os_scripts = {'create': '', 'export': '', 'import': ''}
947

    
948
  for script in os_scripts:
949
    os_scripts[script] = os.path.sep.join([os_dir, script])
950

    
951
    try:
952
      st = os.stat(os_scripts[script])
953
    except EnvironmentError, err:
954
      raise errors.InvalidOS, (name, "'%s' script missing (%s)" %
955
                               (script, _ErrnoOrStr(err)))
956

    
957
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
958
      raise errors.InvalidOS, (name, "'%s' script not executable" % script)
959

    
960
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
961
      raise errors.InvalidOS, (name, "'%s' is not a regular file" % script)
962

    
963

    
964
  return objects.OS(name=name, path=os_dir,
965
                    create_script=os_scripts['create'],
966
                    export_script=os_scripts['export'],
967
                    import_script=os_scripts['import'],
968
                    api_version=api_version)
969

    
970

    
971
def SnapshotBlockDevice(disk):
972
  """Create a snapshot copy of a block device.
973

974
  This function is called recursively, and the snapshot is actually created
975
  just for the leaf lvm backend device.
976

977
  Args:
978
    disk: the disk to be snapshotted
979

980
  Returns:
981
    a config entry for the actual lvm device snapshotted.
982
  """
983

    
984
  if disk.children:
985
    if len(disk.children) == 1:
986
      # only one child, let's recurse on it
987
      return SnapshotBlockDevice(disk.children[0])
988
    else:
989
      # more than one child, choose one that matches
990
      for child in disk.children:
991
        if child.size == disk.size:
992
          # return implies breaking the loop
993
          return SnapshotBlockDevice(child)
994
  elif disk.dev_type == "lvm":
995
    r_dev = _RecursiveFindBD(disk)
996
    if r_dev is not None:
997
      # let's stay on the safe side and ask for the full size, for now
998
      return r_dev.Snapshot(disk.size)
999
    else:
1000
      return None
1001
  else:
1002
    raise errors.ProgrammerError, ("Cannot snapshot non-lvm block device"
1003
                                   "'%s' of type '%s'" %
1004
                                   (disk.unique_id, disk.dev_type))
1005

    
1006

    
1007
def ExportSnapshot(disk, dest_node, instance):
1008
  """Export a block device snapshot to a remote node.
1009

1010
  Args:
1011
    disk: the snapshot block device
1012
    dest_node: the node to send the image to
1013
    instance: instance being exported
1014

1015
  Returns:
1016
    True if successful, False otherwise.
1017
  """
1018

    
1019
  inst_os = OSFromDisk(instance.os)
1020
  export_script = inst_os.export_script
1021

    
1022
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1023
                                     instance.name, int(time.time()))
1024
  if not os.path.exists(constants.LOG_OS_DIR):
1025
    os.mkdir(constants.LOG_OS_DIR, 0750)
1026

    
1027
  real_os_dev = _RecursiveFindBD(disk)
1028
  if real_os_dev is None:
1029
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1030
                                  str(disk))
1031
  real_os_dev.Open()
1032

    
1033
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1034
  destfile = disk.physical_id[1]
1035

    
1036
  # the target command is built out of three individual commands,
1037
  # which are joined by pipes; we check each individual command for
1038
  # valid parameters
1039

    
1040
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1041
                               export_script, instance.name,
1042
                               real_os_dev.dev_path, logfile)
1043

    
1044
  comprcmd = "gzip"
1045

    
1046
  remotecmd = utils.BuildShellCmd("ssh -q -oStrictHostKeyChecking=yes"
1047
                                  " -oBatchMode=yes -oEscapeChar=none"
1048
                                  " %s 'mkdir -p %s; cat > %s/%s'",
1049
                                  dest_node, destdir, destdir, destfile)
1050

    
1051
  # all commands have been checked, so we're safe to combine them
1052
  command = '|'.join([expcmd, comprcmd, remotecmd])
1053

    
1054
  result = utils.RunCmd(command)
1055

    
1056
  if result.failed:
1057
    logger.Error("os snapshot export command '%s' returned error: %s"
1058
                 " output: %s" %
1059
                 (command, result.fail_reason, result.output))
1060
    return False
1061

    
1062
  return True
1063

    
1064

    
1065
def FinalizeExport(instance, snap_disks):
1066
  """Write out the export configuration information.
1067

1068
  Args:
1069
    instance: instance configuration
1070
    snap_disks: snapshot block devices
1071

1072
  Returns:
1073
    False in case of error, True otherwise.
1074
  """
1075

    
1076
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1077
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1078

    
1079
  config = objects.SerializableConfigParser()
1080

    
1081
  config.add_section(constants.INISECT_EXP)
1082
  config.set(constants.INISECT_EXP, 'version', '0')
1083
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1084
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1085
  config.set(constants.INISECT_EXP, 'os', instance.os)
1086
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1087

    
1088
  config.add_section(constants.INISECT_INS)
1089
  config.set(constants.INISECT_INS, 'name', instance.name)
1090
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1091
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1092
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1093
  for nic_count, nic in enumerate(instance.nics):
1094
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1095
               nic_count, '%s' % nic.mac)
1096
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1097
  # TODO: redundant: on load can read nics until it doesn't exist
1098
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1099

    
1100
  for disk_count, disk in enumerate(snap_disks):
1101
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1102
               ('%s' % disk.iv_name))
1103
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1104
               ('%s' % disk.physical_id[1]))
1105
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1106
               ('%d' % disk.size))
1107
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1108

    
1109
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1110
  cfo = open(cff, 'w')
1111
  try:
1112
    config.write(cfo)
1113
  finally:
1114
    cfo.close()
1115

    
1116
  shutil.rmtree(finaldestdir, True)
1117
  shutil.move(destdir, finaldestdir)
1118

    
1119
  return True
1120

    
1121

    
1122
def ExportInfo(dest):
1123
  """Get export configuration information.
1124

1125
  Args:
1126
    dest: directory containing the export
1127

1128
  Returns:
1129
    A serializable config file containing the export info.
1130

1131
  """
1132

    
1133
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1134

    
1135
  config = objects.SerializableConfigParser()
1136
  config.read(cff)
1137

    
1138
  if (not config.has_section(constants.INISECT_EXP) or
1139
      not config.has_section(constants.INISECT_INS)):
1140
    return None
1141

    
1142
  return config
1143

    
1144

    
1145
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1146
  """Import an os image into an instance.
1147

1148
  Args:
1149
    instance: the instance object
1150
    os_disk: the instance-visible name of the os device
1151
    swap_disk: the instance-visible name of the swap device
1152
    src_node: node holding the source image
1153
    src_image: path to the source image on src_node
1154

1155
  Returns:
1156
    False in case of error, True otherwise.
1157

1158
  """
1159

    
1160
  inst_os = OSFromDisk(instance.os)
1161
  import_script = inst_os.import_script
1162

    
1163
  for os_device in instance.disks:
1164
    if os_device.iv_name == os_disk:
1165
      break
1166
  else:
1167
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1168
    return False
1169

    
1170
  for swap_device in instance.disks:
1171
    if swap_device.iv_name == swap_disk:
1172
      break
1173
  else:
1174
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1175
    return False
1176

    
1177
  real_os_dev = _RecursiveFindBD(os_device)
1178
  if real_os_dev is None:
1179
    raise errors.BlockDeviceError, ("Block device '%s' is not set up" %
1180
                                    str(os_device))
1181
  real_os_dev.Open()
1182

    
1183
  real_swap_dev = _RecursiveFindBD(swap_device)
1184
  if real_swap_dev is None:
1185
    raise errors.BlockDeviceError, ("Block device '%s' is not set up" %
1186
                                    str(swap_device))
1187
  real_swap_dev.Open()
1188

    
1189
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1190
                                        instance.name, int(time.time()))
1191
  if not os.path.exists(constants.LOG_OS_DIR):
1192
    os.mkdir(constants.LOG_OS_DIR, 0750)
1193

    
1194
  remotecmd = utils.BuildShellCmd("ssh -q -oStrictHostKeyChecking=yes"
1195
                                  " -oBatchMode=yes -oEscapeChar=none"
1196
                                  " %s 'cat %s'", src_node, src_image)
1197

    
1198
  comprcmd = "gunzip"
1199
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1200
                               inst_os.path, import_script, instance.name,
1201
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1202
                               logfile)
1203

    
1204
  command = '|'.join([remotecmd, comprcmd, impcmd])
1205

    
1206
  result = utils.RunCmd(command)
1207

    
1208
  if result.failed:
1209
    logger.Error("os import command '%s' returned error: %s"
1210
                 " output: %s" %
1211
                 (command, result.fail_reason, result.output))
1212
    return False
1213

    
1214
  return True
1215

    
1216

    
1217
def ListExports():
1218
  """Return a list of exports currently available on this machine.
1219
  """
1220
  if os.path.isdir(constants.EXPORT_DIR):
1221
    return os.listdir(constants.EXPORT_DIR)
1222
  else:
1223
    return []
1224

    
1225

    
1226
def RemoveExport(export):
1227
  """Remove an existing export from the node.
1228

1229
  Args:
1230
    export: the name of the export to remove
1231

1232
  Returns:
1233
    False in case of error, True otherwise.
1234
  """
1235

    
1236
  target = os.path.join(constants.EXPORT_DIR, export)
1237

    
1238
  shutil.rmtree(target)
1239
  # TODO: catch some of the relevant exceptions and provide a pretty
1240
  # error message if rmtree fails.
1241

    
1242
  return True
1243

    
1244

    
1245
class HooksRunner(object):
1246
  """Hook runner.
1247

1248
  This class is instantiated on the node side (ganeti-noded) and not on
1249
  the master side.
1250

1251
  """
1252
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1253

    
1254
  def __init__(self, hooks_base_dir=None):
1255
    """Constructor for hooks runner.
1256

1257
    Args:
1258
      - hooks_base_dir: if not None, this overrides the
1259
        constants.HOOKS_BASE_DIR (useful for unittests)
1260
      - logs_base_dir: if not None, this overrides the
1261
        constants.LOG_HOOKS_DIR (useful for unittests)
1262
      - logging: enable or disable logging of script output
1263

1264
    """
1265
    if hooks_base_dir is None:
1266
      hooks_base_dir = constants.HOOKS_BASE_DIR
1267
    self._BASE_DIR = hooks_base_dir
1268

    
1269
  @staticmethod
1270
  def ExecHook(script, env):
1271
    """Exec one hook script.
1272

1273
    Args:
1274
     - phase: the phase
1275
     - script: the full path to the script
1276
     - env: the environment with which to exec the script
1277

1278
    """
1279
    # exec the process using subprocess and log the output
1280
    fdstdin = None
1281
    try:
1282
      fdstdin = open("/dev/null", "r")
1283
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1284
                               stderr=subprocess.STDOUT, close_fds=True,
1285
                               shell=False, cwd="/",env=env)
1286
      output = ""
1287
      try:
1288
        output = child.stdout.read(4096)
1289
        child.stdout.close()
1290
      except EnvironmentError, err:
1291
        output += "Hook script error: %s" % str(err)
1292

    
1293
      while True:
1294
        try:
1295
          result = child.wait()
1296
          break
1297
        except EnvironmentError, err:
1298
          if err.errno == errno.EINTR:
1299
            continue
1300
          raise
1301
    finally:
1302
      # try not to leak fds
1303
      for fd in (fdstdin, ):
1304
        if fd is not None:
1305
          try:
1306
            fd.close()
1307
          except EnvironmentError, err:
1308
            # just log the error
1309
            #logger.Error("While closing fd %s: %s" % (fd, err))
1310
            pass
1311

    
1312
    return result == 0, output
1313

    
1314
  def RunHooks(self, hpath, phase, env):
1315
    """Run the scripts in the hooks directory.
1316

1317
    This method will not be usually overriden by child opcodes.
1318

1319
    """
1320
    if phase == constants.HOOKS_PHASE_PRE:
1321
      suffix = "pre"
1322
    elif phase == constants.HOOKS_PHASE_POST:
1323
      suffix = "post"
1324
    else:
1325
      raise errors.ProgrammerError, ("Unknown hooks phase: '%s'" % phase)
1326
    rr = []
1327

    
1328
    subdir = "%s-%s.d" % (hpath, suffix)
1329
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1330
    try:
1331
      dir_contents = os.listdir(dir_name)
1332
    except OSError, err:
1333
      # must log
1334
      return rr
1335

    
1336
    # we use the standard python sort order,
1337
    # so 00name is the recommended naming scheme
1338
    dir_contents.sort()
1339
    for relname in dir_contents:
1340
      fname = os.path.join(dir_name, relname)
1341
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1342
          self.RE_MASK.match(relname) is not None):
1343
        rrval = constants.HKR_SKIP
1344
        output = ""
1345
      else:
1346
        result, output = self.ExecHook(fname, env)
1347
        if not result:
1348
          rrval = constants.HKR_FAIL
1349
        else:
1350
          rrval = constants.HKR_SUCCESS
1351
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1352

    
1353
    return rr