Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ c6b8baba

History | View | Annotate | Download (38.1 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import tempfile
30
import stat
31
import errno
32
import re
33
import subprocess
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44
from ganeti import _autoconf
45

    
46

    
47
def StartMaster():
48
  """Activate local node as master node.
49

50
  There are two needed steps for this:
51
    - run the master script
52
    - register the cron script
53

54
  """
55
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
56

    
57
  if result.failed:
58
    logger.Error("could not activate cluster interface with command %s,"
59
                 " error: '%s'" % (result.cmd, result.output))
60
    return False
61

    
62
  return True
63

    
64

    
65
def StopMaster():
66
  """Deactivate this node as master.
67

68
  This does two things:
69
    - run the master stop script
70
    - remove link to master cron script.
71

72
  """
73
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
74

    
75
  if result.failed:
76
    logger.Error("could not deactivate cluster interface with command %s,"
77
                 " error: '%s'" % (result.cmd, result.output))
78
    return False
79

    
80
  return True
81

    
82

    
83
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
84
  """ adds the node to the cluster
85
      - updates the hostkey
86
      - adds the ssh-key
87
      - sets the node id
88
      - sets the node status to installed
89

90
  """
91
  f = open("/etc/ssh/ssh_host_rsa_key", 'w')
92
  f.write(rsa)
93
  f.close()
94

    
95
  f = open("/etc/ssh/ssh_host_rsa_key.pub", 'w')
96
  f.write(rsapub)
97
  f.close()
98

    
99
  f = open("/etc/ssh/ssh_host_dsa_key", 'w')
100
  f.write(dsa)
101
  f.close()
102

    
103
  f = open("/etc/ssh/ssh_host_dsa_key.pub", 'w')
104
  f.write(dsapub)
105
  f.close()
106

    
107
  if not os.path.isdir("/root/.ssh"):
108
    os.mkdir("/root/.ssh")
109

    
110
  f = open("/root/.ssh/id_dsa", 'w')
111
  f.write(sshkey)
112
  f.close()
113

    
114
  f = open("/root/.ssh/id_dsa.pub", 'w')
115
  f.write(sshpub)
116
  f.close()
117

    
118
  f = open('/root/.ssh/id_dsa.pub', 'r')
119
  try:
120
    utils.AddAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
121
  finally:
122
    f.close()
123

    
124
  utils.RunCmd([_autoconf.INITD_SSH, "restart"])
125

    
126
  return True
127

    
128

    
129
def LeaveCluster():
130
  """Cleans up the current node and prepares it to be removed from the cluster.
131

132
  """
133
  if os.path.exists(constants.DATA_DIR):
134
    for dirpath, dirnames, filenames in os.walk(constants.DATA_DIR):
135
      if dirpath == constants.DATA_DIR:
136
        for i in filenames:
137
          os.unlink(os.path.join(dirpath, i))
138

    
139
  f = open('/root/.ssh/id_dsa.pub', 'r')
140
  try:
141
    utils.RemoveAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
142
  finally:
143
    f.close()
144

    
145
  utils.RemoveFile('/root/.ssh/id_dsa')
146
  utils.RemoveFile('/root/.ssh/id_dsa.pub')
147

    
148

    
149
def GetNodeInfo(vgname):
150
  """ gives back a hash with different informations
151
  about the node
152

153
  Returns:
154
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
155
      'memory_free' : xxx, 'memory_total' : xxx }
156
    where
157
    vg_size is the size of the configured volume group in MiB
158
    vg_free is the free size of the volume group in MiB
159
    memory_dom0 is the memory allocated for domain0 in MiB
160
    memory_free is the currently available (free) ram in MiB
161
    memory_total is the total number of ram in MiB
162

163
  """
164
  outputarray = {}
165
  vginfo = _GetVGInfo(vgname)
166
  outputarray['vg_size'] = vginfo['vg_size']
167
  outputarray['vg_free'] = vginfo['vg_free']
168

    
169
  hyper = hypervisor.GetHypervisor()
170
  hyp_info = hyper.GetNodeInfo()
171
  if hyp_info is not None:
172
    outputarray.update(hyp_info)
173

    
174
  return outputarray
175

    
176

    
177
def VerifyNode(what):
178
  """Verify the status of the local node.
179

180
  Args:
181
    what - a dictionary of things to check:
182
      'filelist' : list of files for which to compute checksums
183
      'nodelist' : list of nodes we should check communication with
184
      'hypervisor': run the hypervisor-specific verify
185

186
  Requested files on local node are checksummed and the result returned.
187

188
  The nodelist is traversed, with the following checks being made
189
  for each node:
190
  - known_hosts key correct
191
  - correct resolving of node name (target node returns its own hostname
192
    by ssh-execution of 'hostname', result compared against name in list.
193

194
  """
195
  result = {}
196

    
197
  if 'hypervisor' in what:
198
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
199

    
200
  if 'filelist' in what:
201
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
202

    
203
  if 'nodelist' in what:
204
    result['nodelist'] = {}
205
    for node in what['nodelist']:
206
      success, message = ssh.VerifyNodeHostname(node)
207
      if not success:
208
        result['nodelist'][node] = message
209
  return result
210

    
211

    
212
def GetVolumeList(vg_name):
213
  """Compute list of logical volumes and their size.
214

215
  Returns:
216
    dictionary of all partions (key) with their size:
217
    test1: 20.06MiB
218

219
  """
220
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m",
221
                         "-oname,size", vg_name])
222
  if result.failed:
223
    logger.Error("Failed to list logical volumes, lvs output: %s" %
224
                 result.output)
225
    return {}
226

    
227
  lvlist = [line.split() for line in result.output.splitlines()]
228
  return dict(lvlist)
229

    
230

    
231
def ListVolumeGroups():
232
  """List the volume groups and their size
233

234
  Returns:
235
    Dictionary with keys volume name and values the size of the volume
236

237
  """
238
  return utils.ListVolumeGroups()
239

    
240

    
241
def NodeVolumes():
242
  """List all volumes on this node.
243

244
  """
245
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
246
                         "--separator=|",
247
                         "--options=lv_name,lv_size,devices,vg_name"])
248
  if result.failed:
249
    logger.Error("Failed to list logical volumes, lvs output: %s" %
250
                 result.output)
251
    return {}
252

    
253
  def parse_dev(dev):
254
    if '(' in dev:
255
      return dev.split('(')[0]
256
    else:
257
      return dev
258

    
259
  def map_line(line):
260
    return {
261
      'name': line[0].strip(),
262
      'size': line[1].strip(),
263
      'dev': parse_dev(line[2].strip()),
264
      'vg': line[3].strip(),
265
    }
266

    
267
  return [map_line(line.split('|')) for line in result.output.splitlines()]
268

    
269

    
270
def BridgesExist(bridges_list):
271
  """Check if a list of bridges exist on the current node
272

273
  Returns:
274
    True if all of them exist, false otherwise
275

276
  """
277
  for bridge in bridges_list:
278
    if not utils.BridgeExists(bridge):
279
      return False
280

    
281
  return True
282

    
283

    
284
def GetInstanceList():
285
  """ provides a list of instances
286

287
  Returns:
288
    A list of all running instances on the current node
289
    - instance1.example.com
290
    - instance2.example.com
291

292
  """
293
  try:
294
    names = hypervisor.GetHypervisor().ListInstances()
295
  except errors.HypervisorError, err:
296
    logger.Error("error enumerating instances: %s" % str(err))
297
    raise
298

    
299
  return names
300

    
301

    
302
def GetInstanceInfo(instance):
303
  """ gives back the informations about an instance
304
  as a dictonary
305

306
  Args:
307
    instance: name of the instance (ex. instance1.example.com)
308

309
  Returns:
310
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
311
    where
312
    memory: memory size of instance (int)
313
    state: xen state of instance (string)
314
    time: cpu time of instance (float)
315

316
  """
317
  output = {}
318

    
319
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
320
  if iinfo is not None:
321
    output['memory'] = iinfo[2]
322
    output['state'] = iinfo[4]
323
    output['time'] = iinfo[5]
324

    
325
  return output
326

    
327

    
328
def GetAllInstancesInfo():
329
  """Gather data about all instances.
330

331
  This is the equivalent of `GetInstanceInfo()`, except that it
332
  computes data for all instances at once, thus being faster if one
333
  needs data about more than one instance.
334

335
  Returns: a dictionary of dictionaries, keys being the instance name,
336
    and with values:
337
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
338
    where
339
    memory: memory size of instance (int)
340
    state: xen state of instance (string)
341
    time: cpu time of instance (float)
342
    vcpus: the number of cpus
343

344
  """
345
  output = {}
346

    
347
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
348
  if iinfo:
349
    for name, inst_id, memory, vcpus, state, times in iinfo:
350
      output[name] = {
351
        'memory': memory,
352
        'vcpus': vcpus,
353
        'state': state,
354
        'time': times,
355
        }
356

    
357
  return output
358

    
359

    
360
def AddOSToInstance(instance, os_disk, swap_disk):
361
  """Add an os to an instance.
362

363
  Args:
364
    instance: the instance object
365
    os_disk: the instance-visible name of the os device
366
    swap_disk: the instance-visible name of the swap device
367

368
  """
369
  inst_os = OSFromDisk(instance.os)
370

    
371
  create_script = inst_os.create_script
372

    
373
  os_device = instance.FindDisk(os_disk)
374
  if os_device is None:
375
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
376
    return False
377

    
378
  swap_device = instance.FindDisk(swap_disk)
379
  if swap_device is None:
380
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
381
    return False
382

    
383
  real_os_dev = _RecursiveFindBD(os_device)
384
  if real_os_dev is None:
385
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
386
                                  str(os_device))
387
  real_os_dev.Open()
388

    
389
  real_swap_dev = _RecursiveFindBD(swap_device)
390
  if real_swap_dev is None:
391
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
392
                                  str(swap_device))
393
  real_swap_dev.Open()
394

    
395
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
396
                                     instance.name, int(time.time()))
397
  if not os.path.exists(constants.LOG_OS_DIR):
398
    os.mkdir(constants.LOG_OS_DIR, 0750)
399

    
400
  command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
401
                                inst_os.path, create_script, instance.name,
402
                                real_os_dev.dev_path, real_swap_dev.dev_path,
403
                                logfile)
404

    
405
  result = utils.RunCmd(command)
406

    
407
  if result.failed:
408
    logger.Error("os create command '%s' returned error: %s"
409
                 " output: %s" %
410
                 (command, result.fail_reason, result.output))
411
    return False
412

    
413
  return True
414

    
415

    
416
def RunRenameInstance(instance, old_name, os_disk, swap_disk):
417
  """Run the OS rename script for an instance.
418

419
  Args:
420
    instance: the instance object
421
    old_name: the old name of the instance
422
    os_disk: the instance-visible name of the os device
423
    swap_disk: the instance-visible name of the swap device
424

425
  """
426
  inst_os = OSFromDisk(instance.os)
427

    
428
  script = inst_os.rename_script
429

    
430
  os_device = instance.FindDisk(os_disk)
431
  if os_device is None:
432
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
433
    return False
434

    
435
  swap_device = instance.FindDisk(swap_disk)
436
  if swap_device is None:
437
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
438
    return False
439

    
440
  real_os_dev = _RecursiveFindBD(os_device)
441
  if real_os_dev is None:
442
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
443
                                  str(os_device))
444
  real_os_dev.Open()
445

    
446
  real_swap_dev = _RecursiveFindBD(swap_device)
447
  if real_swap_dev is None:
448
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
449
                                  str(swap_device))
450
  real_swap_dev.Open()
451

    
452
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
453
                                           old_name,
454
                                           instance.name, int(time.time()))
455
  if not os.path.exists(constants.LOG_OS_DIR):
456
    os.mkdir(constants.LOG_OS_DIR, 0750)
457

    
458
  command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
459
                                inst_os.path, script, old_name, instance.name,
460
                                real_os_dev.dev_path, real_swap_dev.dev_path,
461
                                logfile)
462

    
463
  result = utils.RunCmd(command)
464

    
465
  if result.failed:
466
    logger.Error("os create command '%s' returned error: %s"
467
                 " output: %s" %
468
                 (command, result.fail_reason, result.output))
469
    return False
470

    
471
  return True
472

    
473

    
474
def _GetVGInfo(vg_name):
475
  """Get informations about the volume group.
476

477
  Args:
478
    vg_name: the volume group
479

480
  Returns:
481
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
482
    where
483
    vg_size is the total size of the volume group in MiB
484
    vg_free is the free size of the volume group in MiB
485
    pv_count are the number of physical disks in that vg
486

487
  """
488
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
489
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
490

    
491
  if retval.failed:
492
    errmsg = "volume group %s not present" % vg_name
493
    logger.Error(errmsg)
494
    raise errors.LVMError(errmsg)
495
  valarr = retval.stdout.strip().split(':')
496
  retdic = {
497
    "vg_size": int(round(float(valarr[0]), 0)),
498
    "vg_free": int(round(float(valarr[1]), 0)),
499
    "pv_count": int(valarr[2]),
500
    }
501
  return retdic
502

    
503

    
504
def _GatherBlockDevs(instance):
505
  """Set up an instance's block device(s).
506

507
  This is run on the primary node at instance startup. The block
508
  devices must be already assembled.
509

510
  """
511
  block_devices = []
512
  for disk in instance.disks:
513
    device = _RecursiveFindBD(disk)
514
    if device is None:
515
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
516
                                    str(disk))
517
    device.Open()
518
    block_devices.append((disk, device))
519
  return block_devices
520

    
521

    
522
def StartInstance(instance, extra_args):
523
  """Start an instance.
524

525
  Args:
526
    instance - name of instance to start.
527

528
  """
529
  running_instances = GetInstanceList()
530

    
531
  if instance.name in running_instances:
532
    return True
533

    
534
  block_devices = _GatherBlockDevs(instance)
535
  hyper = hypervisor.GetHypervisor()
536

    
537
  try:
538
    hyper.StartInstance(instance, block_devices, extra_args)
539
  except errors.HypervisorError, err:
540
    logger.Error("Failed to start instance: %s" % err)
541
    return False
542

    
543
  return True
544

    
545

    
546
def ShutdownInstance(instance):
547
  """Shut an instance down.
548

549
  Args:
550
    instance - name of instance to shutdown.
551

552
  """
553
  running_instances = GetInstanceList()
554

    
555
  if instance.name not in running_instances:
556
    return True
557

    
558
  hyper = hypervisor.GetHypervisor()
559
  try:
560
    hyper.StopInstance(instance)
561
  except errors.HypervisorError, err:
562
    logger.Error("Failed to stop instance: %s" % err)
563
    return False
564

    
565
  # test every 10secs for 2min
566
  shutdown_ok = False
567

    
568
  time.sleep(1)
569
  for dummy in range(11):
570
    if instance.name not in GetInstanceList():
571
      break
572
    time.sleep(10)
573
  else:
574
    # the shutdown did not succeed
575
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
576

    
577
    try:
578
      hyper.StopInstance(instance, force=True)
579
    except errors.HypervisorError, err:
580
      logger.Error("Failed to stop instance: %s" % err)
581
      return False
582

    
583
    time.sleep(1)
584
    if instance.name in GetInstanceList():
585
      logger.Error("could not shutdown instance '%s' even by destroy")
586
      return False
587

    
588
  return True
589

    
590

    
591
def CreateBlockDevice(disk, size, on_primary, info):
592
  """Creates a block device for an instance.
593

594
  Args:
595
   bdev: a ganeti.objects.Disk object
596
   size: the size of the physical underlying devices
597
   do_open: if the device should be `Assemble()`-d and
598
            `Open()`-ed after creation
599

600
  Returns:
601
    the new unique_id of the device (this can sometime be
602
    computed only after creation), or None. On secondary nodes,
603
    it's not required to return anything.
604

605
  """
606
  clist = []
607
  if disk.children:
608
    for child in disk.children:
609
      crdev = _RecursiveAssembleBD(child, on_primary)
610
      if on_primary or disk.AssembleOnSecondary():
611
        # we need the children open in case the device itself has to
612
        # be assembled
613
        crdev.Open()
614
      else:
615
        crdev.Close()
616
      clist.append(crdev)
617
  try:
618
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
619
    if device is not None:
620
      logger.Info("removing existing device %s" % disk)
621
      device.Remove()
622
  except errors.BlockDeviceError, err:
623
    pass
624

    
625
  device = bdev.Create(disk.dev_type, disk.physical_id,
626
                       clist, size)
627
  if device is None:
628
    raise ValueError("Can't create child device for %s, %s" %
629
                     (disk, size))
630
  if on_primary or disk.AssembleOnSecondary():
631
    device.Assemble()
632
    device.SetSyncSpeed(constants.SYNC_SPEED)
633
    if on_primary or disk.OpenOnSecondary():
634
      device.Open(force=True)
635

    
636
  device.SetInfo(info)
637

    
638
  physical_id = device.unique_id
639
  return physical_id
640

    
641

    
642
def RemoveBlockDevice(disk):
643
  """Remove a block device.
644

645
  This is intended to be called recursively.
646

647
  """
648
  try:
649
    # since we are removing the device, allow a partial match
650
    # this allows removal of broken mirrors
651
    rdev = _RecursiveFindBD(disk, allow_partial=True)
652
  except errors.BlockDeviceError, err:
653
    # probably can't attach
654
    logger.Info("Can't attach to device %s in remove" % disk)
655
    rdev = None
656
  if rdev is not None:
657
    result = rdev.Remove()
658
  else:
659
    result = True
660
  if disk.children:
661
    for child in disk.children:
662
      result = result and RemoveBlockDevice(child)
663
  return result
664

    
665

    
666
def _RecursiveAssembleBD(disk, as_primary):
667
  """Activate a block device for an instance.
668

669
  This is run on the primary and secondary nodes for an instance.
670

671
  This function is called recursively.
672

673
  Args:
674
    disk: a objects.Disk object
675
    as_primary: if we should make the block device read/write
676

677
  Returns:
678
    the assembled device or None (in case no device was assembled)
679

680
  If the assembly is not successful, an exception is raised.
681

682
  """
683
  children = []
684
  if disk.children:
685
    for chld_disk in disk.children:
686
      children.append(_RecursiveAssembleBD(chld_disk, as_primary))
687

    
688
  if as_primary or disk.AssembleOnSecondary():
689
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
690
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
691
    result = r_dev
692
    if as_primary or disk.OpenOnSecondary():
693
      r_dev.Open()
694
    else:
695
      r_dev.Close()
696
  else:
697
    result = True
698
  return result
699

    
700

    
701
def AssembleBlockDevice(disk, as_primary):
702
  """Activate a block device for an instance.
703

704
  This is a wrapper over _RecursiveAssembleBD.
705

706
  Returns:
707
    a /dev path for primary nodes
708
    True for secondary nodes
709

710
  """
711
  result = _RecursiveAssembleBD(disk, as_primary)
712
  if isinstance(result, bdev.BlockDev):
713
    result = result.dev_path
714
  return result
715

    
716

    
717
def ShutdownBlockDevice(disk):
718
  """Shut down a block device.
719

720
  First, if the device is assembled (can `Attach()`), then the device
721
  is shutdown. Then the children of the device are shutdown.
722

723
  This function is called recursively. Note that we don't cache the
724
  children or such, as oppossed to assemble, shutdown of different
725
  devices doesn't require that the upper device was active.
726

727
  """
728
  r_dev = _RecursiveFindBD(disk)
729
  if r_dev is not None:
730
    result = r_dev.Shutdown()
731
  else:
732
    result = True
733
  if disk.children:
734
    for child in disk.children:
735
      result = result and ShutdownBlockDevice(child)
736
  return result
737

    
738

    
739
def MirrorAddChild(md_cdev, new_cdev):
740
  """Extend an MD raid1 array.
741

742
  """
743
  md_bdev = _RecursiveFindBD(md_cdev, allow_partial=True)
744
  if md_bdev is None:
745
    logger.Error("Can't find md device")
746
    return False
747
  new_bdev = _RecursiveFindBD(new_cdev)
748
  if new_bdev is None:
749
    logger.Error("Can't find new device to add")
750
    return False
751
  new_bdev.Open()
752
  md_bdev.AddChild(new_bdev)
753
  return True
754

    
755

    
756
def MirrorRemoveChild(md_cdev, new_cdev):
757
  """Reduce an MD raid1 array.
758

759
  """
760
  md_bdev = _RecursiveFindBD(md_cdev)
761
  if md_bdev is None:
762
    return False
763
  new_bdev = _RecursiveFindBD(new_cdev)
764
  if new_bdev is None:
765
    return False
766
  new_bdev.Open()
767
  md_bdev.RemoveChild(new_bdev.dev_path)
768
  return True
769

    
770

    
771
def GetMirrorStatus(disks):
772
  """Get the mirroring status of a list of devices.
773

774
  Args:
775
    disks: list of `objects.Disk`
776

777
  Returns:
778
    list of (mirror_done, estimated_time) tuples, which
779
    are the result of bdev.BlockDevice.CombinedSyncStatus()
780

781
  """
782
  stats = []
783
  for dsk in disks:
784
    rbd = _RecursiveFindBD(dsk)
785
    if rbd is None:
786
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
787
    stats.append(rbd.CombinedSyncStatus())
788
  return stats
789

    
790

    
791
def _RecursiveFindBD(disk, allow_partial=False):
792
  """Check if a device is activated.
793

794
  If so, return informations about the real device.
795

796
  Args:
797
    disk: the objects.Disk instance
798
    allow_partial: don't abort the find if a child of the
799
                   device can't be found; this is intended to be
800
                   used when repairing mirrors
801

802
  Returns:
803
    None if the device can't be found
804
    otherwise the device instance
805

806
  """
807
  children = []
808
  if disk.children:
809
    for chdisk in disk.children:
810
      children.append(_RecursiveFindBD(chdisk))
811

    
812
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
813

    
814

    
815
def FindBlockDevice(disk):
816
  """Check if a device is activated.
817

818
  If so, return informations about the real device.
819

820
  Args:
821
    disk: the objects.Disk instance
822
  Returns:
823
    None if the device can't be found
824
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
825

826
  """
827
  rbd = _RecursiveFindBD(disk)
828
  if rbd is None:
829
    return rbd
830
  sync_p, est_t, is_degr = rbd.GetSyncStatus()
831
  return rbd.dev_path, rbd.major, rbd.minor, sync_p, est_t, is_degr
832

    
833

    
834
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
835
  """Write a file to the filesystem.
836

837
  This allows the master to overwrite(!) a file. It will only perform
838
  the operation if the file belongs to a list of configuration files.
839

840
  """
841
  if not os.path.isabs(file_name):
842
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
843
                 file_name)
844
    return False
845

    
846
  allowed_files = [constants.CLUSTER_CONF_FILE, "/etc/hosts",
847
                   constants.SSH_KNOWN_HOSTS_FILE]
848
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
849
  if file_name not in allowed_files:
850
    logger.Error("Filename passed to UploadFile not in allowed"
851
                 " upload targets: '%s'" % file_name)
852
    return False
853

    
854
  dir_name, small_name = os.path.split(file_name)
855
  fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
856
  # here we need to make sure we remove the temp file, if any error
857
  # leaves it in place
858
  try:
859
    os.chown(new_name, uid, gid)
860
    os.chmod(new_name, mode)
861
    os.write(fd, data)
862
    os.fsync(fd)
863
    os.utime(new_name, (atime, mtime))
864
    os.rename(new_name, file_name)
865
  finally:
866
    os.close(fd)
867
    utils.RemoveFile(new_name)
868
  return True
869

    
870

    
871
def _ErrnoOrStr(err):
872
  """Format an EnvironmentError exception.
873

874
  If the `err` argument has an errno attribute, it will be looked up
875
  and converted into a textual EXXXX description. Otherwise the string
876
  representation of the error will be returned.
877

878
  """
879
  if hasattr(err, 'errno'):
880
    detail = errno.errorcode[err.errno]
881
  else:
882
    detail = str(err)
883
  return detail
884

    
885

    
886
def _OSOndiskVersion(name, os_dir=None):
887
  """Compute and return the api version of a given OS.
888

889
  This function will try to read the api version of the os given by
890
  the 'name' parameter. By default, it wil use the constants.OS_DIR
891
  as top-level directory for OSes, but this can be overriden by the
892
  use of the os_dir parameter. Return value will be either an
893
  integer denoting the version or None in the case when this is not
894
  a valid OS name.
895

896
  """
897
  if os_dir is None:
898
    os_dir = os.path.sep.join([constants.OS_DIR, name])
899

    
900
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
901

    
902
  try:
903
    st = os.stat(api_file)
904
  except EnvironmentError, err:
905
    raise errors.InvalidOS(name, "'ganeti_api_version' file not"
906
                           " found (%s)" % _ErrnoOrStr(err))
907

    
908
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
909
    raise errors.InvalidOS(name, "'ganeti_api_version' file is not"
910
                           " a regular file")
911

    
912
  try:
913
    f = open(api_file)
914
    try:
915
      api_version = f.read(256)
916
    finally:
917
      f.close()
918
  except EnvironmentError, err:
919
    raise errors.InvalidOS(name, "error while reading the"
920
                           " API version (%s)" % _ErrnoOrStr(err))
921

    
922
  api_version = api_version.strip()
923
  try:
924
    api_version = int(api_version)
925
  except (TypeError, ValueError), err:
926
    raise errors.InvalidOS(name, "API version is not integer (%s)" % str(err))
927

    
928
  return api_version
929

    
930

    
931
def DiagnoseOS(top_dir=None):
932
  """Compute the validity for all OSes.
933

934
  For each name in the give top_dir parameter (if not given, defaults
935
  to constants.OS_DIR), it will return an object. If this is a valid
936
  os, the object will be an instance of the object.OS class. If not,
937
  it will be an instance of errors.InvalidOS and this signifies that
938
  this name does not correspond to a valid OS.
939

940
  Returns:
941
    list of objects
942

943
  """
944
  if top_dir is None:
945
    top_dir = constants.OS_DIR
946

    
947
  try:
948
    f_names = os.listdir(top_dir)
949
  except EnvironmentError, err:
950
    logger.Error("Can't list the OS directory: %s" % str(err))
951
    return False
952
  result = []
953
  for name in f_names:
954
    try:
955
      os_inst = OSFromDisk(name, os.path.sep.join([top_dir, name]))
956
      result.append(os_inst)
957
    except errors.InvalidOS, err:
958
      result.append(err)
959

    
960
  return result
961

    
962

    
963
def OSFromDisk(name, os_dir=None):
964
  """Create an OS instance from disk.
965

966
  This function will return an OS instance if the given name is a
967
  valid OS name. Otherwise, it will raise an appropriate
968
  `errors.InvalidOS` exception, detailing why this is not a valid
969
  OS.
970

971
  """
972
  if os_dir is None:
973
    os_dir = os.path.sep.join([constants.OS_DIR, name])
974

    
975
  api_version = _OSOndiskVersion(name, os_dir)
976

    
977
  if api_version != constants.OS_API_VERSION:
978
    raise errors.InvalidOS(name, "API version mismatch (found %s want %s)"
979
                           % (api_version, constants.OS_API_VERSION))
980

    
981
  # OS Scripts dictionary, we will populate it with the actual script names
982
  os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
983

    
984
  for script in os_scripts:
985
    os_scripts[script] = os.path.sep.join([os_dir, script])
986

    
987
    try:
988
      st = os.stat(os_scripts[script])
989
    except EnvironmentError, err:
990
      raise errors.InvalidOS(name, "'%s' script missing (%s)" %
991
                             (script, _ErrnoOrStr(err)))
992

    
993
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
994
      raise errors.InvalidOS(name, "'%s' script not executable" % script)
995

    
996
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
997
      raise errors.InvalidOS(name, "'%s' is not a regular file" % script)
998

    
999

    
1000
  return objects.OS(name=name, path=os_dir,
1001
                    create_script=os_scripts['create'],
1002
                    export_script=os_scripts['export'],
1003
                    import_script=os_scripts['import'],
1004
                    rename_script=os_scripts['rename'],
1005
                    api_version=api_version)
1006

    
1007

    
1008
def SnapshotBlockDevice(disk):
1009
  """Create a snapshot copy of a block device.
1010

1011
  This function is called recursively, and the snapshot is actually created
1012
  just for the leaf lvm backend device.
1013

1014
  Args:
1015
    disk: the disk to be snapshotted
1016

1017
  Returns:
1018
    a config entry for the actual lvm device snapshotted.
1019

1020
  """
1021
  if disk.children:
1022
    if len(disk.children) == 1:
1023
      # only one child, let's recurse on it
1024
      return SnapshotBlockDevice(disk.children[0])
1025
    else:
1026
      # more than one child, choose one that matches
1027
      for child in disk.children:
1028
        if child.size == disk.size:
1029
          # return implies breaking the loop
1030
          return SnapshotBlockDevice(child)
1031
  elif disk.dev_type == "lvm":
1032
    r_dev = _RecursiveFindBD(disk)
1033
    if r_dev is not None:
1034
      # let's stay on the safe side and ask for the full size, for now
1035
      return r_dev.Snapshot(disk.size)
1036
    else:
1037
      return None
1038
  else:
1039
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1040
                                 "'%s' of type '%s'" %
1041
                                 (disk.unique_id, disk.dev_type))
1042

    
1043

    
1044
def ExportSnapshot(disk, dest_node, instance):
1045
  """Export a block device snapshot to a remote node.
1046

1047
  Args:
1048
    disk: the snapshot block device
1049
    dest_node: the node to send the image to
1050
    instance: instance being exported
1051

1052
  Returns:
1053
    True if successful, False otherwise.
1054

1055
  """
1056
  inst_os = OSFromDisk(instance.os)
1057
  export_script = inst_os.export_script
1058

    
1059
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1060
                                     instance.name, int(time.time()))
1061
  if not os.path.exists(constants.LOG_OS_DIR):
1062
    os.mkdir(constants.LOG_OS_DIR, 0750)
1063

    
1064
  real_os_dev = _RecursiveFindBD(disk)
1065
  if real_os_dev is None:
1066
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1067
                                  str(disk))
1068
  real_os_dev.Open()
1069

    
1070
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1071
  destfile = disk.physical_id[1]
1072

    
1073
  # the target command is built out of three individual commands,
1074
  # which are joined by pipes; we check each individual command for
1075
  # valid parameters
1076

    
1077
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1078
                               export_script, instance.name,
1079
                               real_os_dev.dev_path, logfile)
1080

    
1081
  comprcmd = "gzip"
1082

    
1083
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1084
                                destdir, destdir, destfile)
1085
  remotecmd = ssh.BuildSSHCmd(dest_node, 'root', destcmd)
1086

    
1087

    
1088

    
1089
  # all commands have been checked, so we're safe to combine them
1090
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1091

    
1092
  result = utils.RunCmd(command)
1093

    
1094
  if result.failed:
1095
    logger.Error("os snapshot export command '%s' returned error: %s"
1096
                 " output: %s" %
1097
                 (command, result.fail_reason, result.output))
1098
    return False
1099

    
1100
  return True
1101

    
1102

    
1103
def FinalizeExport(instance, snap_disks):
1104
  """Write out the export configuration information.
1105

1106
  Args:
1107
    instance: instance configuration
1108
    snap_disks: snapshot block devices
1109

1110
  Returns:
1111
    False in case of error, True otherwise.
1112

1113
  """
1114
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1115
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1116

    
1117
  config = objects.SerializableConfigParser()
1118

    
1119
  config.add_section(constants.INISECT_EXP)
1120
  config.set(constants.INISECT_EXP, 'version', '0')
1121
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1122
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1123
  config.set(constants.INISECT_EXP, 'os', instance.os)
1124
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1125

    
1126
  config.add_section(constants.INISECT_INS)
1127
  config.set(constants.INISECT_INS, 'name', instance.name)
1128
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1129
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1130
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1131
  for nic_count, nic in enumerate(instance.nics):
1132
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1133
               nic_count, '%s' % nic.mac)
1134
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1135
  # TODO: redundant: on load can read nics until it doesn't exist
1136
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1137

    
1138
  for disk_count, disk in enumerate(snap_disks):
1139
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1140
               ('%s' % disk.iv_name))
1141
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1142
               ('%s' % disk.physical_id[1]))
1143
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1144
               ('%d' % disk.size))
1145
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1146

    
1147
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1148
  cfo = open(cff, 'w')
1149
  try:
1150
    config.write(cfo)
1151
  finally:
1152
    cfo.close()
1153

    
1154
  shutil.rmtree(finaldestdir, True)
1155
  shutil.move(destdir, finaldestdir)
1156

    
1157
  return True
1158

    
1159

    
1160
def ExportInfo(dest):
1161
  """Get export configuration information.
1162

1163
  Args:
1164
    dest: directory containing the export
1165

1166
  Returns:
1167
    A serializable config file containing the export info.
1168

1169
  """
1170
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1171

    
1172
  config = objects.SerializableConfigParser()
1173
  config.read(cff)
1174

    
1175
  if (not config.has_section(constants.INISECT_EXP) or
1176
      not config.has_section(constants.INISECT_INS)):
1177
    return None
1178

    
1179
  return config
1180

    
1181

    
1182
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1183
  """Import an os image into an instance.
1184

1185
  Args:
1186
    instance: the instance object
1187
    os_disk: the instance-visible name of the os device
1188
    swap_disk: the instance-visible name of the swap device
1189
    src_node: node holding the source image
1190
    src_image: path to the source image on src_node
1191

1192
  Returns:
1193
    False in case of error, True otherwise.
1194

1195
  """
1196
  inst_os = OSFromDisk(instance.os)
1197
  import_script = inst_os.import_script
1198

    
1199
  os_device = instance.FindDisk(os_disk)
1200
  if os_device is None:
1201
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1202
    return False
1203

    
1204
  swap_device = instance.FindDisk(swap_disk)
1205
  if swap_device is None:
1206
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1207
    return False
1208

    
1209
  real_os_dev = _RecursiveFindBD(os_device)
1210
  if real_os_dev is None:
1211
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1212
                                  str(os_device))
1213
  real_os_dev.Open()
1214

    
1215
  real_swap_dev = _RecursiveFindBD(swap_device)
1216
  if real_swap_dev is None:
1217
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1218
                                  str(swap_device))
1219
  real_swap_dev.Open()
1220

    
1221
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1222
                                        instance.name, int(time.time()))
1223
  if not os.path.exists(constants.LOG_OS_DIR):
1224
    os.mkdir(constants.LOG_OS_DIR, 0750)
1225

    
1226
  destcmd = utils.BuildShellCmd('cat %s', src_image)
1227
  remotecmd = ssh.BuildSSHCmd(src_node, 'root', destcmd)
1228

    
1229
  comprcmd = "gunzip"
1230
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1231
                               inst_os.path, import_script, instance.name,
1232
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1233
                               logfile)
1234

    
1235
  command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1236

    
1237
  result = utils.RunCmd(command)
1238

    
1239
  if result.failed:
1240
    logger.Error("os import command '%s' returned error: %s"
1241
                 " output: %s" %
1242
                 (command, result.fail_reason, result.output))
1243
    return False
1244

    
1245
  return True
1246

    
1247

    
1248
def ListExports():
1249
  """Return a list of exports currently available on this machine.
1250

1251
  """
1252
  if os.path.isdir(constants.EXPORT_DIR):
1253
    return os.listdir(constants.EXPORT_DIR)
1254
  else:
1255
    return []
1256

    
1257

    
1258
def RemoveExport(export):
1259
  """Remove an existing export from the node.
1260

1261
  Args:
1262
    export: the name of the export to remove
1263

1264
  Returns:
1265
    False in case of error, True otherwise.
1266

1267
  """
1268
  target = os.path.join(constants.EXPORT_DIR, export)
1269

    
1270
  shutil.rmtree(target)
1271
  # TODO: catch some of the relevant exceptions and provide a pretty
1272
  # error message if rmtree fails.
1273

    
1274
  return True
1275

    
1276

    
1277
class HooksRunner(object):
1278
  """Hook runner.
1279

1280
  This class is instantiated on the node side (ganeti-noded) and not on
1281
  the master side.
1282

1283
  """
1284
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1285

    
1286
  def __init__(self, hooks_base_dir=None):
1287
    """Constructor for hooks runner.
1288

1289
    Args:
1290
      - hooks_base_dir: if not None, this overrides the
1291
        constants.HOOKS_BASE_DIR (useful for unittests)
1292
      - logs_base_dir: if not None, this overrides the
1293
        constants.LOG_HOOKS_DIR (useful for unittests)
1294
      - logging: enable or disable logging of script output
1295

1296
    """
1297
    if hooks_base_dir is None:
1298
      hooks_base_dir = constants.HOOKS_BASE_DIR
1299
    self._BASE_DIR = hooks_base_dir
1300

    
1301
  @staticmethod
1302
  def ExecHook(script, env):
1303
    """Exec one hook script.
1304

1305
    Args:
1306
     - phase: the phase
1307
     - script: the full path to the script
1308
     - env: the environment with which to exec the script
1309

1310
    """
1311
    # exec the process using subprocess and log the output
1312
    fdstdin = None
1313
    try:
1314
      fdstdin = open("/dev/null", "r")
1315
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1316
                               stderr=subprocess.STDOUT, close_fds=True,
1317
                               shell=False, cwd="/",env=env)
1318
      output = ""
1319
      try:
1320
        output = child.stdout.read(4096)
1321
        child.stdout.close()
1322
      except EnvironmentError, err:
1323
        output += "Hook script error: %s" % str(err)
1324

    
1325
      while True:
1326
        try:
1327
          result = child.wait()
1328
          break
1329
        except EnvironmentError, err:
1330
          if err.errno == errno.EINTR:
1331
            continue
1332
          raise
1333
    finally:
1334
      # try not to leak fds
1335
      for fd in (fdstdin, ):
1336
        if fd is not None:
1337
          try:
1338
            fd.close()
1339
          except EnvironmentError, err:
1340
            # just log the error
1341
            #logger.Error("While closing fd %s: %s" % (fd, err))
1342
            pass
1343

    
1344
    return result == 0, output
1345

    
1346
  def RunHooks(self, hpath, phase, env):
1347
    """Run the scripts in the hooks directory.
1348

1349
    This method will not be usually overriden by child opcodes.
1350

1351
    """
1352
    if phase == constants.HOOKS_PHASE_PRE:
1353
      suffix = "pre"
1354
    elif phase == constants.HOOKS_PHASE_POST:
1355
      suffix = "post"
1356
    else:
1357
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1358
    rr = []
1359

    
1360
    subdir = "%s-%s.d" % (hpath, suffix)
1361
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1362
    try:
1363
      dir_contents = os.listdir(dir_name)
1364
    except OSError, err:
1365
      # must log
1366
      return rr
1367

    
1368
    # we use the standard python sort order,
1369
    # so 00name is the recommended naming scheme
1370
    dir_contents.sort()
1371
    for relname in dir_contents:
1372
      fname = os.path.join(dir_name, relname)
1373
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1374
          self.RE_MASK.match(relname) is not None):
1375
        rrval = constants.HKR_SKIP
1376
        output = ""
1377
      else:
1378
        result, output = self.ExecHook(fname, env)
1379
        if not result:
1380
          rrval = constants.HKR_FAIL
1381
        else:
1382
          rrval = constants.HKR_SUCCESS
1383
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1384

    
1385
    return rr