Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ decd5f45

History | View | Annotate | Download (38.2 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import tempfile
30
import stat
31
import errno
32
import re
33
import subprocess
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def StartMaster():
47
  """Activate local node as master node.
48

49
  There are two needed steps for this:
50
    - run the master script
51
    - register the cron script
52

53
  """
54
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
55

    
56
  if result.failed:
57
    logger.Error("could not activate cluster interface with command %s,"
58
                 " error: '%s'" % (result.cmd, result.output))
59
    return False
60

    
61
  return True
62

    
63

    
64
def StopMaster():
65
  """Deactivate this node as master.
66

67
  This does two things:
68
    - run the master stop script
69
    - remove link to master cron script.
70

71
  """
72
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
73

    
74
  if result.failed:
75
    logger.Error("could not deactivate cluster interface with command %s,"
76
                 " error: '%s'" % (result.cmd, result.output))
77
    return False
78

    
79
  return True
80

    
81

    
82
def AddNode(dsa, dsapub, rsa, rsapub, ssh, sshpub):
83
  """ adds the node to the cluster
84
      - updates the hostkey
85
      - adds the ssh-key
86
      - sets the node id
87
      - sets the node status to installed
88

89
  """
90
  f = open("/etc/ssh/ssh_host_rsa_key", 'w')
91
  f.write(rsa)
92
  f.close()
93

    
94
  f = open("/etc/ssh/ssh_host_rsa_key.pub", 'w')
95
  f.write(rsapub)
96
  f.close()
97

    
98
  f = open("/etc/ssh/ssh_host_dsa_key", 'w')
99
  f.write(dsa)
100
  f.close()
101

    
102
  f = open("/etc/ssh/ssh_host_dsa_key.pub", 'w')
103
  f.write(dsapub)
104
  f.close()
105

    
106
  if not os.path.isdir("/root/.ssh"):
107
    os.mkdir("/root/.ssh")
108

    
109
  f = open("/root/.ssh/id_dsa", 'w')
110
  f.write(ssh)
111
  f.close()
112

    
113
  f = open("/root/.ssh/id_dsa.pub", 'w')
114
  f.write(sshpub)
115
  f.close()
116

    
117
  f = open('/root/.ssh/id_dsa.pub', 'r')
118
  try:
119
    utils.AddAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
120
  finally:
121
    f.close()
122

    
123
  utils.RunCmd(["/etc/init.d/ssh", "restart"])
124

    
125
  return True
126

    
127

    
128
def LeaveCluster():
129
  """Cleans up the current node and prepares it to be removed from the cluster.
130

131
  """
132
  if os.path.exists(constants.DATA_DIR):
133
    for dirpath, dirnames, filenames in os.walk(constants.DATA_DIR):
134
      if dirpath == constants.DATA_DIR:
135
        for i in filenames:
136
          os.unlink(os.path.join(dirpath, i))
137

    
138
  f = open('/root/.ssh/id_dsa.pub', 'r')
139
  try:
140
    utils.RemoveAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
141
  finally:
142
    f.close()
143

    
144
  utils.RemoveFile('/root/.ssh/id_dsa')
145
  utils.RemoveFile('/root/.ssh/id_dsa.pub')
146

    
147

    
148
def GetNodeInfo(vgname):
149
  """ gives back a hash with different informations
150
  about the node
151

152
  Returns:
153
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
154
      'memory_free' : xxx, 'memory_total' : xxx }
155
    where
156
    vg_size is the size of the configured volume group in MiB
157
    vg_free is the free size of the volume group in MiB
158
    memory_dom0 is the memory allocated for domain0 in MiB
159
    memory_free is the currently available (free) ram in MiB
160
    memory_total is the total number of ram in MiB
161

162
  """
163
  outputarray = {}
164
  vginfo = _GetVGInfo(vgname)
165
  outputarray['vg_size'] = vginfo['vg_size']
166
  outputarray['vg_free'] = vginfo['vg_free']
167

    
168
  hyper = hypervisor.GetHypervisor()
169
  hyp_info = hyper.GetNodeInfo()
170
  if hyp_info is not None:
171
    outputarray.update(hyp_info)
172

    
173
  return outputarray
174

    
175

    
176
def VerifyNode(what):
177
  """Verify the status of the local node.
178

179
  Args:
180
    what - a dictionary of things to check:
181
      'filelist' : list of files for which to compute checksums
182
      'nodelist' : list of nodes we should check communication with
183
      'hypervisor': run the hypervisor-specific verify
184

185
  Requested files on local node are checksummed and the result returned.
186

187
  The nodelist is traversed, with the following checks being made
188
  for each node:
189
  - known_hosts key correct
190
  - correct resolving of node name (target node returns its own hostname
191
    by ssh-execution of 'hostname', result compared against name in list.
192

193
  """
194
  result = {}
195

    
196
  if 'hypervisor' in what:
197
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
198

    
199
  if 'filelist' in what:
200
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
201

    
202
  if 'nodelist' in what:
203
    result['nodelist'] = {}
204
    for node in what['nodelist']:
205
      success, message = ssh.VerifyNodeHostname(node)
206
      if not success:
207
        result['nodelist'][node] = message
208
  return result
209

    
210

    
211
def GetVolumeList(vg_name):
212
  """Compute list of logical volumes and their size.
213

214
  Returns:
215
    dictionary of all partions (key) with their size:
216
    test1: 20.06MiB
217

218
  """
219
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m",
220
                         "-oname,size", vg_name])
221
  if result.failed:
222
    logger.Error("Failed to list logical volumes, lvs output: %s" %
223
                 result.output)
224
    return {}
225

    
226
  lvlist = [line.split() for line in result.output.splitlines()]
227
  return dict(lvlist)
228

    
229

    
230
def ListVolumeGroups():
231
  """List the volume groups and their size
232

233
  Returns:
234
    Dictionary with keys volume name and values the size of the volume
235

236
  """
237
  return utils.ListVolumeGroups()
238

    
239

    
240
def NodeVolumes():
241
  """List all volumes on this node.
242

243
  """
244
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
245
                         "--separator=|",
246
                         "--options=lv_name,lv_size,devices,vg_name"])
247
  if result.failed:
248
    logger.Error("Failed to list logical volumes, lvs output: %s" %
249
                 result.output)
250
    return {}
251

    
252
  def parse_dev(dev):
253
    if '(' in dev:
254
      return dev.split('(')[0]
255
    else:
256
      return dev
257

    
258
  def map_line(line):
259
    return {
260
      'name': line[0].strip(),
261
      'size': line[1].strip(),
262
      'dev': parse_dev(line[2].strip()),
263
      'vg': line[3].strip(),
264
    }
265

    
266
  return [map_line(line.split('|')) for line in result.output.splitlines()]
267

    
268

    
269
def BridgesExist(bridges_list):
270
  """Check if a list of bridges exist on the current node
271

272
  Returns:
273
    True if all of them exist, false otherwise
274

275
  """
276
  for bridge in bridges_list:
277
    if not utils.BridgeExists(bridge):
278
      return False
279

    
280
  return True
281

    
282

    
283
def GetInstanceList():
284
  """ provides a list of instances
285

286
  Returns:
287
    A list of all running instances on the current node
288
    - instance1.example.com
289
    - instance2.example.com
290

291
  """
292
  try:
293
    names = hypervisor.GetHypervisor().ListInstances()
294
  except errors.HypervisorError, err:
295
    logger.Error("error enumerating instances: %s" % str(err))
296
    raise
297

    
298
  return names
299

    
300

    
301
def GetInstanceInfo(instance):
302
  """ gives back the informations about an instance
303
  as a dictonary
304

305
  Args:
306
    instance: name of the instance (ex. instance1.example.com)
307

308
  Returns:
309
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
310
    where
311
    memory: memory size of instance (int)
312
    state: xen state of instance (string)
313
    time: cpu time of instance (float)
314

315
  """
316
  output = {}
317

    
318
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
319
  if iinfo is not None:
320
    output['memory'] = iinfo[2]
321
    output['state'] = iinfo[4]
322
    output['time'] = iinfo[5]
323

    
324
  return output
325

    
326

    
327
def GetAllInstancesInfo():
328
  """Gather data about all instances.
329

330
  This is the equivalent of `GetInstanceInfo()`, except that it
331
  computes data for all instances at once, thus being faster if one
332
  needs data about more than one instance.
333

334
  Returns: a dictionary of dictionaries, keys being the instance name,
335
    and with values:
336
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
337
    where
338
    memory: memory size of instance (int)
339
    state: xen state of instance (string)
340
    time: cpu time of instance (float)
341
    vcpus: the number of cpus
342

343
  """
344
  output = {}
345

    
346
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
347
  if iinfo:
348
    for name, inst_id, memory, vcpus, state, times in iinfo:
349
      output[name] = {
350
        'memory': memory,
351
        'vcpus': vcpus,
352
        'state': state,
353
        'time': times,
354
        }
355

    
356
  return output
357

    
358

    
359
def AddOSToInstance(instance, os_disk, swap_disk):
360
  """Add an os to an instance.
361

362
  Args:
363
    instance: the instance object
364
    os_disk: the instance-visible name of the os device
365
    swap_disk: the instance-visible name of the swap device
366

367
  """
368
  inst_os = OSFromDisk(instance.os)
369

    
370
  create_script = inst_os.create_script
371

    
372
  for os_device in instance.disks:
373
    if os_device.iv_name == os_disk:
374
      break
375
  else:
376
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
377
    return False
378

    
379
  for swap_device in instance.disks:
380
    if swap_device.iv_name == swap_disk:
381
      break
382
  else:
383
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
384
    return False
385

    
386
  real_os_dev = _RecursiveFindBD(os_device)
387
  if real_os_dev is None:
388
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
389
                                  str(os_device))
390
  real_os_dev.Open()
391

    
392
  real_swap_dev = _RecursiveFindBD(swap_device)
393
  if real_swap_dev is None:
394
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
395
                                  str(swap_device))
396
  real_swap_dev.Open()
397

    
398
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
399
                                     instance.name, int(time.time()))
400
  if not os.path.exists(constants.LOG_OS_DIR):
401
    os.mkdir(constants.LOG_OS_DIR, 0750)
402

    
403
  command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
404
                                inst_os.path, create_script, instance.name,
405
                                real_os_dev.dev_path, real_swap_dev.dev_path,
406
                                logfile)
407

    
408
  result = utils.RunCmd(command)
409

    
410
  if result.failed:
411
    logger.Error("os create command '%s' returned error: %s"
412
                 " output: %s" %
413
                 (command, result.fail_reason, result.output))
414
    return False
415

    
416
  return True
417

    
418

    
419
def RunRenameInstance(instance, old_name, os_disk, swap_disk):
420
  """Run the OS rename script for an instance.
421

422
  Args:
423
    instance: the instance object
424
    old_name: the old name of the instance
425
    os_disk: the instance-visible name of the os device
426
    swap_disk: the instance-visible name of the swap device
427

428
  """
429
  inst_os = OSFromDisk(instance.os)
430

    
431
  script = inst_os.rename_script
432

    
433
  os_device = instance.FindDisk(os_disk)
434
  if os_device is None:
435
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
436
    return False
437

    
438
  swap_device = instance.FindDisk(swap_disk)
439
  if swap_device is None:
440
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
441
    return False
442

    
443
  real_os_dev = _RecursiveFindBD(os_device)
444
  if real_os_dev is None:
445
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
446
                                  str(os_device))
447
  real_os_dev.Open()
448

    
449
  real_swap_dev = _RecursiveFindBD(swap_device)
450
  if real_swap_dev is None:
451
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
452
                                  str(swap_device))
453
  real_swap_dev.Open()
454

    
455
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
456
                                           old_name,
457
                                           instance.name, int(time.time()))
458
  if not os.path.exists(constants.LOG_OS_DIR):
459
    os.mkdir(constants.LOG_OS_DIR, 0750)
460

    
461
  command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
462
                                inst_os.path, script, old_name, instance.name,
463
                                real_os_dev.dev_path, real_swap_dev.dev_path,
464
                                logfile)
465

    
466
  result = utils.RunCmd(command)
467

    
468
  if result.failed:
469
    logger.Error("os create command '%s' returned error: %s"
470
                 " output: %s" %
471
                 (command, result.fail_reason, result.output))
472
    return False
473

    
474
  return True
475

    
476

    
477
def _GetVGInfo(vg_name):
478
  """Get informations about the volume group.
479

480
  Args:
481
    vg_name: the volume group
482

483
  Returns:
484
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
485
    where
486
    vg_size is the total size of the volume group in MiB
487
    vg_free is the free size of the volume group in MiB
488
    pv_count are the number of physical disks in that vg
489

490
  """
491
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
492
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
493

    
494
  if retval.failed:
495
    errmsg = "volume group %s not present" % vg_name
496
    logger.Error(errmsg)
497
    raise errors.LVMError(errmsg)
498
  valarr = retval.stdout.strip().split(':')
499
  retdic = {
500
    "vg_size": int(round(float(valarr[0]), 0)),
501
    "vg_free": int(round(float(valarr[1]), 0)),
502
    "pv_count": int(valarr[2]),
503
    }
504
  return retdic
505

    
506

    
507
def _GatherBlockDevs(instance):
508
  """Set up an instance's block device(s).
509

510
  This is run on the primary node at instance startup. The block
511
  devices must be already assembled.
512

513
  """
514
  block_devices = []
515
  for disk in instance.disks:
516
    device = _RecursiveFindBD(disk)
517
    if device is None:
518
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
519
                                    str(disk))
520
    device.Open()
521
    block_devices.append((disk, device))
522
  return block_devices
523

    
524

    
525
def StartInstance(instance, extra_args):
526
  """Start an instance.
527

528
  Args:
529
    instance - name of instance to start.
530

531
  """
532
  running_instances = GetInstanceList()
533

    
534
  if instance.name in running_instances:
535
    return True
536

    
537
  block_devices = _GatherBlockDevs(instance)
538
  hyper = hypervisor.GetHypervisor()
539

    
540
  try:
541
    hyper.StartInstance(instance, block_devices, extra_args)
542
  except errors.HypervisorError, err:
543
    logger.Error("Failed to start instance: %s" % err)
544
    return False
545

    
546
  return True
547

    
548

    
549
def ShutdownInstance(instance):
550
  """Shut an instance down.
551

552
  Args:
553
    instance - name of instance to shutdown.
554

555
  """
556
  running_instances = GetInstanceList()
557

    
558
  if instance.name not in running_instances:
559
    return True
560

    
561
  hyper = hypervisor.GetHypervisor()
562
  try:
563
    hyper.StopInstance(instance)
564
  except errors.HypervisorError, err:
565
    logger.Error("Failed to stop instance: %s" % err)
566
    return False
567

    
568
  # test every 10secs for 2min
569
  shutdown_ok = False
570

    
571
  time.sleep(1)
572
  for dummy in range(11):
573
    if instance.name not in GetInstanceList():
574
      break
575
    time.sleep(10)
576
  else:
577
    # the shutdown did not succeed
578
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
579

    
580
    try:
581
      hyper.StopInstance(instance, force=True)
582
    except errors.HypervisorError, err:
583
      logger.Error("Failed to stop instance: %s" % err)
584
      return False
585

    
586
    time.sleep(1)
587
    if instance.name in GetInstanceList():
588
      logger.Error("could not shutdown instance '%s' even by destroy")
589
      return False
590

    
591
  return True
592

    
593

    
594
def CreateBlockDevice(disk, size, on_primary, info):
595
  """Creates a block device for an instance.
596

597
  Args:
598
   bdev: a ganeti.objects.Disk object
599
   size: the size of the physical underlying devices
600
   do_open: if the device should be `Assemble()`-d and
601
            `Open()`-ed after creation
602

603
  Returns:
604
    the new unique_id of the device (this can sometime be
605
    computed only after creation), or None. On secondary nodes,
606
    it's not required to return anything.
607

608
  """
609
  clist = []
610
  if disk.children:
611
    for child in disk.children:
612
      crdev = _RecursiveAssembleBD(child, on_primary)
613
      if on_primary or disk.AssembleOnSecondary():
614
        # we need the children open in case the device itself has to
615
        # be assembled
616
        crdev.Open()
617
      else:
618
        crdev.Close()
619
      clist.append(crdev)
620
  try:
621
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
622
    if device is not None:
623
      logger.Info("removing existing device %s" % disk)
624
      device.Remove()
625
  except errors.BlockDeviceError, err:
626
    pass
627

    
628
  device = bdev.Create(disk.dev_type, disk.physical_id,
629
                       clist, size)
630
  if device is None:
631
    raise ValueError("Can't create child device for %s, %s" %
632
                     (disk, size))
633
  if on_primary or disk.AssembleOnSecondary():
634
    device.Assemble()
635
    device.SetSyncSpeed(constants.SYNC_SPEED)
636
    if on_primary or disk.OpenOnSecondary():
637
      device.Open(force=True)
638

    
639
  device.SetInfo(info)
640

    
641
  physical_id = device.unique_id
642
  return physical_id
643

    
644

    
645
def RemoveBlockDevice(disk):
646
  """Remove a block device.
647

648
  This is intended to be called recursively.
649

650
  """
651
  try:
652
    # since we are removing the device, allow a partial match
653
    # this allows removal of broken mirrors
654
    rdev = _RecursiveFindBD(disk, allow_partial=True)
655
  except errors.BlockDeviceError, err:
656
    # probably can't attach
657
    logger.Info("Can't attach to device %s in remove" % disk)
658
    rdev = None
659
  if rdev is not None:
660
    result = rdev.Remove()
661
  else:
662
    result = True
663
  if disk.children:
664
    for child in disk.children:
665
      result = result and RemoveBlockDevice(child)
666
  return result
667

    
668

    
669
def _RecursiveAssembleBD(disk, as_primary):
670
  """Activate a block device for an instance.
671

672
  This is run on the primary and secondary nodes for an instance.
673

674
  This function is called recursively.
675

676
  Args:
677
    disk: a objects.Disk object
678
    as_primary: if we should make the block device read/write
679

680
  Returns:
681
    the assembled device or None (in case no device was assembled)
682

683
  If the assembly is not successful, an exception is raised.
684

685
  """
686
  children = []
687
  if disk.children:
688
    for chld_disk in disk.children:
689
      children.append(_RecursiveAssembleBD(chld_disk, as_primary))
690

    
691
  if as_primary or disk.AssembleOnSecondary():
692
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
693
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
694
    result = r_dev
695
    if as_primary or disk.OpenOnSecondary():
696
      r_dev.Open()
697
    else:
698
      r_dev.Close()
699
  else:
700
    result = True
701
  return result
702

    
703

    
704
def AssembleBlockDevice(disk, as_primary):
705
  """Activate a block device for an instance.
706

707
  This is a wrapper over _RecursiveAssembleBD.
708

709
  Returns:
710
    a /dev path for primary nodes
711
    True for secondary nodes
712

713
  """
714
  result = _RecursiveAssembleBD(disk, as_primary)
715
  if isinstance(result, bdev.BlockDev):
716
    result = result.dev_path
717
  return result
718

    
719

    
720
def ShutdownBlockDevice(disk):
721
  """Shut down a block device.
722

723
  First, if the device is assembled (can `Attach()`), then the device
724
  is shutdown. Then the children of the device are shutdown.
725

726
  This function is called recursively. Note that we don't cache the
727
  children or such, as oppossed to assemble, shutdown of different
728
  devices doesn't require that the upper device was active.
729

730
  """
731
  r_dev = _RecursiveFindBD(disk)
732
  if r_dev is not None:
733
    result = r_dev.Shutdown()
734
  else:
735
    result = True
736
  if disk.children:
737
    for child in disk.children:
738
      result = result and ShutdownBlockDevice(child)
739
  return result
740

    
741

    
742
def MirrorAddChild(md_cdev, new_cdev):
743
  """Extend an MD raid1 array.
744

745
  """
746
  md_bdev = _RecursiveFindBD(md_cdev, allow_partial=True)
747
  if md_bdev is None:
748
    logger.Error("Can't find md device")
749
    return False
750
  new_bdev = _RecursiveFindBD(new_cdev)
751
  if new_bdev is None:
752
    logger.Error("Can't find new device to add")
753
    return False
754
  new_bdev.Open()
755
  md_bdev.AddChild(new_bdev)
756
  return True
757

    
758

    
759
def MirrorRemoveChild(md_cdev, new_cdev):
760
  """Reduce an MD raid1 array.
761

762
  """
763
  md_bdev = _RecursiveFindBD(md_cdev)
764
  if md_bdev is None:
765
    return False
766
  new_bdev = _RecursiveFindBD(new_cdev)
767
  if new_bdev is None:
768
    return False
769
  new_bdev.Open()
770
  md_bdev.RemoveChild(new_bdev.dev_path)
771
  return True
772

    
773

    
774
def GetMirrorStatus(disks):
775
  """Get the mirroring status of a list of devices.
776

777
  Args:
778
    disks: list of `objects.Disk`
779

780
  Returns:
781
    list of (mirror_done, estimated_time) tuples, which
782
    are the result of bdev.BlockDevice.CombinedSyncStatus()
783

784
  """
785
  stats = []
786
  for dsk in disks:
787
    rbd = _RecursiveFindBD(dsk)
788
    if rbd is None:
789
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
790
    stats.append(rbd.CombinedSyncStatus())
791
  return stats
792

    
793

    
794
def _RecursiveFindBD(disk, allow_partial=False):
795
  """Check if a device is activated.
796

797
  If so, return informations about the real device.
798

799
  Args:
800
    disk: the objects.Disk instance
801
    allow_partial: don't abort the find if a child of the
802
                   device can't be found; this is intended to be
803
                   used when repairing mirrors
804

805
  Returns:
806
    None if the device can't be found
807
    otherwise the device instance
808

809
  """
810
  children = []
811
  if disk.children:
812
    for chdisk in disk.children:
813
      children.append(_RecursiveFindBD(chdisk))
814

    
815
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
816

    
817

    
818
def FindBlockDevice(disk):
819
  """Check if a device is activated.
820

821
  If so, return informations about the real device.
822

823
  Args:
824
    disk: the objects.Disk instance
825
  Returns:
826
    None if the device can't be found
827
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
828

829
  """
830
  rbd = _RecursiveFindBD(disk)
831
  if rbd is None:
832
    return rbd
833
  sync_p, est_t, is_degr = rbd.GetSyncStatus()
834
  return rbd.dev_path, rbd.major, rbd.minor, sync_p, est_t, is_degr
835

    
836

    
837
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
838
  """Write a file to the filesystem.
839

840
  This allows the master to overwrite(!) a file. It will only perform
841
  the operation if the file belongs to a list of configuration files.
842

843
  """
844
  if not os.path.isabs(file_name):
845
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
846
                 file_name)
847
    return False
848

    
849
  allowed_files = [constants.CLUSTER_CONF_FILE, "/etc/hosts",
850
                   constants.SSH_KNOWN_HOSTS_FILE]
851
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
852
  if file_name not in allowed_files:
853
    logger.Error("Filename passed to UploadFile not in allowed"
854
                 " upload targets: '%s'" % file_name)
855
    return False
856

    
857
  dir_name, small_name = os.path.split(file_name)
858
  fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
859
  # here we need to make sure we remove the temp file, if any error
860
  # leaves it in place
861
  try:
862
    os.chown(new_name, uid, gid)
863
    os.chmod(new_name, mode)
864
    os.write(fd, data)
865
    os.fsync(fd)
866
    os.utime(new_name, (atime, mtime))
867
    os.rename(new_name, file_name)
868
  finally:
869
    os.close(fd)
870
    utils.RemoveFile(new_name)
871
  return True
872

    
873

    
874
def _ErrnoOrStr(err):
875
  """Format an EnvironmentError exception.
876

877
  If the `err` argument has an errno attribute, it will be looked up
878
  and converted into a textual EXXXX description. Otherwise the string
879
  representation of the error will be returned.
880

881
  """
882
  if hasattr(err, 'errno'):
883
    detail = errno.errorcode[err.errno]
884
  else:
885
    detail = str(err)
886
  return detail
887

    
888

    
889
def _OSOndiskVersion(name, os_dir=None):
890
  """Compute and return the api version of a given OS.
891

892
  This function will try to read the api version of the os given by
893
  the 'name' parameter. By default, it wil use the constants.OS_DIR
894
  as top-level directory for OSes, but this can be overriden by the
895
  use of the os_dir parameter. Return value will be either an
896
  integer denoting the version or None in the case when this is not
897
  a valid OS name.
898

899
  """
900
  if os_dir is None:
901
    os_dir = os.path.sep.join([constants.OS_DIR, name])
902

    
903
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
904

    
905
  try:
906
    st = os.stat(api_file)
907
  except EnvironmentError, err:
908
    raise errors.InvalidOS(name, "'ganeti_api_version' file not"
909
                           " found (%s)" % _ErrnoOrStr(err))
910

    
911
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
912
    raise errors.InvalidOS(name, "'ganeti_api_version' file is not"
913
                           " a regular file")
914

    
915
  try:
916
    f = open(api_file)
917
    try:
918
      api_version = f.read(256)
919
    finally:
920
      f.close()
921
  except EnvironmentError, err:
922
    raise errors.InvalidOS(name, "error while reading the"
923
                           " API version (%s)" % _ErrnoOrStr(err))
924

    
925
  api_version = api_version.strip()
926
  try:
927
    api_version = int(api_version)
928
  except (TypeError, ValueError), err:
929
    raise errors.InvalidOS(name, "API version is not integer (%s)" % str(err))
930

    
931
  return api_version
932

    
933

    
934
def DiagnoseOS(top_dir=None):
935
  """Compute the validity for all OSes.
936

937
  For each name in the give top_dir parameter (if not given, defaults
938
  to constants.OS_DIR), it will return an object. If this is a valid
939
  os, the object will be an instance of the object.OS class. If not,
940
  it will be an instance of errors.InvalidOS and this signifies that
941
  this name does not correspond to a valid OS.
942

943
  Returns:
944
    list of objects
945

946
  """
947
  if top_dir is None:
948
    top_dir = constants.OS_DIR
949

    
950
  try:
951
    f_names = os.listdir(top_dir)
952
  except EnvironmentError, err:
953
    logger.Error("Can't list the OS directory: %s" % str(err))
954
    return False
955
  result = []
956
  for name in f_names:
957
    try:
958
      os_inst = OSFromDisk(name, os.path.sep.join([top_dir, name]))
959
      result.append(os_inst)
960
    except errors.InvalidOS, err:
961
      result.append(err)
962

    
963
  return result
964

    
965

    
966
def OSFromDisk(name, os_dir=None):
967
  """Create an OS instance from disk.
968

969
  This function will return an OS instance if the given name is a
970
  valid OS name. Otherwise, it will raise an appropriate
971
  `errors.InvalidOS` exception, detailing why this is not a valid
972
  OS.
973

974
  """
975
  if os_dir is None:
976
    os_dir = os.path.sep.join([constants.OS_DIR, name])
977

    
978
  api_version = _OSOndiskVersion(name, os_dir)
979

    
980
  if api_version != constants.OS_API_VERSION:
981
    raise errors.InvalidOS(name, "API version mismatch (found %s want %s)"
982
                           % (api_version, constants.OS_API_VERSION))
983

    
984
  # OS Scripts dictionary, we will populate it with the actual script names
985
  os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
986

    
987
  for script in os_scripts:
988
    os_scripts[script] = os.path.sep.join([os_dir, script])
989

    
990
    try:
991
      st = os.stat(os_scripts[script])
992
    except EnvironmentError, err:
993
      raise errors.InvalidOS(name, "'%s' script missing (%s)" %
994
                             (script, _ErrnoOrStr(err)))
995

    
996
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
997
      raise errors.InvalidOS(name, "'%s' script not executable" % script)
998

    
999
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1000
      raise errors.InvalidOS(name, "'%s' is not a regular file" % script)
1001

    
1002

    
1003
  return objects.OS(name=name, path=os_dir,
1004
                    create_script=os_scripts['create'],
1005
                    export_script=os_scripts['export'],
1006
                    import_script=os_scripts['import'],
1007
                    rename_script=os_scripts['rename'],
1008
                    api_version=api_version)
1009

    
1010

    
1011
def SnapshotBlockDevice(disk):
1012
  """Create a snapshot copy of a block device.
1013

1014
  This function is called recursively, and the snapshot is actually created
1015
  just for the leaf lvm backend device.
1016

1017
  Args:
1018
    disk: the disk to be snapshotted
1019

1020
  Returns:
1021
    a config entry for the actual lvm device snapshotted.
1022

1023
  """
1024
  if disk.children:
1025
    if len(disk.children) == 1:
1026
      # only one child, let's recurse on it
1027
      return SnapshotBlockDevice(disk.children[0])
1028
    else:
1029
      # more than one child, choose one that matches
1030
      for child in disk.children:
1031
        if child.size == disk.size:
1032
          # return implies breaking the loop
1033
          return SnapshotBlockDevice(child)
1034
  elif disk.dev_type == "lvm":
1035
    r_dev = _RecursiveFindBD(disk)
1036
    if r_dev is not None:
1037
      # let's stay on the safe side and ask for the full size, for now
1038
      return r_dev.Snapshot(disk.size)
1039
    else:
1040
      return None
1041
  else:
1042
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1043
                                 "'%s' of type '%s'" %
1044
                                 (disk.unique_id, disk.dev_type))
1045

    
1046

    
1047
def ExportSnapshot(disk, dest_node, instance):
1048
  """Export a block device snapshot to a remote node.
1049

1050
  Args:
1051
    disk: the snapshot block device
1052
    dest_node: the node to send the image to
1053
    instance: instance being exported
1054

1055
  Returns:
1056
    True if successful, False otherwise.
1057

1058
  """
1059
  inst_os = OSFromDisk(instance.os)
1060
  export_script = inst_os.export_script
1061

    
1062
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1063
                                     instance.name, int(time.time()))
1064
  if not os.path.exists(constants.LOG_OS_DIR):
1065
    os.mkdir(constants.LOG_OS_DIR, 0750)
1066

    
1067
  real_os_dev = _RecursiveFindBD(disk)
1068
  if real_os_dev is None:
1069
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1070
                                  str(disk))
1071
  real_os_dev.Open()
1072

    
1073
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1074
  destfile = disk.physical_id[1]
1075

    
1076
  # the target command is built out of three individual commands,
1077
  # which are joined by pipes; we check each individual command for
1078
  # valid parameters
1079

    
1080
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1081
                               export_script, instance.name,
1082
                               real_os_dev.dev_path, logfile)
1083

    
1084
  comprcmd = "gzip"
1085

    
1086
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1087
                                destdir, destdir, destfile)
1088
  remotecmd = ssh.BuildSSHCmd(dest_node, 'root', destcmd)
1089

    
1090

    
1091

    
1092
  # all commands have been checked, so we're safe to combine them
1093
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1094

    
1095
  result = utils.RunCmd(command)
1096

    
1097
  if result.failed:
1098
    logger.Error("os snapshot export command '%s' returned error: %s"
1099
                 " output: %s" %
1100
                 (command, result.fail_reason, result.output))
1101
    return False
1102

    
1103
  return True
1104

    
1105

    
1106
def FinalizeExport(instance, snap_disks):
1107
  """Write out the export configuration information.
1108

1109
  Args:
1110
    instance: instance configuration
1111
    snap_disks: snapshot block devices
1112

1113
  Returns:
1114
    False in case of error, True otherwise.
1115

1116
  """
1117
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1118
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1119

    
1120
  config = objects.SerializableConfigParser()
1121

    
1122
  config.add_section(constants.INISECT_EXP)
1123
  config.set(constants.INISECT_EXP, 'version', '0')
1124
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1125
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1126
  config.set(constants.INISECT_EXP, 'os', instance.os)
1127
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1128

    
1129
  config.add_section(constants.INISECT_INS)
1130
  config.set(constants.INISECT_INS, 'name', instance.name)
1131
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1132
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1133
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1134
  for nic_count, nic in enumerate(instance.nics):
1135
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1136
               nic_count, '%s' % nic.mac)
1137
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1138
  # TODO: redundant: on load can read nics until it doesn't exist
1139
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1140

    
1141
  for disk_count, disk in enumerate(snap_disks):
1142
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1143
               ('%s' % disk.iv_name))
1144
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1145
               ('%s' % disk.physical_id[1]))
1146
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1147
               ('%d' % disk.size))
1148
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1149

    
1150
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1151
  cfo = open(cff, 'w')
1152
  try:
1153
    config.write(cfo)
1154
  finally:
1155
    cfo.close()
1156

    
1157
  shutil.rmtree(finaldestdir, True)
1158
  shutil.move(destdir, finaldestdir)
1159

    
1160
  return True
1161

    
1162

    
1163
def ExportInfo(dest):
1164
  """Get export configuration information.
1165

1166
  Args:
1167
    dest: directory containing the export
1168

1169
  Returns:
1170
    A serializable config file containing the export info.
1171

1172
  """
1173
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1174

    
1175
  config = objects.SerializableConfigParser()
1176
  config.read(cff)
1177

    
1178
  if (not config.has_section(constants.INISECT_EXP) or
1179
      not config.has_section(constants.INISECT_INS)):
1180
    return None
1181

    
1182
  return config
1183

    
1184

    
1185
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1186
  """Import an os image into an instance.
1187

1188
  Args:
1189
    instance: the instance object
1190
    os_disk: the instance-visible name of the os device
1191
    swap_disk: the instance-visible name of the swap device
1192
    src_node: node holding the source image
1193
    src_image: path to the source image on src_node
1194

1195
  Returns:
1196
    False in case of error, True otherwise.
1197

1198
  """
1199
  inst_os = OSFromDisk(instance.os)
1200
  import_script = inst_os.import_script
1201

    
1202
  for os_device in instance.disks:
1203
    if os_device.iv_name == os_disk:
1204
      break
1205
  else:
1206
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1207
    return False
1208

    
1209
  for swap_device in instance.disks:
1210
    if swap_device.iv_name == swap_disk:
1211
      break
1212
  else:
1213
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1214
    return False
1215

    
1216
  real_os_dev = _RecursiveFindBD(os_device)
1217
  if real_os_dev is None:
1218
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1219
                                  str(os_device))
1220
  real_os_dev.Open()
1221

    
1222
  real_swap_dev = _RecursiveFindBD(swap_device)
1223
  if real_swap_dev is None:
1224
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1225
                                  str(swap_device))
1226
  real_swap_dev.Open()
1227

    
1228
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1229
                                        instance.name, int(time.time()))
1230
  if not os.path.exists(constants.LOG_OS_DIR):
1231
    os.mkdir(constants.LOG_OS_DIR, 0750)
1232

    
1233
  destcmd = utils.BuildShellCmd('cat %s', src_image)
1234
  remotecmd = ssh.BuildSSHCmd(src_node, 'root', destcmd)
1235

    
1236
  comprcmd = "gunzip"
1237
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1238
                               inst_os.path, import_script, instance.name,
1239
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1240
                               logfile)
1241

    
1242
  command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1243

    
1244
  result = utils.RunCmd(command)
1245

    
1246
  if result.failed:
1247
    logger.Error("os import command '%s' returned error: %s"
1248
                 " output: %s" %
1249
                 (command, result.fail_reason, result.output))
1250
    return False
1251

    
1252
  return True
1253

    
1254

    
1255
def ListExports():
1256
  """Return a list of exports currently available on this machine.
1257

1258
  """
1259
  if os.path.isdir(constants.EXPORT_DIR):
1260
    return os.listdir(constants.EXPORT_DIR)
1261
  else:
1262
    return []
1263

    
1264

    
1265
def RemoveExport(export):
1266
  """Remove an existing export from the node.
1267

1268
  Args:
1269
    export: the name of the export to remove
1270

1271
  Returns:
1272
    False in case of error, True otherwise.
1273

1274
  """
1275
  target = os.path.join(constants.EXPORT_DIR, export)
1276

    
1277
  shutil.rmtree(target)
1278
  # TODO: catch some of the relevant exceptions and provide a pretty
1279
  # error message if rmtree fails.
1280

    
1281
  return True
1282

    
1283

    
1284
class HooksRunner(object):
1285
  """Hook runner.
1286

1287
  This class is instantiated on the node side (ganeti-noded) and not on
1288
  the master side.
1289

1290
  """
1291
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1292

    
1293
  def __init__(self, hooks_base_dir=None):
1294
    """Constructor for hooks runner.
1295

1296
    Args:
1297
      - hooks_base_dir: if not None, this overrides the
1298
        constants.HOOKS_BASE_DIR (useful for unittests)
1299
      - logs_base_dir: if not None, this overrides the
1300
        constants.LOG_HOOKS_DIR (useful for unittests)
1301
      - logging: enable or disable logging of script output
1302

1303
    """
1304
    if hooks_base_dir is None:
1305
      hooks_base_dir = constants.HOOKS_BASE_DIR
1306
    self._BASE_DIR = hooks_base_dir
1307

    
1308
  @staticmethod
1309
  def ExecHook(script, env):
1310
    """Exec one hook script.
1311

1312
    Args:
1313
     - phase: the phase
1314
     - script: the full path to the script
1315
     - env: the environment with which to exec the script
1316

1317
    """
1318
    # exec the process using subprocess and log the output
1319
    fdstdin = None
1320
    try:
1321
      fdstdin = open("/dev/null", "r")
1322
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1323
                               stderr=subprocess.STDOUT, close_fds=True,
1324
                               shell=False, cwd="/",env=env)
1325
      output = ""
1326
      try:
1327
        output = child.stdout.read(4096)
1328
        child.stdout.close()
1329
      except EnvironmentError, err:
1330
        output += "Hook script error: %s" % str(err)
1331

    
1332
      while True:
1333
        try:
1334
          result = child.wait()
1335
          break
1336
        except EnvironmentError, err:
1337
          if err.errno == errno.EINTR:
1338
            continue
1339
          raise
1340
    finally:
1341
      # try not to leak fds
1342
      for fd in (fdstdin, ):
1343
        if fd is not None:
1344
          try:
1345
            fd.close()
1346
          except EnvironmentError, err:
1347
            # just log the error
1348
            #logger.Error("While closing fd %s: %s" % (fd, err))
1349
            pass
1350

    
1351
    return result == 0, output
1352

    
1353
  def RunHooks(self, hpath, phase, env):
1354
    """Run the scripts in the hooks directory.
1355

1356
    This method will not be usually overriden by child opcodes.
1357

1358
    """
1359
    if phase == constants.HOOKS_PHASE_PRE:
1360
      suffix = "pre"
1361
    elif phase == constants.HOOKS_PHASE_POST:
1362
      suffix = "post"
1363
    else:
1364
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1365
    rr = []
1366

    
1367
    subdir = "%s-%s.d" % (hpath, suffix)
1368
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1369
    try:
1370
      dir_contents = os.listdir(dir_name)
1371
    except OSError, err:
1372
      # must log
1373
      return rr
1374

    
1375
    # we use the standard python sort order,
1376
    # so 00name is the recommended naming scheme
1377
    dir_contents.sort()
1378
    for relname in dir_contents:
1379
      fname = os.path.join(dir_name, relname)
1380
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1381
          self.RE_MASK.match(relname) is not None):
1382
        rrval = constants.HKR_SKIP
1383
        output = ""
1384
      else:
1385
        result, output = self.ExecHook(fname, env)
1386
        if not result:
1387
          rrval = constants.HKR_FAIL
1388
        else:
1389
          rrval = constants.HKR_SUCCESS
1390
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1391

    
1392
    return rr