Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ 305a7297

History | View | Annotate | Download (39.1 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import tempfile
30
import stat
31
import errno
32
import re
33
import subprocess
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def StartMaster():
47
  """Activate local node as master node.
48

49
  There are two needed steps for this:
50
    - run the master script
51
    - register the cron script
52

53
  """
54
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
55

    
56
  if result.failed:
57
    logger.Error("could not activate cluster interface with command %s,"
58
                 " error: '%s'" % (result.cmd, result.output))
59
    return False
60

    
61
  return True
62

    
63

    
64
def StopMaster():
65
  """Deactivate this node as master.
66

67
  This does two things:
68
    - run the master stop script
69
    - remove link to master cron script.
70

71
  """
72
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
73

    
74
  if result.failed:
75
    logger.Error("could not deactivate cluster interface with command %s,"
76
                 " error: '%s'" % (result.cmd, result.output))
77
    return False
78

    
79
  return True
80

    
81

    
82
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
83
  """ adds the node to the cluster
84
      - updates the hostkey
85
      - adds the ssh-key
86
      - sets the node id
87
      - sets the node status to installed
88

89
  """
90
  f = open("/etc/ssh/ssh_host_rsa_key", 'w')
91
  f.write(rsa)
92
  f.close()
93

    
94
  f = open("/etc/ssh/ssh_host_rsa_key.pub", 'w')
95
  f.write(rsapub)
96
  f.close()
97

    
98
  f = open("/etc/ssh/ssh_host_dsa_key", 'w')
99
  f.write(dsa)
100
  f.close()
101

    
102
  f = open("/etc/ssh/ssh_host_dsa_key.pub", 'w')
103
  f.write(dsapub)
104
  f.close()
105

    
106
  if not os.path.isdir("/root/.ssh"):
107
    os.mkdir("/root/.ssh")
108

    
109
  f = open("/root/.ssh/id_dsa", 'w')
110
  f.write(sshkey)
111
  f.close()
112

    
113
  f = open("/root/.ssh/id_dsa.pub", 'w')
114
  f.write(sshpub)
115
  f.close()
116

    
117
  f = open('/root/.ssh/id_dsa.pub', 'r')
118
  try:
119
    utils.AddAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
120
  finally:
121
    f.close()
122

    
123
  utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
124

    
125
  return True
126

    
127

    
128
def LeaveCluster():
129
  """Cleans up the current node and prepares it to be removed from the cluster.
130

131
  """
132
  if os.path.exists(constants.DATA_DIR):
133
    for dirpath, dirnames, filenames in os.walk(constants.DATA_DIR):
134
      if dirpath == constants.DATA_DIR:
135
        for i in filenames:
136
          os.unlink(os.path.join(dirpath, i))
137

    
138
  f = open('/root/.ssh/id_dsa.pub', 'r')
139
  try:
140
    utils.RemoveAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
141
  finally:
142
    f.close()
143

    
144
  utils.RemoveFile('/root/.ssh/id_dsa')
145
  utils.RemoveFile('/root/.ssh/id_dsa.pub')
146

    
147

    
148
def GetNodeInfo(vgname):
149
  """ gives back a hash with different informations
150
  about the node
151

152
  Returns:
153
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
154
      'memory_free' : xxx, 'memory_total' : xxx }
155
    where
156
    vg_size is the size of the configured volume group in MiB
157
    vg_free is the free size of the volume group in MiB
158
    memory_dom0 is the memory allocated for domain0 in MiB
159
    memory_free is the currently available (free) ram in MiB
160
    memory_total is the total number of ram in MiB
161

162
  """
163
  outputarray = {}
164
  vginfo = _GetVGInfo(vgname)
165
  outputarray['vg_size'] = vginfo['vg_size']
166
  outputarray['vg_free'] = vginfo['vg_free']
167

    
168
  hyper = hypervisor.GetHypervisor()
169
  hyp_info = hyper.GetNodeInfo()
170
  if hyp_info is not None:
171
    outputarray.update(hyp_info)
172

    
173
  return outputarray
174

    
175

    
176
def VerifyNode(what):
177
  """Verify the status of the local node.
178

179
  Args:
180
    what - a dictionary of things to check:
181
      'filelist' : list of files for which to compute checksums
182
      'nodelist' : list of nodes we should check communication with
183
      'hypervisor': run the hypervisor-specific verify
184

185
  Requested files on local node are checksummed and the result returned.
186

187
  The nodelist is traversed, with the following checks being made
188
  for each node:
189
  - known_hosts key correct
190
  - correct resolving of node name (target node returns its own hostname
191
    by ssh-execution of 'hostname', result compared against name in list.
192

193
  """
194
  result = {}
195

    
196
  if 'hypervisor' in what:
197
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
198

    
199
  if 'filelist' in what:
200
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
201

    
202
  if 'nodelist' in what:
203
    result['nodelist'] = {}
204
    for node in what['nodelist']:
205
      success, message = ssh.VerifyNodeHostname(node)
206
      if not success:
207
        result['nodelist'][node] = message
208
  return result
209

    
210

    
211
def GetVolumeList(vg_name):
212
  """Compute list of logical volumes and their size.
213

214
  Returns:
215
    dictionary of all partions (key) with their size:
216
    test1: 20.06MiB
217

218
  """
219
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m",
220
                         "-oname,size", vg_name])
221
  if result.failed:
222
    logger.Error("Failed to list logical volumes, lvs output: %s" %
223
                 result.output)
224
    return {}
225

    
226
  lvlist = [line.split() for line in result.output.splitlines()]
227
  return dict(lvlist)
228

    
229

    
230
def ListVolumeGroups():
231
  """List the volume groups and their size
232

233
  Returns:
234
    Dictionary with keys volume name and values the size of the volume
235

236
  """
237
  return utils.ListVolumeGroups()
238

    
239

    
240
def NodeVolumes():
241
  """List all volumes on this node.
242

243
  """
244
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
245
                         "--separator=|",
246
                         "--options=lv_name,lv_size,devices,vg_name"])
247
  if result.failed:
248
    logger.Error("Failed to list logical volumes, lvs output: %s" %
249
                 result.output)
250
    return {}
251

    
252
  def parse_dev(dev):
253
    if '(' in dev:
254
      return dev.split('(')[0]
255
    else:
256
      return dev
257

    
258
  def map_line(line):
259
    return {
260
      'name': line[0].strip(),
261
      'size': line[1].strip(),
262
      'dev': parse_dev(line[2].strip()),
263
      'vg': line[3].strip(),
264
    }
265

    
266
  return [map_line(line.split('|')) for line in result.output.splitlines()]
267

    
268

    
269
def BridgesExist(bridges_list):
270
  """Check if a list of bridges exist on the current node
271

272
  Returns:
273
    True if all of them exist, false otherwise
274

275
  """
276
  for bridge in bridges_list:
277
    if not utils.BridgeExists(bridge):
278
      return False
279

    
280
  return True
281

    
282

    
283
def GetInstanceList():
284
  """ provides a list of instances
285

286
  Returns:
287
    A list of all running instances on the current node
288
    - instance1.example.com
289
    - instance2.example.com
290

291
  """
292
  try:
293
    names = hypervisor.GetHypervisor().ListInstances()
294
  except errors.HypervisorError, err:
295
    logger.Error("error enumerating instances: %s" % str(err))
296
    raise
297

    
298
  return names
299

    
300

    
301
def GetInstanceInfo(instance):
302
  """ gives back the informations about an instance
303
  as a dictonary
304

305
  Args:
306
    instance: name of the instance (ex. instance1.example.com)
307

308
  Returns:
309
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
310
    where
311
    memory: memory size of instance (int)
312
    state: xen state of instance (string)
313
    time: cpu time of instance (float)
314

315
  """
316
  output = {}
317

    
318
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
319
  if iinfo is not None:
320
    output['memory'] = iinfo[2]
321
    output['state'] = iinfo[4]
322
    output['time'] = iinfo[5]
323

    
324
  return output
325

    
326

    
327
def GetAllInstancesInfo():
328
  """Gather data about all instances.
329

330
  This is the equivalent of `GetInstanceInfo()`, except that it
331
  computes data for all instances at once, thus being faster if one
332
  needs data about more than one instance.
333

334
  Returns: a dictionary of dictionaries, keys being the instance name,
335
    and with values:
336
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
337
    where
338
    memory: memory size of instance (int)
339
    state: xen state of instance (string)
340
    time: cpu time of instance (float)
341
    vcpus: the number of cpus
342

343
  """
344
  output = {}
345

    
346
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
347
  if iinfo:
348
    for name, inst_id, memory, vcpus, state, times in iinfo:
349
      output[name] = {
350
        'memory': memory,
351
        'vcpus': vcpus,
352
        'state': state,
353
        'time': times,
354
        }
355

    
356
  return output
357

    
358

    
359
def AddOSToInstance(instance, os_disk, swap_disk):
360
  """Add an os to an instance.
361

362
  Args:
363
    instance: the instance object
364
    os_disk: the instance-visible name of the os device
365
    swap_disk: the instance-visible name of the swap device
366

367
  """
368
  inst_os = OSFromDisk(instance.os)
369

    
370
  create_script = inst_os.create_script
371

    
372
  os_device = instance.FindDisk(os_disk)
373
  if os_device is None:
374
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
375
    return False
376

    
377
  swap_device = instance.FindDisk(swap_disk)
378
  if swap_device is None:
379
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
380
    return False
381

    
382
  real_os_dev = _RecursiveFindBD(os_device)
383
  if real_os_dev is None:
384
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
385
                                  str(os_device))
386
  real_os_dev.Open()
387

    
388
  real_swap_dev = _RecursiveFindBD(swap_device)
389
  if real_swap_dev is None:
390
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
391
                                  str(swap_device))
392
  real_swap_dev.Open()
393

    
394
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
395
                                     instance.name, int(time.time()))
396
  if not os.path.exists(constants.LOG_OS_DIR):
397
    os.mkdir(constants.LOG_OS_DIR, 0750)
398

    
399
  command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
400
                                inst_os.path, create_script, instance.name,
401
                                real_os_dev.dev_path, real_swap_dev.dev_path,
402
                                logfile)
403

    
404
  result = utils.RunCmd(command)
405

    
406
  if result.failed:
407
    logger.Error("os create command '%s' returned error: %s"
408
                 " output: %s" %
409
                 (command, result.fail_reason, result.output))
410
    return False
411

    
412
  return True
413

    
414

    
415
def RunRenameInstance(instance, old_name, os_disk, swap_disk):
416
  """Run the OS rename script for an instance.
417

418
  Args:
419
    instance: the instance object
420
    old_name: the old name of the instance
421
    os_disk: the instance-visible name of the os device
422
    swap_disk: the instance-visible name of the swap device
423

424
  """
425
  inst_os = OSFromDisk(instance.os)
426

    
427
  script = inst_os.rename_script
428

    
429
  os_device = instance.FindDisk(os_disk)
430
  if os_device is None:
431
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
432
    return False
433

    
434
  swap_device = instance.FindDisk(swap_disk)
435
  if swap_device is None:
436
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
437
    return False
438

    
439
  real_os_dev = _RecursiveFindBD(os_device)
440
  if real_os_dev is None:
441
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
442
                                  str(os_device))
443
  real_os_dev.Open()
444

    
445
  real_swap_dev = _RecursiveFindBD(swap_device)
446
  if real_swap_dev is None:
447
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
448
                                  str(swap_device))
449
  real_swap_dev.Open()
450

    
451
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
452
                                           old_name,
453
                                           instance.name, int(time.time()))
454
  if not os.path.exists(constants.LOG_OS_DIR):
455
    os.mkdir(constants.LOG_OS_DIR, 0750)
456

    
457
  command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
458
                                inst_os.path, script, old_name, instance.name,
459
                                real_os_dev.dev_path, real_swap_dev.dev_path,
460
                                logfile)
461

    
462
  result = utils.RunCmd(command)
463

    
464
  if result.failed:
465
    logger.Error("os create command '%s' returned error: %s"
466
                 " output: %s" %
467
                 (command, result.fail_reason, result.output))
468
    return False
469

    
470
  return True
471

    
472

    
473
def _GetVGInfo(vg_name):
474
  """Get informations about the volume group.
475

476
  Args:
477
    vg_name: the volume group
478

479
  Returns:
480
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
481
    where
482
    vg_size is the total size of the volume group in MiB
483
    vg_free is the free size of the volume group in MiB
484
    pv_count are the number of physical disks in that vg
485

486
  """
487
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
488
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
489

    
490
  if retval.failed:
491
    errmsg = "volume group %s not present" % vg_name
492
    logger.Error(errmsg)
493
    raise errors.LVMError(errmsg)
494
  valarr = retval.stdout.strip().split(':')
495
  retdic = {
496
    "vg_size": int(round(float(valarr[0]), 0)),
497
    "vg_free": int(round(float(valarr[1]), 0)),
498
    "pv_count": int(valarr[2]),
499
    }
500
  return retdic
501

    
502

    
503
def _GatherBlockDevs(instance):
504
  """Set up an instance's block device(s).
505

506
  This is run on the primary node at instance startup. The block
507
  devices must be already assembled.
508

509
  """
510
  block_devices = []
511
  for disk in instance.disks:
512
    device = _RecursiveFindBD(disk)
513
    if device is None:
514
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
515
                                    str(disk))
516
    device.Open()
517
    block_devices.append((disk, device))
518
  return block_devices
519

    
520

    
521
def StartInstance(instance, extra_args):
522
  """Start an instance.
523

524
  Args:
525
    instance - name of instance to start.
526

527
  """
528
  running_instances = GetInstanceList()
529

    
530
  if instance.name in running_instances:
531
    return True
532

    
533
  block_devices = _GatherBlockDevs(instance)
534
  hyper = hypervisor.GetHypervisor()
535

    
536
  try:
537
    hyper.StartInstance(instance, block_devices, extra_args)
538
  except errors.HypervisorError, err:
539
    logger.Error("Failed to start instance: %s" % err)
540
    return False
541

    
542
  return True
543

    
544

    
545
def ShutdownInstance(instance):
546
  """Shut an instance down.
547

548
  Args:
549
    instance - name of instance to shutdown.
550

551
  """
552
  running_instances = GetInstanceList()
553

    
554
  if instance.name not in running_instances:
555
    return True
556

    
557
  hyper = hypervisor.GetHypervisor()
558
  try:
559
    hyper.StopInstance(instance)
560
  except errors.HypervisorError, err:
561
    logger.Error("Failed to stop instance: %s" % err)
562
    return False
563

    
564
  # test every 10secs for 2min
565
  shutdown_ok = False
566

    
567
  time.sleep(1)
568
  for dummy in range(11):
569
    if instance.name not in GetInstanceList():
570
      break
571
    time.sleep(10)
572
  else:
573
    # the shutdown did not succeed
574
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
575

    
576
    try:
577
      hyper.StopInstance(instance, force=True)
578
    except errors.HypervisorError, err:
579
      logger.Error("Failed to stop instance: %s" % err)
580
      return False
581

    
582
    time.sleep(1)
583
    if instance.name in GetInstanceList():
584
      logger.Error("could not shutdown instance '%s' even by destroy")
585
      return False
586

    
587
  return True
588

    
589

    
590
def CreateBlockDevice(disk, size, on_primary, info):
591
  """Creates a block device for an instance.
592

593
  Args:
594
   bdev: a ganeti.objects.Disk object
595
   size: the size of the physical underlying devices
596
   do_open: if the device should be `Assemble()`-d and
597
            `Open()`-ed after creation
598

599
  Returns:
600
    the new unique_id of the device (this can sometime be
601
    computed only after creation), or None. On secondary nodes,
602
    it's not required to return anything.
603

604
  """
605
  clist = []
606
  if disk.children:
607
    for child in disk.children:
608
      crdev = _RecursiveAssembleBD(child, on_primary)
609
      if on_primary or disk.AssembleOnSecondary():
610
        # we need the children open in case the device itself has to
611
        # be assembled
612
        crdev.Open()
613
      else:
614
        crdev.Close()
615
      clist.append(crdev)
616
  try:
617
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
618
    if device is not None:
619
      logger.Info("removing existing device %s" % disk)
620
      device.Remove()
621
  except errors.BlockDeviceError, err:
622
    pass
623

    
624
  device = bdev.Create(disk.dev_type, disk.physical_id,
625
                       clist, size)
626
  if device is None:
627
    raise ValueError("Can't create child device for %s, %s" %
628
                     (disk, size))
629
  if on_primary or disk.AssembleOnSecondary():
630
    device.Assemble()
631
    device.SetSyncSpeed(constants.SYNC_SPEED)
632
    if on_primary or disk.OpenOnSecondary():
633
      device.Open(force=True)
634

    
635
  device.SetInfo(info)
636

    
637
  physical_id = device.unique_id
638
  return physical_id
639

    
640

    
641
def RemoveBlockDevice(disk):
642
  """Remove a block device.
643

644
  This is intended to be called recursively.
645

646
  """
647
  try:
648
    # since we are removing the device, allow a partial match
649
    # this allows removal of broken mirrors
650
    rdev = _RecursiveFindBD(disk, allow_partial=True)
651
  except errors.BlockDeviceError, err:
652
    # probably can't attach
653
    logger.Info("Can't attach to device %s in remove" % disk)
654
    rdev = None
655
  if rdev is not None:
656
    result = rdev.Remove()
657
  else:
658
    result = True
659
  if disk.children:
660
    for child in disk.children:
661
      result = result and RemoveBlockDevice(child)
662
  return result
663

    
664

    
665
def _RecursiveAssembleBD(disk, as_primary):
666
  """Activate a block device for an instance.
667

668
  This is run on the primary and secondary nodes for an instance.
669

670
  This function is called recursively.
671

672
  Args:
673
    disk: a objects.Disk object
674
    as_primary: if we should make the block device read/write
675

676
  Returns:
677
    the assembled device or None (in case no device was assembled)
678

679
  If the assembly is not successful, an exception is raised.
680

681
  """
682
  children = []
683
  if disk.children:
684
    for chld_disk in disk.children:
685
      children.append(_RecursiveAssembleBD(chld_disk, as_primary))
686

    
687
  if as_primary or disk.AssembleOnSecondary():
688
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
689
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
690
    result = r_dev
691
    if as_primary or disk.OpenOnSecondary():
692
      r_dev.Open()
693
    else:
694
      r_dev.Close()
695
  else:
696
    result = True
697
  return result
698

    
699

    
700
def AssembleBlockDevice(disk, as_primary):
701
  """Activate a block device for an instance.
702

703
  This is a wrapper over _RecursiveAssembleBD.
704

705
  Returns:
706
    a /dev path for primary nodes
707
    True for secondary nodes
708

709
  """
710
  result = _RecursiveAssembleBD(disk, as_primary)
711
  if isinstance(result, bdev.BlockDev):
712
    result = result.dev_path
713
  return result
714

    
715

    
716
def ShutdownBlockDevice(disk):
717
  """Shut down a block device.
718

719
  First, if the device is assembled (can `Attach()`), then the device
720
  is shutdown. Then the children of the device are shutdown.
721

722
  This function is called recursively. Note that we don't cache the
723
  children or such, as oppossed to assemble, shutdown of different
724
  devices doesn't require that the upper device was active.
725

726
  """
727
  r_dev = _RecursiveFindBD(disk)
728
  if r_dev is not None:
729
    result = r_dev.Shutdown()
730
  else:
731
    result = True
732
  if disk.children:
733
    for child in disk.children:
734
      result = result and ShutdownBlockDevice(child)
735
  return result
736

    
737

    
738
def MirrorAddChild(md_cdev, new_cdev):
739
  """Extend an MD raid1 array.
740

741
  """
742
  md_bdev = _RecursiveFindBD(md_cdev, allow_partial=True)
743
  if md_bdev is None:
744
    logger.Error("Can't find md device")
745
    return False
746
  new_bdev = _RecursiveFindBD(new_cdev)
747
  if new_bdev is None:
748
    logger.Error("Can't find new device to add")
749
    return False
750
  new_bdev.Open()
751
  md_bdev.AddChild(new_bdev)
752
  return True
753

    
754

    
755
def MirrorRemoveChild(md_cdev, new_cdev):
756
  """Reduce an MD raid1 array.
757

758
  """
759
  md_bdev = _RecursiveFindBD(md_cdev)
760
  if md_bdev is None:
761
    return False
762
  new_bdev = _RecursiveFindBD(new_cdev)
763
  if new_bdev is None:
764
    return False
765
  new_bdev.Open()
766
  md_bdev.RemoveChild(new_bdev.dev_path)
767
  return True
768

    
769

    
770
def GetMirrorStatus(disks):
771
  """Get the mirroring status of a list of devices.
772

773
  Args:
774
    disks: list of `objects.Disk`
775

776
  Returns:
777
    list of (mirror_done, estimated_time) tuples, which
778
    are the result of bdev.BlockDevice.CombinedSyncStatus()
779

780
  """
781
  stats = []
782
  for dsk in disks:
783
    rbd = _RecursiveFindBD(dsk)
784
    if rbd is None:
785
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
786
    stats.append(rbd.CombinedSyncStatus())
787
  return stats
788

    
789

    
790
def _RecursiveFindBD(disk, allow_partial=False):
791
  """Check if a device is activated.
792

793
  If so, return informations about the real device.
794

795
  Args:
796
    disk: the objects.Disk instance
797
    allow_partial: don't abort the find if a child of the
798
                   device can't be found; this is intended to be
799
                   used when repairing mirrors
800

801
  Returns:
802
    None if the device can't be found
803
    otherwise the device instance
804

805
  """
806
  children = []
807
  if disk.children:
808
    for chdisk in disk.children:
809
      children.append(_RecursiveFindBD(chdisk))
810

    
811
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
812

    
813

    
814
def FindBlockDevice(disk):
815
  """Check if a device is activated.
816

817
  If so, return informations about the real device.
818

819
  Args:
820
    disk: the objects.Disk instance
821
  Returns:
822
    None if the device can't be found
823
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
824

825
  """
826
  rbd = _RecursiveFindBD(disk)
827
  if rbd is None:
828
    return rbd
829
  sync_p, est_t, is_degr = rbd.GetSyncStatus()
830
  return rbd.dev_path, rbd.major, rbd.minor, sync_p, est_t, is_degr
831

    
832

    
833
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
834
  """Write a file to the filesystem.
835

836
  This allows the master to overwrite(!) a file. It will only perform
837
  the operation if the file belongs to a list of configuration files.
838

839
  """
840
  if not os.path.isabs(file_name):
841
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
842
                 file_name)
843
    return False
844

    
845
  allowed_files = [constants.CLUSTER_CONF_FILE, "/etc/hosts",
846
                   constants.SSH_KNOWN_HOSTS_FILE]
847
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
848
  if file_name not in allowed_files:
849
    logger.Error("Filename passed to UploadFile not in allowed"
850
                 " upload targets: '%s'" % file_name)
851
    return False
852

    
853
  dir_name, small_name = os.path.split(file_name)
854
  fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
855
  # here we need to make sure we remove the temp file, if any error
856
  # leaves it in place
857
  try:
858
    os.chown(new_name, uid, gid)
859
    os.chmod(new_name, mode)
860
    os.write(fd, data)
861
    os.fsync(fd)
862
    os.utime(new_name, (atime, mtime))
863
    os.rename(new_name, file_name)
864
  finally:
865
    os.close(fd)
866
    utils.RemoveFile(new_name)
867
  return True
868

    
869

    
870
def _ErrnoOrStr(err):
871
  """Format an EnvironmentError exception.
872

873
  If the `err` argument has an errno attribute, it will be looked up
874
  and converted into a textual EXXXX description. Otherwise the string
875
  representation of the error will be returned.
876

877
  """
878
  if hasattr(err, 'errno'):
879
    detail = errno.errorcode[err.errno]
880
  else:
881
    detail = str(err)
882
  return detail
883

    
884
def _OSSearch(name, search_path=None):
885
  """Search for OSes with the given name in the search_path.
886

887
  Args:
888
    name: The name of the OS to look for
889
    search_path: List of dirs to search (defaults to constants.OS_SEARCH_PATH)
890

891
  Returns:
892
    The base_dir the OS resides in
893

894
  """
895

    
896
  if search_path is None:
897
    search_path = constants.OS_SEARCH_PATH
898

    
899
  for dir in search_path:
900
    t_os_dir = os.path.sep.join([dir, name])
901
    if os.path.isdir(t_os_dir):
902
        return dir
903

    
904
  return None
905

    
906
def _OSOndiskVersion(name, os_dir):
907
  """Compute and return the api version of a given OS.
908

909
  This function will try to read the api version of the os given by
910
  the 'name' parameter and residing in the 'os_dir' directory.
911

912
  Return value will be either an integer denoting the version or None in the
913
  case when this is not a valid OS name.
914

915
  """
916

    
917
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
918

    
919
  try:
920
    st = os.stat(api_file)
921
  except EnvironmentError, err:
922
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
923
                           " found (%s)" % _ErrnoOrStr(err))
924

    
925
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
926
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
927
                           " a regular file")
928

    
929
  try:
930
    f = open(api_file)
931
    try:
932
      api_version = f.read(256)
933
    finally:
934
      f.close()
935
  except EnvironmentError, err:
936
    raise errors.InvalidOS(name, os_dir, "error while reading the"
937
                           " API version (%s)" % _ErrnoOrStr(err))
938

    
939
  api_version = api_version.strip()
940
  try:
941
    api_version = int(api_version)
942
  except (TypeError, ValueError), err:
943
    raise errors.InvalidOS(name, os_dir,
944
                           "API version is not integer (%s)" % str(err))
945

    
946
  return api_version
947

    
948

    
949
def DiagnoseOS(top_dirs=None):
950
  """Compute the validity for all OSes.
951

952
  For each name in all the given top directories (if not given defaults i
953
  to constants.OS_SEARCH_PATH it will return an object. If this is a valid
954
  os, the object will be an instance of the object.OS class. If not,
955
  it will be an instance of errors.InvalidOS and this signifies that
956
  this name does not correspond to a valid OS.
957

958
  Returns:
959
    list of objects
960

961
  """
962
  if top_dirs is None:
963
    top_dirs = constants.OS_SEARCH_PATH
964

    
965
  result = []
966
  for dir in top_dirs:
967
    if os.path.isdir(dir):
968
      try:
969
        f_names = os.listdir(dir)
970
      except EnvironmentError, err:
971
        logger.Error("Can't list the OS directory %s: %s" % (dir,str(err)))
972
        break
973
      for name in f_names:
974
        try:
975
          os_inst = OSFromDisk(name, base_dir=dir)
976
          result.append(os_inst)
977
        except errors.InvalidOS, err:
978
          result.append(err)
979

    
980
  return result
981

    
982

    
983
def OSFromDisk(name, base_dir=None):
984
  """Create an OS instance from disk.
985

986
  This function will return an OS instance if the given name is a
987
  valid OS name. Otherwise, it will raise an appropriate
988
  `errors.InvalidOS` exception, detailing why this is not a valid
989
  OS.
990

991
  Args:
992
    os_dir: Directory containing the OS scripts. Defaults to a search
993
            in all the OS_SEARCH_PATH directories.
994

995
  """
996

    
997
  if base_dir is None:
998
    base_dir = _OSSearch(name)
999
  else:
1000
    if not os.path.isdir(os.path.sep.join([base_dir, name])):
1001
      raise errors.InvalidOS(name, "OS not found in base dir %s" % base_dir)
1002

    
1003
  if base_dir is None:
1004
    raise errors.InvalidOS(name, None, "OS dir not found in search path")
1005

    
1006
  os_dir = os.path.sep.join([base_dir, name])
1007
  api_version = _OSOndiskVersion(name, os_dir)
1008

    
1009
  if api_version != constants.OS_API_VERSION:
1010
    raise errors.InvalidOS(name, os_dir, "API version mismatch"
1011
                           " (found %s want %s)"
1012
                           % (api_version, constants.OS_API_VERSION))
1013

    
1014
  # OS Scripts dictionary, we will populate it with the actual script names
1015
  os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1016

    
1017
  for script in os_scripts:
1018
    os_scripts[script] = os.path.sep.join([os_dir, script])
1019

    
1020
    try:
1021
      st = os.stat(os_scripts[script])
1022
    except EnvironmentError, err:
1023
      raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1024
                             (script, _ErrnoOrStr(err)))
1025

    
1026
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1027
      raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1028
                             script)
1029

    
1030
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1031
      raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1032
                             script)
1033

    
1034

    
1035
  return objects.OS(name=name, path=os_dir,
1036
                    create_script=os_scripts['create'],
1037
                    export_script=os_scripts['export'],
1038
                    import_script=os_scripts['import'],
1039
                    rename_script=os_scripts['rename'],
1040
                    api_version=api_version)
1041

    
1042

    
1043
def SnapshotBlockDevice(disk):
1044
  """Create a snapshot copy of a block device.
1045

1046
  This function is called recursively, and the snapshot is actually created
1047
  just for the leaf lvm backend device.
1048

1049
  Args:
1050
    disk: the disk to be snapshotted
1051

1052
  Returns:
1053
    a config entry for the actual lvm device snapshotted.
1054

1055
  """
1056
  if disk.children:
1057
    if len(disk.children) == 1:
1058
      # only one child, let's recurse on it
1059
      return SnapshotBlockDevice(disk.children[0])
1060
    else:
1061
      # more than one child, choose one that matches
1062
      for child in disk.children:
1063
        if child.size == disk.size:
1064
          # return implies breaking the loop
1065
          return SnapshotBlockDevice(child)
1066
  elif disk.dev_type == "lvm":
1067
    r_dev = _RecursiveFindBD(disk)
1068
    if r_dev is not None:
1069
      # let's stay on the safe side and ask for the full size, for now
1070
      return r_dev.Snapshot(disk.size)
1071
    else:
1072
      return None
1073
  else:
1074
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1075
                                 "'%s' of type '%s'" %
1076
                                 (disk.unique_id, disk.dev_type))
1077

    
1078

    
1079
def ExportSnapshot(disk, dest_node, instance):
1080
  """Export a block device snapshot to a remote node.
1081

1082
  Args:
1083
    disk: the snapshot block device
1084
    dest_node: the node to send the image to
1085
    instance: instance being exported
1086

1087
  Returns:
1088
    True if successful, False otherwise.
1089

1090
  """
1091
  inst_os = OSFromDisk(instance.os)
1092
  export_script = inst_os.export_script
1093

    
1094
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1095
                                     instance.name, int(time.time()))
1096
  if not os.path.exists(constants.LOG_OS_DIR):
1097
    os.mkdir(constants.LOG_OS_DIR, 0750)
1098

    
1099
  real_os_dev = _RecursiveFindBD(disk)
1100
  if real_os_dev is None:
1101
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1102
                                  str(disk))
1103
  real_os_dev.Open()
1104

    
1105
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1106
  destfile = disk.physical_id[1]
1107

    
1108
  # the target command is built out of three individual commands,
1109
  # which are joined by pipes; we check each individual command for
1110
  # valid parameters
1111

    
1112
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1113
                               export_script, instance.name,
1114
                               real_os_dev.dev_path, logfile)
1115

    
1116
  comprcmd = "gzip"
1117

    
1118
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1119
                                destdir, destdir, destfile)
1120
  remotecmd = ssh.BuildSSHCmd(dest_node, 'root', destcmd)
1121

    
1122

    
1123

    
1124
  # all commands have been checked, so we're safe to combine them
1125
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1126

    
1127
  result = utils.RunCmd(command)
1128

    
1129
  if result.failed:
1130
    logger.Error("os snapshot export command '%s' returned error: %s"
1131
                 " output: %s" %
1132
                 (command, result.fail_reason, result.output))
1133
    return False
1134

    
1135
  return True
1136

    
1137

    
1138
def FinalizeExport(instance, snap_disks):
1139
  """Write out the export configuration information.
1140

1141
  Args:
1142
    instance: instance configuration
1143
    snap_disks: snapshot block devices
1144

1145
  Returns:
1146
    False in case of error, True otherwise.
1147

1148
  """
1149
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1150
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1151

    
1152
  config = objects.SerializableConfigParser()
1153

    
1154
  config.add_section(constants.INISECT_EXP)
1155
  config.set(constants.INISECT_EXP, 'version', '0')
1156
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1157
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1158
  config.set(constants.INISECT_EXP, 'os', instance.os)
1159
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1160

    
1161
  config.add_section(constants.INISECT_INS)
1162
  config.set(constants.INISECT_INS, 'name', instance.name)
1163
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1164
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1165
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1166
  for nic_count, nic in enumerate(instance.nics):
1167
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1168
               nic_count, '%s' % nic.mac)
1169
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1170
  # TODO: redundant: on load can read nics until it doesn't exist
1171
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1172

    
1173
  for disk_count, disk in enumerate(snap_disks):
1174
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1175
               ('%s' % disk.iv_name))
1176
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1177
               ('%s' % disk.physical_id[1]))
1178
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1179
               ('%d' % disk.size))
1180
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1181

    
1182
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1183
  cfo = open(cff, 'w')
1184
  try:
1185
    config.write(cfo)
1186
  finally:
1187
    cfo.close()
1188

    
1189
  shutil.rmtree(finaldestdir, True)
1190
  shutil.move(destdir, finaldestdir)
1191

    
1192
  return True
1193

    
1194

    
1195
def ExportInfo(dest):
1196
  """Get export configuration information.
1197

1198
  Args:
1199
    dest: directory containing the export
1200

1201
  Returns:
1202
    A serializable config file containing the export info.
1203

1204
  """
1205
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1206

    
1207
  config = objects.SerializableConfigParser()
1208
  config.read(cff)
1209

    
1210
  if (not config.has_section(constants.INISECT_EXP) or
1211
      not config.has_section(constants.INISECT_INS)):
1212
    return None
1213

    
1214
  return config
1215

    
1216

    
1217
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1218
  """Import an os image into an instance.
1219

1220
  Args:
1221
    instance: the instance object
1222
    os_disk: the instance-visible name of the os device
1223
    swap_disk: the instance-visible name of the swap device
1224
    src_node: node holding the source image
1225
    src_image: path to the source image on src_node
1226

1227
  Returns:
1228
    False in case of error, True otherwise.
1229

1230
  """
1231
  inst_os = OSFromDisk(instance.os)
1232
  import_script = inst_os.import_script
1233

    
1234
  os_device = instance.FindDisk(os_disk)
1235
  if os_device is None:
1236
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1237
    return False
1238

    
1239
  swap_device = instance.FindDisk(swap_disk)
1240
  if swap_device is None:
1241
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1242
    return False
1243

    
1244
  real_os_dev = _RecursiveFindBD(os_device)
1245
  if real_os_dev is None:
1246
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1247
                                  str(os_device))
1248
  real_os_dev.Open()
1249

    
1250
  real_swap_dev = _RecursiveFindBD(swap_device)
1251
  if real_swap_dev is None:
1252
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1253
                                  str(swap_device))
1254
  real_swap_dev.Open()
1255

    
1256
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1257
                                        instance.name, int(time.time()))
1258
  if not os.path.exists(constants.LOG_OS_DIR):
1259
    os.mkdir(constants.LOG_OS_DIR, 0750)
1260

    
1261
  destcmd = utils.BuildShellCmd('cat %s', src_image)
1262
  remotecmd = ssh.BuildSSHCmd(src_node, 'root', destcmd)
1263

    
1264
  comprcmd = "gunzip"
1265
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1266
                               inst_os.path, import_script, instance.name,
1267
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1268
                               logfile)
1269

    
1270
  command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1271

    
1272
  result = utils.RunCmd(command)
1273

    
1274
  if result.failed:
1275
    logger.Error("os import command '%s' returned error: %s"
1276
                 " output: %s" %
1277
                 (command, result.fail_reason, result.output))
1278
    return False
1279

    
1280
  return True
1281

    
1282

    
1283
def ListExports():
1284
  """Return a list of exports currently available on this machine.
1285

1286
  """
1287
  if os.path.isdir(constants.EXPORT_DIR):
1288
    return os.listdir(constants.EXPORT_DIR)
1289
  else:
1290
    return []
1291

    
1292

    
1293
def RemoveExport(export):
1294
  """Remove an existing export from the node.
1295

1296
  Args:
1297
    export: the name of the export to remove
1298

1299
  Returns:
1300
    False in case of error, True otherwise.
1301

1302
  """
1303
  target = os.path.join(constants.EXPORT_DIR, export)
1304

    
1305
  shutil.rmtree(target)
1306
  # TODO: catch some of the relevant exceptions and provide a pretty
1307
  # error message if rmtree fails.
1308

    
1309
  return True
1310

    
1311

    
1312
class HooksRunner(object):
1313
  """Hook runner.
1314

1315
  This class is instantiated on the node side (ganeti-noded) and not on
1316
  the master side.
1317

1318
  """
1319
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1320

    
1321
  def __init__(self, hooks_base_dir=None):
1322
    """Constructor for hooks runner.
1323

1324
    Args:
1325
      - hooks_base_dir: if not None, this overrides the
1326
        constants.HOOKS_BASE_DIR (useful for unittests)
1327
      - logs_base_dir: if not None, this overrides the
1328
        constants.LOG_HOOKS_DIR (useful for unittests)
1329
      - logging: enable or disable logging of script output
1330

1331
    """
1332
    if hooks_base_dir is None:
1333
      hooks_base_dir = constants.HOOKS_BASE_DIR
1334
    self._BASE_DIR = hooks_base_dir
1335

    
1336
  @staticmethod
1337
  def ExecHook(script, env):
1338
    """Exec one hook script.
1339

1340
    Args:
1341
     - phase: the phase
1342
     - script: the full path to the script
1343
     - env: the environment with which to exec the script
1344

1345
    """
1346
    # exec the process using subprocess and log the output
1347
    fdstdin = None
1348
    try:
1349
      fdstdin = open("/dev/null", "r")
1350
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1351
                               stderr=subprocess.STDOUT, close_fds=True,
1352
                               shell=False, cwd="/",env=env)
1353
      output = ""
1354
      try:
1355
        output = child.stdout.read(4096)
1356
        child.stdout.close()
1357
      except EnvironmentError, err:
1358
        output += "Hook script error: %s" % str(err)
1359

    
1360
      while True:
1361
        try:
1362
          result = child.wait()
1363
          break
1364
        except EnvironmentError, err:
1365
          if err.errno == errno.EINTR:
1366
            continue
1367
          raise
1368
    finally:
1369
      # try not to leak fds
1370
      for fd in (fdstdin, ):
1371
        if fd is not None:
1372
          try:
1373
            fd.close()
1374
          except EnvironmentError, err:
1375
            # just log the error
1376
            #logger.Error("While closing fd %s: %s" % (fd, err))
1377
            pass
1378

    
1379
    return result == 0, output
1380

    
1381
  def RunHooks(self, hpath, phase, env):
1382
    """Run the scripts in the hooks directory.
1383

1384
    This method will not be usually overriden by child opcodes.
1385

1386
    """
1387
    if phase == constants.HOOKS_PHASE_PRE:
1388
      suffix = "pre"
1389
    elif phase == constants.HOOKS_PHASE_POST:
1390
      suffix = "post"
1391
    else:
1392
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1393
    rr = []
1394

    
1395
    subdir = "%s-%s.d" % (hpath, suffix)
1396
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1397
    try:
1398
      dir_contents = os.listdir(dir_name)
1399
    except OSError, err:
1400
      # must log
1401
      return rr
1402

    
1403
    # we use the standard python sort order,
1404
    # so 00name is the recommended naming scheme
1405
    dir_contents.sort()
1406
    for relname in dir_contents:
1407
      fname = os.path.join(dir_name, relname)
1408
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1409
          self.RE_MASK.match(relname) is not None):
1410
        rrval = constants.HKR_SKIP
1411
        output = ""
1412
      else:
1413
        result, output = self.ExecHook(fname, env)
1414
        if not result:
1415
          rrval = constants.HKR_FAIL
1416
        else:
1417
          rrval = constants.HKR_SUCCESS
1418
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1419

    
1420
    return rr