Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ 098c0958

History | View | Annotate | Download (36.4 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import tempfile
30
import stat
31
import errno
32
import re
33
import subprocess
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def StartMaster():
47
  """Activate local node as master node.
48

49
  There are two needed steps for this:
50
    - run the master script
51
    - register the cron script
52

53
  """
54
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
55

    
56
  if result.failed:
57
    logger.Error("could not activate cluster interface with command %s,"
58
                 " error: '%s'" % (result.cmd, result.output))
59
    return False
60

    
61
  return True
62

    
63

    
64
def StopMaster():
65
  """Deactivate this node as master.
66

67
  This does two things:
68
    - run the master stop script
69
    - remove link to master cron script.
70

71
  """
72
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
73

    
74
  if result.failed:
75
    logger.Error("could not deactivate cluster interface with command %s,"
76
                 " error: '%s'" % (result.cmd, result.output))
77
    return False
78

    
79
  return True
80

    
81

    
82
def AddNode(dsa, dsapub, rsa, rsapub, ssh, sshpub):
83
  """ adds the node to the cluster
84
      - updates the hostkey
85
      - adds the ssh-key
86
      - sets the node id
87
      - sets the node status to installed
88

89
  """
90
  f = open("/etc/ssh/ssh_host_rsa_key", 'w')
91
  f.write(rsa)
92
  f.close()
93

    
94
  f = open("/etc/ssh/ssh_host_rsa_key.pub", 'w')
95
  f.write(rsapub)
96
  f.close()
97

    
98
  f = open("/etc/ssh/ssh_host_dsa_key", 'w')
99
  f.write(dsa)
100
  f.close()
101

    
102
  f = open("/etc/ssh/ssh_host_dsa_key.pub", 'w')
103
  f.write(dsapub)
104
  f.close()
105

    
106
  if not os.path.isdir("/root/.ssh"):
107
    os.mkdir("/root/.ssh")
108

    
109
  f = open("/root/.ssh/id_dsa", 'w')
110
  f.write(ssh)
111
  f.close()
112

    
113
  f = open("/root/.ssh/id_dsa.pub", 'w')
114
  f.write(sshpub)
115
  f.close()
116

    
117
  f = open('/root/.ssh/id_dsa.pub', 'r')
118
  try:
119
    utils.AddAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
120
  finally:
121
    f.close()
122

    
123
  utils.RunCmd(["/etc/init.d/ssh", "restart"])
124

    
125
  utils.RemoveFile("/root/.ssh/known_hosts")
126
  return True
127

    
128

    
129
def LeaveCluster():
130
  """Cleans up the current node and prepares it to be removed from the cluster.
131

132
  """
133
  if os.path.exists(constants.DATA_DIR):
134
    for dirpath, dirnames, filenames in os.walk(constants.DATA_DIR):
135
      if dirpath == constants.DATA_DIR:
136
        for i in filenames:
137
          os.unlink(os.path.join(dirpath, i))
138

    
139
  f = open('/root/.ssh/id_dsa.pub', 'r')
140
  try:
141
    utils.RemoveAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
142
  finally:
143
    f.close()
144

    
145
  utils.RemoveFile('/root/.ssh/id_dsa')
146
  utils.RemoveFile('/root/.ssh/id_dsa.pub')
147

    
148

    
149
def GetNodeInfo(vgname):
150
  """ gives back a hash with different informations
151
  about the node
152

153
  Returns:
154
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
155
      'memory_free' : xxx, 'memory_total' : xxx }
156
    where
157
    vg_size is the size of the configured volume group in MiB
158
    vg_free is the free size of the volume group in MiB
159
    memory_dom0 is the memory allocated for domain0 in MiB
160
    memory_free is the currently available (free) ram in MiB
161
    memory_total is the total number of ram in MiB
162

163
  """
164
  outputarray = {}
165
  vginfo = _GetVGInfo(vgname)
166
  outputarray['vg_size'] = vginfo['vg_size']
167
  outputarray['vg_free'] = vginfo['vg_free']
168

    
169
  hyper = hypervisor.GetHypervisor()
170
  hyp_info = hyper.GetNodeInfo()
171
  if hyp_info is not None:
172
    outputarray.update(hyp_info)
173

    
174
  return outputarray
175

    
176

    
177
def VerifyNode(what):
178
  """Verify the status of the local node.
179

180
  Args:
181
    what - a dictionary of things to check:
182
      'filelist' : list of files for which to compute checksums
183
      'nodelist' : list of nodes we should check communication with
184
      'hypervisor': run the hypervisor-specific verify
185

186
  Requested files on local node are checksummed and the result returned.
187

188
  The nodelist is traversed, with the following checks being made
189
  for each node:
190
  - known_hosts key correct
191
  - correct resolving of node name (target node returns its own hostname
192
    by ssh-execution of 'hostname', result compared against name in list.
193

194
  """
195
  result = {}
196

    
197
  if 'hypervisor' in what:
198
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
199

    
200
  if 'filelist' in what:
201
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
202

    
203
  if 'nodelist' in what:
204
    result['nodelist'] = {}
205
    for node in what['nodelist']:
206
      success, message = ssh.VerifyNodeHostname(node)
207
      if not success:
208
        result['nodelist'][node] = message
209
  return result
210

    
211

    
212
def GetVolumeList(vg_name):
213
  """Compute list of logical volumes and their size.
214

215
  Returns:
216
    dictionary of all partions (key) with their size:
217
    test1: 20.06MiB
218

219
  """
220
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m",
221
                         "-oname,size", vg_name])
222
  if result.failed:
223
    logger.Error("Failed to list logical volumes, lvs output: %s" %
224
                 result.output)
225
    return {}
226

    
227
  lvlist = [line.split() for line in result.output.splitlines()]
228
  return dict(lvlist)
229

    
230

    
231
def ListVolumeGroups():
232
  """List the volume groups and their size
233

234
  Returns:
235
    Dictionary with keys volume name and values the size of the volume
236

237
  """
238
  return utils.ListVolumeGroups()
239

    
240

    
241
def NodeVolumes():
242
  """List all volumes on this node.
243

244
  """
245
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
246
                         "--separator=|",
247
                         "--options=lv_name,lv_size,devices,vg_name"])
248
  if result.failed:
249
    logger.Error("Failed to list logical volumes, lvs output: %s" %
250
                 result.output)
251
    return {}
252

    
253
  def parse_dev(dev):
254
    if '(' in dev:
255
      return dev.split('(')[0]
256
    else:
257
      return dev
258

    
259
  def map_line(line):
260
    return {
261
      'name': line[0].strip(),
262
      'size': line[1].strip(),
263
      'dev': parse_dev(line[2].strip()),
264
      'vg': line[3].strip(),
265
    }
266

    
267
  return [map_line(line.split('|')) for line in result.output.splitlines()]
268

    
269

    
270
def BridgesExist(bridges_list):
271
  """Check if a list of bridges exist on the current node
272

273
  Returns:
274
    True if all of them exist, false otherwise
275

276
  """
277
  for bridge in bridges_list:
278
    if not utils.BridgeExists(bridge):
279
      return False
280

    
281
  return True
282

    
283

    
284
def GetInstanceList():
285
  """ provides a list of instances
286

287
  Returns:
288
    A list of all running instances on the current node
289
    - instance1.example.com
290
    - instance2.example.com
291

292
  """
293
  try:
294
    names = hypervisor.GetHypervisor().ListInstances()
295
  except errors.HypervisorError, err:
296
    logger.Error("error enumerating instances: %s" % str(err))
297
    raise
298

    
299
  return names
300

    
301

    
302
def GetInstanceInfo(instance):
303
  """ gives back the informations about an instance
304
  as a dictonary
305

306
  Args:
307
    instance: name of the instance (ex. instance1.example.com)
308

309
  Returns:
310
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
311
    where
312
    memory: memory size of instance (int)
313
    state: xen state of instance (string)
314
    time: cpu time of instance (float)
315

316
  """
317
  output = {}
318

    
319
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
320
  if iinfo is not None:
321
    output['memory'] = iinfo[2]
322
    output['state'] = iinfo[4]
323
    output['time'] = iinfo[5]
324

    
325
  return output
326

    
327

    
328
def GetAllInstancesInfo():
329
  """Gather data about all instances.
330

331
  This is the equivalent of `GetInstanceInfo()`, except that it
332
  computes data for all instances at once, thus being faster if one
333
  needs data about more than one instance.
334

335
  Returns: a dictionary of dictionaries, keys being the instance name,
336
    and with values:
337
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
338
    where
339
    memory: memory size of instance (int)
340
    state: xen state of instance (string)
341
    time: cpu time of instance (float)
342
    vcpus: the number of cpus
343

344
  """
345
  output = {}
346

    
347
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
348
  if iinfo:
349
    for name, id, memory, vcpus, state, times in iinfo:
350
      output[name] = {
351
        'memory': memory,
352
        'vcpus': vcpus,
353
        'state': state,
354
        'time': times,
355
        }
356

    
357
  return output
358

    
359

    
360
def AddOSToInstance(instance, os_disk, swap_disk):
361
  """Add an os to an instance.
362

363
  Args:
364
    instance: the instance object
365
    os_disk: the instance-visible name of the os device
366
    swap_disk: the instance-visible name of the swap device
367

368
  """
369
  inst_os = OSFromDisk(instance.os)
370

    
371
  create_script = inst_os.create_script
372

    
373
  for os_device in instance.disks:
374
    if os_device.iv_name == os_disk:
375
      break
376
  else:
377
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
378
    return False
379

    
380
  for swap_device in instance.disks:
381
    if swap_device.iv_name == swap_disk:
382
      break
383
  else:
384
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
385
    return False
386

    
387
  real_os_dev = _RecursiveFindBD(os_device)
388
  if real_os_dev is None:
389
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
390
                                  str(os_device))
391
  real_os_dev.Open()
392

    
393
  real_swap_dev = _RecursiveFindBD(swap_device)
394
  if real_swap_dev is None:
395
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
396
                                  str(swap_device))
397
  real_swap_dev.Open()
398

    
399
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
400
                                     instance.name, int(time.time()))
401
  if not os.path.exists(constants.LOG_OS_DIR):
402
    os.mkdir(constants.LOG_OS_DIR, 0750)
403

    
404
  command = utils.BuildShellCmd("cd %s; %s -i %s -b %s -s %s &>%s",
405
                                inst_os.path, create_script, instance.name,
406
                                real_os_dev.dev_path, real_swap_dev.dev_path,
407
                                logfile)
408

    
409
  result = utils.RunCmd(command)
410

    
411
  if result.failed:
412
    logger.Error("os create command '%s' returned error: %s"
413
                 " output: %s" %
414
                 (command, result.fail_reason, result.output))
415
    return False
416

    
417
  return True
418

    
419

    
420
def _GetVGInfo(vg_name):
421
  """Get informations about the volume group.
422

423
  Args:
424
    vg_name: the volume group
425

426
  Returns:
427
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
428
    where
429
    vg_size is the total size of the volume group in MiB
430
    vg_free is the free size of the volume group in MiB
431
    pv_count are the number of physical disks in that vg
432

433
  """
434
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
435
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
436

    
437
  if retval.failed:
438
    errmsg = "volume group %s not present" % vg_name
439
    logger.Error(errmsg)
440
    raise errors.LVMError(errmsg)
441
  valarr = retval.stdout.strip().split(':')
442
  retdic = {
443
    "vg_size": int(round(float(valarr[0]), 0)),
444
    "vg_free": int(round(float(valarr[1]), 0)),
445
    "pv_count": int(valarr[2]),
446
    }
447
  return retdic
448

    
449

    
450
def _GatherBlockDevs(instance):
451
  """Set up an instance's block device(s).
452

453
  This is run on the primary node at instance startup. The block
454
  devices must be already assembled.
455

456
  """
457
  block_devices = []
458
  for disk in instance.disks:
459
    device = _RecursiveFindBD(disk)
460
    if device is None:
461
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
462
                                    str(disk))
463
    device.Open()
464
    block_devices.append((disk, device))
465
  return block_devices
466

    
467

    
468
def StartInstance(instance, extra_args):
469
  """Start an instance.
470

471
  Args:
472
    instance - name of instance to start.
473

474
  """
475
  running_instances = GetInstanceList()
476

    
477
  if instance.name in running_instances:
478
    return True
479

    
480
  block_devices = _GatherBlockDevs(instance)
481
  hyper = hypervisor.GetHypervisor()
482

    
483
  try:
484
    hyper.StartInstance(instance, block_devices, extra_args)
485
  except errors.HypervisorError, err:
486
    logger.Error("Failed to start instance: %s" % err)
487
    return False
488

    
489
  return True
490

    
491

    
492
def ShutdownInstance(instance):
493
  """Shut an instance down.
494

495
  Args:
496
    instance - name of instance to shutdown.
497

498
  """
499
  running_instances = GetInstanceList()
500

    
501
  if instance.name not in running_instances:
502
    return True
503

    
504
  hyper = hypervisor.GetHypervisor()
505
  try:
506
    hyper.StopInstance(instance)
507
  except errors.HypervisorError, err:
508
    logger.Error("Failed to stop instance: %s" % err)
509
    return False
510

    
511
  # test every 10secs for 2min
512
  shutdown_ok = False
513

    
514
  time.sleep(1)
515
  for dummy in range(11):
516
    if instance.name not in GetInstanceList():
517
      break
518
    time.sleep(10)
519
  else:
520
    # the shutdown did not succeed
521
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
522

    
523
    try:
524
      hyper.StopInstance(instance, force=True)
525
    except errors.HypervisorError, err:
526
      logger.Error("Failed to stop instance: %s" % err)
527
      return False
528

    
529
    time.sleep(1)
530
    if instance.name in GetInstanceList():
531
      logger.Error("could not shutdown instance '%s' even by destroy")
532
      return False
533

    
534
  return True
535

    
536

    
537
def CreateBlockDevice(disk, size, on_primary):
538
  """Creates a block device for an instance.
539

540
  Args:
541
   bdev: a ganeti.objects.Disk object
542
   size: the size of the physical underlying devices
543
   do_open: if the device should be `Assemble()`-d and
544
            `Open()`-ed after creation
545

546
  Returns:
547
    the new unique_id of the device (this can sometime be
548
    computed only after creation), or None. On secondary nodes,
549
    it's not required to return anything.
550

551
  """
552
  clist = []
553
  if disk.children:
554
    for child in disk.children:
555
      crdev = _RecursiveAssembleBD(child, on_primary)
556
      if on_primary or disk.AssembleOnSecondary():
557
        # we need the children open in case the device itself has to
558
        # be assembled
559
        crdev.Open()
560
      else:
561
        crdev.Close()
562
      clist.append(crdev)
563
  try:
564
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
565
    if device is not None:
566
      logger.Info("removing existing device %s" % disk)
567
      device.Remove()
568
  except errors.BlockDeviceError, err:
569
    pass
570

    
571
  device = bdev.Create(disk.dev_type, disk.physical_id,
572
                       clist, size)
573
  if device is None:
574
    raise ValueError("Can't create child device for %s, %s" %
575
                     (disk, size))
576
  if on_primary or disk.AssembleOnSecondary():
577
    device.Assemble()
578
    device.SetSyncSpeed(30*1024)
579
    if on_primary or disk.OpenOnSecondary():
580
      device.Open(force=True)
581
  physical_id = device.unique_id
582
  return physical_id
583

    
584

    
585
def RemoveBlockDevice(disk):
586
  """Remove a block device.
587

588
  This is intended to be called recursively.
589

590
  """
591
  try:
592
    # since we are removing the device, allow a partial match
593
    # this allows removal of broken mirrors
594
    rdev = _RecursiveFindBD(disk, allow_partial=True)
595
  except errors.BlockDeviceError, err:
596
    # probably can't attach
597
    logger.Info("Can't attach to device %s in remove" % disk)
598
    rdev = None
599
  if rdev is not None:
600
    result = rdev.Remove()
601
  else:
602
    result = True
603
  if disk.children:
604
    for child in disk.children:
605
      result = result and RemoveBlockDevice(child)
606
  return result
607

    
608

    
609
def _RecursiveAssembleBD(disk, as_primary):
610
  """Activate a block device for an instance.
611

612
  This is run on the primary and secondary nodes for an instance.
613

614
  This function is called recursively.
615

616
  Args:
617
    disk: a objects.Disk object
618
    as_primary: if we should make the block device read/write
619

620
  Returns:
621
    the assembled device or None (in case no device was assembled)
622

623
  If the assembly is not successful, an exception is raised.
624

625
  """
626
  children = []
627
  if disk.children:
628
    for chld_disk in disk.children:
629
      children.append(_RecursiveAssembleBD(chld_disk, as_primary))
630

    
631
  if as_primary or disk.AssembleOnSecondary():
632
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
633
    r_dev.SetSyncSpeed(30*1024)
634
    result = r_dev
635
    if as_primary or disk.OpenOnSecondary():
636
      r_dev.Open()
637
    else:
638
      r_dev.Close()
639
  else:
640
    result = True
641
  return result
642

    
643

    
644
def AssembleBlockDevice(disk, as_primary):
645
  """Activate a block device for an instance.
646

647
  This is a wrapper over _RecursiveAssembleBD.
648

649
  Returns:
650
    a /dev path for primary nodes
651
    True for secondary nodes
652

653
  """
654
  result = _RecursiveAssembleBD(disk, as_primary)
655
  if isinstance(result, bdev.BlockDev):
656
    result = result.dev_path
657
  return result
658

    
659

    
660
def ShutdownBlockDevice(disk):
661
  """Shut down a block device.
662

663
  First, if the device is assembled (can `Attach()`), then the device
664
  is shutdown. Then the children of the device are shutdown.
665

666
  This function is called recursively. Note that we don't cache the
667
  children or such, as oppossed to assemble, shutdown of different
668
  devices doesn't require that the upper device was active.
669

670
  """
671
  r_dev = _RecursiveFindBD(disk)
672
  if r_dev is not None:
673
    result = r_dev.Shutdown()
674
  else:
675
    result = True
676
  if disk.children:
677
    for child in disk.children:
678
      result = result and ShutdownBlockDevice(child)
679
  return result
680

    
681

    
682
def MirrorAddChild(md_cdev, new_cdev):
683
  """Extend an MD raid1 array.
684

685
  """
686
  md_bdev = _RecursiveFindBD(md_cdev, allow_partial=True)
687
  if md_bdev is None:
688
    logger.Error("Can't find md device")
689
    return False
690
  new_bdev = _RecursiveFindBD(new_cdev)
691
  if new_bdev is None:
692
    logger.Error("Can't find new device to add")
693
    return False
694
  new_bdev.Open()
695
  md_bdev.AddChild(new_bdev)
696
  return True
697

    
698

    
699
def MirrorRemoveChild(md_cdev, new_cdev):
700
  """Reduce an MD raid1 array.
701

702
  """
703
  md_bdev = _RecursiveFindBD(md_cdev)
704
  if md_bdev is None:
705
    return False
706
  new_bdev = _RecursiveFindBD(new_cdev)
707
  if new_bdev is None:
708
    return False
709
  new_bdev.Open()
710
  md_bdev.RemoveChild(new_bdev.dev_path)
711
  return True
712

    
713

    
714
def GetMirrorStatus(disks):
715
  """Get the mirroring status of a list of devices.
716

717
  Args:
718
    disks: list of `objects.Disk`
719

720
  Returns:
721
    list of (mirror_done, estimated_time) tuples, which
722
    are the result of bdev.BlockDevice.CombinedSyncStatus()
723

724
  """
725
  stats = []
726
  for dsk in disks:
727
    rbd = _RecursiveFindBD(dsk)
728
    if rbd is None:
729
      raise errors.BlockDeviceError, "Can't find device %s" % str(dsk)
730
    stats.append(rbd.CombinedSyncStatus())
731
  return stats
732

    
733

    
734
def _RecursiveFindBD(disk, allow_partial=False):
735
  """Check if a device is activated.
736

737
  If so, return informations about the real device.
738

739
  Args:
740
    disk: the objects.Disk instance
741
    allow_partial: don't abort the find if a child of the
742
                   device can't be found; this is intended to be
743
                   used when repairing mirrors
744

745
  Returns:
746
    None if the device can't be found
747
    otherwise the device instance
748

749
  """
750
  children = []
751
  if disk.children:
752
    for chdisk in disk.children:
753
      children.append(_RecursiveFindBD(chdisk))
754

    
755
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
756

    
757

    
758
def FindBlockDevice(disk):
759
  """Check if a device is activated.
760

761
  If so, return informations about the real device.
762

763
  Args:
764
    disk: the objects.Disk instance
765
  Returns:
766
    None if the device can't be found
767
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
768

769
  """
770
  rbd = _RecursiveFindBD(disk)
771
  if rbd is None:
772
    return rbd
773
  sync_p, est_t, is_degr = rbd.GetSyncStatus()
774
  return rbd.dev_path, rbd.major, rbd.minor, sync_p, est_t, is_degr
775

    
776

    
777
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
778
  """Write a file to the filesystem.
779

780
  This allows the master to overwrite(!) a file. It will only perform
781
  the operation if the file belongs to a list of configuration files.
782

783
  """
784
  if not os.path.isabs(file_name):
785
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
786
                 file_name)
787
    return False
788

    
789
  allowed_files = [constants.CLUSTER_CONF_FILE, "/etc/hosts",
790
                   "/etc/ssh/ssh_known_hosts"]
791
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
792
  if file_name not in allowed_files:
793
    logger.Error("Filename passed to UploadFile not in allowed"
794
                 " upload targets: '%s'" % file_name)
795
    return False
796

    
797
  dir_name, small_name = os.path.split(file_name)
798
  fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
799
  # here we need to make sure we remove the temp file, if any error
800
  # leaves it in place
801
  try:
802
    os.chown(new_name, uid, gid)
803
    os.chmod(new_name, mode)
804
    os.write(fd, data)
805
    os.fsync(fd)
806
    os.utime(new_name, (atime, mtime))
807
    os.rename(new_name, file_name)
808
  finally:
809
    os.close(fd)
810
    utils.RemoveFile(new_name)
811
  return True
812

    
813
def _ErrnoOrStr(err):
814
  """Format an EnvironmentError exception.
815

816
  If the `err` argument has an errno attribute, it will be looked up
817
  and converted into a textual EXXXX description. Otherwise the string
818
  representation of the error will be returned.
819

820
  """
821
  if hasattr(err, 'errno'):
822
    detail = errno.errorcode[err.errno]
823
  else:
824
    detail = str(err)
825
  return detail
826

    
827

    
828
def _OSOndiskVersion(name, os_dir=None):
829
  """Compute and return the api version of a given OS.
830

831
  This function will try to read the api version of the os given by
832
  the 'name' parameter. By default, it wil use the constants.OS_DIR
833
  as top-level directory for OSes, but this can be overriden by the
834
  use of the os_dir parameter. Return value will be either an
835
  integer denoting the version or None in the case when this is not
836
  a valid OS name.
837

838
  """
839
  if os_dir is None:
840
    os_dir = os.path.sep.join([constants.OS_DIR, name])
841

    
842
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
843

    
844
  try:
845
    st = os.stat(api_file)
846
  except EnvironmentError, err:
847
    raise errors.InvalidOS, (name, "'ganeti_api_version' file not"
848
                             " found (%s)" % _ErrnoOrStr(err))
849

    
850
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
851
    raise errors.InvalidOS, (name, "'ganeti_api_version' file is not"
852
                             " a regular file")
853

    
854
  try:
855
    f = open(api_file)
856
    try:
857
      api_version = f.read(256)
858
    finally:
859
      f.close()
860
  except EnvironmentError, err:
861
    raise errors.InvalidOS, (name, "error while reading the"
862
                             " API version (%s)" % _ErrnoOrStr(err))
863

    
864
  api_version = api_version.strip()
865
  try:
866
    api_version = int(api_version)
867
  except (TypeError, ValueError), err:
868
    raise errors.InvalidOS, (name, "API version is not integer (%s)" %
869
                             str(err))
870

    
871
  return api_version
872

    
873
def DiagnoseOS(top_dir=None):
874
  """Compute the validity for all OSes.
875

876
  For each name in the give top_dir parameter (if not given, defaults
877
  to constants.OS_DIR), it will return an object. If this is a valid
878
  os, the object will be an instance of the object.OS class. If not,
879
  it will be an instance of errors.InvalidOS and this signifies that
880
  this name does not correspond to a valid OS.
881

882
  Returns:
883
    list of objects
884

885
  """
886
  if top_dir is None:
887
    top_dir = constants.OS_DIR
888

    
889
  try:
890
    f_names = os.listdir(top_dir)
891
  except EnvironmentError, err:
892
    logger.Error("Can't list the OS directory: %s" % str(err))
893
    return False
894
  result = []
895
  for name in f_names:
896
    try:
897
      os_inst = OSFromDisk(name, os.path.sep.join([top_dir, name]))
898
      result.append(os_inst)
899
    except errors.InvalidOS, err:
900
      result.append(err)
901

    
902
  return result
903

    
904

    
905
def OSFromDisk(name, os_dir=None):
906
  """Create an OS instance from disk.
907

908
  This function will return an OS instance if the given name is a
909
  valid OS name. Otherwise, it will raise an appropriate
910
  `errors.InvalidOS` exception, detailing why this is not a valid
911
  OS.
912

913
  """
914
  if os_dir is None:
915
    os_dir = os.path.sep.join([constants.OS_DIR, name])
916

    
917
  api_version = _OSOndiskVersion(name, os_dir)
918

    
919
  if api_version != constants.OS_API_VERSION:
920
    raise errors.InvalidOS, (name, "API version mismatch (found %s want %s)"
921
                             % (api_version, constants.OS_API_VERSION))
922

    
923
  # OS Scripts dictionary, we will populate it with the actual script names
924
  os_scripts = {'create': '', 'export': '', 'import': ''}
925

    
926
  for script in os_scripts:
927
    os_scripts[script] = os.path.sep.join([os_dir, script])
928

    
929
    try:
930
      st = os.stat(os_scripts[script])
931
    except EnvironmentError, err:
932
      raise errors.InvalidOS, (name, "'%s' script missing (%s)" %
933
                               (script, _ErrnoOrStr(err)))
934

    
935
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
936
      raise errors.InvalidOS, (name, "'%s' script not executable" % script)
937

    
938
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
939
      raise errors.InvalidOS, (name, "'%s' is not a regular file" % script)
940

    
941

    
942
  return objects.OS(name=name, path=os_dir,
943
                    create_script=os_scripts['create'],
944
                    export_script=os_scripts['export'],
945
                    import_script=os_scripts['import'],
946
                    api_version=api_version)
947

    
948

    
949
def SnapshotBlockDevice(disk):
950
  """Create a snapshot copy of a block device.
951

952
  This function is called recursively, and the snapshot is actually created
953
  just for the leaf lvm backend device.
954

955
  Args:
956
    disk: the disk to be snapshotted
957

958
  Returns:
959
    a config entry for the actual lvm device snapshotted.
960

961
  """
962
  if disk.children:
963
    if len(disk.children) == 1:
964
      # only one child, let's recurse on it
965
      return SnapshotBlockDevice(disk.children[0])
966
    else:
967
      # more than one child, choose one that matches
968
      for child in disk.children:
969
        if child.size == disk.size:
970
          # return implies breaking the loop
971
          return SnapshotBlockDevice(child)
972
  elif disk.dev_type == "lvm":
973
    r_dev = _RecursiveFindBD(disk)
974
    if r_dev is not None:
975
      # let's stay on the safe side and ask for the full size, for now
976
      return r_dev.Snapshot(disk.size)
977
    else:
978
      return None
979
  else:
980
    raise errors.ProgrammerError, ("Cannot snapshot non-lvm block device"
981
                                   "'%s' of type '%s'" %
982
                                   (disk.unique_id, disk.dev_type))
983

    
984

    
985
def ExportSnapshot(disk, dest_node, instance):
986
  """Export a block device snapshot to a remote node.
987

988
  Args:
989
    disk: the snapshot block device
990
    dest_node: the node to send the image to
991
    instance: instance being exported
992

993
  Returns:
994
    True if successful, False otherwise.
995

996
  """
997
  inst_os = OSFromDisk(instance.os)
998
  export_script = inst_os.export_script
999

    
1000
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1001
                                     instance.name, int(time.time()))
1002
  if not os.path.exists(constants.LOG_OS_DIR):
1003
    os.mkdir(constants.LOG_OS_DIR, 0750)
1004

    
1005
  real_os_dev = _RecursiveFindBD(disk)
1006
  if real_os_dev is None:
1007
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1008
                                  str(disk))
1009
  real_os_dev.Open()
1010

    
1011
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1012
  destfile = disk.physical_id[1]
1013

    
1014
  # the target command is built out of three individual commands,
1015
  # which are joined by pipes; we check each individual command for
1016
  # valid parameters
1017

    
1018
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1019
                               export_script, instance.name,
1020
                               real_os_dev.dev_path, logfile)
1021

    
1022
  comprcmd = "gzip"
1023

    
1024
  remotecmd = utils.BuildShellCmd("ssh -q -oStrictHostKeyChecking=yes"
1025
                                  " -oBatchMode=yes -oEscapeChar=none"
1026
                                  " %s 'mkdir -p %s; cat > %s/%s'",
1027
                                  dest_node, destdir, destdir, destfile)
1028

    
1029
  # all commands have been checked, so we're safe to combine them
1030
  command = '|'.join([expcmd, comprcmd, remotecmd])
1031

    
1032
  result = utils.RunCmd(command)
1033

    
1034
  if result.failed:
1035
    logger.Error("os snapshot export command '%s' returned error: %s"
1036
                 " output: %s" %
1037
                 (command, result.fail_reason, result.output))
1038
    return False
1039

    
1040
  return True
1041

    
1042

    
1043
def FinalizeExport(instance, snap_disks):
1044
  """Write out the export configuration information.
1045

1046
  Args:
1047
    instance: instance configuration
1048
    snap_disks: snapshot block devices
1049

1050
  Returns:
1051
    False in case of error, True otherwise.
1052

1053
  """
1054
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1055
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1056

    
1057
  config = objects.SerializableConfigParser()
1058

    
1059
  config.add_section(constants.INISECT_EXP)
1060
  config.set(constants.INISECT_EXP, 'version', '0')
1061
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1062
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1063
  config.set(constants.INISECT_EXP, 'os', instance.os)
1064
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1065

    
1066
  config.add_section(constants.INISECT_INS)
1067
  config.set(constants.INISECT_INS, 'name', instance.name)
1068
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1069
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1070
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1071
  for nic_count, nic in enumerate(instance.nics):
1072
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1073
               nic_count, '%s' % nic.mac)
1074
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1075
  # TODO: redundant: on load can read nics until it doesn't exist
1076
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1077

    
1078
  for disk_count, disk in enumerate(snap_disks):
1079
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1080
               ('%s' % disk.iv_name))
1081
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1082
               ('%s' % disk.physical_id[1]))
1083
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1084
               ('%d' % disk.size))
1085
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1086

    
1087
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1088
  cfo = open(cff, 'w')
1089
  try:
1090
    config.write(cfo)
1091
  finally:
1092
    cfo.close()
1093

    
1094
  shutil.rmtree(finaldestdir, True)
1095
  shutil.move(destdir, finaldestdir)
1096

    
1097
  return True
1098

    
1099

    
1100
def ExportInfo(dest):
1101
  """Get export configuration information.
1102

1103
  Args:
1104
    dest: directory containing the export
1105

1106
  Returns:
1107
    A serializable config file containing the export info.
1108

1109
  """
1110
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1111

    
1112
  config = objects.SerializableConfigParser()
1113
  config.read(cff)
1114

    
1115
  if (not config.has_section(constants.INISECT_EXP) or
1116
      not config.has_section(constants.INISECT_INS)):
1117
    return None
1118

    
1119
  return config
1120

    
1121

    
1122
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1123
  """Import an os image into an instance.
1124

1125
  Args:
1126
    instance: the instance object
1127
    os_disk: the instance-visible name of the os device
1128
    swap_disk: the instance-visible name of the swap device
1129
    src_node: node holding the source image
1130
    src_image: path to the source image on src_node
1131

1132
  Returns:
1133
    False in case of error, True otherwise.
1134

1135
  """
1136
  inst_os = OSFromDisk(instance.os)
1137
  import_script = inst_os.import_script
1138

    
1139
  for os_device in instance.disks:
1140
    if os_device.iv_name == os_disk:
1141
      break
1142
  else:
1143
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1144
    return False
1145

    
1146
  for swap_device in instance.disks:
1147
    if swap_device.iv_name == swap_disk:
1148
      break
1149
  else:
1150
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1151
    return False
1152

    
1153
  real_os_dev = _RecursiveFindBD(os_device)
1154
  if real_os_dev is None:
1155
    raise errors.BlockDeviceError, ("Block device '%s' is not set up" %
1156
                                    str(os_device))
1157
  real_os_dev.Open()
1158

    
1159
  real_swap_dev = _RecursiveFindBD(swap_device)
1160
  if real_swap_dev is None:
1161
    raise errors.BlockDeviceError, ("Block device '%s' is not set up" %
1162
                                    str(swap_device))
1163
  real_swap_dev.Open()
1164

    
1165
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1166
                                        instance.name, int(time.time()))
1167
  if not os.path.exists(constants.LOG_OS_DIR):
1168
    os.mkdir(constants.LOG_OS_DIR, 0750)
1169

    
1170
  remotecmd = utils.BuildShellCmd("ssh -q -oStrictHostKeyChecking=yes"
1171
                                  " -oBatchMode=yes -oEscapeChar=none"
1172
                                  " %s 'cat %s'", src_node, src_image)
1173

    
1174
  comprcmd = "gunzip"
1175
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1176
                               inst_os.path, import_script, instance.name,
1177
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1178
                               logfile)
1179

    
1180
  command = '|'.join([remotecmd, comprcmd, impcmd])
1181

    
1182
  result = utils.RunCmd(command)
1183

    
1184
  if result.failed:
1185
    logger.Error("os import command '%s' returned error: %s"
1186
                 " output: %s" %
1187
                 (command, result.fail_reason, result.output))
1188
    return False
1189

    
1190
  return True
1191

    
1192

    
1193
def ListExports():
1194
  """Return a list of exports currently available on this machine.
1195

1196
  """
1197
  if os.path.isdir(constants.EXPORT_DIR):
1198
    return os.listdir(constants.EXPORT_DIR)
1199
  else:
1200
    return []
1201

    
1202

    
1203
def RemoveExport(export):
1204
  """Remove an existing export from the node.
1205

1206
  Args:
1207
    export: the name of the export to remove
1208

1209
  Returns:
1210
    False in case of error, True otherwise.
1211

1212
  """
1213
  target = os.path.join(constants.EXPORT_DIR, export)
1214

    
1215
  shutil.rmtree(target)
1216
  # TODO: catch some of the relevant exceptions and provide a pretty
1217
  # error message if rmtree fails.
1218

    
1219
  return True
1220

    
1221

    
1222
class HooksRunner(object):
1223
  """Hook runner.
1224

1225
  This class is instantiated on the node side (ganeti-noded) and not on
1226
  the master side.
1227

1228
  """
1229
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1230

    
1231
  def __init__(self, hooks_base_dir=None):
1232
    """Constructor for hooks runner.
1233

1234
    Args:
1235
      - hooks_base_dir: if not None, this overrides the
1236
        constants.HOOKS_BASE_DIR (useful for unittests)
1237
      - logs_base_dir: if not None, this overrides the
1238
        constants.LOG_HOOKS_DIR (useful for unittests)
1239
      - logging: enable or disable logging of script output
1240

1241
    """
1242
    if hooks_base_dir is None:
1243
      hooks_base_dir = constants.HOOKS_BASE_DIR
1244
    self._BASE_DIR = hooks_base_dir
1245

    
1246
  @staticmethod
1247
  def ExecHook(script, env):
1248
    """Exec one hook script.
1249

1250
    Args:
1251
     - phase: the phase
1252
     - script: the full path to the script
1253
     - env: the environment with which to exec the script
1254

1255
    """
1256
    # exec the process using subprocess and log the output
1257
    fdstdin = None
1258
    try:
1259
      fdstdin = open("/dev/null", "r")
1260
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1261
                               stderr=subprocess.STDOUT, close_fds=True,
1262
                               shell=False, cwd="/",env=env)
1263
      output = ""
1264
      try:
1265
        output = child.stdout.read(4096)
1266
        child.stdout.close()
1267
      except EnvironmentError, err:
1268
        output += "Hook script error: %s" % str(err)
1269

    
1270
      while True:
1271
        try:
1272
          result = child.wait()
1273
          break
1274
        except EnvironmentError, err:
1275
          if err.errno == errno.EINTR:
1276
            continue
1277
          raise
1278
    finally:
1279
      # try not to leak fds
1280
      for fd in (fdstdin, ):
1281
        if fd is not None:
1282
          try:
1283
            fd.close()
1284
          except EnvironmentError, err:
1285
            # just log the error
1286
            #logger.Error("While closing fd %s: %s" % (fd, err))
1287
            pass
1288

    
1289
    return result == 0, output
1290

    
1291
  def RunHooks(self, hpath, phase, env):
1292
    """Run the scripts in the hooks directory.
1293

1294
    This method will not be usually overriden by child opcodes.
1295

1296
    """
1297
    if phase == constants.HOOKS_PHASE_PRE:
1298
      suffix = "pre"
1299
    elif phase == constants.HOOKS_PHASE_POST:
1300
      suffix = "post"
1301
    else:
1302
      raise errors.ProgrammerError, ("Unknown hooks phase: '%s'" % phase)
1303
    rr = []
1304

    
1305
    subdir = "%s-%s.d" % (hpath, suffix)
1306
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1307
    try:
1308
      dir_contents = os.listdir(dir_name)
1309
    except OSError, err:
1310
      # must log
1311
      return rr
1312

    
1313
    # we use the standard python sort order,
1314
    # so 00name is the recommended naming scheme
1315
    dir_contents.sort()
1316
    for relname in dir_contents:
1317
      fname = os.path.join(dir_name, relname)
1318
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1319
          self.RE_MASK.match(relname) is not None):
1320
        rrval = constants.HKR_SKIP
1321
        output = ""
1322
      else:
1323
        result, output = self.ExecHook(fname, env)
1324
        if not result:
1325
          rrval = constants.HKR_FAIL
1326
        else:
1327
          rrval = constants.HKR_SUCCESS
1328
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1329

    
1330
    return rr