Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ 3ecf6786

History | View | Annotate | Download (36.4 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import tempfile
30
import stat
31
import errno
32
import re
33
import subprocess
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def StartMaster():
47
  """Activate local node as master node.
48

49
  There are two needed steps for this:
50
    - run the master script
51
    - register the cron script
52

53
  """
54
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
55

    
56
  if result.failed:
57
    logger.Error("could not activate cluster interface with command %s,"
58
                 " error: '%s'" % (result.cmd, result.output))
59
    return False
60

    
61
  return True
62

    
63

    
64
def StopMaster():
65
  """Deactivate this node as master.
66

67
  This does two things:
68
    - run the master stop script
69
    - remove link to master cron script.
70

71
  """
72
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
73

    
74
  if result.failed:
75
    logger.Error("could not deactivate cluster interface with command %s,"
76
                 " error: '%s'" % (result.cmd, result.output))
77
    return False
78

    
79
  return True
80

    
81

    
82
def AddNode(dsa, dsapub, rsa, rsapub, ssh, sshpub):
83
  """ adds the node to the cluster
84
      - updates the hostkey
85
      - adds the ssh-key
86
      - sets the node id
87
      - sets the node status to installed
88

89
  """
90
  f = open("/etc/ssh/ssh_host_rsa_key", 'w')
91
  f.write(rsa)
92
  f.close()
93

    
94
  f = open("/etc/ssh/ssh_host_rsa_key.pub", 'w')
95
  f.write(rsapub)
96
  f.close()
97

    
98
  f = open("/etc/ssh/ssh_host_dsa_key", 'w')
99
  f.write(dsa)
100
  f.close()
101

    
102
  f = open("/etc/ssh/ssh_host_dsa_key.pub", 'w')
103
  f.write(dsapub)
104
  f.close()
105

    
106
  if not os.path.isdir("/root/.ssh"):
107
    os.mkdir("/root/.ssh")
108

    
109
  f = open("/root/.ssh/id_dsa", 'w')
110
  f.write(ssh)
111
  f.close()
112

    
113
  f = open("/root/.ssh/id_dsa.pub", 'w')
114
  f.write(sshpub)
115
  f.close()
116

    
117
  f = open('/root/.ssh/id_dsa.pub', 'r')
118
  try:
119
    utils.AddAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
120
  finally:
121
    f.close()
122

    
123
  utils.RunCmd(["/etc/init.d/ssh", "restart"])
124

    
125
  utils.RemoveFile("/root/.ssh/known_hosts")
126
  return True
127

    
128

    
129
def LeaveCluster():
130
  """Cleans up the current node and prepares it to be removed from the cluster.
131

132
  """
133
  if os.path.exists(constants.DATA_DIR):
134
    for dirpath, dirnames, filenames in os.walk(constants.DATA_DIR):
135
      if dirpath == constants.DATA_DIR:
136
        for i in filenames:
137
          os.unlink(os.path.join(dirpath, i))
138

    
139
  f = open('/root/.ssh/id_dsa.pub', 'r')
140
  try:
141
    utils.RemoveAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
142
  finally:
143
    f.close()
144

    
145
  utils.RemoveFile('/root/.ssh/id_dsa')
146
  utils.RemoveFile('/root/.ssh/id_dsa.pub')
147

    
148

    
149
def GetNodeInfo(vgname):
150
  """ gives back a hash with different informations
151
  about the node
152

153
  Returns:
154
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
155
      'memory_free' : xxx, 'memory_total' : xxx }
156
    where
157
    vg_size is the size of the configured volume group in MiB
158
    vg_free is the free size of the volume group in MiB
159
    memory_dom0 is the memory allocated for domain0 in MiB
160
    memory_free is the currently available (free) ram in MiB
161
    memory_total is the total number of ram in MiB
162

163
  """
164
  outputarray = {}
165
  vginfo = _GetVGInfo(vgname)
166
  outputarray['vg_size'] = vginfo['vg_size']
167
  outputarray['vg_free'] = vginfo['vg_free']
168

    
169
  hyper = hypervisor.GetHypervisor()
170
  hyp_info = hyper.GetNodeInfo()
171
  if hyp_info is not None:
172
    outputarray.update(hyp_info)
173

    
174
  return outputarray
175

    
176

    
177
def VerifyNode(what):
178
  """Verify the status of the local node.
179

180
  Args:
181
    what - a dictionary of things to check:
182
      'filelist' : list of files for which to compute checksums
183
      'nodelist' : list of nodes we should check communication with
184
      'hypervisor': run the hypervisor-specific verify
185

186
  Requested files on local node are checksummed and the result returned.
187

188
  The nodelist is traversed, with the following checks being made
189
  for each node:
190
  - known_hosts key correct
191
  - correct resolving of node name (target node returns its own hostname
192
    by ssh-execution of 'hostname', result compared against name in list.
193

194
  """
195
  result = {}
196

    
197
  if 'hypervisor' in what:
198
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
199

    
200
  if 'filelist' in what:
201
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
202

    
203
  if 'nodelist' in what:
204
    result['nodelist'] = {}
205
    for node in what['nodelist']:
206
      success, message = ssh.VerifyNodeHostname(node)
207
      if not success:
208
        result['nodelist'][node] = message
209
  return result
210

    
211

    
212
def GetVolumeList(vg_name):
213
  """Compute list of logical volumes and their size.
214

215
  Returns:
216
    dictionary of all partions (key) with their size:
217
    test1: 20.06MiB
218

219
  """
220
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m",
221
                         "-oname,size", vg_name])
222
  if result.failed:
223
    logger.Error("Failed to list logical volumes, lvs output: %s" %
224
                 result.output)
225
    return {}
226

    
227
  lvlist = [line.split() for line in result.output.splitlines()]
228
  return dict(lvlist)
229

    
230

    
231
def ListVolumeGroups():
232
  """List the volume groups and their size
233

234
  Returns:
235
    Dictionary with keys volume name and values the size of the volume
236

237
  """
238
  return utils.ListVolumeGroups()
239

    
240

    
241
def NodeVolumes():
242
  """List all volumes on this node.
243

244
  """
245
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
246
                         "--separator=|",
247
                         "--options=lv_name,lv_size,devices,vg_name"])
248
  if result.failed:
249
    logger.Error("Failed to list logical volumes, lvs output: %s" %
250
                 result.output)
251
    return {}
252

    
253
  def parse_dev(dev):
254
    if '(' in dev:
255
      return dev.split('(')[0]
256
    else:
257
      return dev
258

    
259
  def map_line(line):
260
    return {
261
      'name': line[0].strip(),
262
      'size': line[1].strip(),
263
      'dev': parse_dev(line[2].strip()),
264
      'vg': line[3].strip(),
265
    }
266

    
267
  return [map_line(line.split('|')) for line in result.output.splitlines()]
268

    
269

    
270
def BridgesExist(bridges_list):
271
  """Check if a list of bridges exist on the current node
272

273
  Returns:
274
    True if all of them exist, false otherwise
275

276
  """
277
  for bridge in bridges_list:
278
    if not utils.BridgeExists(bridge):
279
      return False
280

    
281
  return True
282

    
283

    
284
def GetInstanceList():
285
  """ provides a list of instances
286

287
  Returns:
288
    A list of all running instances on the current node
289
    - instance1.example.com
290
    - instance2.example.com
291

292
  """
293
  try:
294
    names = hypervisor.GetHypervisor().ListInstances()
295
  except errors.HypervisorError, err:
296
    logger.Error("error enumerating instances: %s" % str(err))
297
    raise
298

    
299
  return names
300

    
301

    
302
def GetInstanceInfo(instance):
303
  """ gives back the informations about an instance
304
  as a dictonary
305

306
  Args:
307
    instance: name of the instance (ex. instance1.example.com)
308

309
  Returns:
310
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
311
    where
312
    memory: memory size of instance (int)
313
    state: xen state of instance (string)
314
    time: cpu time of instance (float)
315

316
  """
317
  output = {}
318

    
319
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
320
  if iinfo is not None:
321
    output['memory'] = iinfo[2]
322
    output['state'] = iinfo[4]
323
    output['time'] = iinfo[5]
324

    
325
  return output
326

    
327

    
328
def GetAllInstancesInfo():
329
  """Gather data about all instances.
330

331
  This is the equivalent of `GetInstanceInfo()`, except that it
332
  computes data for all instances at once, thus being faster if one
333
  needs data about more than one instance.
334

335
  Returns: a dictionary of dictionaries, keys being the instance name,
336
    and with values:
337
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
338
    where
339
    memory: memory size of instance (int)
340
    state: xen state of instance (string)
341
    time: cpu time of instance (float)
342
    vcpus: the number of cpus
343

344
  """
345
  output = {}
346

    
347
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
348
  if iinfo:
349
    for name, inst_id, memory, vcpus, state, times in iinfo:
350
      output[name] = {
351
        'memory': memory,
352
        'vcpus': vcpus,
353
        'state': state,
354
        'time': times,
355
        }
356

    
357
  return output
358

    
359

    
360
def AddOSToInstance(instance, os_disk, swap_disk):
361
  """Add an os to an instance.
362

363
  Args:
364
    instance: the instance object
365
    os_disk: the instance-visible name of the os device
366
    swap_disk: the instance-visible name of the swap device
367

368
  """
369
  inst_os = OSFromDisk(instance.os)
370

    
371
  create_script = inst_os.create_script
372

    
373
  for os_device in instance.disks:
374
    if os_device.iv_name == os_disk:
375
      break
376
  else:
377
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
378
    return False
379

    
380
  for swap_device in instance.disks:
381
    if swap_device.iv_name == swap_disk:
382
      break
383
  else:
384
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
385
    return False
386

    
387
  real_os_dev = _RecursiveFindBD(os_device)
388
  if real_os_dev is None:
389
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
390
                                  str(os_device))
391
  real_os_dev.Open()
392

    
393
  real_swap_dev = _RecursiveFindBD(swap_device)
394
  if real_swap_dev is None:
395
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
396
                                  str(swap_device))
397
  real_swap_dev.Open()
398

    
399
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
400
                                     instance.name, int(time.time()))
401
  if not os.path.exists(constants.LOG_OS_DIR):
402
    os.mkdir(constants.LOG_OS_DIR, 0750)
403

    
404
  command = utils.BuildShellCmd("cd %s; %s -i %s -b %s -s %s &>%s",
405
                                inst_os.path, create_script, instance.name,
406
                                real_os_dev.dev_path, real_swap_dev.dev_path,
407
                                logfile)
408

    
409
  result = utils.RunCmd(command)
410

    
411
  if result.failed:
412
    logger.Error("os create command '%s' returned error: %s"
413
                 " output: %s" %
414
                 (command, result.fail_reason, result.output))
415
    return False
416

    
417
  return True
418

    
419

    
420
def _GetVGInfo(vg_name):
421
  """Get informations about the volume group.
422

423
  Args:
424
    vg_name: the volume group
425

426
  Returns:
427
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
428
    where
429
    vg_size is the total size of the volume group in MiB
430
    vg_free is the free size of the volume group in MiB
431
    pv_count are the number of physical disks in that vg
432

433
  """
434
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
435
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
436

    
437
  if retval.failed:
438
    errmsg = "volume group %s not present" % vg_name
439
    logger.Error(errmsg)
440
    raise errors.LVMError(errmsg)
441
  valarr = retval.stdout.strip().split(':')
442
  retdic = {
443
    "vg_size": int(round(float(valarr[0]), 0)),
444
    "vg_free": int(round(float(valarr[1]), 0)),
445
    "pv_count": int(valarr[2]),
446
    }
447
  return retdic
448

    
449

    
450
def _GatherBlockDevs(instance):
451
  """Set up an instance's block device(s).
452

453
  This is run on the primary node at instance startup. The block
454
  devices must be already assembled.
455

456
  """
457
  block_devices = []
458
  for disk in instance.disks:
459
    device = _RecursiveFindBD(disk)
460
    if device is None:
461
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
462
                                    str(disk))
463
    device.Open()
464
    block_devices.append((disk, device))
465
  return block_devices
466

    
467

    
468
def StartInstance(instance, extra_args):
469
  """Start an instance.
470

471
  Args:
472
    instance - name of instance to start.
473

474
  """
475
  running_instances = GetInstanceList()
476

    
477
  if instance.name in running_instances:
478
    return True
479

    
480
  block_devices = _GatherBlockDevs(instance)
481
  hyper = hypervisor.GetHypervisor()
482

    
483
  try:
484
    hyper.StartInstance(instance, block_devices, extra_args)
485
  except errors.HypervisorError, err:
486
    logger.Error("Failed to start instance: %s" % err)
487
    return False
488

    
489
  return True
490

    
491

    
492
def ShutdownInstance(instance):
493
  """Shut an instance down.
494

495
  Args:
496
    instance - name of instance to shutdown.
497

498
  """
499
  running_instances = GetInstanceList()
500

    
501
  if instance.name not in running_instances:
502
    return True
503

    
504
  hyper = hypervisor.GetHypervisor()
505
  try:
506
    hyper.StopInstance(instance)
507
  except errors.HypervisorError, err:
508
    logger.Error("Failed to stop instance: %s" % err)
509
    return False
510

    
511
  # test every 10secs for 2min
512
  shutdown_ok = False
513

    
514
  time.sleep(1)
515
  for dummy in range(11):
516
    if instance.name not in GetInstanceList():
517
      break
518
    time.sleep(10)
519
  else:
520
    # the shutdown did not succeed
521
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
522

    
523
    try:
524
      hyper.StopInstance(instance, force=True)
525
    except errors.HypervisorError, err:
526
      logger.Error("Failed to stop instance: %s" % err)
527
      return False
528

    
529
    time.sleep(1)
530
    if instance.name in GetInstanceList():
531
      logger.Error("could not shutdown instance '%s' even by destroy")
532
      return False
533

    
534
  return True
535

    
536

    
537
def CreateBlockDevice(disk, size, on_primary, info):
538
  """Creates a block device for an instance.
539

540
  Args:
541
   bdev: a ganeti.objects.Disk object
542
   size: the size of the physical underlying devices
543
   do_open: if the device should be `Assemble()`-d and
544
            `Open()`-ed after creation
545

546
  Returns:
547
    the new unique_id of the device (this can sometime be
548
    computed only after creation), or None. On secondary nodes,
549
    it's not required to return anything.
550

551
  """
552
  clist = []
553
  if disk.children:
554
    for child in disk.children:
555
      crdev = _RecursiveAssembleBD(child, on_primary)
556
      if on_primary or disk.AssembleOnSecondary():
557
        # we need the children open in case the device itself has to
558
        # be assembled
559
        crdev.Open()
560
      else:
561
        crdev.Close()
562
      clist.append(crdev)
563
  try:
564
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
565
    if device is not None:
566
      logger.Info("removing existing device %s" % disk)
567
      device.Remove()
568
  except errors.BlockDeviceError, err:
569
    pass
570

    
571
  device = bdev.Create(disk.dev_type, disk.physical_id,
572
                       clist, size)
573
  if device is None:
574
    raise ValueError("Can't create child device for %s, %s" %
575
                     (disk, size))
576
  if on_primary or disk.AssembleOnSecondary():
577
    device.Assemble()
578
    device.SetSyncSpeed(constants.SYNC_SPEED)
579
    if on_primary or disk.OpenOnSecondary():
580
      device.Open(force=True)
581

    
582
  device.SetInfo(info)
583

    
584
  physical_id = device.unique_id
585
  return physical_id
586

    
587

    
588
def RemoveBlockDevice(disk):
589
  """Remove a block device.
590

591
  This is intended to be called recursively.
592

593
  """
594
  try:
595
    # since we are removing the device, allow a partial match
596
    # this allows removal of broken mirrors
597
    rdev = _RecursiveFindBD(disk, allow_partial=True)
598
  except errors.BlockDeviceError, err:
599
    # probably can't attach
600
    logger.Info("Can't attach to device %s in remove" % disk)
601
    rdev = None
602
  if rdev is not None:
603
    result = rdev.Remove()
604
  else:
605
    result = True
606
  if disk.children:
607
    for child in disk.children:
608
      result = result and RemoveBlockDevice(child)
609
  return result
610

    
611

    
612
def _RecursiveAssembleBD(disk, as_primary):
613
  """Activate a block device for an instance.
614

615
  This is run on the primary and secondary nodes for an instance.
616

617
  This function is called recursively.
618

619
  Args:
620
    disk: a objects.Disk object
621
    as_primary: if we should make the block device read/write
622

623
  Returns:
624
    the assembled device or None (in case no device was assembled)
625

626
  If the assembly is not successful, an exception is raised.
627

628
  """
629
  children = []
630
  if disk.children:
631
    for chld_disk in disk.children:
632
      children.append(_RecursiveAssembleBD(chld_disk, as_primary))
633

    
634
  if as_primary or disk.AssembleOnSecondary():
635
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
636
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
637
    result = r_dev
638
    if as_primary or disk.OpenOnSecondary():
639
      r_dev.Open()
640
    else:
641
      r_dev.Close()
642
  else:
643
    result = True
644
  return result
645

    
646

    
647
def AssembleBlockDevice(disk, as_primary):
648
  """Activate a block device for an instance.
649

650
  This is a wrapper over _RecursiveAssembleBD.
651

652
  Returns:
653
    a /dev path for primary nodes
654
    True for secondary nodes
655

656
  """
657
  result = _RecursiveAssembleBD(disk, as_primary)
658
  if isinstance(result, bdev.BlockDev):
659
    result = result.dev_path
660
  return result
661

    
662

    
663
def ShutdownBlockDevice(disk):
664
  """Shut down a block device.
665

666
  First, if the device is assembled (can `Attach()`), then the device
667
  is shutdown. Then the children of the device are shutdown.
668

669
  This function is called recursively. Note that we don't cache the
670
  children or such, as oppossed to assemble, shutdown of different
671
  devices doesn't require that the upper device was active.
672

673
  """
674
  r_dev = _RecursiveFindBD(disk)
675
  if r_dev is not None:
676
    result = r_dev.Shutdown()
677
  else:
678
    result = True
679
  if disk.children:
680
    for child in disk.children:
681
      result = result and ShutdownBlockDevice(child)
682
  return result
683

    
684

    
685
def MirrorAddChild(md_cdev, new_cdev):
686
  """Extend an MD raid1 array.
687

688
  """
689
  md_bdev = _RecursiveFindBD(md_cdev, allow_partial=True)
690
  if md_bdev is None:
691
    logger.Error("Can't find md device")
692
    return False
693
  new_bdev = _RecursiveFindBD(new_cdev)
694
  if new_bdev is None:
695
    logger.Error("Can't find new device to add")
696
    return False
697
  new_bdev.Open()
698
  md_bdev.AddChild(new_bdev)
699
  return True
700

    
701

    
702
def MirrorRemoveChild(md_cdev, new_cdev):
703
  """Reduce an MD raid1 array.
704

705
  """
706
  md_bdev = _RecursiveFindBD(md_cdev)
707
  if md_bdev is None:
708
    return False
709
  new_bdev = _RecursiveFindBD(new_cdev)
710
  if new_bdev is None:
711
    return False
712
  new_bdev.Open()
713
  md_bdev.RemoveChild(new_bdev.dev_path)
714
  return True
715

    
716

    
717
def GetMirrorStatus(disks):
718
  """Get the mirroring status of a list of devices.
719

720
  Args:
721
    disks: list of `objects.Disk`
722

723
  Returns:
724
    list of (mirror_done, estimated_time) tuples, which
725
    are the result of bdev.BlockDevice.CombinedSyncStatus()
726

727
  """
728
  stats = []
729
  for dsk in disks:
730
    rbd = _RecursiveFindBD(dsk)
731
    if rbd is None:
732
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
733
    stats.append(rbd.CombinedSyncStatus())
734
  return stats
735

    
736

    
737
def _RecursiveFindBD(disk, allow_partial=False):
738
  """Check if a device is activated.
739

740
  If so, return informations about the real device.
741

742
  Args:
743
    disk: the objects.Disk instance
744
    allow_partial: don't abort the find if a child of the
745
                   device can't be found; this is intended to be
746
                   used when repairing mirrors
747

748
  Returns:
749
    None if the device can't be found
750
    otherwise the device instance
751

752
  """
753
  children = []
754
  if disk.children:
755
    for chdisk in disk.children:
756
      children.append(_RecursiveFindBD(chdisk))
757

    
758
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
759

    
760

    
761
def FindBlockDevice(disk):
762
  """Check if a device is activated.
763

764
  If so, return informations about the real device.
765

766
  Args:
767
    disk: the objects.Disk instance
768
  Returns:
769
    None if the device can't be found
770
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
771

772
  """
773
  rbd = _RecursiveFindBD(disk)
774
  if rbd is None:
775
    return rbd
776
  sync_p, est_t, is_degr = rbd.GetSyncStatus()
777
  return rbd.dev_path, rbd.major, rbd.minor, sync_p, est_t, is_degr
778

    
779

    
780
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
781
  """Write a file to the filesystem.
782

783
  This allows the master to overwrite(!) a file. It will only perform
784
  the operation if the file belongs to a list of configuration files.
785

786
  """
787
  if not os.path.isabs(file_name):
788
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
789
                 file_name)
790
    return False
791

    
792
  allowed_files = [constants.CLUSTER_CONF_FILE, "/etc/hosts",
793
                   "/etc/ssh/ssh_known_hosts"]
794
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
795
  if file_name not in allowed_files:
796
    logger.Error("Filename passed to UploadFile not in allowed"
797
                 " upload targets: '%s'" % file_name)
798
    return False
799

    
800
  dir_name, small_name = os.path.split(file_name)
801
  fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
802
  # here we need to make sure we remove the temp file, if any error
803
  # leaves it in place
804
  try:
805
    os.chown(new_name, uid, gid)
806
    os.chmod(new_name, mode)
807
    os.write(fd, data)
808
    os.fsync(fd)
809
    os.utime(new_name, (atime, mtime))
810
    os.rename(new_name, file_name)
811
  finally:
812
    os.close(fd)
813
    utils.RemoveFile(new_name)
814
  return True
815

    
816
def _ErrnoOrStr(err):
817
  """Format an EnvironmentError exception.
818

819
  If the `err` argument has an errno attribute, it will be looked up
820
  and converted into a textual EXXXX description. Otherwise the string
821
  representation of the error will be returned.
822

823
  """
824
  if hasattr(err, 'errno'):
825
    detail = errno.errorcode[err.errno]
826
  else:
827
    detail = str(err)
828
  return detail
829

    
830

    
831
def _OSOndiskVersion(name, os_dir=None):
832
  """Compute and return the api version of a given OS.
833

834
  This function will try to read the api version of the os given by
835
  the 'name' parameter. By default, it wil use the constants.OS_DIR
836
  as top-level directory for OSes, but this can be overriden by the
837
  use of the os_dir parameter. Return value will be either an
838
  integer denoting the version or None in the case when this is not
839
  a valid OS name.
840

841
  """
842
  if os_dir is None:
843
    os_dir = os.path.sep.join([constants.OS_DIR, name])
844

    
845
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
846

    
847
  try:
848
    st = os.stat(api_file)
849
  except EnvironmentError, err:
850
    raise errors.InvalidOS(name, "'ganeti_api_version' file not"
851
                           " found (%s)" % _ErrnoOrStr(err))
852

    
853
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
854
    raise errors.InvalidOS(name, "'ganeti_api_version' file is not"
855
                           " a regular file")
856

    
857
  try:
858
    f = open(api_file)
859
    try:
860
      api_version = f.read(256)
861
    finally:
862
      f.close()
863
  except EnvironmentError, err:
864
    raise errors.InvalidOS(name, "error while reading the"
865
                           " API version (%s)" % _ErrnoOrStr(err))
866

    
867
  api_version = api_version.strip()
868
  try:
869
    api_version = int(api_version)
870
  except (TypeError, ValueError), err:
871
    raise errors.InvalidOS(name, "API version is not integer (%s)" % str(err))
872

    
873
  return api_version
874

    
875
def DiagnoseOS(top_dir=None):
876
  """Compute the validity for all OSes.
877

878
  For each name in the give top_dir parameter (if not given, defaults
879
  to constants.OS_DIR), it will return an object. If this is a valid
880
  os, the object will be an instance of the object.OS class. If not,
881
  it will be an instance of errors.InvalidOS and this signifies that
882
  this name does not correspond to a valid OS.
883

884
  Returns:
885
    list of objects
886

887
  """
888
  if top_dir is None:
889
    top_dir = constants.OS_DIR
890

    
891
  try:
892
    f_names = os.listdir(top_dir)
893
  except EnvironmentError, err:
894
    logger.Error("Can't list the OS directory: %s" % str(err))
895
    return False
896
  result = []
897
  for name in f_names:
898
    try:
899
      os_inst = OSFromDisk(name, os.path.sep.join([top_dir, name]))
900
      result.append(os_inst)
901
    except errors.InvalidOS, err:
902
      result.append(err)
903

    
904
  return result
905

    
906

    
907
def OSFromDisk(name, os_dir=None):
908
  """Create an OS instance from disk.
909

910
  This function will return an OS instance if the given name is a
911
  valid OS name. Otherwise, it will raise an appropriate
912
  `errors.InvalidOS` exception, detailing why this is not a valid
913
  OS.
914

915
  """
916
  if os_dir is None:
917
    os_dir = os.path.sep.join([constants.OS_DIR, name])
918

    
919
  api_version = _OSOndiskVersion(name, os_dir)
920

    
921
  if api_version != constants.OS_API_VERSION:
922
    raise errors.InvalidOS(name, "API version mismatch (found %s want %s)"
923
                           % (api_version, constants.OS_API_VERSION))
924

    
925
  # OS Scripts dictionary, we will populate it with the actual script names
926
  os_scripts = {'create': '', 'export': '', 'import': ''}
927

    
928
  for script in os_scripts:
929
    os_scripts[script] = os.path.sep.join([os_dir, script])
930

    
931
    try:
932
      st = os.stat(os_scripts[script])
933
    except EnvironmentError, err:
934
      raise errors.InvalidOS(name, "'%s' script missing (%s)" %
935
                             (script, _ErrnoOrStr(err)))
936

    
937
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
938
      raise errors.InvalidOS(name, "'%s' script not executable" % script)
939

    
940
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
941
      raise errors.InvalidOS(name, "'%s' is not a regular file" % script)
942

    
943

    
944
  return objects.OS(name=name, path=os_dir,
945
                    create_script=os_scripts['create'],
946
                    export_script=os_scripts['export'],
947
                    import_script=os_scripts['import'],
948
                    api_version=api_version)
949

    
950

    
951
def SnapshotBlockDevice(disk):
952
  """Create a snapshot copy of a block device.
953

954
  This function is called recursively, and the snapshot is actually created
955
  just for the leaf lvm backend device.
956

957
  Args:
958
    disk: the disk to be snapshotted
959

960
  Returns:
961
    a config entry for the actual lvm device snapshotted.
962

963
  """
964
  if disk.children:
965
    if len(disk.children) == 1:
966
      # only one child, let's recurse on it
967
      return SnapshotBlockDevice(disk.children[0])
968
    else:
969
      # more than one child, choose one that matches
970
      for child in disk.children:
971
        if child.size == disk.size:
972
          # return implies breaking the loop
973
          return SnapshotBlockDevice(child)
974
  elif disk.dev_type == "lvm":
975
    r_dev = _RecursiveFindBD(disk)
976
    if r_dev is not None:
977
      # let's stay on the safe side and ask for the full size, for now
978
      return r_dev.Snapshot(disk.size)
979
    else:
980
      return None
981
  else:
982
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
983
                                 "'%s' of type '%s'" %
984
                                 (disk.unique_id, disk.dev_type))
985

    
986

    
987
def ExportSnapshot(disk, dest_node, instance):
988
  """Export a block device snapshot to a remote node.
989

990
  Args:
991
    disk: the snapshot block device
992
    dest_node: the node to send the image to
993
    instance: instance being exported
994

995
  Returns:
996
    True if successful, False otherwise.
997

998
  """
999
  inst_os = OSFromDisk(instance.os)
1000
  export_script = inst_os.export_script
1001

    
1002
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1003
                                     instance.name, int(time.time()))
1004
  if not os.path.exists(constants.LOG_OS_DIR):
1005
    os.mkdir(constants.LOG_OS_DIR, 0750)
1006

    
1007
  real_os_dev = _RecursiveFindBD(disk)
1008
  if real_os_dev is None:
1009
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1010
                                  str(disk))
1011
  real_os_dev.Open()
1012

    
1013
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1014
  destfile = disk.physical_id[1]
1015

    
1016
  # the target command is built out of three individual commands,
1017
  # which are joined by pipes; we check each individual command for
1018
  # valid parameters
1019

    
1020
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1021
                               export_script, instance.name,
1022
                               real_os_dev.dev_path, logfile)
1023

    
1024
  comprcmd = "gzip"
1025

    
1026
  remotecmd = utils.BuildShellCmd("ssh -q -oStrictHostKeyChecking=yes"
1027
                                  " -oBatchMode=yes -oEscapeChar=none"
1028
                                  " %s 'mkdir -p %s; cat > %s/%s'",
1029
                                  dest_node, destdir, destdir, destfile)
1030

    
1031
  # all commands have been checked, so we're safe to combine them
1032
  command = '|'.join([expcmd, comprcmd, remotecmd])
1033

    
1034
  result = utils.RunCmd(command)
1035

    
1036
  if result.failed:
1037
    logger.Error("os snapshot export command '%s' returned error: %s"
1038
                 " output: %s" %
1039
                 (command, result.fail_reason, result.output))
1040
    return False
1041

    
1042
  return True
1043

    
1044

    
1045
def FinalizeExport(instance, snap_disks):
1046
  """Write out the export configuration information.
1047

1048
  Args:
1049
    instance: instance configuration
1050
    snap_disks: snapshot block devices
1051

1052
  Returns:
1053
    False in case of error, True otherwise.
1054

1055
  """
1056
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1057
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1058

    
1059
  config = objects.SerializableConfigParser()
1060

    
1061
  config.add_section(constants.INISECT_EXP)
1062
  config.set(constants.INISECT_EXP, 'version', '0')
1063
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1064
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1065
  config.set(constants.INISECT_EXP, 'os', instance.os)
1066
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1067

    
1068
  config.add_section(constants.INISECT_INS)
1069
  config.set(constants.INISECT_INS, 'name', instance.name)
1070
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1071
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1072
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1073
  for nic_count, nic in enumerate(instance.nics):
1074
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1075
               nic_count, '%s' % nic.mac)
1076
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1077
  # TODO: redundant: on load can read nics until it doesn't exist
1078
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1079

    
1080
  for disk_count, disk in enumerate(snap_disks):
1081
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1082
               ('%s' % disk.iv_name))
1083
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1084
               ('%s' % disk.physical_id[1]))
1085
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1086
               ('%d' % disk.size))
1087
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1088

    
1089
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1090
  cfo = open(cff, 'w')
1091
  try:
1092
    config.write(cfo)
1093
  finally:
1094
    cfo.close()
1095

    
1096
  shutil.rmtree(finaldestdir, True)
1097
  shutil.move(destdir, finaldestdir)
1098

    
1099
  return True
1100

    
1101

    
1102
def ExportInfo(dest):
1103
  """Get export configuration information.
1104

1105
  Args:
1106
    dest: directory containing the export
1107

1108
  Returns:
1109
    A serializable config file containing the export info.
1110

1111
  """
1112
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1113

    
1114
  config = objects.SerializableConfigParser()
1115
  config.read(cff)
1116

    
1117
  if (not config.has_section(constants.INISECT_EXP) or
1118
      not config.has_section(constants.INISECT_INS)):
1119
    return None
1120

    
1121
  return config
1122

    
1123

    
1124
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1125
  """Import an os image into an instance.
1126

1127
  Args:
1128
    instance: the instance object
1129
    os_disk: the instance-visible name of the os device
1130
    swap_disk: the instance-visible name of the swap device
1131
    src_node: node holding the source image
1132
    src_image: path to the source image on src_node
1133

1134
  Returns:
1135
    False in case of error, True otherwise.
1136

1137
  """
1138
  inst_os = OSFromDisk(instance.os)
1139
  import_script = inst_os.import_script
1140

    
1141
  for os_device in instance.disks:
1142
    if os_device.iv_name == os_disk:
1143
      break
1144
  else:
1145
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1146
    return False
1147

    
1148
  for swap_device in instance.disks:
1149
    if swap_device.iv_name == swap_disk:
1150
      break
1151
  else:
1152
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1153
    return False
1154

    
1155
  real_os_dev = _RecursiveFindBD(os_device)
1156
  if real_os_dev is None:
1157
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1158
                                  str(os_device))
1159
  real_os_dev.Open()
1160

    
1161
  real_swap_dev = _RecursiveFindBD(swap_device)
1162
  if real_swap_dev is None:
1163
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1164
                                  str(swap_device))
1165
  real_swap_dev.Open()
1166

    
1167
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1168
                                        instance.name, int(time.time()))
1169
  if not os.path.exists(constants.LOG_OS_DIR):
1170
    os.mkdir(constants.LOG_OS_DIR, 0750)
1171

    
1172
  remotecmd = utils.BuildShellCmd("ssh -q -oStrictHostKeyChecking=yes"
1173
                                  " -oBatchMode=yes -oEscapeChar=none"
1174
                                  " %s 'cat %s'", src_node, src_image)
1175

    
1176
  comprcmd = "gunzip"
1177
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1178
                               inst_os.path, import_script, instance.name,
1179
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1180
                               logfile)
1181

    
1182
  command = '|'.join([remotecmd, comprcmd, impcmd])
1183

    
1184
  result = utils.RunCmd(command)
1185

    
1186
  if result.failed:
1187
    logger.Error("os import command '%s' returned error: %s"
1188
                 " output: %s" %
1189
                 (command, result.fail_reason, result.output))
1190
    return False
1191

    
1192
  return True
1193

    
1194

    
1195
def ListExports():
1196
  """Return a list of exports currently available on this machine.
1197

1198
  """
1199
  if os.path.isdir(constants.EXPORT_DIR):
1200
    return os.listdir(constants.EXPORT_DIR)
1201
  else:
1202
    return []
1203

    
1204

    
1205
def RemoveExport(export):
1206
  """Remove an existing export from the node.
1207

1208
  Args:
1209
    export: the name of the export to remove
1210

1211
  Returns:
1212
    False in case of error, True otherwise.
1213

1214
  """
1215
  target = os.path.join(constants.EXPORT_DIR, export)
1216

    
1217
  shutil.rmtree(target)
1218
  # TODO: catch some of the relevant exceptions and provide a pretty
1219
  # error message if rmtree fails.
1220

    
1221
  return True
1222

    
1223

    
1224
class HooksRunner(object):
1225
  """Hook runner.
1226

1227
  This class is instantiated on the node side (ganeti-noded) and not on
1228
  the master side.
1229

1230
  """
1231
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1232

    
1233
  def __init__(self, hooks_base_dir=None):
1234
    """Constructor for hooks runner.
1235

1236
    Args:
1237
      - hooks_base_dir: if not None, this overrides the
1238
        constants.HOOKS_BASE_DIR (useful for unittests)
1239
      - logs_base_dir: if not None, this overrides the
1240
        constants.LOG_HOOKS_DIR (useful for unittests)
1241
      - logging: enable or disable logging of script output
1242

1243
    """
1244
    if hooks_base_dir is None:
1245
      hooks_base_dir = constants.HOOKS_BASE_DIR
1246
    self._BASE_DIR = hooks_base_dir
1247

    
1248
  @staticmethod
1249
  def ExecHook(script, env):
1250
    """Exec one hook script.
1251

1252
    Args:
1253
     - phase: the phase
1254
     - script: the full path to the script
1255
     - env: the environment with which to exec the script
1256

1257
    """
1258
    # exec the process using subprocess and log the output
1259
    fdstdin = None
1260
    try:
1261
      fdstdin = open("/dev/null", "r")
1262
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1263
                               stderr=subprocess.STDOUT, close_fds=True,
1264
                               shell=False, cwd="/",env=env)
1265
      output = ""
1266
      try:
1267
        output = child.stdout.read(4096)
1268
        child.stdout.close()
1269
      except EnvironmentError, err:
1270
        output += "Hook script error: %s" % str(err)
1271

    
1272
      while True:
1273
        try:
1274
          result = child.wait()
1275
          break
1276
        except EnvironmentError, err:
1277
          if err.errno == errno.EINTR:
1278
            continue
1279
          raise
1280
    finally:
1281
      # try not to leak fds
1282
      for fd in (fdstdin, ):
1283
        if fd is not None:
1284
          try:
1285
            fd.close()
1286
          except EnvironmentError, err:
1287
            # just log the error
1288
            #logger.Error("While closing fd %s: %s" % (fd, err))
1289
            pass
1290

    
1291
    return result == 0, output
1292

    
1293
  def RunHooks(self, hpath, phase, env):
1294
    """Run the scripts in the hooks directory.
1295

1296
    This method will not be usually overriden by child opcodes.
1297

1298
    """
1299
    if phase == constants.HOOKS_PHASE_PRE:
1300
      suffix = "pre"
1301
    elif phase == constants.HOOKS_PHASE_POST:
1302
      suffix = "post"
1303
    else:
1304
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1305
    rr = []
1306

    
1307
    subdir = "%s-%s.d" % (hpath, suffix)
1308
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1309
    try:
1310
      dir_contents = os.listdir(dir_name)
1311
    except OSError, err:
1312
      # must log
1313
      return rr
1314

    
1315
    # we use the standard python sort order,
1316
    # so 00name is the recommended naming scheme
1317
    dir_contents.sort()
1318
    for relname in dir_contents:
1319
      fname = os.path.join(dir_name, relname)
1320
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1321
          self.RE_MASK.match(relname) is not None):
1322
        rrval = constants.HKR_SKIP
1323
        output = ""
1324
      else:
1325
        result, output = self.ExecHook(fname, env)
1326
        if not result:
1327
          rrval = constants.HKR_FAIL
1328
        else:
1329
          rrval = constants.HKR_SUCCESS
1330
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1331

    
1332
    return rr