Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ a0c3fea1

History | View | Annotate | Download (36.5 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import tempfile
30
import stat
31
import errno
32
import re
33
import subprocess
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def StartMaster():
47
  """Activate local node as master node.
48

49
  There are two needed steps for this:
50
    - run the master script
51
    - register the cron script
52

53
  """
54
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
55

    
56
  if result.failed:
57
    logger.Error("could not activate cluster interface with command %s,"
58
                 " error: '%s'" % (result.cmd, result.output))
59
    return False
60

    
61
  return True
62

    
63

    
64
def StopMaster():
65
  """Deactivate this node as master.
66

67
  This does two things:
68
    - run the master stop script
69
    - remove link to master cron script.
70

71
  """
72
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
73

    
74
  if result.failed:
75
    logger.Error("could not deactivate cluster interface with command %s,"
76
                 " error: '%s'" % (result.cmd, result.output))
77
    return False
78

    
79
  return True
80

    
81

    
82
def AddNode(dsa, dsapub, rsa, rsapub, ssh, sshpub):
83
  """ adds the node to the cluster
84
      - updates the hostkey
85
      - adds the ssh-key
86
      - sets the node id
87
      - sets the node status to installed
88

89
  """
90
  f = open("/etc/ssh/ssh_host_rsa_key", 'w')
91
  f.write(rsa)
92
  f.close()
93

    
94
  f = open("/etc/ssh/ssh_host_rsa_key.pub", 'w')
95
  f.write(rsapub)
96
  f.close()
97

    
98
  f = open("/etc/ssh/ssh_host_dsa_key", 'w')
99
  f.write(dsa)
100
  f.close()
101

    
102
  f = open("/etc/ssh/ssh_host_dsa_key.pub", 'w')
103
  f.write(dsapub)
104
  f.close()
105

    
106
  if not os.path.isdir("/root/.ssh"):
107
    os.mkdir("/root/.ssh")
108

    
109
  f = open("/root/.ssh/id_dsa", 'w')
110
  f.write(ssh)
111
  f.close()
112

    
113
  f = open("/root/.ssh/id_dsa.pub", 'w')
114
  f.write(sshpub)
115
  f.close()
116

    
117
  f = open('/root/.ssh/id_dsa.pub', 'r')
118
  try:
119
    utils.AddAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
120
  finally:
121
    f.close()
122

    
123
  utils.RunCmd(["/etc/init.d/ssh", "restart"])
124

    
125
  utils.RemoveFile("/root/.ssh/known_hosts")
126
  return True
127

    
128

    
129
def LeaveCluster():
130
  """Cleans up the current node and prepares it to be removed from the cluster.
131

132
  """
133
  if os.path.exists(constants.DATA_DIR):
134
    for dirpath, dirnames, filenames in os.walk(constants.DATA_DIR):
135
      if dirpath == constants.DATA_DIR:
136
        for i in filenames:
137
          os.unlink(os.path.join(dirpath, i))
138

    
139
  f = open('/root/.ssh/id_dsa.pub', 'r')
140
  try:
141
    utils.RemoveAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
142
  finally:
143
    f.close()
144

    
145
  utils.RemoveFile('/root/.ssh/id_dsa')
146
  utils.RemoveFile('/root/.ssh/id_dsa.pub')
147

    
148

    
149
def GetNodeInfo(vgname):
150
  """ gives back a hash with different informations
151
  about the node
152

153
  Returns:
154
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
155
      'memory_free' : xxx, 'memory_total' : xxx }
156
    where
157
    vg_size is the size of the configured volume group in MiB
158
    vg_free is the free size of the volume group in MiB
159
    memory_dom0 is the memory allocated for domain0 in MiB
160
    memory_free is the currently available (free) ram in MiB
161
    memory_total is the total number of ram in MiB
162

163
  """
164
  outputarray = {}
165
  vginfo = _GetVGInfo(vgname)
166
  outputarray['vg_size'] = vginfo['vg_size']
167
  outputarray['vg_free'] = vginfo['vg_free']
168

    
169
  hyper = hypervisor.GetHypervisor()
170
  hyp_info = hyper.GetNodeInfo()
171
  if hyp_info is not None:
172
    outputarray.update(hyp_info)
173

    
174
  return outputarray
175

    
176

    
177
def VerifyNode(what):
178
  """Verify the status of the local node.
179

180
  Args:
181
    what - a dictionary of things to check:
182
      'filelist' : list of files for which to compute checksums
183
      'nodelist' : list of nodes we should check communication with
184
      'hypervisor': run the hypervisor-specific verify
185

186
  Requested files on local node are checksummed and the result returned.
187

188
  The nodelist is traversed, with the following checks being made
189
  for each node:
190
  - known_hosts key correct
191
  - correct resolving of node name (target node returns its own hostname
192
    by ssh-execution of 'hostname', result compared against name in list.
193

194
  """
195
  result = {}
196

    
197
  if 'hypervisor' in what:
198
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
199

    
200
  if 'filelist' in what:
201
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
202

    
203
  if 'nodelist' in what:
204
    result['nodelist'] = {}
205
    for node in what['nodelist']:
206
      success, message = ssh.VerifyNodeHostname(node)
207
      if not success:
208
        result['nodelist'][node] = message
209
  return result
210

    
211

    
212
def GetVolumeList(vg_name):
213
  """Compute list of logical volumes and their size.
214

215
  Returns:
216
    dictionary of all partions (key) with their size:
217
    test1: 20.06MiB
218

219
  """
220
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m",
221
                         "-oname,size", vg_name])
222
  if result.failed:
223
    logger.Error("Failed to list logical volumes, lvs output: %s" %
224
                 result.output)
225
    return {}
226

    
227
  lvlist = [line.split() for line in result.output.splitlines()]
228
  return dict(lvlist)
229

    
230

    
231
def ListVolumeGroups():
232
  """List the volume groups and their size
233

234
  Returns:
235
    Dictionary with keys volume name and values the size of the volume
236

237
  """
238
  return utils.ListVolumeGroups()
239

    
240

    
241
def NodeVolumes():
242
  """List all volumes on this node.
243

244
  """
245
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
246
                         "--separator=|",
247
                         "--options=lv_name,lv_size,devices,vg_name"])
248
  if result.failed:
249
    logger.Error("Failed to list logical volumes, lvs output: %s" %
250
                 result.output)
251
    return {}
252

    
253
  def parse_dev(dev):
254
    if '(' in dev:
255
      return dev.split('(')[0]
256
    else:
257
      return dev
258

    
259
  def map_line(line):
260
    return {
261
      'name': line[0].strip(),
262
      'size': line[1].strip(),
263
      'dev': parse_dev(line[2].strip()),
264
      'vg': line[3].strip(),
265
    }
266

    
267
  return [map_line(line.split('|')) for line in result.output.splitlines()]
268

    
269

    
270
def BridgesExist(bridges_list):
271
  """Check if a list of bridges exist on the current node
272

273
  Returns:
274
    True if all of them exist, false otherwise
275

276
  """
277
  for bridge in bridges_list:
278
    if not utils.BridgeExists(bridge):
279
      return False
280

    
281
  return True
282

    
283

    
284
def GetInstanceList():
285
  """ provides a list of instances
286

287
  Returns:
288
    A list of all running instances on the current node
289
    - instance1.example.com
290
    - instance2.example.com
291

292
  """
293
  try:
294
    names = hypervisor.GetHypervisor().ListInstances()
295
  except errors.HypervisorError, err:
296
    logger.Error("error enumerating instances: %s" % str(err))
297
    raise
298

    
299
  return names
300

    
301

    
302
def GetInstanceInfo(instance):
303
  """ gives back the informations about an instance
304
  as a dictonary
305

306
  Args:
307
    instance: name of the instance (ex. instance1.example.com)
308

309
  Returns:
310
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
311
    where
312
    memory: memory size of instance (int)
313
    state: xen state of instance (string)
314
    time: cpu time of instance (float)
315

316
  """
317
  output = {}
318

    
319
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
320
  if iinfo is not None:
321
    output['memory'] = iinfo[2]
322
    output['state'] = iinfo[4]
323
    output['time'] = iinfo[5]
324

    
325
  return output
326

    
327

    
328
def GetAllInstancesInfo():
329
  """Gather data about all instances.
330

331
  This is the equivalent of `GetInstanceInfo()`, except that it
332
  computes data for all instances at once, thus being faster if one
333
  needs data about more than one instance.
334

335
  Returns: a dictionary of dictionaries, keys being the instance name,
336
    and with values:
337
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
338
    where
339
    memory: memory size of instance (int)
340
    state: xen state of instance (string)
341
    time: cpu time of instance (float)
342
    vcpus: the number of cpus
343

344
  """
345
  output = {}
346

    
347
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
348
  if iinfo:
349
    for name, id, memory, vcpus, state, times in iinfo:
350
      output[name] = {
351
        'memory': memory,
352
        'vcpus': vcpus,
353
        'state': state,
354
        'time': times,
355
        }
356

    
357
  return output
358

    
359

    
360
def AddOSToInstance(instance, os_disk, swap_disk):
361
  """Add an os to an instance.
362

363
  Args:
364
    instance: the instance object
365
    os_disk: the instance-visible name of the os device
366
    swap_disk: the instance-visible name of the swap device
367

368
  """
369
  inst_os = OSFromDisk(instance.os)
370

    
371
  create_script = inst_os.create_script
372

    
373
  for os_device in instance.disks:
374
    if os_device.iv_name == os_disk:
375
      break
376
  else:
377
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
378
    return False
379

    
380
  for swap_device in instance.disks:
381
    if swap_device.iv_name == swap_disk:
382
      break
383
  else:
384
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
385
    return False
386

    
387
  real_os_dev = _RecursiveFindBD(os_device)
388
  if real_os_dev is None:
389
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
390
                                  str(os_device))
391
  real_os_dev.Open()
392

    
393
  real_swap_dev = _RecursiveFindBD(swap_device)
394
  if real_swap_dev is None:
395
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
396
                                  str(swap_device))
397
  real_swap_dev.Open()
398

    
399
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
400
                                     instance.name, int(time.time()))
401
  if not os.path.exists(constants.LOG_OS_DIR):
402
    os.mkdir(constants.LOG_OS_DIR, 0750)
403

    
404
  command = utils.BuildShellCmd("cd %s; %s -i %s -b %s -s %s &>%s",
405
                                inst_os.path, create_script, instance.name,
406
                                real_os_dev.dev_path, real_swap_dev.dev_path,
407
                                logfile)
408

    
409
  result = utils.RunCmd(command)
410

    
411
  if result.failed:
412
    logger.Error("os create command '%s' returned error: %s"
413
                 " output: %s" %
414
                 (command, result.fail_reason, result.output))
415
    return False
416

    
417
  return True
418

    
419

    
420
def _GetVGInfo(vg_name):
421
  """Get informations about the volume group.
422

423
  Args:
424
    vg_name: the volume group
425

426
  Returns:
427
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
428
    where
429
    vg_size is the total size of the volume group in MiB
430
    vg_free is the free size of the volume group in MiB
431
    pv_count are the number of physical disks in that vg
432

433
  """
434
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
435
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
436

    
437
  if retval.failed:
438
    errmsg = "volume group %s not present" % vg_name
439
    logger.Error(errmsg)
440
    raise errors.LVMError(errmsg)
441
  valarr = retval.stdout.strip().split(':')
442
  retdic = {
443
    "vg_size": int(round(float(valarr[0]), 0)),
444
    "vg_free": int(round(float(valarr[1]), 0)),
445
    "pv_count": int(valarr[2]),
446
    }
447
  return retdic
448

    
449

    
450
def _GatherBlockDevs(instance):
451
  """Set up an instance's block device(s).
452

453
  This is run on the primary node at instance startup. The block
454
  devices must be already assembled.
455

456
  """
457
  block_devices = []
458
  for disk in instance.disks:
459
    device = _RecursiveFindBD(disk)
460
    if device is None:
461
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
462
                                    str(disk))
463
    device.Open()
464
    block_devices.append((disk, device))
465
  return block_devices
466

    
467

    
468
def StartInstance(instance, extra_args):
469
  """Start an instance.
470

471
  Args:
472
    instance - name of instance to start.
473

474
  """
475
  running_instances = GetInstanceList()
476

    
477
  if instance.name in running_instances:
478
    return True
479

    
480
  block_devices = _GatherBlockDevs(instance)
481
  hyper = hypervisor.GetHypervisor()
482

    
483
  try:
484
    hyper.StartInstance(instance, block_devices, extra_args)
485
  except errors.HypervisorError, err:
486
    logger.Error("Failed to start instance: %s" % err)
487
    return False
488

    
489
  return True
490

    
491

    
492
def ShutdownInstance(instance):
493
  """Shut an instance down.
494

495
  Args:
496
    instance - name of instance to shutdown.
497

498
  """
499
  running_instances = GetInstanceList()
500

    
501
  if instance.name not in running_instances:
502
    return True
503

    
504
  hyper = hypervisor.GetHypervisor()
505
  try:
506
    hyper.StopInstance(instance)
507
  except errors.HypervisorError, err:
508
    logger.Error("Failed to stop instance: %s" % err)
509
    return False
510

    
511
  # test every 10secs for 2min
512
  shutdown_ok = False
513

    
514
  time.sleep(1)
515
  for dummy in range(11):
516
    if instance.name not in GetInstanceList():
517
      break
518
    time.sleep(10)
519
  else:
520
    # the shutdown did not succeed
521
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
522

    
523
    try:
524
      hyper.StopInstance(instance, force=True)
525
    except errors.HypervisorError, err:
526
      logger.Error("Failed to stop instance: %s" % err)
527
      return False
528

    
529
    time.sleep(1)
530
    if instance.name in GetInstanceList():
531
      logger.Error("could not shutdown instance '%s' even by destroy")
532
      return False
533

    
534
  return True
535

    
536

    
537
def CreateBlockDevice(disk, size, on_primary, info):
538
  """Creates a block device for an instance.
539

540
  Args:
541
   bdev: a ganeti.objects.Disk object
542
   size: the size of the physical underlying devices
543
   do_open: if the device should be `Assemble()`-d and
544
            `Open()`-ed after creation
545

546
  Returns:
547
    the new unique_id of the device (this can sometime be
548
    computed only after creation), or None. On secondary nodes,
549
    it's not required to return anything.
550

551
  """
552
  clist = []
553
  if disk.children:
554
    for child in disk.children:
555
      crdev = _RecursiveAssembleBD(child, on_primary)
556
      if on_primary or disk.AssembleOnSecondary():
557
        # we need the children open in case the device itself has to
558
        # be assembled
559
        crdev.Open()
560
      else:
561
        crdev.Close()
562
      clist.append(crdev)
563
  try:
564
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
565
    if device is not None:
566
      logger.Info("removing existing device %s" % disk)
567
      device.Remove()
568
  except errors.BlockDeviceError, err:
569
    pass
570

    
571
  device = bdev.Create(disk.dev_type, disk.physical_id,
572
                       clist, size)
573
  if device is None:
574
    raise ValueError("Can't create child device for %s, %s" %
575
                     (disk, size))
576
  if on_primary or disk.AssembleOnSecondary():
577
    device.Assemble()
578
    device.SetSyncSpeed(constants.SYNC_SPEED)
579
    if on_primary or disk.OpenOnSecondary():
580
      device.Open(force=True)
581

    
582
  device.SetInfo(info)
583

    
584
  physical_id = device.unique_id
585
  return physical_id
586

    
587

    
588
def RemoveBlockDevice(disk):
589
  """Remove a block device.
590

591
  This is intended to be called recursively.
592

593
  """
594
  try:
595
    # since we are removing the device, allow a partial match
596
    # this allows removal of broken mirrors
597
    rdev = _RecursiveFindBD(disk, allow_partial=True)
598
  except errors.BlockDeviceError, err:
599
    # probably can't attach
600
    logger.Info("Can't attach to device %s in remove" % disk)
601
    rdev = None
602
  if rdev is not None:
603
    result = rdev.Remove()
604
  else:
605
    result = True
606
  if disk.children:
607
    for child in disk.children:
608
      result = result and RemoveBlockDevice(child)
609
  return result
610

    
611

    
612
def _RecursiveAssembleBD(disk, as_primary):
613
  """Activate a block device for an instance.
614

615
  This is run on the primary and secondary nodes for an instance.
616

617
  This function is called recursively.
618

619
  Args:
620
    disk: a objects.Disk object
621
    as_primary: if we should make the block device read/write
622

623
  Returns:
624
    the assembled device or None (in case no device was assembled)
625

626
  If the assembly is not successful, an exception is raised.
627

628
  """
629
  children = []
630
  if disk.children:
631
    for chld_disk in disk.children:
632
      children.append(_RecursiveAssembleBD(chld_disk, as_primary))
633

    
634
  if as_primary or disk.AssembleOnSecondary():
635
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
636
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
637
    result = r_dev
638
    if as_primary or disk.OpenOnSecondary():
639
      r_dev.Open()
640
    else:
641
      r_dev.Close()
642
  else:
643
    result = True
644
  return result
645

    
646

    
647
def AssembleBlockDevice(disk, as_primary):
648
  """Activate a block device for an instance.
649

650
  This is a wrapper over _RecursiveAssembleBD.
651

652
  Returns:
653
    a /dev path for primary nodes
654
    True for secondary nodes
655

656
  """
657
  result = _RecursiveAssembleBD(disk, as_primary)
658
  if isinstance(result, bdev.BlockDev):
659
    result = result.dev_path
660
  return result
661

    
662

    
663
def ShutdownBlockDevice(disk):
664
  """Shut down a block device.
665

666
  First, if the device is assembled (can `Attach()`), then the device
667
  is shutdown. Then the children of the device are shutdown.
668

669
  This function is called recursively. Note that we don't cache the
670
  children or such, as oppossed to assemble, shutdown of different
671
  devices doesn't require that the upper device was active.
672

673
  """
674
  r_dev = _RecursiveFindBD(disk)
675
  if r_dev is not None:
676
    result = r_dev.Shutdown()
677
  else:
678
    result = True
679
  if disk.children:
680
    for child in disk.children:
681
      result = result and ShutdownBlockDevice(child)
682
  return result
683

    
684

    
685
def MirrorAddChild(md_cdev, new_cdev):
686
  """Extend an MD raid1 array.
687

688
  """
689
  md_bdev = _RecursiveFindBD(md_cdev, allow_partial=True)
690
  if md_bdev is None:
691
    logger.Error("Can't find md device")
692
    return False
693
  new_bdev = _RecursiveFindBD(new_cdev)
694
  if new_bdev is None:
695
    logger.Error("Can't find new device to add")
696
    return False
697
  new_bdev.Open()
698
  md_bdev.AddChild(new_bdev)
699
  return True
700

    
701

    
702
def MirrorRemoveChild(md_cdev, new_cdev):
703
  """Reduce an MD raid1 array.
704

705
  """
706
  md_bdev = _RecursiveFindBD(md_cdev)
707
  if md_bdev is None:
708
    return False
709
  new_bdev = _RecursiveFindBD(new_cdev)
710
  if new_bdev is None:
711
    return False
712
  new_bdev.Open()
713
  md_bdev.RemoveChild(new_bdev.dev_path)
714
  return True
715

    
716

    
717
def GetMirrorStatus(disks):
718
  """Get the mirroring status of a list of devices.
719

720
  Args:
721
    disks: list of `objects.Disk`
722

723
  Returns:
724
    list of (mirror_done, estimated_time) tuples, which
725
    are the result of bdev.BlockDevice.CombinedSyncStatus()
726

727
  """
728
  stats = []
729
  for dsk in disks:
730
    rbd = _RecursiveFindBD(dsk)
731
    if rbd is None:
732
      raise errors.BlockDeviceError, "Can't find device %s" % str(dsk)
733
    stats.append(rbd.CombinedSyncStatus())
734
  return stats
735

    
736

    
737
def _RecursiveFindBD(disk, allow_partial=False):
738
  """Check if a device is activated.
739

740
  If so, return informations about the real device.
741

742
  Args:
743
    disk: the objects.Disk instance
744
    allow_partial: don't abort the find if a child of the
745
                   device can't be found; this is intended to be
746
                   used when repairing mirrors
747

748
  Returns:
749
    None if the device can't be found
750
    otherwise the device instance
751

752
  """
753
  children = []
754
  if disk.children:
755
    for chdisk in disk.children:
756
      children.append(_RecursiveFindBD(chdisk))
757

    
758
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
759

    
760

    
761
def FindBlockDevice(disk):
762
  """Check if a device is activated.
763

764
  If so, return informations about the real device.
765

766
  Args:
767
    disk: the objects.Disk instance
768
  Returns:
769
    None if the device can't be found
770
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
771

772
  """
773
  rbd = _RecursiveFindBD(disk)
774
  if rbd is None:
775
    return rbd
776
  sync_p, est_t, is_degr = rbd.GetSyncStatus()
777
  return rbd.dev_path, rbd.major, rbd.minor, sync_p, est_t, is_degr
778

    
779

    
780
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
781
  """Write a file to the filesystem.
782

783
  This allows the master to overwrite(!) a file. It will only perform
784
  the operation if the file belongs to a list of configuration files.
785

786
  """
787
  if not os.path.isabs(file_name):
788
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
789
                 file_name)
790
    return False
791

    
792
  allowed_files = [constants.CLUSTER_CONF_FILE, "/etc/hosts",
793
                   "/etc/ssh/ssh_known_hosts"]
794
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
795
  if file_name not in allowed_files:
796
    logger.Error("Filename passed to UploadFile not in allowed"
797
                 " upload targets: '%s'" % file_name)
798
    return False
799

    
800
  dir_name, small_name = os.path.split(file_name)
801
  fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
802
  # here we need to make sure we remove the temp file, if any error
803
  # leaves it in place
804
  try:
805
    os.chown(new_name, uid, gid)
806
    os.chmod(new_name, mode)
807
    os.write(fd, data)
808
    os.fsync(fd)
809
    os.utime(new_name, (atime, mtime))
810
    os.rename(new_name, file_name)
811
  finally:
812
    os.close(fd)
813
    utils.RemoveFile(new_name)
814
  return True
815

    
816
def _ErrnoOrStr(err):
817
  """Format an EnvironmentError exception.
818

819
  If the `err` argument has an errno attribute, it will be looked up
820
  and converted into a textual EXXXX description. Otherwise the string
821
  representation of the error will be returned.
822

823
  """
824
  if hasattr(err, 'errno'):
825
    detail = errno.errorcode[err.errno]
826
  else:
827
    detail = str(err)
828
  return detail
829

    
830

    
831
def _OSOndiskVersion(name, os_dir=None):
832
  """Compute and return the api version of a given OS.
833

834
  This function will try to read the api version of the os given by
835
  the 'name' parameter. By default, it wil use the constants.OS_DIR
836
  as top-level directory for OSes, but this can be overriden by the
837
  use of the os_dir parameter. Return value will be either an
838
  integer denoting the version or None in the case when this is not
839
  a valid OS name.
840

841
  """
842
  if os_dir is None:
843
    os_dir = os.path.sep.join([constants.OS_DIR, name])
844

    
845
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
846

    
847
  try:
848
    st = os.stat(api_file)
849
  except EnvironmentError, err:
850
    raise errors.InvalidOS, (name, "'ganeti_api_version' file not"
851
                             " found (%s)" % _ErrnoOrStr(err))
852

    
853
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
854
    raise errors.InvalidOS, (name, "'ganeti_api_version' file is not"
855
                             " a regular file")
856

    
857
  try:
858
    f = open(api_file)
859
    try:
860
      api_version = f.read(256)
861
    finally:
862
      f.close()
863
  except EnvironmentError, err:
864
    raise errors.InvalidOS, (name, "error while reading the"
865
                             " API version (%s)" % _ErrnoOrStr(err))
866

    
867
  api_version = api_version.strip()
868
  try:
869
    api_version = int(api_version)
870
  except (TypeError, ValueError), err:
871
    raise errors.InvalidOS, (name, "API version is not integer (%s)" %
872
                             str(err))
873

    
874
  return api_version
875

    
876
def DiagnoseOS(top_dir=None):
877
  """Compute the validity for all OSes.
878

879
  For each name in the give top_dir parameter (if not given, defaults
880
  to constants.OS_DIR), it will return an object. If this is a valid
881
  os, the object will be an instance of the object.OS class. If not,
882
  it will be an instance of errors.InvalidOS and this signifies that
883
  this name does not correspond to a valid OS.
884

885
  Returns:
886
    list of objects
887

888
  """
889
  if top_dir is None:
890
    top_dir = constants.OS_DIR
891

    
892
  try:
893
    f_names = os.listdir(top_dir)
894
  except EnvironmentError, err:
895
    logger.Error("Can't list the OS directory: %s" % str(err))
896
    return False
897
  result = []
898
  for name in f_names:
899
    try:
900
      os_inst = OSFromDisk(name, os.path.sep.join([top_dir, name]))
901
      result.append(os_inst)
902
    except errors.InvalidOS, err:
903
      result.append(err)
904

    
905
  return result
906

    
907

    
908
def OSFromDisk(name, os_dir=None):
909
  """Create an OS instance from disk.
910

911
  This function will return an OS instance if the given name is a
912
  valid OS name. Otherwise, it will raise an appropriate
913
  `errors.InvalidOS` exception, detailing why this is not a valid
914
  OS.
915

916
  """
917
  if os_dir is None:
918
    os_dir = os.path.sep.join([constants.OS_DIR, name])
919

    
920
  api_version = _OSOndiskVersion(name, os_dir)
921

    
922
  if api_version != constants.OS_API_VERSION:
923
    raise errors.InvalidOS, (name, "API version mismatch (found %s want %s)"
924
                             % (api_version, constants.OS_API_VERSION))
925

    
926
  # OS Scripts dictionary, we will populate it with the actual script names
927
  os_scripts = {'create': '', 'export': '', 'import': ''}
928

    
929
  for script in os_scripts:
930
    os_scripts[script] = os.path.sep.join([os_dir, script])
931

    
932
    try:
933
      st = os.stat(os_scripts[script])
934
    except EnvironmentError, err:
935
      raise errors.InvalidOS, (name, "'%s' script missing (%s)" %
936
                               (script, _ErrnoOrStr(err)))
937

    
938
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
939
      raise errors.InvalidOS, (name, "'%s' script not executable" % script)
940

    
941
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
942
      raise errors.InvalidOS, (name, "'%s' is not a regular file" % script)
943

    
944

    
945
  return objects.OS(name=name, path=os_dir,
946
                    create_script=os_scripts['create'],
947
                    export_script=os_scripts['export'],
948
                    import_script=os_scripts['import'],
949
                    api_version=api_version)
950

    
951

    
952
def SnapshotBlockDevice(disk):
953
  """Create a snapshot copy of a block device.
954

955
  This function is called recursively, and the snapshot is actually created
956
  just for the leaf lvm backend device.
957

958
  Args:
959
    disk: the disk to be snapshotted
960

961
  Returns:
962
    a config entry for the actual lvm device snapshotted.
963

964
  """
965
  if disk.children:
966
    if len(disk.children) == 1:
967
      # only one child, let's recurse on it
968
      return SnapshotBlockDevice(disk.children[0])
969
    else:
970
      # more than one child, choose one that matches
971
      for child in disk.children:
972
        if child.size == disk.size:
973
          # return implies breaking the loop
974
          return SnapshotBlockDevice(child)
975
  elif disk.dev_type == "lvm":
976
    r_dev = _RecursiveFindBD(disk)
977
    if r_dev is not None:
978
      # let's stay on the safe side and ask for the full size, for now
979
      return r_dev.Snapshot(disk.size)
980
    else:
981
      return None
982
  else:
983
    raise errors.ProgrammerError, ("Cannot snapshot non-lvm block device"
984
                                   "'%s' of type '%s'" %
985
                                   (disk.unique_id, disk.dev_type))
986

    
987

    
988
def ExportSnapshot(disk, dest_node, instance):
989
  """Export a block device snapshot to a remote node.
990

991
  Args:
992
    disk: the snapshot block device
993
    dest_node: the node to send the image to
994
    instance: instance being exported
995

996
  Returns:
997
    True if successful, False otherwise.
998

999
  """
1000
  inst_os = OSFromDisk(instance.os)
1001
  export_script = inst_os.export_script
1002

    
1003
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1004
                                     instance.name, int(time.time()))
1005
  if not os.path.exists(constants.LOG_OS_DIR):
1006
    os.mkdir(constants.LOG_OS_DIR, 0750)
1007

    
1008
  real_os_dev = _RecursiveFindBD(disk)
1009
  if real_os_dev is None:
1010
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1011
                                  str(disk))
1012
  real_os_dev.Open()
1013

    
1014
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1015
  destfile = disk.physical_id[1]
1016

    
1017
  # the target command is built out of three individual commands,
1018
  # which are joined by pipes; we check each individual command for
1019
  # valid parameters
1020

    
1021
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1022
                               export_script, instance.name,
1023
                               real_os_dev.dev_path, logfile)
1024

    
1025
  comprcmd = "gzip"
1026

    
1027
  remotecmd = utils.BuildShellCmd("ssh -q -oStrictHostKeyChecking=yes"
1028
                                  " -oBatchMode=yes -oEscapeChar=none"
1029
                                  " %s 'mkdir -p %s; cat > %s/%s'",
1030
                                  dest_node, destdir, destdir, destfile)
1031

    
1032
  # all commands have been checked, so we're safe to combine them
1033
  command = '|'.join([expcmd, comprcmd, remotecmd])
1034

    
1035
  result = utils.RunCmd(command)
1036

    
1037
  if result.failed:
1038
    logger.Error("os snapshot export command '%s' returned error: %s"
1039
                 " output: %s" %
1040
                 (command, result.fail_reason, result.output))
1041
    return False
1042

    
1043
  return True
1044

    
1045

    
1046
def FinalizeExport(instance, snap_disks):
1047
  """Write out the export configuration information.
1048

1049
  Args:
1050
    instance: instance configuration
1051
    snap_disks: snapshot block devices
1052

1053
  Returns:
1054
    False in case of error, True otherwise.
1055

1056
  """
1057
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1058
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1059

    
1060
  config = objects.SerializableConfigParser()
1061

    
1062
  config.add_section(constants.INISECT_EXP)
1063
  config.set(constants.INISECT_EXP, 'version', '0')
1064
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1065
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1066
  config.set(constants.INISECT_EXP, 'os', instance.os)
1067
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1068

    
1069
  config.add_section(constants.INISECT_INS)
1070
  config.set(constants.INISECT_INS, 'name', instance.name)
1071
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1072
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1073
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1074
  for nic_count, nic in enumerate(instance.nics):
1075
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1076
               nic_count, '%s' % nic.mac)
1077
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1078
  # TODO: redundant: on load can read nics until it doesn't exist
1079
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1080

    
1081
  for disk_count, disk in enumerate(snap_disks):
1082
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1083
               ('%s' % disk.iv_name))
1084
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1085
               ('%s' % disk.physical_id[1]))
1086
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1087
               ('%d' % disk.size))
1088
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1089

    
1090
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1091
  cfo = open(cff, 'w')
1092
  try:
1093
    config.write(cfo)
1094
  finally:
1095
    cfo.close()
1096

    
1097
  shutil.rmtree(finaldestdir, True)
1098
  shutil.move(destdir, finaldestdir)
1099

    
1100
  return True
1101

    
1102

    
1103
def ExportInfo(dest):
1104
  """Get export configuration information.
1105

1106
  Args:
1107
    dest: directory containing the export
1108

1109
  Returns:
1110
    A serializable config file containing the export info.
1111

1112
  """
1113
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1114

    
1115
  config = objects.SerializableConfigParser()
1116
  config.read(cff)
1117

    
1118
  if (not config.has_section(constants.INISECT_EXP) or
1119
      not config.has_section(constants.INISECT_INS)):
1120
    return None
1121

    
1122
  return config
1123

    
1124

    
1125
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1126
  """Import an os image into an instance.
1127

1128
  Args:
1129
    instance: the instance object
1130
    os_disk: the instance-visible name of the os device
1131
    swap_disk: the instance-visible name of the swap device
1132
    src_node: node holding the source image
1133
    src_image: path to the source image on src_node
1134

1135
  Returns:
1136
    False in case of error, True otherwise.
1137

1138
  """
1139
  inst_os = OSFromDisk(instance.os)
1140
  import_script = inst_os.import_script
1141

    
1142
  for os_device in instance.disks:
1143
    if os_device.iv_name == os_disk:
1144
      break
1145
  else:
1146
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1147
    return False
1148

    
1149
  for swap_device in instance.disks:
1150
    if swap_device.iv_name == swap_disk:
1151
      break
1152
  else:
1153
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1154
    return False
1155

    
1156
  real_os_dev = _RecursiveFindBD(os_device)
1157
  if real_os_dev is None:
1158
    raise errors.BlockDeviceError, ("Block device '%s' is not set up" %
1159
                                    str(os_device))
1160
  real_os_dev.Open()
1161

    
1162
  real_swap_dev = _RecursiveFindBD(swap_device)
1163
  if real_swap_dev is None:
1164
    raise errors.BlockDeviceError, ("Block device '%s' is not set up" %
1165
                                    str(swap_device))
1166
  real_swap_dev.Open()
1167

    
1168
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1169
                                        instance.name, int(time.time()))
1170
  if not os.path.exists(constants.LOG_OS_DIR):
1171
    os.mkdir(constants.LOG_OS_DIR, 0750)
1172

    
1173
  remotecmd = utils.BuildShellCmd("ssh -q -oStrictHostKeyChecking=yes"
1174
                                  " -oBatchMode=yes -oEscapeChar=none"
1175
                                  " %s 'cat %s'", src_node, src_image)
1176

    
1177
  comprcmd = "gunzip"
1178
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1179
                               inst_os.path, import_script, instance.name,
1180
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1181
                               logfile)
1182

    
1183
  command = '|'.join([remotecmd, comprcmd, impcmd])
1184

    
1185
  result = utils.RunCmd(command)
1186

    
1187
  if result.failed:
1188
    logger.Error("os import command '%s' returned error: %s"
1189
                 " output: %s" %
1190
                 (command, result.fail_reason, result.output))
1191
    return False
1192

    
1193
  return True
1194

    
1195

    
1196
def ListExports():
1197
  """Return a list of exports currently available on this machine.
1198

1199
  """
1200
  if os.path.isdir(constants.EXPORT_DIR):
1201
    return os.listdir(constants.EXPORT_DIR)
1202
  else:
1203
    return []
1204

    
1205

    
1206
def RemoveExport(export):
1207
  """Remove an existing export from the node.
1208

1209
  Args:
1210
    export: the name of the export to remove
1211

1212
  Returns:
1213
    False in case of error, True otherwise.
1214

1215
  """
1216
  target = os.path.join(constants.EXPORT_DIR, export)
1217

    
1218
  shutil.rmtree(target)
1219
  # TODO: catch some of the relevant exceptions and provide a pretty
1220
  # error message if rmtree fails.
1221

    
1222
  return True
1223

    
1224

    
1225
class HooksRunner(object):
1226
  """Hook runner.
1227

1228
  This class is instantiated on the node side (ganeti-noded) and not on
1229
  the master side.
1230

1231
  """
1232
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1233

    
1234
  def __init__(self, hooks_base_dir=None):
1235
    """Constructor for hooks runner.
1236

1237
    Args:
1238
      - hooks_base_dir: if not None, this overrides the
1239
        constants.HOOKS_BASE_DIR (useful for unittests)
1240
      - logs_base_dir: if not None, this overrides the
1241
        constants.LOG_HOOKS_DIR (useful for unittests)
1242
      - logging: enable or disable logging of script output
1243

1244
    """
1245
    if hooks_base_dir is None:
1246
      hooks_base_dir = constants.HOOKS_BASE_DIR
1247
    self._BASE_DIR = hooks_base_dir
1248

    
1249
  @staticmethod
1250
  def ExecHook(script, env):
1251
    """Exec one hook script.
1252

1253
    Args:
1254
     - phase: the phase
1255
     - script: the full path to the script
1256
     - env: the environment with which to exec the script
1257

1258
    """
1259
    # exec the process using subprocess and log the output
1260
    fdstdin = None
1261
    try:
1262
      fdstdin = open("/dev/null", "r")
1263
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1264
                               stderr=subprocess.STDOUT, close_fds=True,
1265
                               shell=False, cwd="/",env=env)
1266
      output = ""
1267
      try:
1268
        output = child.stdout.read(4096)
1269
        child.stdout.close()
1270
      except EnvironmentError, err:
1271
        output += "Hook script error: %s" % str(err)
1272

    
1273
      while True:
1274
        try:
1275
          result = child.wait()
1276
          break
1277
        except EnvironmentError, err:
1278
          if err.errno == errno.EINTR:
1279
            continue
1280
          raise
1281
    finally:
1282
      # try not to leak fds
1283
      for fd in (fdstdin, ):
1284
        if fd is not None:
1285
          try:
1286
            fd.close()
1287
          except EnvironmentError, err:
1288
            # just log the error
1289
            #logger.Error("While closing fd %s: %s" % (fd, err))
1290
            pass
1291

    
1292
    return result == 0, output
1293

    
1294
  def RunHooks(self, hpath, phase, env):
1295
    """Run the scripts in the hooks directory.
1296

1297
    This method will not be usually overriden by child opcodes.
1298

1299
    """
1300
    if phase == constants.HOOKS_PHASE_PRE:
1301
      suffix = "pre"
1302
    elif phase == constants.HOOKS_PHASE_POST:
1303
      suffix = "post"
1304
    else:
1305
      raise errors.ProgrammerError, ("Unknown hooks phase: '%s'" % phase)
1306
    rr = []
1307

    
1308
    subdir = "%s-%s.d" % (hpath, suffix)
1309
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1310
    try:
1311
      dir_contents = os.listdir(dir_name)
1312
    except OSError, err:
1313
      # must log
1314
      return rr
1315

    
1316
    # we use the standard python sort order,
1317
    # so 00name is the recommended naming scheme
1318
    dir_contents.sort()
1319
    for relname in dir_contents:
1320
      fname = os.path.join(dir_name, relname)
1321
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1322
          self.RE_MASK.match(relname) is not None):
1323
        rrval = constants.HKR_SKIP
1324
        output = ""
1325
      else:
1326
        result, output = self.ExecHook(fname, env)
1327
        if not result:
1328
          rrval = constants.HKR_FAIL
1329
        else:
1330
          rrval = constants.HKR_SUCCESS
1331
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1332

    
1333
    return rr