Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ cb91d46e

History | View | Annotate | Download (36.4 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import tempfile
30
import stat
31
import errno
32
import re
33
import subprocess
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def StartMaster():
47
  """Activate local node as master node.
48

49
  There are two needed steps for this:
50
    - run the master script
51
    - register the cron script
52

53
  """
54
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
55

    
56
  if result.failed:
57
    logger.Error("could not activate cluster interface with command %s,"
58
                 " error: '%s'" % (result.cmd, result.output))
59
    return False
60

    
61
  return True
62

    
63

    
64
def StopMaster():
65
  """Deactivate this node as master.
66

67
  This does two things:
68
    - run the master stop script
69
    - remove link to master cron script.
70

71
  """
72
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
73

    
74
  if result.failed:
75
    logger.Error("could not deactivate cluster interface with command %s,"
76
                 " error: '%s'" % (result.cmd, result.output))
77
    return False
78

    
79
  return True
80

    
81

    
82
def AddNode(dsa, dsapub, rsa, rsapub, ssh, sshpub):
83
  """ adds the node to the cluster
84
      - updates the hostkey
85
      - adds the ssh-key
86
      - sets the node id
87
      - sets the node status to installed
88
  """
89

    
90
  f = open("/etc/ssh/ssh_host_rsa_key", 'w')
91
  f.write(rsa)
92
  f.close()
93

    
94
  f = open("/etc/ssh/ssh_host_rsa_key.pub", 'w')
95
  f.write(rsapub)
96
  f.close()
97

    
98
  f = open("/etc/ssh/ssh_host_dsa_key", 'w')
99
  f.write(dsa)
100
  f.close()
101

    
102
  f = open("/etc/ssh/ssh_host_dsa_key.pub", 'w')
103
  f.write(dsapub)
104
  f.close()
105

    
106
  if not os.path.isdir("/root/.ssh"):
107
    os.mkdir("/root/.ssh")
108

    
109
  f = open("/root/.ssh/id_dsa", 'w')
110
  f.write(ssh)
111
  f.close()
112

    
113
  f = open("/root/.ssh/id_dsa.pub", 'w')
114
  f.write(sshpub)
115
  f.close()
116

    
117
  f = open('/root/.ssh/id_dsa.pub', 'r')
118
  try:
119
    utils.AddAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
120
  finally:
121
    f.close()
122

    
123
  utils.RunCmd(["/etc/init.d/ssh", "restart"])
124

    
125
  utils.RemoveFile("/root/.ssh/known_hosts")
126
  return True
127

    
128

    
129
def LeaveCluster():
130
  """Cleans up the current node and prepares it to be removed from the cluster.
131

132
  """
133
  if os.path.exists(constants.DATA_DIR):
134
    for dirpath, dirnames, filenames in os.walk(constants.DATA_DIR):
135
      if dirpath == constants.DATA_DIR:
136
        for i in filenames:
137
          os.unlink(os.path.join(dirpath, i))
138

    
139
  f = open('/root/.ssh/id_dsa.pub', 'r')
140
  try:
141
    utils.RemoveAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
142
  finally:
143
    f.close()
144

    
145
  utils.RemoveFile('/root/.ssh/id_dsa')
146
  utils.RemoveFile('/root/.ssh/id_dsa.pub')
147

    
148

    
149
def GetNodeInfo(vgname):
150
  """ gives back a hash with different informations
151
  about the node
152

153
  Returns:
154
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
155
      'memory_free' : xxx, 'memory_total' : xxx }
156
    where
157
    vg_size is the size of the configured volume group in MiB
158
    vg_free is the free size of the volume group in MiB
159
    memory_dom0 is the memory allocated for domain0 in MiB
160
    memory_free is the currently available (free) ram in MiB
161
    memory_total is the total number of ram in MiB
162
  """
163

    
164
  outputarray = {}
165
  vginfo = _GetVGInfo(vgname)
166
  outputarray['vg_size'] = vginfo['vg_size']
167
  outputarray['vg_free'] = vginfo['vg_free']
168

    
169
  hyper = hypervisor.GetHypervisor()
170
  hyp_info = hyper.GetNodeInfo()
171
  if hyp_info is not None:
172
    outputarray.update(hyp_info)
173

    
174
  return outputarray
175

    
176

    
177
def VerifyNode(what):
178
  """Verify the status of the local node.
179

180
  Args:
181
    what - a dictionary of things to check:
182
      'filelist' : list of files for which to compute checksums
183
      'nodelist' : list of nodes we should check communication with
184
      'hypervisor': run the hypervisor-specific verify
185

186
  Requested files on local node are checksummed and the result returned.
187

188
  The nodelist is traversed, with the following checks being made
189
  for each node:
190
  - known_hosts key correct
191
  - correct resolving of node name (target node returns its own hostname
192
    by ssh-execution of 'hostname', result compared against name in list.
193

194
  """
195

    
196
  result = {}
197

    
198
  if 'hypervisor' in what:
199
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
200

    
201
  if 'filelist' in what:
202
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
203

    
204
  if 'nodelist' in what:
205
    result['nodelist'] = {}
206
    for node in what['nodelist']:
207
      success, message = ssh.VerifyNodeHostname(node)
208
      if not success:
209
        result['nodelist'][node] = message
210
  return result
211

    
212

    
213
def GetVolumeList(vg_name):
214
  """Compute list of logical volumes and their size.
215

216
  Returns:
217
    dictionary of all partions (key) with their size:
218
    test1: 20.06MiB
219

220
  """
221
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m",
222
                         "-oname,size", vg_name])
223
  if result.failed:
224
    logger.Error("Failed to list logical volumes, lvs output: %s" %
225
                 result.output)
226
    return {}
227

    
228
  lvlist = [line.split() for line in result.output.splitlines()]
229
  return dict(lvlist)
230

    
231

    
232
def ListVolumeGroups():
233
  """List the volume groups and their size
234

235
  Returns:
236
    Dictionary with keys volume name and values the size of the volume
237

238
  """
239
  return utils.ListVolumeGroups()
240

    
241

    
242
def NodeVolumes():
243
  """List all volumes on this node.
244

245
  """
246
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
247
                         "--separator=|",
248
                         "--options=lv_name,lv_size,devices,vg_name"])
249
  if result.failed:
250
    logger.Error("Failed to list logical volumes, lvs output: %s" %
251
                 result.output)
252
    return {}
253

    
254
  def parse_dev(dev):
255
    if '(' in dev:
256
      return dev.split('(')[0]
257
    else:
258
      return dev
259

    
260
  def map_line(line):
261
    return {
262
      'name': line[0].strip(),
263
      'size': line[1].strip(),
264
      'dev': parse_dev(line[2].strip()),
265
      'vg': line[3].strip(),
266
    }
267

    
268
  return [map_line(line.split('|')) for line in result.output.splitlines()]
269

    
270

    
271
def BridgesExist(bridges_list):
272
  """Check if a list of bridges exist on the current node
273

274
  Returns:
275
    True if all of them exist, false otherwise
276

277
  """
278
  for bridge in bridges_list:
279
    if not utils.BridgeExists(bridge):
280
      return False
281

    
282
  return True
283

    
284

    
285
def GetInstanceList():
286
  """ provides a list of instances
287

288
  Returns:
289
    A list of all running instances on the current node
290
    - instance1.example.com
291
    - instance2.example.com
292
  """
293

    
294
  try:
295
    names = hypervisor.GetHypervisor().ListInstances()
296
  except errors.HypervisorError, err:
297
    logger.Error("error enumerating instances: %s" % str(err))
298
    raise
299

    
300
  return names
301

    
302

    
303
def GetInstanceInfo(instance):
304
  """ gives back the informations about an instance
305
  as a dictonary
306

307
  Args:
308
    instance: name of the instance (ex. instance1.example.com)
309

310
  Returns:
311
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
312
    where
313
    memory: memory size of instance (int)
314
    state: xen state of instance (string)
315
    time: cpu time of instance (float)
316
  """
317

    
318
  output = {}
319

    
320
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
321
  if iinfo is not None:
322
    output['memory'] = iinfo[2]
323
    output['state'] = iinfo[4]
324
    output['time'] = iinfo[5]
325

    
326
  return output
327

    
328

    
329
def GetAllInstancesInfo():
330
  """Gather data about all instances.
331

332
  This is the equivalent of `GetInstanceInfo()`, except that it
333
  computes data for all instances at once, thus being faster if one
334
  needs data about more than one instance.
335

336
  Returns: a dictionary of dictionaries, keys being the instance name,
337
    and with values:
338
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
339
    where
340
    memory: memory size of instance (int)
341
    state: xen state of instance (string)
342
    time: cpu time of instance (float)
343
    vcpus: the number of cpus
344
  """
345

    
346
  output = {}
347

    
348
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
349
  if iinfo:
350
    for name, id, memory, vcpus, state, times in iinfo:
351
      output[name] = {
352
        'memory': memory,
353
        'vcpus': vcpus,
354
        'state': state,
355
        'time': times,
356
        }
357

    
358
  return output
359

    
360

    
361
def AddOSToInstance(instance, os_disk, swap_disk):
362
  """Add an os to an instance.
363

364
  Args:
365
    instance: the instance object
366
    os_disk: the instance-visible name of the os device
367
    swap_disk: the instance-visible name of the swap device
368

369
  """
370
  inst_os = OSFromDisk(instance.os)
371

    
372
  create_script = inst_os.create_script
373

    
374
  for os_device in instance.disks:
375
    if os_device.iv_name == os_disk:
376
      break
377
  else:
378
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
379
    return False
380

    
381
  for swap_device in instance.disks:
382
    if swap_device.iv_name == swap_disk:
383
      break
384
  else:
385
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
386
    return False
387

    
388
  real_os_dev = _RecursiveFindBD(os_device)
389
  if real_os_dev is None:
390
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
391
                                  str(os_device))
392
  real_os_dev.Open()
393

    
394
  real_swap_dev = _RecursiveFindBD(swap_device)
395
  if real_swap_dev is None:
396
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
397
                                  str(swap_device))
398
  real_swap_dev.Open()
399

    
400
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
401
                                     instance.name, int(time.time()))
402
  if not os.path.exists(constants.LOG_OS_DIR):
403
    os.mkdir(constants.LOG_OS_DIR, 0750)
404

    
405
  command = utils.BuildShellCmd("cd %s; %s -i %s -b %s -s %s &>%s",
406
                                inst_os.path, create_script, instance.name,
407
                                real_os_dev.dev_path, real_swap_dev.dev_path,
408
                                logfile)
409

    
410
  result = utils.RunCmd(command)
411

    
412
  if result.failed:
413
    logger.Error("os create command '%s' returned error: %s"
414
                 " output: %s" %
415
                 (command, result.fail_reason, result.output))
416
    return False
417

    
418
  return True
419

    
420

    
421
def _GetVGInfo(vg_name):
422
  """Get informations about the volume group.
423

424
  Args:
425
    vg_name: the volume group
426

427
  Returns:
428
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
429
    where
430
    vg_size is the total size of the volume group in MiB
431
    vg_free is the free size of the volume group in MiB
432
    pv_count are the number of physical disks in that vg
433

434
  """
435
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
436
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
437

    
438
  if retval.failed:
439
    errmsg = "volume group %s not present" % vg_name
440
    logger.Error(errmsg)
441
    raise errors.LVMError(errmsg)
442
  valarr = retval.stdout.strip().split(':')
443
  retdic = {
444
    "vg_size": int(round(float(valarr[0]), 0)),
445
    "vg_free": int(round(float(valarr[1]), 0)),
446
    "pv_count": int(valarr[2]),
447
    }
448
  return retdic
449

    
450

    
451
def _GatherBlockDevs(instance):
452
  """Set up an instance's block device(s).
453

454
  This is run on the primary node at instance startup. The block
455
  devices must be already assembled.
456

457
  """
458
  block_devices = []
459
  for disk in instance.disks:
460
    device = _RecursiveFindBD(disk)
461
    if device is None:
462
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
463
                                    str(disk))
464
    device.Open()
465
    block_devices.append((disk, device))
466
  return block_devices
467

    
468

    
469
def StartInstance(instance, extra_args):
470
  """Start an instance.
471

472
  Args:
473
    instance - name of instance to start.
474
  """
475

    
476
  running_instances = GetInstanceList()
477

    
478
  if instance.name in running_instances:
479
    return True
480

    
481
  block_devices = _GatherBlockDevs(instance)
482
  hyper = hypervisor.GetHypervisor()
483

    
484
  try:
485
    hyper.StartInstance(instance, block_devices, extra_args)
486
  except errors.HypervisorError, err:
487
    logger.Error("Failed to start instance: %s" % err)
488
    return False
489

    
490
  return True
491

    
492

    
493
def ShutdownInstance(instance):
494
  """Shut an instance down.
495

496
  Args:
497
    instance - name of instance to shutdown.
498
  """
499

    
500
  running_instances = GetInstanceList()
501

    
502
  if instance.name not in running_instances:
503
    return True
504

    
505
  hyper = hypervisor.GetHypervisor()
506
  try:
507
    hyper.StopInstance(instance)
508
  except errors.HypervisorError, err:
509
    logger.Error("Failed to stop instance: %s" % err)
510
    return False
511

    
512
  # test every 10secs for 2min
513
  shutdown_ok = False
514

    
515
  time.sleep(1)
516
  for dummy in range(11):
517
    if instance.name not in GetInstanceList():
518
      break
519
    time.sleep(10)
520
  else:
521
    # the shutdown did not succeed
522
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
523

    
524
    try:
525
      hyper.StopInstance(instance, force=True)
526
    except errors.HypervisorError, err:
527
      logger.Error("Failed to stop instance: %s" % err)
528
      return False
529

    
530
    time.sleep(1)
531
    if instance.name in GetInstanceList():
532
      logger.Error("could not shutdown instance '%s' even by destroy")
533
      return False
534

    
535
  return True
536

    
537

    
538
def CreateBlockDevice(disk, size, on_primary):
539
  """Creates a block device for an instance.
540

541
  Args:
542
   bdev: a ganeti.objects.Disk object
543
   size: the size of the physical underlying devices
544
   do_open: if the device should be `Assemble()`-d and
545
            `Open()`-ed after creation
546

547
  Returns:
548
    the new unique_id of the device (this can sometime be
549
    computed only after creation), or None. On secondary nodes,
550
    it's not required to return anything.
551

552
  """
553
  clist = []
554
  if disk.children:
555
    for child in disk.children:
556
      crdev = _RecursiveAssembleBD(child, on_primary)
557
      if on_primary or disk.AssembleOnSecondary():
558
        # we need the children open in case the device itself has to
559
        # be assembled
560
        crdev.Open()
561
      else:
562
        crdev.Close()
563
      clist.append(crdev)
564
  try:
565
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
566
    if device is not None:
567
      logger.Info("removing existing device %s" % disk)
568
      device.Remove()
569
  except errors.BlockDeviceError, err:
570
    pass
571

    
572
  device = bdev.Create(disk.dev_type, disk.physical_id,
573
                       clist, size)
574
  if device is None:
575
    raise ValueError("Can't create child device for %s, %s" %
576
                     (disk, size))
577
  if on_primary or disk.AssembleOnSecondary():
578
    device.Assemble()
579
    device.SetSyncSpeed(30*1024)
580
    if on_primary or disk.OpenOnSecondary():
581
      device.Open(force=True)
582
  physical_id = device.unique_id
583
  return physical_id
584

    
585

    
586
def RemoveBlockDevice(disk):
587
  """Remove a block device.
588

589
  This is intended to be called recursively.
590

591
  """
592
  try:
593
    # since we are removing the device, allow a partial match
594
    # this allows removal of broken mirrors
595
    rdev = _RecursiveFindBD(disk, allow_partial=True)
596
  except errors.BlockDeviceError, err:
597
    # probably can't attach
598
    logger.Info("Can't attach to device %s in remove" % disk)
599
    rdev = None
600
  if rdev is not None:
601
    result = rdev.Remove()
602
  else:
603
    result = True
604
  if disk.children:
605
    for child in disk.children:
606
      result = result and RemoveBlockDevice(child)
607
  return result
608

    
609

    
610
def _RecursiveAssembleBD(disk, as_primary):
611
  """Activate a block device for an instance.
612

613
  This is run on the primary and secondary nodes for an instance.
614

615
  This function is called recursively.
616

617
  Args:
618
    disk: a objects.Disk object
619
    as_primary: if we should make the block device read/write
620

621
  Returns:
622
    the assembled device or None (in case no device was assembled)
623

624
  If the assembly is not successful, an exception is raised.
625

626
  """
627
  children = []
628
  if disk.children:
629
    for chld_disk in disk.children:
630
      children.append(_RecursiveAssembleBD(chld_disk, as_primary))
631

    
632
  if as_primary or disk.AssembleOnSecondary():
633
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
634
    r_dev.SetSyncSpeed(30*1024)
635
    result = r_dev
636
    if as_primary or disk.OpenOnSecondary():
637
      r_dev.Open()
638
    else:
639
      r_dev.Close()
640
  else:
641
    result = True
642
  return result
643

    
644

    
645
def AssembleBlockDevice(disk, as_primary):
646
  """Activate a block device for an instance.
647

648
  This is a wrapper over _RecursiveAssembleBD.
649

650
  Returns:
651
    a /dev path for primary nodes
652
    True for secondary nodes
653

654
  """
655
  result = _RecursiveAssembleBD(disk, as_primary)
656
  if isinstance(result, bdev.BlockDev):
657
    result = result.dev_path
658
  return result
659

    
660

    
661
def ShutdownBlockDevice(disk):
662
  """Shut down a block device.
663

664
  First, if the device is assembled (can `Attach()`), then the device
665
  is shutdown. Then the children of the device are shutdown.
666

667
  This function is called recursively. Note that we don't cache the
668
  children or such, as oppossed to assemble, shutdown of different
669
  devices doesn't require that the upper device was active.
670

671
  """
672
  r_dev = _RecursiveFindBD(disk)
673
  if r_dev is not None:
674
    result = r_dev.Shutdown()
675
  else:
676
    result = True
677
  if disk.children:
678
    for child in disk.children:
679
      result = result and ShutdownBlockDevice(child)
680
  return result
681

    
682

    
683
def MirrorAddChild(md_cdev, new_cdev):
684
  """Extend an MD raid1 array.
685

686
  """
687
  md_bdev = _RecursiveFindBD(md_cdev, allow_partial=True)
688
  if md_bdev is None:
689
    logger.Error("Can't find md device")
690
    return False
691
  new_bdev = _RecursiveFindBD(new_cdev)
692
  if new_bdev is None:
693
    logger.Error("Can't find new device to add")
694
    return False
695
  new_bdev.Open()
696
  md_bdev.AddChild(new_bdev)
697
  return True
698

    
699

    
700
def MirrorRemoveChild(md_cdev, new_cdev):
701
  """Reduce an MD raid1 array.
702

703
  """
704
  md_bdev = _RecursiveFindBD(md_cdev)
705
  if md_bdev is None:
706
    return False
707
  new_bdev = _RecursiveFindBD(new_cdev)
708
  if new_bdev is None:
709
    return False
710
  new_bdev.Open()
711
  md_bdev.RemoveChild(new_bdev.dev_path)
712
  return True
713

    
714

    
715
def GetMirrorStatus(disks):
716
  """Get the mirroring status of a list of devices.
717

718
  Args:
719
    disks: list of `objects.Disk`
720

721
  Returns:
722
    list of (mirror_done, estimated_time) tuples, which
723
    are the result of bdev.BlockDevice.CombinedSyncStatus()
724

725
  """
726
  stats = []
727
  for dsk in disks:
728
    rbd = _RecursiveFindBD(dsk)
729
    if rbd is None:
730
      raise errors.BlockDeviceError, "Can't find device %s" % str(dsk)
731
    stats.append(rbd.CombinedSyncStatus())
732
  return stats
733

    
734

    
735
def _RecursiveFindBD(disk, allow_partial=False):
736
  """Check if a device is activated.
737

738
  If so, return informations about the real device.
739

740
  Args:
741
    disk: the objects.Disk instance
742
    allow_partial: don't abort the find if a child of the
743
                   device can't be found; this is intended to be
744
                   used when repairing mirrors
745

746
  Returns:
747
    None if the device can't be found
748
    otherwise the device instance
749

750
  """
751
  children = []
752
  if disk.children:
753
    for chdisk in disk.children:
754
      children.append(_RecursiveFindBD(chdisk))
755

    
756
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
757

    
758

    
759
def FindBlockDevice(disk):
760
  """Check if a device is activated.
761

762
  If so, return informations about the real device.
763

764
  Args:
765
    disk: the objects.Disk instance
766
  Returns:
767
    None if the device can't be found
768
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
769

770
  """
771
  rbd = _RecursiveFindBD(disk)
772
  if rbd is None:
773
    return rbd
774
  sync_p, est_t, is_degr = rbd.GetSyncStatus()
775
  return rbd.dev_path, rbd.major, rbd.minor, sync_p, est_t, is_degr
776

    
777

    
778
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
779
  """Write a file to the filesystem.
780

781
  This allows the master to overwrite(!) a file. It will only perform
782
  the operation if the file belongs to a list of configuration files.
783

784
  """
785
  if not os.path.isabs(file_name):
786
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
787
                 file_name)
788
    return False
789

    
790
  allowed_files = [constants.CLUSTER_CONF_FILE, "/etc/hosts",
791
                   "/etc/ssh/ssh_known_hosts"]
792
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
793
  if file_name not in allowed_files:
794
    logger.Error("Filename passed to UploadFile not in allowed"
795
                 " upload targets: '%s'" % file_name)
796
    return False
797

    
798
  dir_name, small_name = os.path.split(file_name)
799
  fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
800
  # here we need to make sure we remove the temp file, if any error
801
  # leaves it in place
802
  try:
803
    os.chown(new_name, uid, gid)
804
    os.chmod(new_name, mode)
805
    os.write(fd, data)
806
    os.fsync(fd)
807
    os.utime(new_name, (atime, mtime))
808
    os.rename(new_name, file_name)
809
  finally:
810
    os.close(fd)
811
    utils.RemoveFile(new_name)
812
  return True
813

    
814
def _ErrnoOrStr(err):
815
  """Format an EnvironmentError exception.
816

817
  If the `err` argument has an errno attribute, it will be looked up
818
  and converted into a textual EXXXX description. Otherwise the string
819
  representation of the error will be returned.
820

821
  """
822
  if hasattr(err, 'errno'):
823
    detail = errno.errorcode[err.errno]
824
  else:
825
    detail = str(err)
826
  return detail
827

    
828

    
829
def _OSOndiskVersion(name, os_dir=None):
830
  """Compute and return the api version of a given OS.
831

832
  This function will try to read the api version of the os given by
833
  the 'name' parameter. By default, it wil use the constants.OS_DIR
834
  as top-level directory for OSes, but this can be overriden by the
835
  use of the os_dir parameter. Return value will be either an
836
  integer denoting the version or None in the case when this is not
837
  a valid OS name.
838

839
  """
840
  if os_dir is None:
841
    os_dir = os.path.sep.join([constants.OS_DIR, name])
842

    
843
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
844

    
845
  try:
846
    st = os.stat(api_file)
847
  except EnvironmentError, err:
848
    raise errors.InvalidOS, (name, "'ganeti_api_version' file not"
849
                             " found (%s)" % _ErrnoOrStr(err))
850

    
851
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
852
    raise errors.InvalidOS, (name, "'ganeti_api_version' file is not"
853
                             " a regular file")
854

    
855
  try:
856
    f = open(api_file)
857
    try:
858
      api_version = f.read(256)
859
    finally:
860
      f.close()
861
  except EnvironmentError, err:
862
    raise errors.InvalidOS, (name, "error while reading the"
863
                             " API version (%s)" % _ErrnoOrStr(err))
864

    
865
  api_version = api_version.strip()
866
  try:
867
    api_version = int(api_version)
868
  except (TypeError, ValueError), err:
869
    raise errors.InvalidOS, (name, "API version is not integer (%s)" %
870
                             str(err))
871

    
872
  return api_version
873

    
874
def DiagnoseOS(top_dir=None):
875
  """Compute the validity for all OSes.
876

877
  For each name in the give top_dir parameter (if not given, defaults
878
  to constants.OS_DIR), it will return an object. If this is a valid
879
  os, the object will be an instance of the object.OS class. If not,
880
  it will be an instance of errors.InvalidOS and this signifies that
881
  this name does not correspond to a valid OS.
882

883
  Returns:
884
    list of objects
885

886
  """
887
  if top_dir is None:
888
    top_dir = constants.OS_DIR
889

    
890
  try:
891
    f_names = os.listdir(top_dir)
892
  except EnvironmentError, err:
893
    logger.Error("Can't list the OS directory: %s" % str(err))
894
    return False
895
  result = []
896
  for name in f_names:
897
    try:
898
      os_inst = OSFromDisk(name, os.path.sep.join([top_dir, name]))
899
      result.append(os_inst)
900
    except errors.InvalidOS, err:
901
      result.append(err)
902

    
903
  return result
904

    
905

    
906
def OSFromDisk(name, os_dir=None):
907
  """Create an OS instance from disk.
908

909
  This function will return an OS instance if the given name is a
910
  valid OS name. Otherwise, it will raise an appropriate
911
  `errors.InvalidOS` exception, detailing why this is not a valid
912
  OS.
913

914
  """
915
  if os_dir is None:
916
    os_dir = os.path.sep.join([constants.OS_DIR, name])
917

    
918
  api_version = _OSOndiskVersion(name, os_dir)
919

    
920
  if api_version != constants.OS_API_VERSION:
921
    raise errors.InvalidOS, (name, "API version mismatch (found %s want %s)"
922
                             % (api_version, constants.OS_API_VERSION))
923

    
924
  # OS Scripts dictionary, we will populate it with the actual script names
925
  os_scripts = {'create': '', 'export': '', 'import': ''}
926

    
927
  for script in os_scripts:
928
    os_scripts[script] = os.path.sep.join([os_dir, script])
929

    
930
    try:
931
      st = os.stat(os_scripts[script])
932
    except EnvironmentError, err:
933
      raise errors.InvalidOS, (name, "'%s' script missing (%s)" %
934
                               (script, _ErrnoOrStr(err)))
935

    
936
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
937
      raise errors.InvalidOS, (name, "'%s' script not executable" % script)
938

    
939
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
940
      raise errors.InvalidOS, (name, "'%s' is not a regular file" % script)
941

    
942

    
943
  return objects.OS(name=name, path=os_dir,
944
                    create_script=os_scripts['create'],
945
                    export_script=os_scripts['export'],
946
                    import_script=os_scripts['import'],
947
                    api_version=api_version)
948

    
949

    
950
def SnapshotBlockDevice(disk):
951
  """Create a snapshot copy of a block device.
952

953
  This function is called recursively, and the snapshot is actually created
954
  just for the leaf lvm backend device.
955

956
  Args:
957
    disk: the disk to be snapshotted
958

959
  Returns:
960
    a config entry for the actual lvm device snapshotted.
961
  """
962

    
963
  if disk.children:
964
    if len(disk.children) == 1:
965
      # only one child, let's recurse on it
966
      return SnapshotBlockDevice(disk.children[0])
967
    else:
968
      # more than one child, choose one that matches
969
      for child in disk.children:
970
        if child.size == disk.size:
971
          # return implies breaking the loop
972
          return SnapshotBlockDevice(child)
973
  elif disk.dev_type == "lvm":
974
    r_dev = _RecursiveFindBD(disk)
975
    if r_dev is not None:
976
      # let's stay on the safe side and ask for the full size, for now
977
      return r_dev.Snapshot(disk.size)
978
    else:
979
      return None
980
  else:
981
    raise errors.ProgrammerError, ("Cannot snapshot non-lvm block device"
982
                                   "'%s' of type '%s'" %
983
                                   (disk.unique_id, disk.dev_type))
984

    
985

    
986
def ExportSnapshot(disk, dest_node, instance):
987
  """Export a block device snapshot to a remote node.
988

989
  Args:
990
    disk: the snapshot block device
991
    dest_node: the node to send the image to
992
    instance: instance being exported
993

994
  Returns:
995
    True if successful, False otherwise.
996
  """
997

    
998
  inst_os = OSFromDisk(instance.os)
999
  export_script = inst_os.export_script
1000

    
1001
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1002
                                     instance.name, int(time.time()))
1003
  if not os.path.exists(constants.LOG_OS_DIR):
1004
    os.mkdir(constants.LOG_OS_DIR, 0750)
1005

    
1006
  real_os_dev = _RecursiveFindBD(disk)
1007
  if real_os_dev is None:
1008
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1009
                                  str(disk))
1010
  real_os_dev.Open()
1011

    
1012
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1013
  destfile = disk.physical_id[1]
1014

    
1015
  # the target command is built out of three individual commands,
1016
  # which are joined by pipes; we check each individual command for
1017
  # valid parameters
1018

    
1019
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1020
                               export_script, instance.name,
1021
                               real_os_dev.dev_path, logfile)
1022

    
1023
  comprcmd = "gzip"
1024

    
1025
  remotecmd = utils.BuildShellCmd("ssh -q -oStrictHostKeyChecking=yes"
1026
                                  " -oBatchMode=yes -oEscapeChar=none"
1027
                                  " %s 'mkdir -p %s; cat > %s/%s'",
1028
                                  dest_node, destdir, destdir, destfile)
1029

    
1030
  # all commands have been checked, so we're safe to combine them
1031
  command = '|'.join([expcmd, comprcmd, remotecmd])
1032

    
1033
  result = utils.RunCmd(command)
1034

    
1035
  if result.failed:
1036
    logger.Error("os snapshot export command '%s' returned error: %s"
1037
                 " output: %s" %
1038
                 (command, result.fail_reason, result.output))
1039
    return False
1040

    
1041
  return True
1042

    
1043

    
1044
def FinalizeExport(instance, snap_disks):
1045
  """Write out the export configuration information.
1046

1047
  Args:
1048
    instance: instance configuration
1049
    snap_disks: snapshot block devices
1050

1051
  Returns:
1052
    False in case of error, True otherwise.
1053
  """
1054

    
1055
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1056
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1057

    
1058
  config = objects.SerializableConfigParser()
1059

    
1060
  config.add_section(constants.INISECT_EXP)
1061
  config.set(constants.INISECT_EXP, 'version', '0')
1062
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1063
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1064
  config.set(constants.INISECT_EXP, 'os', instance.os)
1065
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1066

    
1067
  config.add_section(constants.INISECT_INS)
1068
  config.set(constants.INISECT_INS, 'name', instance.name)
1069
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1070
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1071
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1072
  for nic_count, nic in enumerate(instance.nics):
1073
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1074
               nic_count, '%s' % nic.mac)
1075
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1076
  # TODO: redundant: on load can read nics until it doesn't exist
1077
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1078

    
1079
  for disk_count, disk in enumerate(snap_disks):
1080
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1081
               ('%s' % disk.iv_name))
1082
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1083
               ('%s' % disk.physical_id[1]))
1084
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1085
               ('%d' % disk.size))
1086
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1087

    
1088
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1089
  cfo = open(cff, 'w')
1090
  try:
1091
    config.write(cfo)
1092
  finally:
1093
    cfo.close()
1094

    
1095
  shutil.rmtree(finaldestdir, True)
1096
  shutil.move(destdir, finaldestdir)
1097

    
1098
  return True
1099

    
1100

    
1101
def ExportInfo(dest):
1102
  """Get export configuration information.
1103

1104
  Args:
1105
    dest: directory containing the export
1106

1107
  Returns:
1108
    A serializable config file containing the export info.
1109

1110
  """
1111

    
1112
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1113

    
1114
  config = objects.SerializableConfigParser()
1115
  config.read(cff)
1116

    
1117
  if (not config.has_section(constants.INISECT_EXP) or
1118
      not config.has_section(constants.INISECT_INS)):
1119
    return None
1120

    
1121
  return config
1122

    
1123

    
1124
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1125
  """Import an os image into an instance.
1126

1127
  Args:
1128
    instance: the instance object
1129
    os_disk: the instance-visible name of the os device
1130
    swap_disk: the instance-visible name of the swap device
1131
    src_node: node holding the source image
1132
    src_image: path to the source image on src_node
1133

1134
  Returns:
1135
    False in case of error, True otherwise.
1136

1137
  """
1138

    
1139
  inst_os = OSFromDisk(instance.os)
1140
  import_script = inst_os.import_script
1141

    
1142
  for os_device in instance.disks:
1143
    if os_device.iv_name == os_disk:
1144
      break
1145
  else:
1146
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1147
    return False
1148

    
1149
  for swap_device in instance.disks:
1150
    if swap_device.iv_name == swap_disk:
1151
      break
1152
  else:
1153
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1154
    return False
1155

    
1156
  real_os_dev = _RecursiveFindBD(os_device)
1157
  if real_os_dev is None:
1158
    raise errors.BlockDeviceError, ("Block device '%s' is not set up" %
1159
                                    str(os_device))
1160
  real_os_dev.Open()
1161

    
1162
  real_swap_dev = _RecursiveFindBD(swap_device)
1163
  if real_swap_dev is None:
1164
    raise errors.BlockDeviceError, ("Block device '%s' is not set up" %
1165
                                    str(swap_device))
1166
  real_swap_dev.Open()
1167

    
1168
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1169
                                        instance.name, int(time.time()))
1170
  if not os.path.exists(constants.LOG_OS_DIR):
1171
    os.mkdir(constants.LOG_OS_DIR, 0750)
1172

    
1173
  remotecmd = utils.BuildShellCmd("ssh -q -oStrictHostKeyChecking=yes"
1174
                                  " -oBatchMode=yes -oEscapeChar=none"
1175
                                  " %s 'cat %s'", src_node, src_image)
1176

    
1177
  comprcmd = "gunzip"
1178
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1179
                               inst_os.path, import_script, instance.name,
1180
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1181
                               logfile)
1182

    
1183
  command = '|'.join([remotecmd, comprcmd, impcmd])
1184

    
1185
  result = utils.RunCmd(command)
1186

    
1187
  if result.failed:
1188
    logger.Error("os import command '%s' returned error: %s"
1189
                 " output: %s" %
1190
                 (command, result.fail_reason, result.output))
1191
    return False
1192

    
1193
  return True
1194

    
1195

    
1196
def ListExports():
1197
  """Return a list of exports currently available on this machine.
1198
  """
1199
  if os.path.isdir(constants.EXPORT_DIR):
1200
    return os.listdir(constants.EXPORT_DIR)
1201
  else:
1202
    return []
1203

    
1204

    
1205
def RemoveExport(export):
1206
  """Remove an existing export from the node.
1207

1208
  Args:
1209
    export: the name of the export to remove
1210

1211
  Returns:
1212
    False in case of error, True otherwise.
1213
  """
1214

    
1215
  target = os.path.join(constants.EXPORT_DIR, export)
1216

    
1217
  shutil.rmtree(target)
1218
  # TODO: catch some of the relevant exceptions and provide a pretty
1219
  # error message if rmtree fails.
1220

    
1221
  return True
1222

    
1223

    
1224
class HooksRunner(object):
1225
  """Hook runner.
1226

1227
  This class is instantiated on the node side (ganeti-noded) and not on
1228
  the master side.
1229

1230
  """
1231
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1232

    
1233
  def __init__(self, hooks_base_dir=None):
1234
    """Constructor for hooks runner.
1235

1236
    Args:
1237
      - hooks_base_dir: if not None, this overrides the
1238
        constants.HOOKS_BASE_DIR (useful for unittests)
1239
      - logs_base_dir: if not None, this overrides the
1240
        constants.LOG_HOOKS_DIR (useful for unittests)
1241
      - logging: enable or disable logging of script output
1242

1243
    """
1244
    if hooks_base_dir is None:
1245
      hooks_base_dir = constants.HOOKS_BASE_DIR
1246
    self._BASE_DIR = hooks_base_dir
1247

    
1248
  @staticmethod
1249
  def ExecHook(script, env):
1250
    """Exec one hook script.
1251

1252
    Args:
1253
     - phase: the phase
1254
     - script: the full path to the script
1255
     - env: the environment with which to exec the script
1256

1257
    """
1258
    # exec the process using subprocess and log the output
1259
    fdstdin = None
1260
    try:
1261
      fdstdin = open("/dev/null", "r")
1262
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1263
                               stderr=subprocess.STDOUT, close_fds=True,
1264
                               shell=False, cwd="/",env=env)
1265
      output = ""
1266
      try:
1267
        output = child.stdout.read(4096)
1268
        child.stdout.close()
1269
      except EnvironmentError, err:
1270
        output += "Hook script error: %s" % str(err)
1271

    
1272
      while True:
1273
        try:
1274
          result = child.wait()
1275
          break
1276
        except EnvironmentError, err:
1277
          if err.errno == errno.EINTR:
1278
            continue
1279
          raise
1280
    finally:
1281
      # try not to leak fds
1282
      for fd in (fdstdin, ):
1283
        if fd is not None:
1284
          try:
1285
            fd.close()
1286
          except EnvironmentError, err:
1287
            # just log the error
1288
            #logger.Error("While closing fd %s: %s" % (fd, err))
1289
            pass
1290

    
1291
    return result == 0, output
1292

    
1293
  def RunHooks(self, hpath, phase, env):
1294
    """Run the scripts in the hooks directory.
1295

1296
    This method will not be usually overriden by child opcodes.
1297

1298
    """
1299
    if phase == constants.HOOKS_PHASE_PRE:
1300
      suffix = "pre"
1301
    elif phase == constants.HOOKS_PHASE_POST:
1302
      suffix = "post"
1303
    else:
1304
      raise errors.ProgrammerError, ("Unknown hooks phase: '%s'" % phase)
1305
    rr = []
1306

    
1307
    subdir = "%s-%s.d" % (hpath, suffix)
1308
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1309
    try:
1310
      dir_contents = os.listdir(dir_name)
1311
    except OSError, err:
1312
      # must log
1313
      return rr
1314

    
1315
    # we use the standard python sort order,
1316
    # so 00name is the recommended naming scheme
1317
    dir_contents.sort()
1318
    for relname in dir_contents:
1319
      fname = os.path.join(dir_name, relname)
1320
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1321
          self.RE_MASK.match(relname) is not None):
1322
        rrval = constants.HKR_SKIP
1323
        output = ""
1324
      else:
1325
        result, output = self.ExecHook(fname, env)
1326
        if not result:
1327
          rrval = constants.HKR_FAIL
1328
        else:
1329
          rrval = constants.HKR_SUCCESS
1330
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1331

    
1332
    return rr