Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ a8083063

History | View | Annotate | Download (36.8 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import tempfile
30
import stat
31
import errno
32
import re
33
import subprocess
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43

    
44

    
45
def ListConfigFiles():
46
  """Return a list of the config files present on the local node.
47
  """
48

    
49
  configfiles = []
50

    
51
  for testfile in constants.MASTER_CONFIGFILES:
52
    if os.path.exists(testfile):
53
      configfiles.append(testfile)
54

    
55
  for testfile in constants.NODE_CONFIGFILES:
56
    if os.path.exists(testfile):
57
      configfiles.append(testfile)
58

    
59
  return configfiles
60

    
61

    
62
def StartMaster():
63
  """Activate local node as master node.
64

65
  There are two needed steps for this:
66
    - register the master init script, and also run it now
67
    - register the cron script
68

69
  """
70
  result = utils.RunCmd(["update-rc.d", constants.MASTER_INITD_NAME,
71
                         "defaults", "21", "79"])
72

    
73
  if result.failed:
74
    logger.Error("could not register the master init.d script with command"
75
                 " %s, error %s" % (result.cmd, result.output))
76
    return False
77

    
78
  result = utils.RunCmd([constants.MASTER_INITD_SCRIPT, "start"])
79

    
80
  if result.failed:
81
    logger.Error("could not activate cluster interface with command %s,"
82
                 " error %s" % (result.cmd, result.output))
83
    return False
84

    
85
  utils.RemoveFile(constants.MASTER_CRON_LINK)
86
  os.symlink(constants.MASTER_CRON_FILE, constants.MASTER_CRON_LINK)
87
  return True
88

    
89

    
90
def StopMaster():
91
  """Deactivate this node as master.
92

93
  This does two things:
94
    - remove links to master's startup script
95
    - remove link to master cron script.
96

97
  """
98
  result = utils.RunCmd(["update-rc.d", "-f",
99
                          constants.MASTER_INITD_NAME, "remove"])
100
  if result.failed:
101
    logger.Error("could not unregister the master script with command"
102
                 " %s, error %s" % (result.cmd, result.output))
103
    return False
104

    
105
  output = utils.RunCmd([constants.MASTER_INITD_SCRIPT, "stop"])
106

    
107
  if result.failed:
108
    logger.Error("could not deactivate cluster interface with command %s,"
109
                 " error %s" % (result.cmd, result.output))
110
    return False
111

    
112
  utils.RemoveFile(constants.MASTER_CRON_LINK)
113

    
114
  return True
115

    
116

    
117
def AddNode(dsa, dsapub, rsa, rsapub, ssh, sshpub):
118
  """ adds the node to the cluster
119
      - updates the hostkey
120
      - adds the ssh-key
121
      - sets the node id
122
      - sets the node status to installed
123
  """
124

    
125
  f = open("/etc/ssh/ssh_host_rsa_key", 'w')
126
  f.write(rsa)
127
  f.close()
128

    
129
  f = open("/etc/ssh/ssh_host_rsa_key.pub", 'w')
130
  f.write(rsapub)
131
  f.close()
132

    
133
  f = open("/etc/ssh/ssh_host_dsa_key", 'w')
134
  f.write(dsa)
135
  f.close()
136

    
137
  f = open("/etc/ssh/ssh_host_dsa_key.pub", 'w')
138
  f.write(dsapub)
139
  f.close()
140

    
141
  if not os.path.isdir("/root/.ssh"):
142
    os.mkdir("/root/.ssh")
143

    
144
  f = open("/root/.ssh/id_dsa", 'w')
145
  f.write(ssh)
146
  f.close()
147

    
148
  f = open("/root/.ssh/id_dsa.pub", 'w')
149
  f.write(sshpub)
150
  f.close()
151

    
152
  f = open('/root/.ssh/id_dsa.pub', 'r')
153
  try:
154
    utils.AddAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
155
  finally:
156
    f.close()
157

    
158
  utils.RunCmd(["/etc/init.d/ssh", "restart"])
159

    
160
  utils.RemoveFile("/root/.ssh/known_hosts")
161
  return True
162

    
163

    
164
def LeaveCluster():
165
  """Cleans up the current node and prepares it to be removed from the cluster.
166

167
  """
168
  if os.path.exists(constants.DATA_DIR):
169
    for dirpath, dirnames, filenames in os.walk(constants.DATA_DIR):
170
      if dirpath == constants.DATA_DIR:
171
        for i in filenames:
172
          os.unlink(os.path.join(dirpath, i))
173
  utils.RemoveFile(constants.CLUSTER_NAME_FILE)
174

    
175
  f = open('/root/.ssh/id_dsa.pub', 'r')
176
  try:
177
    utils.RemoveAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
178
  finally:
179
    f.close()
180

    
181
  utils.RemoveFile('/root/.ssh/id_dsa')
182
  utils.RemoveFile('/root/.ssh/id_dsa.pub')
183

    
184

    
185
def GetNodeInfo(vgname):
186
  """ gives back a hash with different informations
187
  about the node
188

189
  Returns:
190
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
191
      'memory_free' : xxx, 'memory_total' : xxx }
192
    where
193
    vg_size is the size of the configured volume group in MiB
194
    vg_free is the free size of the volume group in MiB
195
    memory_dom0 is the memory allocated for domain0 in MiB
196
    memory_free is the currently available (free) ram in MiB
197
    memory_total is the total number of ram in MiB
198
  """
199

    
200
  outputarray = {}
201
  vginfo = _GetVGInfo(vgname)
202
  outputarray['vg_size'] = vginfo['vg_size']
203
  outputarray['vg_free'] = vginfo['vg_free']
204

    
205
  hyper = hypervisor.GetHypervisor()
206
  hyp_info = hyper.GetNodeInfo()
207
  if hyp_info is not None:
208
    outputarray.update(hyp_info)
209

    
210
  return outputarray
211

    
212

    
213
def VerifyNode(what):
214
  """Verify the status of the local node.
215

216
  Args:
217
    what - a dictionary of things to check:
218
      'filelist' : list of files for which to compute checksums
219
      'nodelist' : list of nodes we should check communication with
220
      'hypervisor': run the hypervisor-specific verify
221

222
  Requested files on local node are checksummed and the result returned.
223

224
  The nodelist is traversed, with the following checks being made
225
  for each node:
226
  - known_hosts key correct
227
  - correct resolving of node name (target node returns its own hostname
228
    by ssh-execution of 'hostname', result compared against name in list.
229

230
  """
231

    
232
  result = {}
233

    
234
  if 'hypervisor' in what:
235
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
236

    
237
  if 'filelist' in what:
238
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
239

    
240
  if 'nodelist' in what:
241
    result['nodelist'] = {}
242
    for node in what['nodelist']:
243
      success, message = ssh.VerifyNodeHostname(node)
244
      if not success:
245
        result['nodelist'][node] = message
246
  return result
247

    
248

    
249
def GetVolumeList(vg_name):
250
  """Compute list of logical volumes and their size.
251

252
  Returns:
253
    dictionary of all partions (key) with their size:
254
    test1: 20.06MiB
255

256
  """
257
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m",
258
                         "-oname,size", vg_name])
259
  if result.failed:
260
    logger.Error("Failed to list logical volumes, lvs output: %s" %
261
                 result.output)
262
    return {}
263

    
264
  lvlist = [line.split() for line in result.output.splitlines()]
265
  return dict(lvlist)
266

    
267

    
268
def ListVolumeGroups():
269
  """List the volume groups and their size
270

271
  Returns:
272
    Dictionary with keys volume name and values the size of the volume
273

274
  """
275
  return utils.ListVolumeGroups()
276

    
277

    
278
def BridgesExist(bridges_list):
279
  """Check if a list of bridges exist on the current node
280

281
  Returns:
282
    True if all of them exist, false otherwise
283

284
  """
285
  for bridge in bridges_list:
286
    if not utils.BridgeExists(bridge):
287
      return False
288

    
289
  return True
290

    
291

    
292
def GetInstanceList():
293
  """ provides a list of instances
294

295
  Returns:
296
    A list of all running instances on the current node
297
    - instance1.example.com
298
    - instance2.example.com
299
  """
300

    
301
  try:
302
    names = hypervisor.GetHypervisor().ListInstances()
303
  except errors.HypervisorError, err:
304
    logger.Error("error enumerating instances: %s" % str(err))
305
    raise
306

    
307
  return names
308

    
309

    
310
def GetInstanceInfo(instance):
311
  """ gives back the informations about an instance
312
  as a dictonary
313

314
  Args:
315
    instance: name of the instance (ex. instance1.example.com)
316

317
  Returns:
318
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
319
    where
320
    memory: memory size of instance (int)
321
    state: xen state of instance (string)
322
    time: cpu time of instance (float)
323
  """
324

    
325
  output = {}
326

    
327
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
328
  if iinfo is not None:
329
    output['memory'] = iinfo[2]
330
    output['state'] = iinfo[4]
331
    output['time'] = iinfo[5]
332

    
333
  return output
334

    
335

    
336
def GetAllInstancesInfo():
337
  """Gather data about all instances.
338

339
  This is the equivalent of `GetInstanceInfo()`, except that it
340
  computes data for all instances at once, thus being faster if one
341
  needs data about more than one instance.
342

343
  Returns: a dictionary of dictionaries, keys being the instance name,
344
    and with values:
345
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
346
    where
347
    memory: memory size of instance (int)
348
    state: xen state of instance (string)
349
    time: cpu time of instance (float)
350
    vcpus: the number of cpus
351
  """
352

    
353
  output = {}
354

    
355
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
356
  if iinfo:
357
    for name, id, memory, vcpus, state, times in iinfo:
358
      output[name] = {
359
        'memory': memory,
360
        'vcpus': vcpus,
361
        'state': state,
362
        'time': times,
363
        }
364

    
365
  return output
366

    
367

    
368
def AddOSToInstance(instance, os_disk, swap_disk):
369
  """Add an os to an instance.
370

371
  Args:
372
    instance: the instance object
373
    os_disk: the instance-visible name of the os device
374
    swap_disk: the instance-visible name of the swap device
375

376
  """
377
  inst_os = OSFromDisk(instance.os)
378

    
379
  create_script = inst_os.create_script
380

    
381
  for os_device in instance.disks:
382
    if os_device.iv_name == os_disk:
383
      break
384
  else:
385
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
386
    return False
387

    
388
  for swap_device in instance.disks:
389
    if swap_device.iv_name == swap_disk:
390
      break
391
  else:
392
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
393
    return False
394

    
395
  real_os_dev = _RecursiveFindBD(os_device)
396
  if real_os_dev is None:
397
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
398
                                  str(os_device))
399
  real_os_dev.Open()
400

    
401
  real_swap_dev = _RecursiveFindBD(swap_device)
402
  if real_swap_dev is None:
403
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
404
                                  str(swap_device))
405
  real_swap_dev.Open()
406

    
407
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
408
                                     instance.name, int(time.time()))
409
  if not os.path.exists(constants.LOG_OS_DIR):
410
    os.mkdir(constants.LOG_OS_DIR, 0750)
411

    
412
  command = utils.BuildShellCmd("cd %s; %s -i %s -b %s -s %s &>%s",
413
                                inst_os.path, create_script, instance.name,
414
                                real_os_dev.dev_path, real_swap_dev.dev_path,
415
                                logfile)
416

    
417
  result = utils.RunCmd(command)
418

    
419
  if result.failed:
420
    logger.Error("os create command '%s' returned error: %s"
421
                 " output: %s" %
422
                 (command, result.fail_reason, result.output))
423
    return False
424

    
425
  return True
426

    
427

    
428
def _GetVGInfo(vg_name):
429
  """Get informations about the volume group.
430

431
  Args:
432
    vg_name: the volume group
433

434
  Returns:
435
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
436
    where
437
    vg_size is the total size of the volume group in MiB
438
    vg_free is the free size of the volume group in MiB
439
    pv_count are the number of physical disks in that vg
440

441
  """
442
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
443
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
444

    
445
  if retval.failed:
446
    errmsg = "volume group %s not present" % vg_name
447
    logger.Error(errmsg)
448
    raise errors.LVMError(errmsg)
449
  valarr = retval.stdout.strip().split(':')
450
  retdic = {
451
    "vg_size": int(round(float(valarr[0]), 0)),
452
    "vg_free": int(round(float(valarr[1]), 0)),
453
    "pv_count": int(valarr[2]),
454
    }
455
  return retdic
456

    
457

    
458
def _GatherBlockDevs(instance):
459
  """Set up an instance's block device(s).
460

461
  This is run on the primary node at instance startup. The block
462
  devices must be already assembled.
463

464
  """
465
  block_devices = []
466
  for disk in instance.disks:
467
    device = _RecursiveFindBD(disk)
468
    if device is None:
469
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
470
                                    str(disk))
471
    device.Open()
472
    block_devices.append((disk, device))
473
  return block_devices
474

    
475

    
476
def StartInstance(instance, extra_args):
477
  """Start an instance.
478

479
  Args:
480
    instance - name of instance to start.
481
  """
482

    
483
  running_instances = GetInstanceList()
484

    
485
  if instance.name in running_instances:
486
    return True
487

    
488
  block_devices = _GatherBlockDevs(instance)
489
  hyper = hypervisor.GetHypervisor()
490

    
491
  try:
492
    hyper.StartInstance(instance, block_devices, extra_args)
493
  except errors.HypervisorError, err:
494
    logger.Error("Failed to start instance: %s" % err)
495
    return False
496

    
497
  return True
498

    
499

    
500
def ShutdownInstance(instance):
501
  """Shut an instance down.
502

503
  Args:
504
    instance - name of instance to shutdown.
505
  """
506

    
507
  running_instances = GetInstanceList()
508

    
509
  if instance.name not in running_instances:
510
    return True
511

    
512
  hyper = hypervisor.GetHypervisor()
513
  try:
514
    hyper.StopInstance(instance)
515
  except errors.HypervisorError, err:
516
    logger.Error("Failed to stop instance: %s" % err)
517
    return False
518

    
519
  # test every 10secs for 2min
520
  shutdown_ok = False
521

    
522
  time.sleep(1)
523
  for dummy in range(11):
524
    if instance.name not in GetInstanceList():
525
      break
526
    time.sleep(10)
527
  else:
528
    # the shutdown did not succeed
529
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
530

    
531
    try:
532
      hyper.StopInstance(instance, force=True)
533
    except errors.HypervisorError, err:
534
      logger.Error("Failed to stop instance: %s" % err)
535
      return False
536

    
537
    time.sleep(1)
538
    if instance.name in GetInstanceList():
539
      logger.Error("could not shutdown instance '%s' even by destroy")
540
      return False
541

    
542
  return True
543

    
544

    
545
def CreateBlockDevice(disk, size, on_primary):
546
  """Creates a block device for an instance.
547

548
  Args:
549
   bdev: a ganeti.objects.Disk object
550
   size: the size of the physical underlying devices
551
   do_open: if the device should be `Assemble()`-d and
552
            `Open()`-ed after creation
553

554
  Returns:
555
    the new unique_id of the device (this can sometime be
556
    computed only after creation), or None. On secondary nodes,
557
    it's not required to return anything.
558

559
  """
560
  clist = []
561
  if disk.children:
562
    for child in disk.children:
563
      crdev = _RecursiveAssembleBD(child, on_primary)
564
      if on_primary or disk.AssembleOnSecondary():
565
        # we need the children open in case the device itself has to
566
        # be assembled
567
        crdev.Open()
568
      else:
569
        crdev.Close()
570
      clist.append(crdev)
571
  try:
572
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
573
    if device is not None:
574
      logger.Info("removing existing device %s" % disk)
575
      device.Remove()
576
  except errors.BlockDeviceError, err:
577
    pass
578

    
579
  device = bdev.Create(disk.dev_type, disk.physical_id,
580
                       clist, size)
581
  if device is None:
582
    raise ValueError("Can't create child device for %s, %s" %
583
                     (disk, size))
584
  if on_primary or disk.AssembleOnSecondary():
585
    device.Assemble()
586
    device.SetSyncSpeed(30*1024)
587
    if on_primary or disk.OpenOnSecondary():
588
      device.Open(force=True)
589
  physical_id = device.unique_id
590
  return physical_id
591

    
592

    
593
def RemoveBlockDevice(disk):
594
  """Remove a block device.
595

596
  This is intended to be called recursively.
597

598
  """
599
  try:
600
    # since we are removing the device, allow a partial match
601
    # this allows removal of broken mirrors
602
    rdev = _RecursiveFindBD(disk, allow_partial=True)
603
  except errors.BlockDeviceError, err:
604
    # probably can't attach
605
    logger.Info("Can't attach to device %s in remove" % disk)
606
    rdev = None
607
  if rdev is not None:
608
    result = rdev.Remove()
609
  else:
610
    result = True
611
  if disk.children:
612
    for child in disk.children:
613
      result = result and RemoveBlockDevice(child)
614
  return result
615

    
616

    
617
def _RecursiveAssembleBD(disk, as_primary):
618
  """Activate a block device for an instance.
619

620
  This is run on the primary and secondary nodes for an instance.
621

622
  This function is called recursively.
623

624
  Args:
625
    disk: a objects.Disk object
626
    as_primary: if we should make the block device read/write
627

628
  Returns:
629
    the assembled device or None (in case no device was assembled)
630

631
  If the assembly is not successful, an exception is raised.
632

633
  """
634
  children = []
635
  if disk.children:
636
    for chld_disk in disk.children:
637
      children.append(_RecursiveAssembleBD(chld_disk, as_primary))
638

    
639
  if as_primary or disk.AssembleOnSecondary():
640
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
641
    r_dev.SetSyncSpeed(30*1024)
642
    result = r_dev
643
    if as_primary or disk.OpenOnSecondary():
644
      r_dev.Open()
645
    else:
646
      r_dev.Close()
647
  else:
648
    result = True
649
  return result
650

    
651

    
652
def AssembleBlockDevice(disk, as_primary):
653
  """Activate a block device for an instance.
654

655
  This is a wrapper over _RecursiveAssembleBD.
656

657
  Returns:
658
    a /dev path for primary nodes
659
    True for secondary nodes
660

661
  """
662
  result = _RecursiveAssembleBD(disk, as_primary)
663
  if isinstance(result, bdev.BlockDev):
664
    result = result.dev_path
665
  return result
666

    
667

    
668
def ShutdownBlockDevice(disk):
669
  """Shut down a block device.
670

671
  First, if the device is assembled (can `Attach()`), then the device
672
  is shutdown. Then the children of the device are shutdown.
673

674
  This function is called recursively. Note that we don't cache the
675
  children or such, as oppossed to assemble, shutdown of different
676
  devices doesn't require that the upper device was active.
677

678
  """
679
  r_dev = _RecursiveFindBD(disk)
680
  if r_dev is not None:
681
    result = r_dev.Shutdown()
682
  else:
683
    result = True
684
  if disk.children:
685
    for child in disk.children:
686
      result = result and ShutdownBlockDevice(child)
687
  return result
688

    
689

    
690
def MirrorAddChild(md_cdev, new_cdev):
691
  """Extend an MD raid1 array.
692

693
  """
694
  md_bdev = _RecursiveFindBD(md_cdev, allow_partial=True)
695
  if md_bdev is None:
696
    logger.Error("Can't find md device")
697
    return False
698
  new_bdev = _RecursiveFindBD(new_cdev)
699
  if new_bdev is None:
700
    logger.Error("Can't find new device to add")
701
    return False
702
  new_bdev.Open()
703
  md_bdev.AddChild(new_bdev)
704
  return True
705

    
706

    
707
def MirrorRemoveChild(md_cdev, new_cdev):
708
  """Reduce an MD raid1 array.
709

710
  """
711
  md_bdev = _RecursiveFindBD(md_cdev)
712
  if md_bdev is None:
713
    return False
714
  new_bdev = _RecursiveFindBD(new_cdev)
715
  if new_bdev is None:
716
    return False
717
  new_bdev.Open()
718
  md_bdev.RemoveChild(new_bdev.dev_path)
719
  return True
720

    
721

    
722
def GetMirrorStatus(disks):
723
  """Get the mirroring status of a list of devices.
724

725
  Args:
726
    disks: list of `objects.Disk`
727

728
  Returns:
729
    list of (mirror_done, estimated_time) tuples, which
730
    are the result of bdev.BlockDevice.CombinedSyncStatus()
731

732
  """
733
  stats = []
734
  for dsk in disks:
735
    rbd = _RecursiveFindBD(dsk)
736
    if rbd is None:
737
      raise errors.BlockDeviceError, "Can't find device %s" % str(dsk)
738
    stats.append(rbd.CombinedSyncStatus())
739
  return stats
740

    
741

    
742
def _RecursiveFindBD(disk, allow_partial=False):
743
  """Check if a device is activated.
744

745
  If so, return informations about the real device.
746

747
  Args:
748
    disk: the objects.Disk instance
749
    allow_partial: don't abort the find if a child of the
750
                   device can't be found; this is intended to be
751
                   used when repairing mirrors
752

753
  Returns:
754
    None if the device can't be found
755
    otherwise the device instance
756

757
  """
758
  children = []
759
  if disk.children:
760
    for chdisk in disk.children:
761
      children.append(_RecursiveFindBD(chdisk))
762

    
763
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
764

    
765

    
766
def FindBlockDevice(disk):
767
  """Check if a device is activated.
768

769
  If so, return informations about the real device.
770

771
  Args:
772
    disk: the objects.Disk instance
773
  Returns:
774
    None if the device can't be found
775
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
776

777
  """
778
  rbd = _RecursiveFindBD(disk)
779
  if rbd is None:
780
    return rbd
781
  sync_p, est_t, is_degr = rbd.GetSyncStatus()
782
  return rbd.dev_path, rbd.major, rbd.minor, sync_p, est_t, is_degr
783

    
784

    
785
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
786
  """Write a file to the filesystem.
787

788
  This allows the master to overwrite(!) a file. It will only perform
789
  the operation if the file belongs to a list of configuration files.
790

791
  """
792
  if not os.path.isabs(file_name):
793
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
794
                 file_name)
795
    return False
796

    
797
  if file_name not in [constants.CLUSTER_CONF_FILE, "/etc/hosts",
798
                       "/etc/ssh/ssh_known_hosts"]:
799
    logger.Error("Filename passed to UploadFile not in allowed"
800
                 " upload targets: '%s'" % file_name)
801
    return False
802

    
803
  dir_name, small_name = os.path.split(file_name)
804
  fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
805
  # here we need to make sure we remove the temp file, if any error
806
  # leaves it in place
807
  try:
808
    os.chown(new_name, uid, gid)
809
    os.chmod(new_name, mode)
810
    os.write(fd, data)
811
    os.fsync(fd)
812
    os.utime(new_name, (atime, mtime))
813
    os.rename(new_name, file_name)
814
  finally:
815
    os.close(fd)
816
    utils.RemoveFile(new_name)
817
  return True
818

    
819
def _ErrnoOrStr(err):
820
  """Format an EnvironmentError exception.
821

822
  If the `err` argument has an errno attribute, it will be looked up
823
  and converted into a textual EXXXX description. Otherwise the string
824
  representation of the error will be returned.
825

826
  """
827
  if hasattr(err, 'errno'):
828
    detail = errno.errorcode[err.errno]
829
  else:
830
    detail = str(err)
831
  return detail
832

    
833

    
834
def _OSOndiskVersion(name, os_dir=None):
835
  """Compute and return the api version of a given OS.
836

837
  This function will try to read the api version of the os given by
838
  the 'name' parameter. By default, it wil use the constants.OS_DIR
839
  as top-level directory for OSes, but this can be overriden by the
840
  use of the os_dir parameter. Return value will be either an
841
  integer denoting the version or None in the case when this is not
842
  a valid OS name.
843

844
  """
845
  if os_dir is None:
846
    os_dir = os.path.sep.join([constants.OS_DIR, name])
847

    
848
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
849

    
850
  try:
851
    st = os.stat(api_file)
852
  except EnvironmentError, err:
853
    raise errors.InvalidOS, (name, "'ganeti_api_version' file not"
854
                             " found (%s)" % _ErrnoOrStr(err))
855

    
856
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
857
    raise errors.InvalidOS, (name, "'ganeti_api_version' file is not"
858
                             " a regular file")
859

    
860
  try:
861
    f = open(api_file)
862
    try:
863
      api_version = f.read(256)
864
    finally:
865
      f.close()
866
  except EnvironmentError, err:
867
    raise errors.InvalidOS, (name, "error while reading the"
868
                             " API version (%s)" % _ErrnoOrStr(err))
869

    
870
  api_version = api_version.strip()
871
  try:
872
    api_version = int(api_version)
873
  except (TypeError, ValueError), err:
874
    raise errors.InvalidOS, (name, "API version is not integer (%s)" %
875
                             str(err))
876

    
877
  return api_version
878

    
879
def DiagnoseOS(top_dir=None):
880
  """Compute the validity for all OSes.
881

882
  For each name in the give top_dir parameter (if not given, defaults
883
  to constants.OS_DIR), it will return an object. If this is a valid
884
  os, the object will be an instance of the object.OS class. If not,
885
  it will be an instance of errors.InvalidOS and this signifies that
886
  this name does not correspond to a valid OS.
887

888
  Returns:
889
    list of objects
890

891
  """
892
  if top_dir is None:
893
    top_dir = constants.OS_DIR
894

    
895
  try:
896
    f_names = os.listdir(top_dir)
897
  except EnvironmentError, err:
898
    logger.Error("Can't list the OS directory: %s" % str(err))
899
    return False
900
  result = []
901
  for name in f_names:
902
    try:
903
      os_inst = OSFromDisk(name, os.path.sep.join([top_dir, name]))
904
      result.append(os_inst)
905
    except errors.InvalidOS, err:
906
      result.append(err)
907

    
908
  return result
909

    
910

    
911
def OSFromDisk(name, os_dir=None):
912
  """Create an OS instance from disk.
913

914
  This function will return an OS instance if the given name is a
915
  valid OS name. Otherwise, it will raise an appropriate
916
  `errors.InvalidOS` exception, detailing why this is not a valid
917
  OS.
918

919
  """
920
  if os_dir is None:
921
    os_dir = os.path.sep.join([constants.OS_DIR, name])
922

    
923
  api_version = _OSOndiskVersion(name, os_dir)
924

    
925
  if api_version != constants.OS_API_VERSION:
926
    raise errors.InvalidOS, (name, "API version mismatch (found %s want %s)"
927
                             % (api_version, constants.OS_API_VERSION))
928

    
929
  # OS Scripts dictionary, we will populate it with the actual script names
930
  os_scripts = {'create': '', 'export': '', 'import': ''}
931

    
932
  for script in os_scripts:
933
    os_scripts[script] = os.path.sep.join([os_dir, script])
934

    
935
    try:
936
      st = os.stat(os_scripts[script])
937
    except EnvironmentError, err:
938
      raise errors.InvalidOS, (name, "'%s' script missing (%s)" %
939
                               (script, _ErrnoOrStr(err)))
940

    
941
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
942
      raise errors.InvalidOS, (name, "'%s' script not executable" % script)
943

    
944
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
945
      raise errors.InvalidOS, (name, "'%s' is not a regular file" % script)
946

    
947

    
948
  return objects.OS(name=name, path=os_dir,
949
                    create_script=os_scripts['create'],
950
                    export_script=os_scripts['export'],
951
                    import_script=os_scripts['import'],
952
                    api_version=api_version)
953

    
954

    
955
def SnapshotBlockDevice(disk):
956
  """Create a snapshot copy of a block device.
957

958
  This function is called recursively, and the snapshot is actually created
959
  just for the leaf lvm backend device.
960

961
  Args:
962
    disk: the disk to be snapshotted
963

964
  Returns:
965
    a config entry for the actual lvm device snapshotted.
966
  """
967

    
968
  if disk.children:
969
    if len(disk.children) == 1:
970
      # only one child, let's recurse on it
971
      return SnapshotBlockDevice(disk.children[0])
972
    else:
973
      # more than one child, choose one that matches
974
      for child in disk.children:
975
        if child.size == disk.size:
976
          # return implies breaking the loop
977
          return SnapshotBlockDevice(child)
978
  elif disk.dev_type == "lvm":
979
    r_dev = _RecursiveFindBD(disk)
980
    if r_dev is not None:
981
      # let's stay on the safe side and ask for the full size, for now
982
      return r_dev.Snapshot(disk.size)
983
    else:
984
      return None
985
  else:
986
    raise errors.ProgrammerError, ("Cannot snapshot non-lvm block device"
987
                                   "'%s' of type '%s'" %
988
                                   (disk.unique_id, disk.dev_type))
989

    
990

    
991
def ExportSnapshot(disk, dest_node, instance):
992
  """Export a block device snapshot to a remote node.
993

994
  Args:
995
    disk: the snapshot block device
996
    dest_node: the node to send the image to
997
    instance: instance being exported
998

999
  Returns:
1000
    True if successful, False otherwise.
1001
  """
1002

    
1003
  inst_os = OSFromDisk(instance.os)
1004
  export_script = inst_os.export_script
1005

    
1006
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1007
                                     instance.name, int(time.time()))
1008
  if not os.path.exists(constants.LOG_OS_DIR):
1009
    os.mkdir(constants.LOG_OS_DIR, 0750)
1010

    
1011
  real_os_dev = _RecursiveFindBD(disk)
1012
  if real_os_dev is None:
1013
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1014
                                  str(disk))
1015
  real_os_dev.Open()
1016

    
1017
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1018
  destfile = disk.physical_id[1]
1019

    
1020
  # the target command is built out of three individual commands,
1021
  # which are joined by pipes; we check each individual command for
1022
  # valid parameters
1023

    
1024
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1025
                               export_script, instance.name,
1026
                               real_os_dev.dev_path, logfile)
1027

    
1028
  comprcmd = "gzip"
1029

    
1030
  remotecmd = utils.BuildShellCmd("ssh -q -oStrictHostKeyChecking=yes"
1031
                                  " -oBatchMode=yes -oEscapeChar=none"
1032
                                  " %s 'mkdir -p %s; cat > %s/%s'",
1033
                                  dest_node, destdir, destdir, destfile)
1034

    
1035
  # all commands have been checked, so we're safe to combine them
1036
  command = '|'.join([expcmd, comprcmd, remotecmd])
1037

    
1038
  result = utils.RunCmd(command)
1039

    
1040
  if result.failed:
1041
    logger.Error("os snapshot export command '%s' returned error: %s"
1042
                 " output: %s" %
1043
                 (command, result.fail_reason, result.output))
1044
    return False
1045

    
1046
  return True
1047

    
1048

    
1049
def FinalizeExport(instance, snap_disks):
1050
  """Write out the export configuration information.
1051

1052
  Args:
1053
    instance: instance configuration
1054
    snap_disks: snapshot block devices
1055

1056
  Returns:
1057
    False in case of error, True otherwise.
1058
  """
1059

    
1060
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1061
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1062

    
1063
  config = objects.SerializableConfigParser()
1064

    
1065
  config.add_section(constants.INISECT_EXP)
1066
  config.set(constants.INISECT_EXP, 'version', '0')
1067
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1068
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1069
  config.set(constants.INISECT_EXP, 'os', instance.os)
1070
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1071

    
1072
  config.add_section(constants.INISECT_INS)
1073
  config.set(constants.INISECT_INS, 'name', instance.name)
1074
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1075
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1076
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1077
  for nic_count, nic in enumerate(instance.nics):
1078
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1079
               nic_count, '%s' % nic.mac)
1080
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1081
  # TODO: redundant: on load can read nics until it doesn't exist
1082
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1083

    
1084
  for disk_count, disk in enumerate(snap_disks):
1085
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1086
               ('%s' % disk.iv_name))
1087
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1088
               ('%s' % disk.physical_id[1]))
1089
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1090
               ('%d' % disk.size))
1091
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1092

    
1093
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1094
  cfo = open(cff, 'w')
1095
  try:
1096
    config.write(cfo)
1097
  finally:
1098
    cfo.close()
1099

    
1100
  shutil.rmtree(finaldestdir, True)
1101
  shutil.move(destdir, finaldestdir)
1102

    
1103
  return True
1104

    
1105

    
1106
def ExportInfo(dest):
1107
  """Get export configuration information.
1108

1109
  Args:
1110
    dest: directory containing the export
1111

1112
  Returns:
1113
    A serializable config file containing the export info.
1114

1115
  """
1116

    
1117
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1118

    
1119
  config = objects.SerializableConfigParser()
1120
  config.read(cff)
1121

    
1122
  if (not config.has_section(constants.INISECT_EXP) or
1123
      not config.has_section(constants.INISECT_INS)):
1124
    return None
1125

    
1126
  return config
1127

    
1128

    
1129
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1130
  """Import an os image into an instance.
1131

1132
  Args:
1133
    instance: the instance object
1134
    os_disk: the instance-visible name of the os device
1135
    swap_disk: the instance-visible name of the swap device
1136
    src_node: node holding the source image
1137
    src_image: path to the source image on src_node
1138

1139
  Returns:
1140
    False in case of error, True otherwise.
1141

1142
  """
1143

    
1144
  inst_os = OSFromDisk(instance.os)
1145
  import_script = inst_os.import_script
1146

    
1147
  for os_device in instance.disks:
1148
    if os_device.iv_name == os_disk:
1149
      break
1150
  else:
1151
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1152
    return False
1153

    
1154
  for swap_device in instance.disks:
1155
    if swap_device.iv_name == swap_disk:
1156
      break
1157
  else:
1158
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1159
    return False
1160

    
1161
  real_os_dev = _RecursiveFindBD(os_device)
1162
  if real_os_dev is None:
1163
    raise errors.BlockDeviceError, ("Block device '%s' is not set up" %
1164
                                    str(os_device))
1165
  real_os_dev.Open()
1166

    
1167
  real_swap_dev = _RecursiveFindBD(swap_device)
1168
  if real_swap_dev is None:
1169
    raise errors.BlockDeviceError, ("Block device '%s' is not set up" %
1170
                                    str(swap_device))
1171
  real_swap_dev.Open()
1172

    
1173
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1174
                                        instance.name, int(time.time()))
1175
  if not os.path.exists(constants.LOG_OS_DIR):
1176
    os.mkdir(constants.LOG_OS_DIR, 0750)
1177

    
1178
  remotecmd = utils.BuildShellCmd("ssh -q -oStrictHostKeyChecking=yes"
1179
                                  " -oBatchMode=yes -oEscapeChar=none"
1180
                                  " %s 'cat %s'", src_node, src_image)
1181

    
1182
  comprcmd = "gunzip"
1183
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1184
                               inst_os.path, import_script, instance.name,
1185
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1186
                               logfile)
1187

    
1188
  command = '|'.join([remotecmd, comprcmd, impcmd])
1189

    
1190
  result = utils.RunCmd(command)
1191

    
1192
  if result.failed:
1193
    logger.Error("os import command '%s' returned error: %s"
1194
                 " output: %s" %
1195
                 (command, result.fail_reason, result.output))
1196
    return False
1197

    
1198
  return True
1199

    
1200

    
1201
def ListExports():
1202
  """Return a list of exports currently available on this machine.
1203
  """
1204
  if os.path.isdir(constants.EXPORT_DIR):
1205
    return os.listdir(constants.EXPORT_DIR)
1206
  else:
1207
    return []
1208

    
1209

    
1210
def RemoveExport(export):
1211
  """Remove an existing export from the node.
1212

1213
  Args:
1214
    export: the name of the export to remove
1215

1216
  Returns:
1217
    False in case of error, True otherwise.
1218
  """
1219

    
1220
  target = os.path.join(constants.EXPORT_DIR, export)
1221

    
1222
  shutil.rmtree(target)
1223
  # TODO: catch some of the relevant exceptions and provide a pretty
1224
  # error message if rmtree fails.
1225

    
1226
  return True
1227

    
1228

    
1229
class HooksRunner(object):
1230
  """Hook runner.
1231

1232
  This class is instantiated on the node side (ganeti-noded) and not on
1233
  the master side.
1234

1235
  """
1236
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1237

    
1238
  def __init__(self, hooks_base_dir=None):
1239
    """Constructor for hooks runner.
1240

1241
    Args:
1242
      - hooks_base_dir: if not None, this overrides the
1243
        constants.HOOKS_BASE_DIR (useful for unittests)
1244
      - logs_base_dir: if not None, this overrides the
1245
        constants.LOG_HOOKS_DIR (useful for unittests)
1246
      - logging: enable or disable logging of script output
1247

1248
    """
1249
    if hooks_base_dir is None:
1250
      hooks_base_dir = constants.HOOKS_BASE_DIR
1251
    self._BASE_DIR = hooks_base_dir
1252

    
1253
  @staticmethod
1254
  def ExecHook(script, env):
1255
    """Exec one hook script.
1256

1257
    Args:
1258
     - phase: the phase
1259
     - script: the full path to the script
1260
     - env: the environment with which to exec the script
1261

1262
    """
1263
    # exec the process using subprocess and log the output
1264
    fdstdin = None
1265
    try:
1266
      fdstdin = open("/dev/null", "r")
1267
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1268
                               stderr=subprocess.STDOUT, close_fds=True,
1269
                               shell=False, cwd="/",env=env)
1270
      output = ""
1271
      try:
1272
        output = child.stdout.read(4096)
1273
        child.stdout.close()
1274
      except EnvironmentError, err:
1275
        output += "Hook script error: %s" % str(err)
1276

    
1277
      while True:
1278
        try:
1279
          result = child.wait()
1280
          break
1281
        except EnvironmentError, err:
1282
          if err.errno == errno.EINTR:
1283
            continue
1284
          raise
1285
    finally:
1286
      # try not to leak fds
1287
      for fd in (fdstdin, ):
1288
        if fd is not None:
1289
          try:
1290
            fd.close()
1291
          except EnvironmentError, err:
1292
            # just log the error
1293
            #logger.Error("While closing fd %s: %s" % (fd, err))
1294
            pass
1295

    
1296
    return result == 0, output
1297

    
1298
  def RunHooks(self, hpath, phase, env):
1299
    """Run the scripts in the hooks directory.
1300

1301
    This method will not be usually overriden by child opcodes.
1302

1303
    """
1304
    if phase == constants.HOOKS_PHASE_PRE:
1305
      suffix = "pre"
1306
    elif phase == constants.HOOKS_PHASE_POST:
1307
      suffix = "post"
1308
    else:
1309
      raise errors.ProgrammerError, ("Unknown hooks phase: '%s'" % phase)
1310
    rr = []
1311

    
1312
    subdir = "%s-%s.d" % (hpath, suffix)
1313
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1314
    try:
1315
      dir_contents = os.listdir(dir_name)
1316
    except OSError, err:
1317
      # must log
1318
      return rr
1319

    
1320
    # we use the standard python sort order,
1321
    # so 00name is the recommended naming scheme
1322
    dir_contents.sort()
1323
    for relname in dir_contents:
1324
      fname = os.path.join(dir_name, relname)
1325
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1326
          self.RE_MASK.match(relname) is not None):
1327
        rrval = constants.HKR_SKIP
1328
        output = ""
1329
      else:
1330
        result, output = self.ExecHook(fname, env)
1331
        if not result:
1332
          rrval = constants.HKR_FAIL
1333
        else:
1334
          rrval = constants.HKR_SUCCESS
1335
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1336

    
1337
    return rr