Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ 3ef10550

History | View | Annotate | Download (39.1 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import tempfile
30
import stat
31
import errno
32
import re
33
import subprocess
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def StartMaster():
47
  """Activate local node as master node.
48

49
  There are two needed steps for this:
50
    - run the master script
51
    - register the cron script
52

53
  """
54
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
55

    
56
  if result.failed:
57
    logger.Error("could not activate cluster interface with command %s,"
58
                 " error: '%s'" % (result.cmd, result.output))
59
    return False
60

    
61
  return True
62

    
63

    
64
def StopMaster():
65
  """Deactivate this node as master.
66

67
  This does two things:
68
    - run the master stop script
69
    - remove link to master cron script.
70

71
  """
72
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
73

    
74
  if result.failed:
75
    logger.Error("could not deactivate cluster interface with command %s,"
76
                 " error: '%s'" % (result.cmd, result.output))
77
    return False
78

    
79
  return True
80

    
81

    
82
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
83
  """ adds the node to the cluster
84
      - updates the hostkey
85
      - adds the ssh-key
86
      - sets the node id
87
      - sets the node status to installed
88

89
  """
90
  f = open("/etc/ssh/ssh_host_rsa_key", 'w')
91
  f.write(rsa)
92
  f.close()
93

    
94
  f = open("/etc/ssh/ssh_host_rsa_key.pub", 'w')
95
  f.write(rsapub)
96
  f.close()
97

    
98
  f = open("/etc/ssh/ssh_host_dsa_key", 'w')
99
  f.write(dsa)
100
  f.close()
101

    
102
  f = open("/etc/ssh/ssh_host_dsa_key.pub", 'w')
103
  f.write(dsapub)
104
  f.close()
105

    
106
  if not os.path.isdir("/root/.ssh"):
107
    os.mkdir("/root/.ssh")
108

    
109
  f = open("/root/.ssh/id_dsa", 'w')
110
  f.write(sshkey)
111
  f.close()
112

    
113
  f = open("/root/.ssh/id_dsa.pub", 'w')
114
  f.write(sshpub)
115
  f.close()
116

    
117
  f = open('/root/.ssh/id_dsa.pub', 'r')
118
  try:
119
    utils.AddAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
120
  finally:
121
    f.close()
122

    
123
  utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
124

    
125
  return True
126

    
127

    
128
def LeaveCluster():
129
  """Cleans up the current node and prepares it to be removed from the cluster.
130

131
  """
132
  if os.path.exists(constants.DATA_DIR):
133
    for dirpath, dirnames, filenames in os.walk(constants.DATA_DIR):
134
      if dirpath == constants.DATA_DIR:
135
        for i in filenames:
136
          os.unlink(os.path.join(dirpath, i))
137

    
138
  f = open('/root/.ssh/id_dsa.pub', 'r')
139
  try:
140
    utils.RemoveAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192))
141
  finally:
142
    f.close()
143

    
144
  utils.RemoveFile('/root/.ssh/id_dsa')
145
  utils.RemoveFile('/root/.ssh/id_dsa.pub')
146

    
147

    
148
def GetNodeInfo(vgname):
149
  """ gives back a hash with different informations
150
  about the node
151

152
  Returns:
153
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
154
      'memory_free' : xxx, 'memory_total' : xxx }
155
    where
156
    vg_size is the size of the configured volume group in MiB
157
    vg_free is the free size of the volume group in MiB
158
    memory_dom0 is the memory allocated for domain0 in MiB
159
    memory_free is the currently available (free) ram in MiB
160
    memory_total is the total number of ram in MiB
161

162
  """
163
  outputarray = {}
164
  vginfo = _GetVGInfo(vgname)
165
  outputarray['vg_size'] = vginfo['vg_size']
166
  outputarray['vg_free'] = vginfo['vg_free']
167

    
168
  hyper = hypervisor.GetHypervisor()
169
  hyp_info = hyper.GetNodeInfo()
170
  if hyp_info is not None:
171
    outputarray.update(hyp_info)
172

    
173
  f = open("/proc/sys/kernel/random/boot_id", 'r')
174
  try:
175
    outputarray["bootid"] = f.read(128).rstrip("\n")
176
  finally:
177
    f.close()
178

    
179
  return outputarray
180

    
181

    
182
def VerifyNode(what):
183
  """Verify the status of the local node.
184

185
  Args:
186
    what - a dictionary of things to check:
187
      'filelist' : list of files for which to compute checksums
188
      'nodelist' : list of nodes we should check communication with
189
      'hypervisor': run the hypervisor-specific verify
190

191
  Requested files on local node are checksummed and the result returned.
192

193
  The nodelist is traversed, with the following checks being made
194
  for each node:
195
  - known_hosts key correct
196
  - correct resolving of node name (target node returns its own hostname
197
    by ssh-execution of 'hostname', result compared against name in list.
198

199
  """
200
  result = {}
201

    
202
  if 'hypervisor' in what:
203
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
204

    
205
  if 'filelist' in what:
206
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
207

    
208
  if 'nodelist' in what:
209
    result['nodelist'] = {}
210
    for node in what['nodelist']:
211
      success, message = ssh.VerifyNodeHostname(node)
212
      if not success:
213
        result['nodelist'][node] = message
214
  return result
215

    
216

    
217
def GetVolumeList(vg_name):
218
  """Compute list of logical volumes and their size.
219

220
  Returns:
221
    dictionary of all partions (key) with their size:
222
    test1: 20.06MiB
223

224
  """
225
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m",
226
                         "-oname,size", vg_name])
227
  if result.failed:
228
    logger.Error("Failed to list logical volumes, lvs output: %s" %
229
                 result.output)
230
    return {}
231

    
232
  lvlist = [line.split() for line in result.output.splitlines()]
233
  return dict(lvlist)
234

    
235

    
236
def ListVolumeGroups():
237
  """List the volume groups and their size
238

239
  Returns:
240
    Dictionary with keys volume name and values the size of the volume
241

242
  """
243
  return utils.ListVolumeGroups()
244

    
245

    
246
def NodeVolumes():
247
  """List all volumes on this node.
248

249
  """
250
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
251
                         "--separator=|",
252
                         "--options=lv_name,lv_size,devices,vg_name"])
253
  if result.failed:
254
    logger.Error("Failed to list logical volumes, lvs output: %s" %
255
                 result.output)
256
    return {}
257

    
258
  def parse_dev(dev):
259
    if '(' in dev:
260
      return dev.split('(')[0]
261
    else:
262
      return dev
263

    
264
  def map_line(line):
265
    return {
266
      'name': line[0].strip(),
267
      'size': line[1].strip(),
268
      'dev': parse_dev(line[2].strip()),
269
      'vg': line[3].strip(),
270
    }
271

    
272
  return [map_line(line.split('|')) for line in result.output.splitlines()]
273

    
274

    
275
def BridgesExist(bridges_list):
276
  """Check if a list of bridges exist on the current node
277

278
  Returns:
279
    True if all of them exist, false otherwise
280

281
  """
282
  for bridge in bridges_list:
283
    if not utils.BridgeExists(bridge):
284
      return False
285

    
286
  return True
287

    
288

    
289
def GetInstanceList():
290
  """ provides a list of instances
291

292
  Returns:
293
    A list of all running instances on the current node
294
    - instance1.example.com
295
    - instance2.example.com
296

297
  """
298
  try:
299
    names = hypervisor.GetHypervisor().ListInstances()
300
  except errors.HypervisorError, err:
301
    logger.Error("error enumerating instances: %s" % str(err))
302
    raise
303

    
304
  return names
305

    
306

    
307
def GetInstanceInfo(instance):
308
  """ gives back the informations about an instance
309
  as a dictonary
310

311
  Args:
312
    instance: name of the instance (ex. instance1.example.com)
313

314
  Returns:
315
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
316
    where
317
    memory: memory size of instance (int)
318
    state: xen state of instance (string)
319
    time: cpu time of instance (float)
320

321
  """
322
  output = {}
323

    
324
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
325
  if iinfo is not None:
326
    output['memory'] = iinfo[2]
327
    output['state'] = iinfo[4]
328
    output['time'] = iinfo[5]
329

    
330
  return output
331

    
332

    
333
def GetAllInstancesInfo():
334
  """Gather data about all instances.
335

336
  This is the equivalent of `GetInstanceInfo()`, except that it
337
  computes data for all instances at once, thus being faster if one
338
  needs data about more than one instance.
339

340
  Returns: a dictionary of dictionaries, keys being the instance name,
341
    and with values:
342
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
343
    where
344
    memory: memory size of instance (int)
345
    state: xen state of instance (string)
346
    time: cpu time of instance (float)
347
    vcpus: the number of cpus
348

349
  """
350
  output = {}
351

    
352
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
353
  if iinfo:
354
    for name, inst_id, memory, vcpus, state, times in iinfo:
355
      output[name] = {
356
        'memory': memory,
357
        'vcpus': vcpus,
358
        'state': state,
359
        'time': times,
360
        }
361

    
362
  return output
363

    
364

    
365
def AddOSToInstance(instance, os_disk, swap_disk):
366
  """Add an os to an instance.
367

368
  Args:
369
    instance: the instance object
370
    os_disk: the instance-visible name of the os device
371
    swap_disk: the instance-visible name of the swap device
372

373
  """
374
  inst_os = OSFromDisk(instance.os)
375

    
376
  create_script = inst_os.create_script
377

    
378
  os_device = instance.FindDisk(os_disk)
379
  if os_device is None:
380
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
381
    return False
382

    
383
  swap_device = instance.FindDisk(swap_disk)
384
  if swap_device is None:
385
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
386
    return False
387

    
388
  real_os_dev = _RecursiveFindBD(os_device)
389
  if real_os_dev is None:
390
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
391
                                  str(os_device))
392
  real_os_dev.Open()
393

    
394
  real_swap_dev = _RecursiveFindBD(swap_device)
395
  if real_swap_dev is None:
396
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
397
                                  str(swap_device))
398
  real_swap_dev.Open()
399

    
400
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
401
                                     instance.name, int(time.time()))
402
  if not os.path.exists(constants.LOG_OS_DIR):
403
    os.mkdir(constants.LOG_OS_DIR, 0750)
404

    
405
  command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
406
                                inst_os.path, create_script, instance.name,
407
                                real_os_dev.dev_path, real_swap_dev.dev_path,
408
                                logfile)
409

    
410
  result = utils.RunCmd(command)
411

    
412
  if result.failed:
413
    logger.Error("os create command '%s' returned error: %s"
414
                 " output: %s" %
415
                 (command, result.fail_reason, result.output))
416
    return False
417

    
418
  return True
419

    
420

    
421
def RunRenameInstance(instance, old_name, os_disk, swap_disk):
422
  """Run the OS rename script for an instance.
423

424
  Args:
425
    instance: the instance object
426
    old_name: the old name of the instance
427
    os_disk: the instance-visible name of the os device
428
    swap_disk: the instance-visible name of the swap device
429

430
  """
431
  inst_os = OSFromDisk(instance.os)
432

    
433
  script = inst_os.rename_script
434

    
435
  os_device = instance.FindDisk(os_disk)
436
  if os_device is None:
437
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
438
    return False
439

    
440
  swap_device = instance.FindDisk(swap_disk)
441
  if swap_device is None:
442
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
443
    return False
444

    
445
  real_os_dev = _RecursiveFindBD(os_device)
446
  if real_os_dev is None:
447
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
448
                                  str(os_device))
449
  real_os_dev.Open()
450

    
451
  real_swap_dev = _RecursiveFindBD(swap_device)
452
  if real_swap_dev is None:
453
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
454
                                  str(swap_device))
455
  real_swap_dev.Open()
456

    
457
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
458
                                           old_name,
459
                                           instance.name, int(time.time()))
460
  if not os.path.exists(constants.LOG_OS_DIR):
461
    os.mkdir(constants.LOG_OS_DIR, 0750)
462

    
463
  command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
464
                                inst_os.path, script, old_name, instance.name,
465
                                real_os_dev.dev_path, real_swap_dev.dev_path,
466
                                logfile)
467

    
468
  result = utils.RunCmd(command)
469

    
470
  if result.failed:
471
    logger.Error("os create command '%s' returned error: %s"
472
                 " output: %s" %
473
                 (command, result.fail_reason, result.output))
474
    return False
475

    
476
  return True
477

    
478

    
479
def _GetVGInfo(vg_name):
480
  """Get informations about the volume group.
481

482
  Args:
483
    vg_name: the volume group
484

485
  Returns:
486
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
487
    where
488
    vg_size is the total size of the volume group in MiB
489
    vg_free is the free size of the volume group in MiB
490
    pv_count are the number of physical disks in that vg
491

492
  """
493
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
494
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
495

    
496
  if retval.failed:
497
    errmsg = "volume group %s not present" % vg_name
498
    logger.Error(errmsg)
499
    raise errors.LVMError(errmsg)
500
  valarr = retval.stdout.strip().split(':')
501
  retdic = {
502
    "vg_size": int(round(float(valarr[0]), 0)),
503
    "vg_free": int(round(float(valarr[1]), 0)),
504
    "pv_count": int(valarr[2]),
505
    }
506
  return retdic
507

    
508

    
509
def _GatherBlockDevs(instance):
510
  """Set up an instance's block device(s).
511

512
  This is run on the primary node at instance startup. The block
513
  devices must be already assembled.
514

515
  """
516
  block_devices = []
517
  for disk in instance.disks:
518
    device = _RecursiveFindBD(disk)
519
    if device is None:
520
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
521
                                    str(disk))
522
    device.Open()
523
    block_devices.append((disk, device))
524
  return block_devices
525

    
526

    
527
def StartInstance(instance, extra_args):
528
  """Start an instance.
529

530
  Args:
531
    instance - name of instance to start.
532

533
  """
534
  running_instances = GetInstanceList()
535

    
536
  if instance.name in running_instances:
537
    return True
538

    
539
  block_devices = _GatherBlockDevs(instance)
540
  hyper = hypervisor.GetHypervisor()
541

    
542
  try:
543
    hyper.StartInstance(instance, block_devices, extra_args)
544
  except errors.HypervisorError, err:
545
    logger.Error("Failed to start instance: %s" % err)
546
    return False
547

    
548
  return True
549

    
550

    
551
def ShutdownInstance(instance):
552
  """Shut an instance down.
553

554
  Args:
555
    instance - name of instance to shutdown.
556

557
  """
558
  running_instances = GetInstanceList()
559

    
560
  if instance.name not in running_instances:
561
    return True
562

    
563
  hyper = hypervisor.GetHypervisor()
564
  try:
565
    hyper.StopInstance(instance)
566
  except errors.HypervisorError, err:
567
    logger.Error("Failed to stop instance: %s" % err)
568
    return False
569

    
570
  # test every 10secs for 2min
571
  shutdown_ok = False
572

    
573
  time.sleep(1)
574
  for dummy in range(11):
575
    if instance.name not in GetInstanceList():
576
      break
577
    time.sleep(10)
578
  else:
579
    # the shutdown did not succeed
580
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
581

    
582
    try:
583
      hyper.StopInstance(instance, force=True)
584
    except errors.HypervisorError, err:
585
      logger.Error("Failed to stop instance: %s" % err)
586
      return False
587

    
588
    time.sleep(1)
589
    if instance.name in GetInstanceList():
590
      logger.Error("could not shutdown instance '%s' even by destroy")
591
      return False
592

    
593
  return True
594

    
595

    
596
def CreateBlockDevice(disk, size, on_primary, info):
597
  """Creates a block device for an instance.
598

599
  Args:
600
   bdev: a ganeti.objects.Disk object
601
   size: the size of the physical underlying devices
602
   do_open: if the device should be `Assemble()`-d and
603
            `Open()`-ed after creation
604

605
  Returns:
606
    the new unique_id of the device (this can sometime be
607
    computed only after creation), or None. On secondary nodes,
608
    it's not required to return anything.
609

610
  """
611
  clist = []
612
  if disk.children:
613
    for child in disk.children:
614
      crdev = _RecursiveAssembleBD(child, on_primary)
615
      if on_primary or disk.AssembleOnSecondary():
616
        # we need the children open in case the device itself has to
617
        # be assembled
618
        crdev.Open()
619
      else:
620
        crdev.Close()
621
      clist.append(crdev)
622
  try:
623
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
624
    if device is not None:
625
      logger.Info("removing existing device %s" % disk)
626
      device.Remove()
627
  except errors.BlockDeviceError, err:
628
    pass
629

    
630
  device = bdev.Create(disk.dev_type, disk.physical_id,
631
                       clist, size)
632
  if device is None:
633
    raise ValueError("Can't create child device for %s, %s" %
634
                     (disk, size))
635
  if on_primary or disk.AssembleOnSecondary():
636
    device.Assemble()
637
    device.SetSyncSpeed(constants.SYNC_SPEED)
638
    if on_primary or disk.OpenOnSecondary():
639
      device.Open(force=True)
640

    
641
  device.SetInfo(info)
642

    
643
  physical_id = device.unique_id
644
  return physical_id
645

    
646

    
647
def RemoveBlockDevice(disk):
648
  """Remove a block device.
649

650
  This is intended to be called recursively.
651

652
  """
653
  try:
654
    # since we are removing the device, allow a partial match
655
    # this allows removal of broken mirrors
656
    rdev = _RecursiveFindBD(disk, allow_partial=True)
657
  except errors.BlockDeviceError, err:
658
    # probably can't attach
659
    logger.Info("Can't attach to device %s in remove" % disk)
660
    rdev = None
661
  if rdev is not None:
662
    result = rdev.Remove()
663
  else:
664
    result = True
665
  if disk.children:
666
    for child in disk.children:
667
      result = result and RemoveBlockDevice(child)
668
  return result
669

    
670

    
671
def _RecursiveAssembleBD(disk, as_primary):
672
  """Activate a block device for an instance.
673

674
  This is run on the primary and secondary nodes for an instance.
675

676
  This function is called recursively.
677

678
  Args:
679
    disk: a objects.Disk object
680
    as_primary: if we should make the block device read/write
681

682
  Returns:
683
    the assembled device or None (in case no device was assembled)
684

685
  If the assembly is not successful, an exception is raised.
686

687
  """
688
  children = []
689
  if disk.children:
690
    for chld_disk in disk.children:
691
      children.append(_RecursiveAssembleBD(chld_disk, as_primary))
692

    
693
  if as_primary or disk.AssembleOnSecondary():
694
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
695
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
696
    result = r_dev
697
    if as_primary or disk.OpenOnSecondary():
698
      r_dev.Open()
699
    else:
700
      r_dev.Close()
701
  else:
702
    result = True
703
  return result
704

    
705

    
706
def AssembleBlockDevice(disk, as_primary):
707
  """Activate a block device for an instance.
708

709
  This is a wrapper over _RecursiveAssembleBD.
710

711
  Returns:
712
    a /dev path for primary nodes
713
    True for secondary nodes
714

715
  """
716
  result = _RecursiveAssembleBD(disk, as_primary)
717
  if isinstance(result, bdev.BlockDev):
718
    result = result.dev_path
719
  return result
720

    
721

    
722
def ShutdownBlockDevice(disk):
723
  """Shut down a block device.
724

725
  First, if the device is assembled (can `Attach()`), then the device
726
  is shutdown. Then the children of the device are shutdown.
727

728
  This function is called recursively. Note that we don't cache the
729
  children or such, as oppossed to assemble, shutdown of different
730
  devices doesn't require that the upper device was active.
731

732
  """
733
  r_dev = _RecursiveFindBD(disk)
734
  if r_dev is not None:
735
    result = r_dev.Shutdown()
736
  else:
737
    result = True
738
  if disk.children:
739
    for child in disk.children:
740
      result = result and ShutdownBlockDevice(child)
741
  return result
742

    
743

    
744
def MirrorAddChild(md_cdev, new_cdev):
745
  """Extend an MD raid1 array.
746

747
  """
748
  md_bdev = _RecursiveFindBD(md_cdev, allow_partial=True)
749
  if md_bdev is None:
750
    logger.Error("Can't find md device")
751
    return False
752
  new_bdev = _RecursiveFindBD(new_cdev)
753
  if new_bdev is None:
754
    logger.Error("Can't find new device to add")
755
    return False
756
  new_bdev.Open()
757
  md_bdev.AddChild(new_bdev)
758
  return True
759

    
760

    
761
def MirrorRemoveChild(md_cdev, new_cdev):
762
  """Reduce an MD raid1 array.
763

764
  """
765
  md_bdev = _RecursiveFindBD(md_cdev)
766
  if md_bdev is None:
767
    return False
768
  new_bdev = _RecursiveFindBD(new_cdev)
769
  if new_bdev is None:
770
    return False
771
  new_bdev.Open()
772
  md_bdev.RemoveChild(new_bdev.dev_path)
773
  return True
774

    
775

    
776
def GetMirrorStatus(disks):
777
  """Get the mirroring status of a list of devices.
778

779
  Args:
780
    disks: list of `objects.Disk`
781

782
  Returns:
783
    list of (mirror_done, estimated_time) tuples, which
784
    are the result of bdev.BlockDevice.CombinedSyncStatus()
785

786
  """
787
  stats = []
788
  for dsk in disks:
789
    rbd = _RecursiveFindBD(dsk)
790
    if rbd is None:
791
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
792
    stats.append(rbd.CombinedSyncStatus())
793
  return stats
794

    
795

    
796
def _RecursiveFindBD(disk, allow_partial=False):
797
  """Check if a device is activated.
798

799
  If so, return informations about the real device.
800

801
  Args:
802
    disk: the objects.Disk instance
803
    allow_partial: don't abort the find if a child of the
804
                   device can't be found; this is intended to be
805
                   used when repairing mirrors
806

807
  Returns:
808
    None if the device can't be found
809
    otherwise the device instance
810

811
  """
812
  children = []
813
  if disk.children:
814
    for chdisk in disk.children:
815
      children.append(_RecursiveFindBD(chdisk))
816

    
817
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
818

    
819

    
820
def FindBlockDevice(disk):
821
  """Check if a device is activated.
822

823
  If so, return informations about the real device.
824

825
  Args:
826
    disk: the objects.Disk instance
827
  Returns:
828
    None if the device can't be found
829
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
830

831
  """
832
  rbd = _RecursiveFindBD(disk)
833
  if rbd is None:
834
    return rbd
835
  sync_p, est_t, is_degr = rbd.GetSyncStatus()
836
  return rbd.dev_path, rbd.major, rbd.minor, sync_p, est_t, is_degr
837

    
838

    
839
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
840
  """Write a file to the filesystem.
841

842
  This allows the master to overwrite(!) a file. It will only perform
843
  the operation if the file belongs to a list of configuration files.
844

845
  """
846
  if not os.path.isabs(file_name):
847
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
848
                 file_name)
849
    return False
850

    
851
  allowed_files = [constants.CLUSTER_CONF_FILE, "/etc/hosts",
852
                   constants.SSH_KNOWN_HOSTS_FILE]
853
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
854
  if file_name not in allowed_files:
855
    logger.Error("Filename passed to UploadFile not in allowed"
856
                 " upload targets: '%s'" % file_name)
857
    return False
858

    
859
  dir_name, small_name = os.path.split(file_name)
860
  fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
861
  # here we need to make sure we remove the temp file, if any error
862
  # leaves it in place
863
  try:
864
    os.chown(new_name, uid, gid)
865
    os.chmod(new_name, mode)
866
    os.write(fd, data)
867
    os.fsync(fd)
868
    os.utime(new_name, (atime, mtime))
869
    os.rename(new_name, file_name)
870
  finally:
871
    os.close(fd)
872
    utils.RemoveFile(new_name)
873
  return True
874

    
875

    
876
def _ErrnoOrStr(err):
877
  """Format an EnvironmentError exception.
878

879
  If the `err` argument has an errno attribute, it will be looked up
880
  and converted into a textual EXXXX description. Otherwise the string
881
  representation of the error will be returned.
882

883
  """
884
  if hasattr(err, 'errno'):
885
    detail = errno.errorcode[err.errno]
886
  else:
887
    detail = str(err)
888
  return detail
889

    
890
def _OSSearch(name, search_path=None):
891
  """Search for OSes with the given name in the search_path.
892

893
  Args:
894
    name: The name of the OS to look for
895
    search_path: List of dirs to search (defaults to constants.OS_SEARCH_PATH)
896

897
  Returns:
898
    The base_dir the OS resides in
899

900
  """
901

    
902
  if search_path is None:
903
    search_path = constants.OS_SEARCH_PATH
904

    
905
  for dir in search_path:
906
    t_os_dir = os.path.sep.join([dir, name])
907
    if os.path.isdir(t_os_dir):
908
        return dir
909

    
910
  return None
911

    
912
def _OSOndiskVersion(name, os_dir):
913
  """Compute and return the api version of a given OS.
914

915
  This function will try to read the api version of the os given by
916
  the 'name' parameter and residing in the 'os_dir' directory.
917

918
  Return value will be either an integer denoting the version or None in the
919
  case when this is not a valid OS name.
920

921
  """
922

    
923
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
924

    
925
  try:
926
    st = os.stat(api_file)
927
  except EnvironmentError, err:
928
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
929
                           " found (%s)" % _ErrnoOrStr(err))
930

    
931
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
932
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
933
                           " a regular file")
934

    
935
  try:
936
    f = open(api_file)
937
    try:
938
      api_version = f.read(256)
939
    finally:
940
      f.close()
941
  except EnvironmentError, err:
942
    raise errors.InvalidOS(name, os_dir, "error while reading the"
943
                           " API version (%s)" % _ErrnoOrStr(err))
944

    
945
  api_version = api_version.strip()
946
  try:
947
    api_version = int(api_version)
948
  except (TypeError, ValueError), err:
949
    raise errors.InvalidOS(name, os_dir,
950
                           "API version is not integer (%s)" % str(err))
951

    
952
  return api_version
953

    
954

    
955
def DiagnoseOS(top_dirs=None):
956
  """Compute the validity for all OSes.
957

958
  For each name in all the given top directories (if not given defaults i
959
  to constants.OS_SEARCH_PATH it will return an object. If this is a valid
960
  os, the object will be an instance of the object.OS class. If not,
961
  it will be an instance of errors.InvalidOS and this signifies that
962
  this name does not correspond to a valid OS.
963

964
  Returns:
965
    list of objects
966

967
  """
968
  if top_dirs is None:
969
    top_dirs = constants.OS_SEARCH_PATH
970

    
971
  result = []
972
  for dir in top_dirs:
973
    if os.path.isdir(dir):
974
      try:
975
        f_names = utils.ListVisibleFiles(dir)
976
      except EnvironmentError, err:
977
        logger.Error("Can't list the OS directory %s: %s" % (dir,str(err)))
978
        break
979
      for name in f_names:
980
        try:
981
          os_inst = OSFromDisk(name, base_dir=dir)
982
          result.append(os_inst)
983
        except errors.InvalidOS, err:
984
          result.append(err)
985

    
986
  return result
987

    
988

    
989
def OSFromDisk(name, base_dir=None):
990
  """Create an OS instance from disk.
991

992
  This function will return an OS instance if the given name is a
993
  valid OS name. Otherwise, it will raise an appropriate
994
  `errors.InvalidOS` exception, detailing why this is not a valid
995
  OS.
996

997
  Args:
998
    os_dir: Directory containing the OS scripts. Defaults to a search
999
            in all the OS_SEARCH_PATH directories.
1000

1001
  """
1002

    
1003
  if base_dir is None:
1004
    base_dir = _OSSearch(name)
1005

    
1006
  if base_dir is None:
1007
    raise errors.InvalidOS(name, None, "OS dir not found in search path")
1008

    
1009
  os_dir = os.path.sep.join([base_dir, name])
1010
  api_version = _OSOndiskVersion(name, os_dir)
1011

    
1012
  if api_version != constants.OS_API_VERSION:
1013
    raise errors.InvalidOS(name, os_dir, "API version mismatch"
1014
                           " (found %s want %s)"
1015
                           % (api_version, constants.OS_API_VERSION))
1016

    
1017
  # OS Scripts dictionary, we will populate it with the actual script names
1018
  os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1019

    
1020
  for script in os_scripts:
1021
    os_scripts[script] = os.path.sep.join([os_dir, script])
1022

    
1023
    try:
1024
      st = os.stat(os_scripts[script])
1025
    except EnvironmentError, err:
1026
      raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1027
                             (script, _ErrnoOrStr(err)))
1028

    
1029
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1030
      raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1031
                             script)
1032

    
1033
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1034
      raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1035
                             script)
1036

    
1037

    
1038
  return objects.OS(name=name, path=os_dir,
1039
                    create_script=os_scripts['create'],
1040
                    export_script=os_scripts['export'],
1041
                    import_script=os_scripts['import'],
1042
                    rename_script=os_scripts['rename'],
1043
                    api_version=api_version)
1044

    
1045

    
1046
def SnapshotBlockDevice(disk):
1047
  """Create a snapshot copy of a block device.
1048

1049
  This function is called recursively, and the snapshot is actually created
1050
  just for the leaf lvm backend device.
1051

1052
  Args:
1053
    disk: the disk to be snapshotted
1054

1055
  Returns:
1056
    a config entry for the actual lvm device snapshotted.
1057

1058
  """
1059
  if disk.children:
1060
    if len(disk.children) == 1:
1061
      # only one child, let's recurse on it
1062
      return SnapshotBlockDevice(disk.children[0])
1063
    else:
1064
      # more than one child, choose one that matches
1065
      for child in disk.children:
1066
        if child.size == disk.size:
1067
          # return implies breaking the loop
1068
          return SnapshotBlockDevice(child)
1069
  elif disk.dev_type == "lvm":
1070
    r_dev = _RecursiveFindBD(disk)
1071
    if r_dev is not None:
1072
      # let's stay on the safe side and ask for the full size, for now
1073
      return r_dev.Snapshot(disk.size)
1074
    else:
1075
      return None
1076
  else:
1077
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1078
                                 "'%s' of type '%s'" %
1079
                                 (disk.unique_id, disk.dev_type))
1080

    
1081

    
1082
def ExportSnapshot(disk, dest_node, instance):
1083
  """Export a block device snapshot to a remote node.
1084

1085
  Args:
1086
    disk: the snapshot block device
1087
    dest_node: the node to send the image to
1088
    instance: instance being exported
1089

1090
  Returns:
1091
    True if successful, False otherwise.
1092

1093
  """
1094
  inst_os = OSFromDisk(instance.os)
1095
  export_script = inst_os.export_script
1096

    
1097
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1098
                                     instance.name, int(time.time()))
1099
  if not os.path.exists(constants.LOG_OS_DIR):
1100
    os.mkdir(constants.LOG_OS_DIR, 0750)
1101

    
1102
  real_os_dev = _RecursiveFindBD(disk)
1103
  if real_os_dev is None:
1104
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1105
                                  str(disk))
1106
  real_os_dev.Open()
1107

    
1108
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1109
  destfile = disk.physical_id[1]
1110

    
1111
  # the target command is built out of three individual commands,
1112
  # which are joined by pipes; we check each individual command for
1113
  # valid parameters
1114

    
1115
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1116
                               export_script, instance.name,
1117
                               real_os_dev.dev_path, logfile)
1118

    
1119
  comprcmd = "gzip"
1120

    
1121
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1122
                                destdir, destdir, destfile)
1123
  remotecmd = ssh.BuildSSHCmd(dest_node, 'root', destcmd)
1124

    
1125

    
1126

    
1127
  # all commands have been checked, so we're safe to combine them
1128
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1129

    
1130
  result = utils.RunCmd(command)
1131

    
1132
  if result.failed:
1133
    logger.Error("os snapshot export command '%s' returned error: %s"
1134
                 " output: %s" %
1135
                 (command, result.fail_reason, result.output))
1136
    return False
1137

    
1138
  return True
1139

    
1140

    
1141
def FinalizeExport(instance, snap_disks):
1142
  """Write out the export configuration information.
1143

1144
  Args:
1145
    instance: instance configuration
1146
    snap_disks: snapshot block devices
1147

1148
  Returns:
1149
    False in case of error, True otherwise.
1150

1151
  """
1152
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1153
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1154

    
1155
  config = objects.SerializableConfigParser()
1156

    
1157
  config.add_section(constants.INISECT_EXP)
1158
  config.set(constants.INISECT_EXP, 'version', '0')
1159
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1160
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1161
  config.set(constants.INISECT_EXP, 'os', instance.os)
1162
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1163

    
1164
  config.add_section(constants.INISECT_INS)
1165
  config.set(constants.INISECT_INS, 'name', instance.name)
1166
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1167
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1168
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1169
  for nic_count, nic in enumerate(instance.nics):
1170
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1171
               nic_count, '%s' % nic.mac)
1172
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1173
  # TODO: redundant: on load can read nics until it doesn't exist
1174
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1175

    
1176
  for disk_count, disk in enumerate(snap_disks):
1177
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1178
               ('%s' % disk.iv_name))
1179
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1180
               ('%s' % disk.physical_id[1]))
1181
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1182
               ('%d' % disk.size))
1183
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1184

    
1185
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1186
  cfo = open(cff, 'w')
1187
  try:
1188
    config.write(cfo)
1189
  finally:
1190
    cfo.close()
1191

    
1192
  shutil.rmtree(finaldestdir, True)
1193
  shutil.move(destdir, finaldestdir)
1194

    
1195
  return True
1196

    
1197

    
1198
def ExportInfo(dest):
1199
  """Get export configuration information.
1200

1201
  Args:
1202
    dest: directory containing the export
1203

1204
  Returns:
1205
    A serializable config file containing the export info.
1206

1207
  """
1208
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1209

    
1210
  config = objects.SerializableConfigParser()
1211
  config.read(cff)
1212

    
1213
  if (not config.has_section(constants.INISECT_EXP) or
1214
      not config.has_section(constants.INISECT_INS)):
1215
    return None
1216

    
1217
  return config
1218

    
1219

    
1220
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1221
  """Import an os image into an instance.
1222

1223
  Args:
1224
    instance: the instance object
1225
    os_disk: the instance-visible name of the os device
1226
    swap_disk: the instance-visible name of the swap device
1227
    src_node: node holding the source image
1228
    src_image: path to the source image on src_node
1229

1230
  Returns:
1231
    False in case of error, True otherwise.
1232

1233
  """
1234
  inst_os = OSFromDisk(instance.os)
1235
  import_script = inst_os.import_script
1236

    
1237
  os_device = instance.FindDisk(os_disk)
1238
  if os_device is None:
1239
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1240
    return False
1241

    
1242
  swap_device = instance.FindDisk(swap_disk)
1243
  if swap_device is None:
1244
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1245
    return False
1246

    
1247
  real_os_dev = _RecursiveFindBD(os_device)
1248
  if real_os_dev is None:
1249
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1250
                                  str(os_device))
1251
  real_os_dev.Open()
1252

    
1253
  real_swap_dev = _RecursiveFindBD(swap_device)
1254
  if real_swap_dev is None:
1255
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1256
                                  str(swap_device))
1257
  real_swap_dev.Open()
1258

    
1259
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1260
                                        instance.name, int(time.time()))
1261
  if not os.path.exists(constants.LOG_OS_DIR):
1262
    os.mkdir(constants.LOG_OS_DIR, 0750)
1263

    
1264
  destcmd = utils.BuildShellCmd('cat %s', src_image)
1265
  remotecmd = ssh.BuildSSHCmd(src_node, 'root', destcmd)
1266

    
1267
  comprcmd = "gunzip"
1268
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1269
                               inst_os.path, import_script, instance.name,
1270
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1271
                               logfile)
1272

    
1273
  command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1274

    
1275
  result = utils.RunCmd(command)
1276

    
1277
  if result.failed:
1278
    logger.Error("os import command '%s' returned error: %s"
1279
                 " output: %s" %
1280
                 (command, result.fail_reason, result.output))
1281
    return False
1282

    
1283
  return True
1284

    
1285

    
1286
def ListExports():
1287
  """Return a list of exports currently available on this machine.
1288

1289
  """
1290
  if os.path.isdir(constants.EXPORT_DIR):
1291
    return utils.ListVisibleFiles(constants.EXPORT_DIR)
1292
  else:
1293
    return []
1294

    
1295

    
1296
def RemoveExport(export):
1297
  """Remove an existing export from the node.
1298

1299
  Args:
1300
    export: the name of the export to remove
1301

1302
  Returns:
1303
    False in case of error, True otherwise.
1304

1305
  """
1306
  target = os.path.join(constants.EXPORT_DIR, export)
1307

    
1308
  shutil.rmtree(target)
1309
  # TODO: catch some of the relevant exceptions and provide a pretty
1310
  # error message if rmtree fails.
1311

    
1312
  return True
1313

    
1314

    
1315
class HooksRunner(object):
1316
  """Hook runner.
1317

1318
  This class is instantiated on the node side (ganeti-noded) and not on
1319
  the master side.
1320

1321
  """
1322
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1323

    
1324
  def __init__(self, hooks_base_dir=None):
1325
    """Constructor for hooks runner.
1326

1327
    Args:
1328
      - hooks_base_dir: if not None, this overrides the
1329
        constants.HOOKS_BASE_DIR (useful for unittests)
1330
      - logs_base_dir: if not None, this overrides the
1331
        constants.LOG_HOOKS_DIR (useful for unittests)
1332
      - logging: enable or disable logging of script output
1333

1334
    """
1335
    if hooks_base_dir is None:
1336
      hooks_base_dir = constants.HOOKS_BASE_DIR
1337
    self._BASE_DIR = hooks_base_dir
1338

    
1339
  @staticmethod
1340
  def ExecHook(script, env):
1341
    """Exec one hook script.
1342

1343
    Args:
1344
     - phase: the phase
1345
     - script: the full path to the script
1346
     - env: the environment with which to exec the script
1347

1348
    """
1349
    # exec the process using subprocess and log the output
1350
    fdstdin = None
1351
    try:
1352
      fdstdin = open("/dev/null", "r")
1353
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1354
                               stderr=subprocess.STDOUT, close_fds=True,
1355
                               shell=False, cwd="/",env=env)
1356
      output = ""
1357
      try:
1358
        output = child.stdout.read(4096)
1359
        child.stdout.close()
1360
      except EnvironmentError, err:
1361
        output += "Hook script error: %s" % str(err)
1362

    
1363
      while True:
1364
        try:
1365
          result = child.wait()
1366
          break
1367
        except EnvironmentError, err:
1368
          if err.errno == errno.EINTR:
1369
            continue
1370
          raise
1371
    finally:
1372
      # try not to leak fds
1373
      for fd in (fdstdin, ):
1374
        if fd is not None:
1375
          try:
1376
            fd.close()
1377
          except EnvironmentError, err:
1378
            # just log the error
1379
            #logger.Error("While closing fd %s: %s" % (fd, err))
1380
            pass
1381

    
1382
    return result == 0, output
1383

    
1384
  def RunHooks(self, hpath, phase, env):
1385
    """Run the scripts in the hooks directory.
1386

1387
    This method will not be usually overriden by child opcodes.
1388

1389
    """
1390
    if phase == constants.HOOKS_PHASE_PRE:
1391
      suffix = "pre"
1392
    elif phase == constants.HOOKS_PHASE_POST:
1393
      suffix = "post"
1394
    else:
1395
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1396
    rr = []
1397

    
1398
    subdir = "%s-%s.d" % (hpath, suffix)
1399
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1400
    try:
1401
      dir_contents = utils.ListVisibleFiles(dir_name)
1402
    except OSError, err:
1403
      # must log
1404
      return rr
1405

    
1406
    # we use the standard python sort order,
1407
    # so 00name is the recommended naming scheme
1408
    dir_contents.sort()
1409
    for relname in dir_contents:
1410
      fname = os.path.join(dir_name, relname)
1411
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1412
          self.RE_MASK.match(relname) is not None):
1413
        rrval = constants.HKR_SKIP
1414
        output = ""
1415
      else:
1416
        result, output = self.ExecHook(fname, env)
1417
        if not result:
1418
          rrval = constants.HKR_FAIL
1419
        else:
1420
          rrval = constants.HKR_SUCCESS
1421
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1422

    
1423
    return rr