Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ 7900ed01

History | View | Annotate | Download (39.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import tempfile
30
import stat
31
import errno
32
import re
33
import subprocess
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def StartMaster():
47
  """Activate local node as master node.
48

49
  There are two needed steps for this:
50
    - run the master script
51
    - register the cron script
52

53
  """
54
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
55

    
56
  if result.failed:
57
    logger.Error("could not activate cluster interface with command %s,"
58
                 " error: '%s'" % (result.cmd, result.output))
59
    return False
60

    
61
  return True
62

    
63

    
64
def StopMaster():
65
  """Deactivate this node as master.
66

67
  This does two things:
68
    - run the master stop script
69
    - remove link to master cron script.
70

71
  """
72
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
73

    
74
  if result.failed:
75
    logger.Error("could not deactivate cluster interface with command %s,"
76
                 " error: '%s'" % (result.cmd, result.output))
77
    return False
78

    
79
  return True
80

    
81

    
82
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
83
  """Joins this node to the cluster.
84

85
  This does the following:
86
      - updates the hostkeys of the machine (rsa and dsa)
87
      - adds the ssh private key to the user
88
      - adds the ssh public key to the users' authorized_keys file
89

90
  """
91
  user_dir = utils.GetHomeDir(constants.GANETI_RUNAS)
92
  if not user_dir:
93
    logger.Error("Cannot find home of run-as user %s" % constants.GANETI_RUNAS)
94
    return False
95

    
96
  sshd_keys =  [("ssh_host_rsa_key", rsa, 0600),
97
                ("ssh_host_rsa_key.pub", rsapub, 0644),
98
                ("ssh_host_dsa_key", dsa, 0600),
99
                ("ssh_host_dsa_key.pub",  dsapub, 0644)]
100
  for name, content, mode in sshd_keys:
101
    utils.WriteFile(os.path.join(constants.SSH_CONFIG_DIR, name),
102
                    data=content, mode=mode)
103

    
104
  user_ssh_dir = os.path.join(user_dir, ".ssh")
105

    
106
  if not os.path.isdir(user_ssh_dir):
107
    os.mkdir(user_ssh_dir)
108

    
109
  for name, content in [("id_dsa", sshkey), ("id_dsa.pub", sshpub)]:
110
    utils.WriteFile(os.path.join(user_ssh_dir, name), data=content, mode=0600)
111

    
112
  utils.AddAuthorizedKey(os.path.join(user_ssh_dir, "authorized_keys"), sshpub)
113

    
114
  utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
115

    
116
  return True
117

    
118

    
119
def LeaveCluster():
120
  """Cleans up the current node and prepares it to be removed from the cluster.
121

122
  """
123
  if os.path.isdir(constants.DATA_DIR):
124
    for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
125
      full_name = os.path.join(constants.DATA_DIR, rel_name)
126
      if os.path.isfile(full_name) and not os.path.islink(full_name):
127
        utils.RemoveFile(full_name)
128

    
129
  user_dir = utils.GetHomeDir(constants.GANETI_RUNAS)
130
  if not user_dir:
131
    logger.Error("Cannot find home of run-as user %s" % constants.GANETI_RUNAS)
132
    return
133

    
134
  user_ssh_dir = os.path.join(user_dir, ".ssh")
135

    
136
  if not os.path.isdir(user_ssh_dir):
137
    logger.Error("User's ssh dir '%s' does not exist?!" % user_ssh_dir)
138
    return
139

    
140
  f = open(os.path.join(user_ssh_dir, "id_dsa.pub"), 'r')
141
  try:
142
    utils.RemoveAuthorizedKey(os.path.join(user_ssh_dir, "authorized_keys"),
143
                              f.read(8192))
144
  finally:
145
    f.close()
146

    
147

    
148
  utils.RemoveFile(os.path.join(user_ssh_dir, "id_dsa"))
149
  utils.RemoveFile(os.path.join(user_ssh_dir, "id_dsa.pub"))
150

    
151

    
152
def GetNodeInfo(vgname):
153
  """ gives back a hash with different informations
154
  about the node
155

156
  Returns:
157
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
158
      'memory_free' : xxx, 'memory_total' : xxx }
159
    where
160
    vg_size is the size of the configured volume group in MiB
161
    vg_free is the free size of the volume group in MiB
162
    memory_dom0 is the memory allocated for domain0 in MiB
163
    memory_free is the currently available (free) ram in MiB
164
    memory_total is the total number of ram in MiB
165

166
  """
167
  outputarray = {}
168
  vginfo = _GetVGInfo(vgname)
169
  outputarray['vg_size'] = vginfo['vg_size']
170
  outputarray['vg_free'] = vginfo['vg_free']
171

    
172
  hyper = hypervisor.GetHypervisor()
173
  hyp_info = hyper.GetNodeInfo()
174
  if hyp_info is not None:
175
    outputarray.update(hyp_info)
176

    
177
  f = open("/proc/sys/kernel/random/boot_id", 'r')
178
  try:
179
    outputarray["bootid"] = f.read(128).rstrip("\n")
180
  finally:
181
    f.close()
182

    
183
  return outputarray
184

    
185

    
186
def VerifyNode(what):
187
  """Verify the status of the local node.
188

189
  Args:
190
    what - a dictionary of things to check:
191
      'filelist' : list of files for which to compute checksums
192
      'nodelist' : list of nodes we should check communication with
193
      'hypervisor': run the hypervisor-specific verify
194

195
  Requested files on local node are checksummed and the result returned.
196

197
  The nodelist is traversed, with the following checks being made
198
  for each node:
199
  - known_hosts key correct
200
  - correct resolving of node name (target node returns its own hostname
201
    by ssh-execution of 'hostname', result compared against name in list.
202

203
  """
204
  result = {}
205

    
206
  if 'hypervisor' in what:
207
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
208

    
209
  if 'filelist' in what:
210
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
211

    
212
  if 'nodelist' in what:
213
    result['nodelist'] = {}
214
    for node in what['nodelist']:
215
      success, message = ssh.VerifyNodeHostname(node)
216
      if not success:
217
        result['nodelist'][node] = message
218
  return result
219

    
220

    
221
def GetVolumeList(vg_name):
222
  """Compute list of logical volumes and their size.
223

224
  Returns:
225
    dictionary of all partions (key) with their size:
226
    test1: 20.06MiB
227

228
  """
229
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m",
230
                         "-oname,size", vg_name])
231
  if result.failed:
232
    logger.Error("Failed to list logical volumes, lvs output: %s" %
233
                 result.output)
234
    return {}
235

    
236
  lvlist = [line.split() for line in result.output.splitlines()]
237
  return dict(lvlist)
238

    
239

    
240
def ListVolumeGroups():
241
  """List the volume groups and their size
242

243
  Returns:
244
    Dictionary with keys volume name and values the size of the volume
245

246
  """
247
  return utils.ListVolumeGroups()
248

    
249

    
250
def NodeVolumes():
251
  """List all volumes on this node.
252

253
  """
254
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
255
                         "--separator=|",
256
                         "--options=lv_name,lv_size,devices,vg_name"])
257
  if result.failed:
258
    logger.Error("Failed to list logical volumes, lvs output: %s" %
259
                 result.output)
260
    return {}
261

    
262
  def parse_dev(dev):
263
    if '(' in dev:
264
      return dev.split('(')[0]
265
    else:
266
      return dev
267

    
268
  def map_line(line):
269
    return {
270
      'name': line[0].strip(),
271
      'size': line[1].strip(),
272
      'dev': parse_dev(line[2].strip()),
273
      'vg': line[3].strip(),
274
    }
275

    
276
  return [map_line(line.split('|')) for line in result.output.splitlines()]
277

    
278

    
279
def BridgesExist(bridges_list):
280
  """Check if a list of bridges exist on the current node
281

282
  Returns:
283
    True if all of them exist, false otherwise
284

285
  """
286
  for bridge in bridges_list:
287
    if not utils.BridgeExists(bridge):
288
      return False
289

    
290
  return True
291

    
292

    
293
def GetInstanceList():
294
  """ provides a list of instances
295

296
  Returns:
297
    A list of all running instances on the current node
298
    - instance1.example.com
299
    - instance2.example.com
300

301
  """
302
  try:
303
    names = hypervisor.GetHypervisor().ListInstances()
304
  except errors.HypervisorError, err:
305
    logger.Error("error enumerating instances: %s" % str(err))
306
    raise
307

    
308
  return names
309

    
310

    
311
def GetInstanceInfo(instance):
312
  """ gives back the informations about an instance
313
  as a dictonary
314

315
  Args:
316
    instance: name of the instance (ex. instance1.example.com)
317

318
  Returns:
319
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
320
    where
321
    memory: memory size of instance (int)
322
    state: xen state of instance (string)
323
    time: cpu time of instance (float)
324

325
  """
326
  output = {}
327

    
328
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
329
  if iinfo is not None:
330
    output['memory'] = iinfo[2]
331
    output['state'] = iinfo[4]
332
    output['time'] = iinfo[5]
333

    
334
  return output
335

    
336

    
337
def GetAllInstancesInfo():
338
  """Gather data about all instances.
339

340
  This is the equivalent of `GetInstanceInfo()`, except that it
341
  computes data for all instances at once, thus being faster if one
342
  needs data about more than one instance.
343

344
  Returns: a dictionary of dictionaries, keys being the instance name,
345
    and with values:
346
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
347
    where
348
    memory: memory size of instance (int)
349
    state: xen state of instance (string)
350
    time: cpu time of instance (float)
351
    vcpus: the number of cpus
352

353
  """
354
  output = {}
355

    
356
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
357
  if iinfo:
358
    for name, inst_id, memory, vcpus, state, times in iinfo:
359
      output[name] = {
360
        'memory': memory,
361
        'vcpus': vcpus,
362
        'state': state,
363
        'time': times,
364
        }
365

    
366
  return output
367

    
368

    
369
def AddOSToInstance(instance, os_disk, swap_disk):
370
  """Add an os to an instance.
371

372
  Args:
373
    instance: the instance object
374
    os_disk: the instance-visible name of the os device
375
    swap_disk: the instance-visible name of the swap device
376

377
  """
378
  inst_os = OSFromDisk(instance.os)
379

    
380
  create_script = inst_os.create_script
381

    
382
  os_device = instance.FindDisk(os_disk)
383
  if os_device is None:
384
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
385
    return False
386

    
387
  swap_device = instance.FindDisk(swap_disk)
388
  if swap_device is None:
389
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
390
    return False
391

    
392
  real_os_dev = _RecursiveFindBD(os_device)
393
  if real_os_dev is None:
394
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
395
                                  str(os_device))
396
  real_os_dev.Open()
397

    
398
  real_swap_dev = _RecursiveFindBD(swap_device)
399
  if real_swap_dev is None:
400
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
401
                                  str(swap_device))
402
  real_swap_dev.Open()
403

    
404
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
405
                                     instance.name, int(time.time()))
406
  if not os.path.exists(constants.LOG_OS_DIR):
407
    os.mkdir(constants.LOG_OS_DIR, 0750)
408

    
409
  command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
410
                                inst_os.path, create_script, instance.name,
411
                                real_os_dev.dev_path, real_swap_dev.dev_path,
412
                                logfile)
413

    
414
  result = utils.RunCmd(command)
415

    
416
  if result.failed:
417
    logger.Error("os create command '%s' returned error: %s"
418
                 " output: %s" %
419
                 (command, result.fail_reason, result.output))
420
    return False
421

    
422
  return True
423

    
424

    
425
def RunRenameInstance(instance, old_name, os_disk, swap_disk):
426
  """Run the OS rename script for an instance.
427

428
  Args:
429
    instance: the instance object
430
    old_name: the old name of the instance
431
    os_disk: the instance-visible name of the os device
432
    swap_disk: the instance-visible name of the swap device
433

434
  """
435
  inst_os = OSFromDisk(instance.os)
436

    
437
  script = inst_os.rename_script
438

    
439
  os_device = instance.FindDisk(os_disk)
440
  if os_device is None:
441
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
442
    return False
443

    
444
  swap_device = instance.FindDisk(swap_disk)
445
  if swap_device is None:
446
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
447
    return False
448

    
449
  real_os_dev = _RecursiveFindBD(os_device)
450
  if real_os_dev is None:
451
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
452
                                  str(os_device))
453
  real_os_dev.Open()
454

    
455
  real_swap_dev = _RecursiveFindBD(swap_device)
456
  if real_swap_dev is None:
457
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
458
                                  str(swap_device))
459
  real_swap_dev.Open()
460

    
461
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
462
                                           old_name,
463
                                           instance.name, int(time.time()))
464
  if not os.path.exists(constants.LOG_OS_DIR):
465
    os.mkdir(constants.LOG_OS_DIR, 0750)
466

    
467
  command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
468
                                inst_os.path, script, old_name, instance.name,
469
                                real_os_dev.dev_path, real_swap_dev.dev_path,
470
                                logfile)
471

    
472
  result = utils.RunCmd(command)
473

    
474
  if result.failed:
475
    logger.Error("os create command '%s' returned error: %s"
476
                 " output: %s" %
477
                 (command, result.fail_reason, result.output))
478
    return False
479

    
480
  return True
481

    
482

    
483
def _GetVGInfo(vg_name):
484
  """Get informations about the volume group.
485

486
  Args:
487
    vg_name: the volume group
488

489
  Returns:
490
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
491
    where
492
    vg_size is the total size of the volume group in MiB
493
    vg_free is the free size of the volume group in MiB
494
    pv_count are the number of physical disks in that vg
495

496
  """
497
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
498
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
499

    
500
  if retval.failed:
501
    errmsg = "volume group %s not present" % vg_name
502
    logger.Error(errmsg)
503
    raise errors.LVMError(errmsg)
504
  valarr = retval.stdout.strip().split(':')
505
  retdic = {
506
    "vg_size": int(round(float(valarr[0]), 0)),
507
    "vg_free": int(round(float(valarr[1]), 0)),
508
    "pv_count": int(valarr[2]),
509
    }
510
  return retdic
511

    
512

    
513
def _GatherBlockDevs(instance):
514
  """Set up an instance's block device(s).
515

516
  This is run on the primary node at instance startup. The block
517
  devices must be already assembled.
518

519
  """
520
  block_devices = []
521
  for disk in instance.disks:
522
    device = _RecursiveFindBD(disk)
523
    if device is None:
524
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
525
                                    str(disk))
526
    device.Open()
527
    block_devices.append((disk, device))
528
  return block_devices
529

    
530

    
531
def StartInstance(instance, extra_args):
532
  """Start an instance.
533

534
  Args:
535
    instance - name of instance to start.
536

537
  """
538
  running_instances = GetInstanceList()
539

    
540
  if instance.name in running_instances:
541
    return True
542

    
543
  block_devices = _GatherBlockDevs(instance)
544
  hyper = hypervisor.GetHypervisor()
545

    
546
  try:
547
    hyper.StartInstance(instance, block_devices, extra_args)
548
  except errors.HypervisorError, err:
549
    logger.Error("Failed to start instance: %s" % err)
550
    return False
551

    
552
  return True
553

    
554

    
555
def ShutdownInstance(instance):
556
  """Shut an instance down.
557

558
  Args:
559
    instance - name of instance to shutdown.
560

561
  """
562
  running_instances = GetInstanceList()
563

    
564
  if instance.name not in running_instances:
565
    return True
566

    
567
  hyper = hypervisor.GetHypervisor()
568
  try:
569
    hyper.StopInstance(instance)
570
  except errors.HypervisorError, err:
571
    logger.Error("Failed to stop instance: %s" % err)
572
    return False
573

    
574
  # test every 10secs for 2min
575
  shutdown_ok = False
576

    
577
  time.sleep(1)
578
  for dummy in range(11):
579
    if instance.name not in GetInstanceList():
580
      break
581
    time.sleep(10)
582
  else:
583
    # the shutdown did not succeed
584
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
585

    
586
    try:
587
      hyper.StopInstance(instance, force=True)
588
    except errors.HypervisorError, err:
589
      logger.Error("Failed to stop instance: %s" % err)
590
      return False
591

    
592
    time.sleep(1)
593
    if instance.name in GetInstanceList():
594
      logger.Error("could not shutdown instance '%s' even by destroy")
595
      return False
596

    
597
  return True
598

    
599

    
600
def CreateBlockDevice(disk, size, on_primary, info):
601
  """Creates a block device for an instance.
602

603
  Args:
604
   bdev: a ganeti.objects.Disk object
605
   size: the size of the physical underlying devices
606
   do_open: if the device should be `Assemble()`-d and
607
            `Open()`-ed after creation
608

609
  Returns:
610
    the new unique_id of the device (this can sometime be
611
    computed only after creation), or None. On secondary nodes,
612
    it's not required to return anything.
613

614
  """
615
  clist = []
616
  if disk.children:
617
    for child in disk.children:
618
      crdev = _RecursiveAssembleBD(child, on_primary)
619
      if on_primary or disk.AssembleOnSecondary():
620
        # we need the children open in case the device itself has to
621
        # be assembled
622
        crdev.Open()
623
      else:
624
        crdev.Close()
625
      clist.append(crdev)
626
  try:
627
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
628
    if device is not None:
629
      logger.Info("removing existing device %s" % disk)
630
      device.Remove()
631
  except errors.BlockDeviceError, err:
632
    pass
633

    
634
  device = bdev.Create(disk.dev_type, disk.physical_id,
635
                       clist, size)
636
  if device is None:
637
    raise ValueError("Can't create child device for %s, %s" %
638
                     (disk, size))
639
  if on_primary or disk.AssembleOnSecondary():
640
    device.Assemble()
641
    device.SetSyncSpeed(constants.SYNC_SPEED)
642
    if on_primary or disk.OpenOnSecondary():
643
      device.Open(force=True)
644

    
645
  device.SetInfo(info)
646

    
647
  physical_id = device.unique_id
648
  return physical_id
649

    
650

    
651
def RemoveBlockDevice(disk):
652
  """Remove a block device.
653

654
  This is intended to be called recursively.
655

656
  """
657
  try:
658
    # since we are removing the device, allow a partial match
659
    # this allows removal of broken mirrors
660
    rdev = _RecursiveFindBD(disk, allow_partial=True)
661
  except errors.BlockDeviceError, err:
662
    # probably can't attach
663
    logger.Info("Can't attach to device %s in remove" % disk)
664
    rdev = None
665
  if rdev is not None:
666
    result = rdev.Remove()
667
  else:
668
    result = True
669
  if disk.children:
670
    for child in disk.children:
671
      result = result and RemoveBlockDevice(child)
672
  return result
673

    
674

    
675
def _RecursiveAssembleBD(disk, as_primary):
676
  """Activate a block device for an instance.
677

678
  This is run on the primary and secondary nodes for an instance.
679

680
  This function is called recursively.
681

682
  Args:
683
    disk: a objects.Disk object
684
    as_primary: if we should make the block device read/write
685

686
  Returns:
687
    the assembled device or None (in case no device was assembled)
688

689
  If the assembly is not successful, an exception is raised.
690

691
  """
692
  children = []
693
  if disk.children:
694
    for chld_disk in disk.children:
695
      children.append(_RecursiveAssembleBD(chld_disk, as_primary))
696

    
697
  if as_primary or disk.AssembleOnSecondary():
698
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
699
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
700
    result = r_dev
701
    if as_primary or disk.OpenOnSecondary():
702
      r_dev.Open()
703
    else:
704
      r_dev.Close()
705
  else:
706
    result = True
707
  return result
708

    
709

    
710
def AssembleBlockDevice(disk, as_primary):
711
  """Activate a block device for an instance.
712

713
  This is a wrapper over _RecursiveAssembleBD.
714

715
  Returns:
716
    a /dev path for primary nodes
717
    True for secondary nodes
718

719
  """
720
  result = _RecursiveAssembleBD(disk, as_primary)
721
  if isinstance(result, bdev.BlockDev):
722
    result = result.dev_path
723
  return result
724

    
725

    
726
def ShutdownBlockDevice(disk):
727
  """Shut down a block device.
728

729
  First, if the device is assembled (can `Attach()`), then the device
730
  is shutdown. Then the children of the device are shutdown.
731

732
  This function is called recursively. Note that we don't cache the
733
  children or such, as oppossed to assemble, shutdown of different
734
  devices doesn't require that the upper device was active.
735

736
  """
737
  r_dev = _RecursiveFindBD(disk)
738
  if r_dev is not None:
739
    result = r_dev.Shutdown()
740
  else:
741
    result = True
742
  if disk.children:
743
    for child in disk.children:
744
      result = result and ShutdownBlockDevice(child)
745
  return result
746

    
747

    
748
def MirrorAddChild(md_cdev, new_cdev):
749
  """Extend an MD raid1 array.
750

751
  """
752
  md_bdev = _RecursiveFindBD(md_cdev, allow_partial=True)
753
  if md_bdev is None:
754
    logger.Error("Can't find md device")
755
    return False
756
  new_bdev = _RecursiveFindBD(new_cdev)
757
  if new_bdev is None:
758
    logger.Error("Can't find new device to add")
759
    return False
760
  new_bdev.Open()
761
  md_bdev.AddChild(new_bdev)
762
  return True
763

    
764

    
765
def MirrorRemoveChild(md_cdev, new_cdev):
766
  """Reduce an MD raid1 array.
767

768
  """
769
  md_bdev = _RecursiveFindBD(md_cdev)
770
  if md_bdev is None:
771
    return False
772
  new_bdev = _RecursiveFindBD(new_cdev)
773
  if new_bdev is None:
774
    return False
775
  new_bdev.Open()
776
  md_bdev.RemoveChild(new_bdev.dev_path)
777
  return True
778

    
779

    
780
def GetMirrorStatus(disks):
781
  """Get the mirroring status of a list of devices.
782

783
  Args:
784
    disks: list of `objects.Disk`
785

786
  Returns:
787
    list of (mirror_done, estimated_time) tuples, which
788
    are the result of bdev.BlockDevice.CombinedSyncStatus()
789

790
  """
791
  stats = []
792
  for dsk in disks:
793
    rbd = _RecursiveFindBD(dsk)
794
    if rbd is None:
795
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
796
    stats.append(rbd.CombinedSyncStatus())
797
  return stats
798

    
799

    
800
def _RecursiveFindBD(disk, allow_partial=False):
801
  """Check if a device is activated.
802

803
  If so, return informations about the real device.
804

805
  Args:
806
    disk: the objects.Disk instance
807
    allow_partial: don't abort the find if a child of the
808
                   device can't be found; this is intended to be
809
                   used when repairing mirrors
810

811
  Returns:
812
    None if the device can't be found
813
    otherwise the device instance
814

815
  """
816
  children = []
817
  if disk.children:
818
    for chdisk in disk.children:
819
      children.append(_RecursiveFindBD(chdisk))
820

    
821
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
822

    
823

    
824
def FindBlockDevice(disk):
825
  """Check if a device is activated.
826

827
  If so, return informations about the real device.
828

829
  Args:
830
    disk: the objects.Disk instance
831
  Returns:
832
    None if the device can't be found
833
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
834

835
  """
836
  rbd = _RecursiveFindBD(disk)
837
  if rbd is None:
838
    return rbd
839
  sync_p, est_t, is_degr = rbd.GetSyncStatus()
840
  return rbd.dev_path, rbd.major, rbd.minor, sync_p, est_t, is_degr
841

    
842

    
843
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
844
  """Write a file to the filesystem.
845

846
  This allows the master to overwrite(!) a file. It will only perform
847
  the operation if the file belongs to a list of configuration files.
848

849
  """
850
  if not os.path.isabs(file_name):
851
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
852
                 file_name)
853
    return False
854

    
855
  allowed_files = [constants.CLUSTER_CONF_FILE, "/etc/hosts",
856
                   constants.SSH_KNOWN_HOSTS_FILE]
857
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
858
  if file_name not in allowed_files:
859
    logger.Error("Filename passed to UploadFile not in allowed"
860
                 " upload targets: '%s'" % file_name)
861
    return False
862

    
863
  dir_name, small_name = os.path.split(file_name)
864
  fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
865
  # here we need to make sure we remove the temp file, if any error
866
  # leaves it in place
867
  try:
868
    os.chown(new_name, uid, gid)
869
    os.chmod(new_name, mode)
870
    os.write(fd, data)
871
    os.fsync(fd)
872
    os.utime(new_name, (atime, mtime))
873
    os.rename(new_name, file_name)
874
  finally:
875
    os.close(fd)
876
    utils.RemoveFile(new_name)
877
  return True
878

    
879

    
880
def _ErrnoOrStr(err):
881
  """Format an EnvironmentError exception.
882

883
  If the `err` argument has an errno attribute, it will be looked up
884
  and converted into a textual EXXXX description. Otherwise the string
885
  representation of the error will be returned.
886

887
  """
888
  if hasattr(err, 'errno'):
889
    detail = errno.errorcode[err.errno]
890
  else:
891
    detail = str(err)
892
  return detail
893

    
894
def _OSSearch(name, search_path=None):
895
  """Search for OSes with the given name in the search_path.
896

897
  Args:
898
    name: The name of the OS to look for
899
    search_path: List of dirs to search (defaults to constants.OS_SEARCH_PATH)
900

901
  Returns:
902
    The base_dir the OS resides in
903

904
  """
905

    
906
  if search_path is None:
907
    search_path = constants.OS_SEARCH_PATH
908

    
909
  for dir in search_path:
910
    t_os_dir = os.path.sep.join([dir, name])
911
    if os.path.isdir(t_os_dir):
912
        return dir
913

    
914
  return None
915

    
916
def _OSOndiskVersion(name, os_dir):
917
  """Compute and return the api version of a given OS.
918

919
  This function will try to read the api version of the os given by
920
  the 'name' parameter and residing in the 'os_dir' directory.
921

922
  Return value will be either an integer denoting the version or None in the
923
  case when this is not a valid OS name.
924

925
  """
926

    
927
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
928

    
929
  try:
930
    st = os.stat(api_file)
931
  except EnvironmentError, err:
932
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
933
                           " found (%s)" % _ErrnoOrStr(err))
934

    
935
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
936
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
937
                           " a regular file")
938

    
939
  try:
940
    f = open(api_file)
941
    try:
942
      api_version = f.read(256)
943
    finally:
944
      f.close()
945
  except EnvironmentError, err:
946
    raise errors.InvalidOS(name, os_dir, "error while reading the"
947
                           " API version (%s)" % _ErrnoOrStr(err))
948

    
949
  api_version = api_version.strip()
950
  try:
951
    api_version = int(api_version)
952
  except (TypeError, ValueError), err:
953
    raise errors.InvalidOS(name, os_dir,
954
                           "API version is not integer (%s)" % str(err))
955

    
956
  return api_version
957

    
958

    
959
def DiagnoseOS(top_dirs=None):
960
  """Compute the validity for all OSes.
961

962
  For each name in all the given top directories (if not given defaults i
963
  to constants.OS_SEARCH_PATH it will return an object. If this is a valid
964
  os, the object will be an instance of the object.OS class. If not,
965
  it will be an instance of errors.InvalidOS and this signifies that
966
  this name does not correspond to a valid OS.
967

968
  Returns:
969
    list of objects
970

971
  """
972
  if top_dirs is None:
973
    top_dirs = constants.OS_SEARCH_PATH
974

    
975
  result = []
976
  for dir in top_dirs:
977
    if os.path.isdir(dir):
978
      try:
979
        f_names = utils.ListVisibleFiles(dir)
980
      except EnvironmentError, err:
981
        logger.Error("Can't list the OS directory %s: %s" % (dir,str(err)))
982
        break
983
      for name in f_names:
984
        try:
985
          os_inst = OSFromDisk(name, base_dir=dir)
986
          result.append(os_inst)
987
        except errors.InvalidOS, err:
988
          result.append(err)
989

    
990
  return result
991

    
992

    
993
def OSFromDisk(name, base_dir=None):
994
  """Create an OS instance from disk.
995

996
  This function will return an OS instance if the given name is a
997
  valid OS name. Otherwise, it will raise an appropriate
998
  `errors.InvalidOS` exception, detailing why this is not a valid
999
  OS.
1000

1001
  Args:
1002
    os_dir: Directory containing the OS scripts. Defaults to a search
1003
            in all the OS_SEARCH_PATH directories.
1004

1005
  """
1006

    
1007
  if base_dir is None:
1008
    base_dir = _OSSearch(name)
1009

    
1010
  if base_dir is None:
1011
    raise errors.InvalidOS(name, None, "OS dir not found in search path")
1012

    
1013
  os_dir = os.path.sep.join([base_dir, name])
1014
  api_version = _OSOndiskVersion(name, os_dir)
1015

    
1016
  if api_version != constants.OS_API_VERSION:
1017
    raise errors.InvalidOS(name, os_dir, "API version mismatch"
1018
                           " (found %s want %s)"
1019
                           % (api_version, constants.OS_API_VERSION))
1020

    
1021
  # OS Scripts dictionary, we will populate it with the actual script names
1022
  os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1023

    
1024
  for script in os_scripts:
1025
    os_scripts[script] = os.path.sep.join([os_dir, script])
1026

    
1027
    try:
1028
      st = os.stat(os_scripts[script])
1029
    except EnvironmentError, err:
1030
      raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1031
                             (script, _ErrnoOrStr(err)))
1032

    
1033
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1034
      raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1035
                             script)
1036

    
1037
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1038
      raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1039
                             script)
1040

    
1041

    
1042
  return objects.OS(name=name, path=os_dir,
1043
                    create_script=os_scripts['create'],
1044
                    export_script=os_scripts['export'],
1045
                    import_script=os_scripts['import'],
1046
                    rename_script=os_scripts['rename'],
1047
                    api_version=api_version)
1048

    
1049

    
1050
def SnapshotBlockDevice(disk):
1051
  """Create a snapshot copy of a block device.
1052

1053
  This function is called recursively, and the snapshot is actually created
1054
  just for the leaf lvm backend device.
1055

1056
  Args:
1057
    disk: the disk to be snapshotted
1058

1059
  Returns:
1060
    a config entry for the actual lvm device snapshotted.
1061

1062
  """
1063
  if disk.children:
1064
    if len(disk.children) == 1:
1065
      # only one child, let's recurse on it
1066
      return SnapshotBlockDevice(disk.children[0])
1067
    else:
1068
      # more than one child, choose one that matches
1069
      for child in disk.children:
1070
        if child.size == disk.size:
1071
          # return implies breaking the loop
1072
          return SnapshotBlockDevice(child)
1073
  elif disk.dev_type == "lvm":
1074
    r_dev = _RecursiveFindBD(disk)
1075
    if r_dev is not None:
1076
      # let's stay on the safe side and ask for the full size, for now
1077
      return r_dev.Snapshot(disk.size)
1078
    else:
1079
      return None
1080
  else:
1081
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1082
                                 "'%s' of type '%s'" %
1083
                                 (disk.unique_id, disk.dev_type))
1084

    
1085

    
1086
def ExportSnapshot(disk, dest_node, instance):
1087
  """Export a block device snapshot to a remote node.
1088

1089
  Args:
1090
    disk: the snapshot block device
1091
    dest_node: the node to send the image to
1092
    instance: instance being exported
1093

1094
  Returns:
1095
    True if successful, False otherwise.
1096

1097
  """
1098
  inst_os = OSFromDisk(instance.os)
1099
  export_script = inst_os.export_script
1100

    
1101
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1102
                                     instance.name, int(time.time()))
1103
  if not os.path.exists(constants.LOG_OS_DIR):
1104
    os.mkdir(constants.LOG_OS_DIR, 0750)
1105

    
1106
  real_os_dev = _RecursiveFindBD(disk)
1107
  if real_os_dev is None:
1108
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1109
                                  str(disk))
1110
  real_os_dev.Open()
1111

    
1112
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1113
  destfile = disk.physical_id[1]
1114

    
1115
  # the target command is built out of three individual commands,
1116
  # which are joined by pipes; we check each individual command for
1117
  # valid parameters
1118

    
1119
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1120
                               export_script, instance.name,
1121
                               real_os_dev.dev_path, logfile)
1122

    
1123
  comprcmd = "gzip"
1124

    
1125
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1126
                                destdir, destdir, destfile)
1127
  remotecmd = ssh.BuildSSHCmd(dest_node, constants.GANETI_RUNAS, destcmd)
1128

    
1129

    
1130

    
1131
  # all commands have been checked, so we're safe to combine them
1132
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1133

    
1134
  result = utils.RunCmd(command)
1135

    
1136
  if result.failed:
1137
    logger.Error("os snapshot export command '%s' returned error: %s"
1138
                 " output: %s" %
1139
                 (command, result.fail_reason, result.output))
1140
    return False
1141

    
1142
  return True
1143

    
1144

    
1145
def FinalizeExport(instance, snap_disks):
1146
  """Write out the export configuration information.
1147

1148
  Args:
1149
    instance: instance configuration
1150
    snap_disks: snapshot block devices
1151

1152
  Returns:
1153
    False in case of error, True otherwise.
1154

1155
  """
1156
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1157
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1158

    
1159
  config = objects.SerializableConfigParser()
1160

    
1161
  config.add_section(constants.INISECT_EXP)
1162
  config.set(constants.INISECT_EXP, 'version', '0')
1163
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1164
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1165
  config.set(constants.INISECT_EXP, 'os', instance.os)
1166
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1167

    
1168
  config.add_section(constants.INISECT_INS)
1169
  config.set(constants.INISECT_INS, 'name', instance.name)
1170
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1171
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1172
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1173
  for nic_count, nic in enumerate(instance.nics):
1174
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1175
               nic_count, '%s' % nic.mac)
1176
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1177
  # TODO: redundant: on load can read nics until it doesn't exist
1178
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1179

    
1180
  for disk_count, disk in enumerate(snap_disks):
1181
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1182
               ('%s' % disk.iv_name))
1183
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1184
               ('%s' % disk.physical_id[1]))
1185
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1186
               ('%d' % disk.size))
1187
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1188

    
1189
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1190
  cfo = open(cff, 'w')
1191
  try:
1192
    config.write(cfo)
1193
  finally:
1194
    cfo.close()
1195

    
1196
  shutil.rmtree(finaldestdir, True)
1197
  shutil.move(destdir, finaldestdir)
1198

    
1199
  return True
1200

    
1201

    
1202
def ExportInfo(dest):
1203
  """Get export configuration information.
1204

1205
  Args:
1206
    dest: directory containing the export
1207

1208
  Returns:
1209
    A serializable config file containing the export info.
1210

1211
  """
1212
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1213

    
1214
  config = objects.SerializableConfigParser()
1215
  config.read(cff)
1216

    
1217
  if (not config.has_section(constants.INISECT_EXP) or
1218
      not config.has_section(constants.INISECT_INS)):
1219
    return None
1220

    
1221
  return config
1222

    
1223

    
1224
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1225
  """Import an os image into an instance.
1226

1227
  Args:
1228
    instance: the instance object
1229
    os_disk: the instance-visible name of the os device
1230
    swap_disk: the instance-visible name of the swap device
1231
    src_node: node holding the source image
1232
    src_image: path to the source image on src_node
1233

1234
  Returns:
1235
    False in case of error, True otherwise.
1236

1237
  """
1238
  inst_os = OSFromDisk(instance.os)
1239
  import_script = inst_os.import_script
1240

    
1241
  os_device = instance.FindDisk(os_disk)
1242
  if os_device is None:
1243
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1244
    return False
1245

    
1246
  swap_device = instance.FindDisk(swap_disk)
1247
  if swap_device is None:
1248
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1249
    return False
1250

    
1251
  real_os_dev = _RecursiveFindBD(os_device)
1252
  if real_os_dev is None:
1253
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1254
                                  str(os_device))
1255
  real_os_dev.Open()
1256

    
1257
  real_swap_dev = _RecursiveFindBD(swap_device)
1258
  if real_swap_dev is None:
1259
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1260
                                  str(swap_device))
1261
  real_swap_dev.Open()
1262

    
1263
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1264
                                        instance.name, int(time.time()))
1265
  if not os.path.exists(constants.LOG_OS_DIR):
1266
    os.mkdir(constants.LOG_OS_DIR, 0750)
1267

    
1268
  destcmd = utils.BuildShellCmd('cat %s', src_image)
1269
  remotecmd = ssh.BuildSSHCmd(src_node, constants.GANETI_RUNAS, destcmd)
1270

    
1271
  comprcmd = "gunzip"
1272
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1273
                               inst_os.path, import_script, instance.name,
1274
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1275
                               logfile)
1276

    
1277
  command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1278

    
1279
  result = utils.RunCmd(command)
1280

    
1281
  if result.failed:
1282
    logger.Error("os import command '%s' returned error: %s"
1283
                 " output: %s" %
1284
                 (command, result.fail_reason, result.output))
1285
    return False
1286

    
1287
  return True
1288

    
1289

    
1290
def ListExports():
1291
  """Return a list of exports currently available on this machine.
1292

1293
  """
1294
  if os.path.isdir(constants.EXPORT_DIR):
1295
    return utils.ListVisibleFiles(constants.EXPORT_DIR)
1296
  else:
1297
    return []
1298

    
1299

    
1300
def RemoveExport(export):
1301
  """Remove an existing export from the node.
1302

1303
  Args:
1304
    export: the name of the export to remove
1305

1306
  Returns:
1307
    False in case of error, True otherwise.
1308

1309
  """
1310
  target = os.path.join(constants.EXPORT_DIR, export)
1311

    
1312
  shutil.rmtree(target)
1313
  # TODO: catch some of the relevant exceptions and provide a pretty
1314
  # error message if rmtree fails.
1315

    
1316
  return True
1317

    
1318

    
1319
class HooksRunner(object):
1320
  """Hook runner.
1321

1322
  This class is instantiated on the node side (ganeti-noded) and not on
1323
  the master side.
1324

1325
  """
1326
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1327

    
1328
  def __init__(self, hooks_base_dir=None):
1329
    """Constructor for hooks runner.
1330

1331
    Args:
1332
      - hooks_base_dir: if not None, this overrides the
1333
        constants.HOOKS_BASE_DIR (useful for unittests)
1334
      - logs_base_dir: if not None, this overrides the
1335
        constants.LOG_HOOKS_DIR (useful for unittests)
1336
      - logging: enable or disable logging of script output
1337

1338
    """
1339
    if hooks_base_dir is None:
1340
      hooks_base_dir = constants.HOOKS_BASE_DIR
1341
    self._BASE_DIR = hooks_base_dir
1342

    
1343
  @staticmethod
1344
  def ExecHook(script, env):
1345
    """Exec one hook script.
1346

1347
    Args:
1348
     - phase: the phase
1349
     - script: the full path to the script
1350
     - env: the environment with which to exec the script
1351

1352
    """
1353
    # exec the process using subprocess and log the output
1354
    fdstdin = None
1355
    try:
1356
      fdstdin = open("/dev/null", "r")
1357
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1358
                               stderr=subprocess.STDOUT, close_fds=True,
1359
                               shell=False, cwd="/",env=env)
1360
      output = ""
1361
      try:
1362
        output = child.stdout.read(4096)
1363
        child.stdout.close()
1364
      except EnvironmentError, err:
1365
        output += "Hook script error: %s" % str(err)
1366

    
1367
      while True:
1368
        try:
1369
          result = child.wait()
1370
          break
1371
        except EnvironmentError, err:
1372
          if err.errno == errno.EINTR:
1373
            continue
1374
          raise
1375
    finally:
1376
      # try not to leak fds
1377
      for fd in (fdstdin, ):
1378
        if fd is not None:
1379
          try:
1380
            fd.close()
1381
          except EnvironmentError, err:
1382
            # just log the error
1383
            #logger.Error("While closing fd %s: %s" % (fd, err))
1384
            pass
1385

    
1386
    return result == 0, output
1387

    
1388
  def RunHooks(self, hpath, phase, env):
1389
    """Run the scripts in the hooks directory.
1390

1391
    This method will not be usually overriden by child opcodes.
1392

1393
    """
1394
    if phase == constants.HOOKS_PHASE_PRE:
1395
      suffix = "pre"
1396
    elif phase == constants.HOOKS_PHASE_POST:
1397
      suffix = "post"
1398
    else:
1399
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1400
    rr = []
1401

    
1402
    subdir = "%s-%s.d" % (hpath, suffix)
1403
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1404
    try:
1405
      dir_contents = utils.ListVisibleFiles(dir_name)
1406
    except OSError, err:
1407
      # must log
1408
      return rr
1409

    
1410
    # we use the standard python sort order,
1411
    # so 00name is the recommended naming scheme
1412
    dir_contents.sort()
1413
    for relname in dir_contents:
1414
      fname = os.path.join(dir_name, relname)
1415
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1416
          self.RE_MASK.match(relname) is not None):
1417
        rrval = constants.HKR_SKIP
1418
        output = ""
1419
      else:
1420
        result, output = self.ExecHook(fname, env)
1421
        if not result:
1422
          rrval = constants.HKR_FAIL
1423
        else:
1424
          rrval = constants.HKR_SUCCESS
1425
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1426

    
1427
    return rr