Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ 2f8598a5

History | View | Annotate | Download (39.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import tempfile
30
import stat
31
import errno
32
import re
33
import subprocess
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def StartMaster():
47
  """Activate local node as master node.
48

49
  There are two needed steps for this:
50
    - run the master script
51
    - register the cron script
52

53
  """
54
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
55

    
56
  if result.failed:
57
    logger.Error("could not activate cluster interface with command %s,"
58
                 " error: '%s'" % (result.cmd, result.output))
59
    return False
60

    
61
  return True
62

    
63

    
64
def StopMaster():
65
  """Deactivate this node as master.
66

67
  This does two things:
68
    - run the master stop script
69
    - remove link to master cron script.
70

71
  """
72
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
73

    
74
  if result.failed:
75
    logger.Error("could not deactivate cluster interface with command %s,"
76
                 " error: '%s'" % (result.cmd, result.output))
77
    return False
78

    
79
  return True
80

    
81

    
82
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
83
  """Joins this node to the cluster.
84

85
  This does the following:
86
      - updates the hostkeys of the machine (rsa and dsa)
87
      - adds the ssh private key to the user
88
      - adds the ssh public key to the users' authorized_keys file
89

90
  """
91
  user_dir = utils.GetHomeDir(constants.GANETI_RUNAS)
92
  if not user_dir:
93
    logger.Error("Cannot find home of run-as user %s" % constants.GANETI_RUNAS)
94
    return False
95

    
96
  sshd_keys =  [("ssh_host_rsa_key", rsa, 0600),
97
                ("ssh_host_rsa_key.pub", rsapub, 0644),
98
                ("ssh_host_dsa_key", dsa, 0600),
99
                ("ssh_host_dsa_key.pub",  dsapub, 0644)]
100
  for name, content, mode in sshd_keys:
101
    utils.WriteFile(os.path.join(constants.SSH_CONFIG_DIR, name),
102
                    data=content, mode=mode)
103

    
104
  user_ssh_dir = os.path.join(user_dir, ".ssh")
105

    
106
  if not os.path.isdir(user_ssh_dir):
107
    os.mkdir(user_ssh_dir)
108

    
109
  for name, content in [("id_dsa", sshkey), ("id_dsa.pub", sshpub)]:
110
    utils.WriteFile(os.path.join(user_ssh_dir, name), data=content, mode=0600)
111

    
112
  utils.AddAuthorizedKey(os.path.join(user_ssh_dir, "authorized_keys"), sshpub)
113

    
114
  utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
115

    
116
  return True
117

    
118

    
119
def LeaveCluster():
120
  """Cleans up the current node and prepares it to be removed from the cluster.
121

122
  """
123
  if os.path.isdir(constants.DATA_DIR):
124
    for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
125
      full_name = os.path.join(constants.DATA_DIR, rel_name)
126
      if os.path.isfile(full_name) and not os.path.islink(full_name):
127
        utils.RemoveFile(full_name)
128

    
129
  user_dir = utils.GetHomeDir(constants.GANETI_RUNAS)
130
  if not user_dir:
131
    logger.Error("Cannot find home of run-as user %s" % constants.GANETI_RUNAS)
132
    return
133

    
134
  user_ssh_dir = os.path.join(user_dir, ".ssh")
135

    
136
  if not os.path.isdir(user_ssh_dir):
137
    logger.Error("User's ssh dir '%s' does not exist?!" % user_ssh_dir)
138
    return
139

    
140
  f = open(os.path.join(user_ssh_dir, "id_dsa.pub"), 'r')
141
  try:
142
    utils.RemoveAuthorizedKey(os.path.join(user_ssh_dir, "authorized_keys"),
143
                              f.read(8192))
144
  finally:
145
    f.close()
146

    
147

    
148
  utils.RemoveFile(os.path.join(user_ssh_dir, "id_dsa"))
149
  utils.RemoveFile(os.path.join(user_ssh_dir, "id_dsa.pub"))
150

    
151

    
152
def GetNodeInfo(vgname):
153
  """Gives back a hash with different informations about the node.
154

155
  Returns:
156
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
157
      'memory_free' : xxx, 'memory_total' : xxx }
158
    where
159
    vg_size is the size of the configured volume group in MiB
160
    vg_free is the free size of the volume group in MiB
161
    memory_dom0 is the memory allocated for domain0 in MiB
162
    memory_free is the currently available (free) ram in MiB
163
    memory_total is the total number of ram in MiB
164

165
  """
166
  outputarray = {}
167
  vginfo = _GetVGInfo(vgname)
168
  outputarray['vg_size'] = vginfo['vg_size']
169
  outputarray['vg_free'] = vginfo['vg_free']
170

    
171
  hyper = hypervisor.GetHypervisor()
172
  hyp_info = hyper.GetNodeInfo()
173
  if hyp_info is not None:
174
    outputarray.update(hyp_info)
175

    
176
  f = open("/proc/sys/kernel/random/boot_id", 'r')
177
  try:
178
    outputarray["bootid"] = f.read(128).rstrip("\n")
179
  finally:
180
    f.close()
181

    
182
  return outputarray
183

    
184

    
185
def VerifyNode(what):
186
  """Verify the status of the local node.
187

188
  Args:
189
    what - a dictionary of things to check:
190
      'filelist' : list of files for which to compute checksums
191
      'nodelist' : list of nodes we should check communication with
192
      'hypervisor': run the hypervisor-specific verify
193

194
  Requested files on local node are checksummed and the result returned.
195

196
  The nodelist is traversed, with the following checks being made
197
  for each node:
198
  - known_hosts key correct
199
  - correct resolving of node name (target node returns its own hostname
200
    by ssh-execution of 'hostname', result compared against name in list.
201

202
  """
203
  result = {}
204

    
205
  if 'hypervisor' in what:
206
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
207

    
208
  if 'filelist' in what:
209
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
210

    
211
  if 'nodelist' in what:
212
    result['nodelist'] = {}
213
    for node in what['nodelist']:
214
      success, message = ssh.VerifyNodeHostname(node)
215
      if not success:
216
        result['nodelist'][node] = message
217
  return result
218

    
219

    
220
def GetVolumeList(vg_name):
221
  """Compute list of logical volumes and their size.
222

223
  Returns:
224
    dictionary of all partions (key) with their size:
225
    test1: 20.06MiB
226

227
  """
228
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m",
229
                         "-oname,size", vg_name])
230
  if result.failed:
231
    logger.Error("Failed to list logical volumes, lvs output: %s" %
232
                 result.output)
233
    return {}
234

    
235
  lvlist = [line.split() for line in result.output.splitlines()]
236
  return dict(lvlist)
237

    
238

    
239
def ListVolumeGroups():
240
  """List the volume groups and their size.
241

242
  Returns:
243
    Dictionary with keys volume name and values the size of the volume
244

245
  """
246
  return utils.ListVolumeGroups()
247

    
248

    
249
def NodeVolumes():
250
  """List all volumes on this node.
251

252
  """
253
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
254
                         "--separator=|",
255
                         "--options=lv_name,lv_size,devices,vg_name"])
256
  if result.failed:
257
    logger.Error("Failed to list logical volumes, lvs output: %s" %
258
                 result.output)
259
    return {}
260

    
261
  def parse_dev(dev):
262
    if '(' in dev:
263
      return dev.split('(')[0]
264
    else:
265
      return dev
266

    
267
  def map_line(line):
268
    return {
269
      'name': line[0].strip(),
270
      'size': line[1].strip(),
271
      'dev': parse_dev(line[2].strip()),
272
      'vg': line[3].strip(),
273
    }
274

    
275
  return [map_line(line.split('|')) for line in result.output.splitlines()]
276

    
277

    
278
def BridgesExist(bridges_list):
279
  """Check if a list of bridges exist on the current node.
280

281
  Returns:
282
    True if all of them exist, false otherwise
283

284
  """
285
  for bridge in bridges_list:
286
    if not utils.BridgeExists(bridge):
287
      return False
288

    
289
  return True
290

    
291

    
292
def GetInstanceList():
293
  """Provides a list of instances.
294

295
  Returns:
296
    A list of all running instances on the current node
297
    - instance1.example.com
298
    - instance2.example.com
299

300
  """
301
  try:
302
    names = hypervisor.GetHypervisor().ListInstances()
303
  except errors.HypervisorError, err:
304
    logger.Error("error enumerating instances: %s" % str(err))
305
    raise
306

    
307
  return names
308

    
309

    
310
def GetInstanceInfo(instance):
311
  """Gives back the informations about an instance as a dictionary.
312

313
  Args:
314
    instance: name of the instance (ex. instance1.example.com)
315

316
  Returns:
317
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
318
    where
319
    memory: memory size of instance (int)
320
    state: xen state of instance (string)
321
    time: cpu time of instance (float)
322

323
  """
324
  output = {}
325

    
326
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
327
  if iinfo is not None:
328
    output['memory'] = iinfo[2]
329
    output['state'] = iinfo[4]
330
    output['time'] = iinfo[5]
331

    
332
  return output
333

    
334

    
335
def GetAllInstancesInfo():
336
  """Gather data about all instances.
337

338
  This is the equivalent of `GetInstanceInfo()`, except that it
339
  computes data for all instances at once, thus being faster if one
340
  needs data about more than one instance.
341

342
  Returns: a dictionary of dictionaries, keys being the instance name,
343
    and with values:
344
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
345
    where
346
    memory: memory size of instance (int)
347
    state: xen state of instance (string)
348
    time: cpu time of instance (float)
349
    vcpus: the number of cpus
350

351
  """
352
  output = {}
353

    
354
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
355
  if iinfo:
356
    for name, inst_id, memory, vcpus, state, times in iinfo:
357
      output[name] = {
358
        'memory': memory,
359
        'vcpus': vcpus,
360
        'state': state,
361
        'time': times,
362
        }
363

    
364
  return output
365

    
366

    
367
def AddOSToInstance(instance, os_disk, swap_disk):
368
  """Add an OS to an instance.
369

370
  Args:
371
    instance: the instance object
372
    os_disk: the instance-visible name of the os device
373
    swap_disk: the instance-visible name of the swap device
374

375
  """
376
  inst_os = OSFromDisk(instance.os)
377

    
378
  create_script = inst_os.create_script
379

    
380
  os_device = instance.FindDisk(os_disk)
381
  if os_device is None:
382
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
383
    return False
384

    
385
  swap_device = instance.FindDisk(swap_disk)
386
  if swap_device is None:
387
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
388
    return False
389

    
390
  real_os_dev = _RecursiveFindBD(os_device)
391
  if real_os_dev is None:
392
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
393
                                  str(os_device))
394
  real_os_dev.Open()
395

    
396
  real_swap_dev = _RecursiveFindBD(swap_device)
397
  if real_swap_dev is None:
398
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
399
                                  str(swap_device))
400
  real_swap_dev.Open()
401

    
402
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
403
                                     instance.name, int(time.time()))
404
  if not os.path.exists(constants.LOG_OS_DIR):
405
    os.mkdir(constants.LOG_OS_DIR, 0750)
406

    
407
  command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
408
                                inst_os.path, create_script, instance.name,
409
                                real_os_dev.dev_path, real_swap_dev.dev_path,
410
                                logfile)
411

    
412
  result = utils.RunCmd(command)
413

    
414
  if result.failed:
415
    logger.Error("os create command '%s' returned error: %s"
416
                 " output: %s" %
417
                 (command, result.fail_reason, result.output))
418
    return False
419

    
420
  return True
421

    
422

    
423
def RunRenameInstance(instance, old_name, os_disk, swap_disk):
424
  """Run the OS rename script for an instance.
425

426
  Args:
427
    instance: the instance object
428
    old_name: the old name of the instance
429
    os_disk: the instance-visible name of the os device
430
    swap_disk: the instance-visible name of the swap device
431

432
  """
433
  inst_os = OSFromDisk(instance.os)
434

    
435
  script = inst_os.rename_script
436

    
437
  os_device = instance.FindDisk(os_disk)
438
  if os_device is None:
439
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
440
    return False
441

    
442
  swap_device = instance.FindDisk(swap_disk)
443
  if swap_device is None:
444
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
445
    return False
446

    
447
  real_os_dev = _RecursiveFindBD(os_device)
448
  if real_os_dev is None:
449
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
450
                                  str(os_device))
451
  real_os_dev.Open()
452

    
453
  real_swap_dev = _RecursiveFindBD(swap_device)
454
  if real_swap_dev is None:
455
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
456
                                  str(swap_device))
457
  real_swap_dev.Open()
458

    
459
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
460
                                           old_name,
461
                                           instance.name, int(time.time()))
462
  if not os.path.exists(constants.LOG_OS_DIR):
463
    os.mkdir(constants.LOG_OS_DIR, 0750)
464

    
465
  command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
466
                                inst_os.path, script, old_name, instance.name,
467
                                real_os_dev.dev_path, real_swap_dev.dev_path,
468
                                logfile)
469

    
470
  result = utils.RunCmd(command)
471

    
472
  if result.failed:
473
    logger.Error("os create command '%s' returned error: %s"
474
                 " output: %s" %
475
                 (command, result.fail_reason, result.output))
476
    return False
477

    
478
  return True
479

    
480

    
481
def _GetVGInfo(vg_name):
482
  """Get informations about the volume group.
483

484
  Args:
485
    vg_name: the volume group
486

487
  Returns:
488
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
489
    where
490
    vg_size is the total size of the volume group in MiB
491
    vg_free is the free size of the volume group in MiB
492
    pv_count are the number of physical disks in that vg
493

494
  """
495
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
496
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
497

    
498
  if retval.failed:
499
    errmsg = "volume group %s not present" % vg_name
500
    logger.Error(errmsg)
501
    raise errors.LVMError(errmsg)
502
  valarr = retval.stdout.strip().split(':')
503
  retdic = {
504
    "vg_size": int(round(float(valarr[0]), 0)),
505
    "vg_free": int(round(float(valarr[1]), 0)),
506
    "pv_count": int(valarr[2]),
507
    }
508
  return retdic
509

    
510

    
511
def _GatherBlockDevs(instance):
512
  """Set up an instance's block device(s).
513

514
  This is run on the primary node at instance startup. The block
515
  devices must be already assembled.
516

517
  """
518
  block_devices = []
519
  for disk in instance.disks:
520
    device = _RecursiveFindBD(disk)
521
    if device is None:
522
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
523
                                    str(disk))
524
    device.Open()
525
    block_devices.append((disk, device))
526
  return block_devices
527

    
528

    
529
def StartInstance(instance, extra_args):
530
  """Start an instance.
531

532
  Args:
533
    instance - name of instance to start.
534

535
  """
536
  running_instances = GetInstanceList()
537

    
538
  if instance.name in running_instances:
539
    return True
540

    
541
  block_devices = _GatherBlockDevs(instance)
542
  hyper = hypervisor.GetHypervisor()
543

    
544
  try:
545
    hyper.StartInstance(instance, block_devices, extra_args)
546
  except errors.HypervisorError, err:
547
    logger.Error("Failed to start instance: %s" % err)
548
    return False
549

    
550
  return True
551

    
552

    
553
def ShutdownInstance(instance):
554
  """Shut an instance down.
555

556
  Args:
557
    instance - name of instance to shutdown.
558

559
  """
560
  running_instances = GetInstanceList()
561

    
562
  if instance.name not in running_instances:
563
    return True
564

    
565
  hyper = hypervisor.GetHypervisor()
566
  try:
567
    hyper.StopInstance(instance)
568
  except errors.HypervisorError, err:
569
    logger.Error("Failed to stop instance: %s" % err)
570
    return False
571

    
572
  # test every 10secs for 2min
573
  shutdown_ok = False
574

    
575
  time.sleep(1)
576
  for dummy in range(11):
577
    if instance.name not in GetInstanceList():
578
      break
579
    time.sleep(10)
580
  else:
581
    # the shutdown did not succeed
582
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
583

    
584
    try:
585
      hyper.StopInstance(instance, force=True)
586
    except errors.HypervisorError, err:
587
      logger.Error("Failed to stop instance: %s" % err)
588
      return False
589

    
590
    time.sleep(1)
591
    if instance.name in GetInstanceList():
592
      logger.Error("could not shutdown instance '%s' even by destroy")
593
      return False
594

    
595
  return True
596

    
597

    
598
def CreateBlockDevice(disk, size, on_primary, info):
599
  """Creates a block device for an instance.
600

601
  Args:
602
   bdev: a ganeti.objects.Disk object
603
   size: the size of the physical underlying devices
604
   do_open: if the device should be `Assemble()`-d and
605
            `Open()`-ed after creation
606

607
  Returns:
608
    the new unique_id of the device (this can sometime be
609
    computed only after creation), or None. On secondary nodes,
610
    it's not required to return anything.
611

612
  """
613
  clist = []
614
  if disk.children:
615
    for child in disk.children:
616
      crdev = _RecursiveAssembleBD(child, on_primary)
617
      if on_primary or disk.AssembleOnSecondary():
618
        # we need the children open in case the device itself has to
619
        # be assembled
620
        crdev.Open()
621
      else:
622
        crdev.Close()
623
      clist.append(crdev)
624
  try:
625
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
626
    if device is not None:
627
      logger.Info("removing existing device %s" % disk)
628
      device.Remove()
629
  except errors.BlockDeviceError, err:
630
    pass
631

    
632
  device = bdev.Create(disk.dev_type, disk.physical_id,
633
                       clist, size)
634
  if device is None:
635
    raise ValueError("Can't create child device for %s, %s" %
636
                     (disk, size))
637
  if on_primary or disk.AssembleOnSecondary():
638
    device.Assemble()
639
    device.SetSyncSpeed(constants.SYNC_SPEED)
640
    if on_primary or disk.OpenOnSecondary():
641
      device.Open(force=True)
642

    
643
  device.SetInfo(info)
644

    
645
  physical_id = device.unique_id
646
  return physical_id
647

    
648

    
649
def RemoveBlockDevice(disk):
650
  """Remove a block device.
651

652
  This is intended to be called recursively.
653

654
  """
655
  try:
656
    # since we are removing the device, allow a partial match
657
    # this allows removal of broken mirrors
658
    rdev = _RecursiveFindBD(disk, allow_partial=True)
659
  except errors.BlockDeviceError, err:
660
    # probably can't attach
661
    logger.Info("Can't attach to device %s in remove" % disk)
662
    rdev = None
663
  if rdev is not None:
664
    result = rdev.Remove()
665
  else:
666
    result = True
667
  if disk.children:
668
    for child in disk.children:
669
      result = result and RemoveBlockDevice(child)
670
  return result
671

    
672

    
673
def _RecursiveAssembleBD(disk, as_primary):
674
  """Activate a block device for an instance.
675

676
  This is run on the primary and secondary nodes for an instance.
677

678
  This function is called recursively.
679

680
  Args:
681
    disk: a objects.Disk object
682
    as_primary: if we should make the block device read/write
683

684
  Returns:
685
    the assembled device or None (in case no device was assembled)
686

687
  If the assembly is not successful, an exception is raised.
688

689
  """
690
  children = []
691
  if disk.children:
692
    for chld_disk in disk.children:
693
      children.append(_RecursiveAssembleBD(chld_disk, as_primary))
694

    
695
  if as_primary or disk.AssembleOnSecondary():
696
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
697
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
698
    result = r_dev
699
    if as_primary or disk.OpenOnSecondary():
700
      r_dev.Open()
701
    else:
702
      r_dev.Close()
703
  else:
704
    result = True
705
  return result
706

    
707

    
708
def AssembleBlockDevice(disk, as_primary):
709
  """Activate a block device for an instance.
710

711
  This is a wrapper over _RecursiveAssembleBD.
712

713
  Returns:
714
    a /dev path for primary nodes
715
    True for secondary nodes
716

717
  """
718
  result = _RecursiveAssembleBD(disk, as_primary)
719
  if isinstance(result, bdev.BlockDev):
720
    result = result.dev_path
721
  return result
722

    
723

    
724
def ShutdownBlockDevice(disk):
725
  """Shut down a block device.
726

727
  First, if the device is assembled (can `Attach()`), then the device
728
  is shutdown. Then the children of the device are shutdown.
729

730
  This function is called recursively. Note that we don't cache the
731
  children or such, as oppossed to assemble, shutdown of different
732
  devices doesn't require that the upper device was active.
733

734
  """
735
  r_dev = _RecursiveFindBD(disk)
736
  if r_dev is not None:
737
    result = r_dev.Shutdown()
738
  else:
739
    result = True
740
  if disk.children:
741
    for child in disk.children:
742
      result = result and ShutdownBlockDevice(child)
743
  return result
744

    
745

    
746
def MirrorAddChild(md_cdev, new_cdev):
747
  """Extend an MD raid1 array.
748

749
  """
750
  md_bdev = _RecursiveFindBD(md_cdev, allow_partial=True)
751
  if md_bdev is None:
752
    logger.Error("Can't find md device")
753
    return False
754
  new_bdev = _RecursiveFindBD(new_cdev)
755
  if new_bdev is None:
756
    logger.Error("Can't find new device to add")
757
    return False
758
  new_bdev.Open()
759
  md_bdev.AddChild(new_bdev)
760
  return True
761

    
762

    
763
def MirrorRemoveChild(md_cdev, new_cdev):
764
  """Reduce an MD raid1 array.
765

766
  """
767
  md_bdev = _RecursiveFindBD(md_cdev)
768
  if md_bdev is None:
769
    return False
770
  new_bdev = _RecursiveFindBD(new_cdev)
771
  if new_bdev is None:
772
    return False
773
  new_bdev.Open()
774
  md_bdev.RemoveChild(new_bdev.dev_path)
775
  return True
776

    
777

    
778
def GetMirrorStatus(disks):
779
  """Get the mirroring status of a list of devices.
780

781
  Args:
782
    disks: list of `objects.Disk`
783

784
  Returns:
785
    list of (mirror_done, estimated_time) tuples, which
786
    are the result of bdev.BlockDevice.CombinedSyncStatus()
787

788
  """
789
  stats = []
790
  for dsk in disks:
791
    rbd = _RecursiveFindBD(dsk)
792
    if rbd is None:
793
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
794
    stats.append(rbd.CombinedSyncStatus())
795
  return stats
796

    
797

    
798
def _RecursiveFindBD(disk, allow_partial=False):
799
  """Check if a device is activated.
800

801
  If so, return informations about the real device.
802

803
  Args:
804
    disk: the objects.Disk instance
805
    allow_partial: don't abort the find if a child of the
806
                   device can't be found; this is intended to be
807
                   used when repairing mirrors
808

809
  Returns:
810
    None if the device can't be found
811
    otherwise the device instance
812

813
  """
814
  children = []
815
  if disk.children:
816
    for chdisk in disk.children:
817
      children.append(_RecursiveFindBD(chdisk))
818

    
819
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
820

    
821

    
822
def FindBlockDevice(disk):
823
  """Check if a device is activated.
824

825
  If so, return informations about the real device.
826

827
  Args:
828
    disk: the objects.Disk instance
829
  Returns:
830
    None if the device can't be found
831
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
832

833
  """
834
  rbd = _RecursiveFindBD(disk)
835
  if rbd is None:
836
    return rbd
837
  sync_p, est_t, is_degr = rbd.GetSyncStatus()
838
  return rbd.dev_path, rbd.major, rbd.minor, sync_p, est_t, is_degr
839

    
840

    
841
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
842
  """Write a file to the filesystem.
843

844
  This allows the master to overwrite(!) a file. It will only perform
845
  the operation if the file belongs to a list of configuration files.
846

847
  """
848
  if not os.path.isabs(file_name):
849
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
850
                 file_name)
851
    return False
852

    
853
  allowed_files = [constants.CLUSTER_CONF_FILE, "/etc/hosts",
854
                   constants.SSH_KNOWN_HOSTS_FILE]
855
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
856
  if file_name not in allowed_files:
857
    logger.Error("Filename passed to UploadFile not in allowed"
858
                 " upload targets: '%s'" % file_name)
859
    return False
860

    
861
  dir_name, small_name = os.path.split(file_name)
862
  fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
863
  # here we need to make sure we remove the temp file, if any error
864
  # leaves it in place
865
  try:
866
    os.chown(new_name, uid, gid)
867
    os.chmod(new_name, mode)
868
    os.write(fd, data)
869
    os.fsync(fd)
870
    os.utime(new_name, (atime, mtime))
871
    os.rename(new_name, file_name)
872
  finally:
873
    os.close(fd)
874
    utils.RemoveFile(new_name)
875
  return True
876

    
877

    
878
def _ErrnoOrStr(err):
879
  """Format an EnvironmentError exception.
880

881
  If the `err` argument has an errno attribute, it will be looked up
882
  and converted into a textual EXXXX description. Otherwise the string
883
  representation of the error will be returned.
884

885
  """
886
  if hasattr(err, 'errno'):
887
    detail = errno.errorcode[err.errno]
888
  else:
889
    detail = str(err)
890
  return detail
891

    
892
def _OSSearch(name, search_path=None):
893
  """Search for OSes with the given name in the search_path.
894

895
  Args:
896
    name: The name of the OS to look for
897
    search_path: List of dirs to search (defaults to constants.OS_SEARCH_PATH)
898

899
  Returns:
900
    The base_dir the OS resides in
901

902
  """
903

    
904
  if search_path is None:
905
    search_path = constants.OS_SEARCH_PATH
906

    
907
  for dir in search_path:
908
    t_os_dir = os.path.sep.join([dir, name])
909
    if os.path.isdir(t_os_dir):
910
        return dir
911

    
912
  return None
913

    
914
def _OSOndiskVersion(name, os_dir):
915
  """Compute and return the API version of a given OS.
916

917
  This function will try to read the API version of the os given by
918
  the 'name' parameter and residing in the 'os_dir' directory.
919

920
  Return value will be either an integer denoting the version or None in the
921
  case when this is not a valid OS name.
922

923
  """
924

    
925
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
926

    
927
  try:
928
    st = os.stat(api_file)
929
  except EnvironmentError, err:
930
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
931
                           " found (%s)" % _ErrnoOrStr(err))
932

    
933
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
934
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
935
                           " a regular file")
936

    
937
  try:
938
    f = open(api_file)
939
    try:
940
      api_version = f.read(256)
941
    finally:
942
      f.close()
943
  except EnvironmentError, err:
944
    raise errors.InvalidOS(name, os_dir, "error while reading the"
945
                           " API version (%s)" % _ErrnoOrStr(err))
946

    
947
  api_version = api_version.strip()
948
  try:
949
    api_version = int(api_version)
950
  except (TypeError, ValueError), err:
951
    raise errors.InvalidOS(name, os_dir,
952
                           "API version is not integer (%s)" % str(err))
953

    
954
  return api_version
955

    
956

    
957
def DiagnoseOS(top_dirs=None):
958
  """Compute the validity for all OSes.
959

960
  For each name in all the given top directories (if not given defaults i
961
  to constants.OS_SEARCH_PATH it will return an object. If this is a valid
962
  os, the object will be an instance of the object.OS class. If not,
963
  it will be an instance of errors.InvalidOS and this signifies that
964
  this name does not correspond to a valid OS.
965

966
  Returns:
967
    list of objects
968

969
  """
970
  if top_dirs is None:
971
    top_dirs = constants.OS_SEARCH_PATH
972

    
973
  result = []
974
  for dir in top_dirs:
975
    if os.path.isdir(dir):
976
      try:
977
        f_names = utils.ListVisibleFiles(dir)
978
      except EnvironmentError, err:
979
        logger.Error("Can't list the OS directory %s: %s" % (dir,str(err)))
980
        break
981
      for name in f_names:
982
        try:
983
          os_inst = OSFromDisk(name, base_dir=dir)
984
          result.append(os_inst)
985
        except errors.InvalidOS, err:
986
          result.append(err)
987

    
988
  return result
989

    
990

    
991
def OSFromDisk(name, base_dir=None):
992
  """Create an OS instance from disk.
993

994
  This function will return an OS instance if the given name is a
995
  valid OS name. Otherwise, it will raise an appropriate
996
  `errors.InvalidOS` exception, detailing why this is not a valid
997
  OS.
998

999
  Args:
1000
    os_dir: Directory containing the OS scripts. Defaults to a search
1001
            in all the OS_SEARCH_PATH directories.
1002

1003
  """
1004

    
1005
  if base_dir is None:
1006
    base_dir = _OSSearch(name)
1007

    
1008
  if base_dir is None:
1009
    raise errors.InvalidOS(name, None, "OS dir not found in search path")
1010

    
1011
  os_dir = os.path.sep.join([base_dir, name])
1012
  api_version = _OSOndiskVersion(name, os_dir)
1013

    
1014
  if api_version != constants.OS_API_VERSION:
1015
    raise errors.InvalidOS(name, os_dir, "API version mismatch"
1016
                           " (found %s want %s)"
1017
                           % (api_version, constants.OS_API_VERSION))
1018

    
1019
  # OS Scripts dictionary, we will populate it with the actual script names
1020
  os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1021

    
1022
  for script in os_scripts:
1023
    os_scripts[script] = os.path.sep.join([os_dir, script])
1024

    
1025
    try:
1026
      st = os.stat(os_scripts[script])
1027
    except EnvironmentError, err:
1028
      raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1029
                             (script, _ErrnoOrStr(err)))
1030

    
1031
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1032
      raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1033
                             script)
1034

    
1035
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1036
      raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1037
                             script)
1038

    
1039

    
1040
  return objects.OS(name=name, path=os_dir,
1041
                    create_script=os_scripts['create'],
1042
                    export_script=os_scripts['export'],
1043
                    import_script=os_scripts['import'],
1044
                    rename_script=os_scripts['rename'],
1045
                    api_version=api_version)
1046

    
1047

    
1048
def SnapshotBlockDevice(disk):
1049
  """Create a snapshot copy of a block device.
1050

1051
  This function is called recursively, and the snapshot is actually created
1052
  just for the leaf lvm backend device.
1053

1054
  Args:
1055
    disk: the disk to be snapshotted
1056

1057
  Returns:
1058
    a config entry for the actual lvm device snapshotted.
1059

1060
  """
1061
  if disk.children:
1062
    if len(disk.children) == 1:
1063
      # only one child, let's recurse on it
1064
      return SnapshotBlockDevice(disk.children[0])
1065
    else:
1066
      # more than one child, choose one that matches
1067
      for child in disk.children:
1068
        if child.size == disk.size:
1069
          # return implies breaking the loop
1070
          return SnapshotBlockDevice(child)
1071
  elif disk.dev_type == "lvm":
1072
    r_dev = _RecursiveFindBD(disk)
1073
    if r_dev is not None:
1074
      # let's stay on the safe side and ask for the full size, for now
1075
      return r_dev.Snapshot(disk.size)
1076
    else:
1077
      return None
1078
  else:
1079
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1080
                                 "'%s' of type '%s'" %
1081
                                 (disk.unique_id, disk.dev_type))
1082

    
1083

    
1084
def ExportSnapshot(disk, dest_node, instance):
1085
  """Export a block device snapshot to a remote node.
1086

1087
  Args:
1088
    disk: the snapshot block device
1089
    dest_node: the node to send the image to
1090
    instance: instance being exported
1091

1092
  Returns:
1093
    True if successful, False otherwise.
1094

1095
  """
1096
  inst_os = OSFromDisk(instance.os)
1097
  export_script = inst_os.export_script
1098

    
1099
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1100
                                     instance.name, int(time.time()))
1101
  if not os.path.exists(constants.LOG_OS_DIR):
1102
    os.mkdir(constants.LOG_OS_DIR, 0750)
1103

    
1104
  real_os_dev = _RecursiveFindBD(disk)
1105
  if real_os_dev is None:
1106
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1107
                                  str(disk))
1108
  real_os_dev.Open()
1109

    
1110
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1111
  destfile = disk.physical_id[1]
1112

    
1113
  # the target command is built out of three individual commands,
1114
  # which are joined by pipes; we check each individual command for
1115
  # valid parameters
1116

    
1117
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1118
                               export_script, instance.name,
1119
                               real_os_dev.dev_path, logfile)
1120

    
1121
  comprcmd = "gzip"
1122

    
1123
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1124
                                destdir, destdir, destfile)
1125
  remotecmd = ssh.BuildSSHCmd(dest_node, constants.GANETI_RUNAS, destcmd)
1126

    
1127

    
1128

    
1129
  # all commands have been checked, so we're safe to combine them
1130
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1131

    
1132
  result = utils.RunCmd(command)
1133

    
1134
  if result.failed:
1135
    logger.Error("os snapshot export command '%s' returned error: %s"
1136
                 " output: %s" %
1137
                 (command, result.fail_reason, result.output))
1138
    return False
1139

    
1140
  return True
1141

    
1142

    
1143
def FinalizeExport(instance, snap_disks):
1144
  """Write out the export configuration information.
1145

1146
  Args:
1147
    instance: instance configuration
1148
    snap_disks: snapshot block devices
1149

1150
  Returns:
1151
    False in case of error, True otherwise.
1152

1153
  """
1154
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1155
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1156

    
1157
  config = objects.SerializableConfigParser()
1158

    
1159
  config.add_section(constants.INISECT_EXP)
1160
  config.set(constants.INISECT_EXP, 'version', '0')
1161
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1162
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1163
  config.set(constants.INISECT_EXP, 'os', instance.os)
1164
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1165

    
1166
  config.add_section(constants.INISECT_INS)
1167
  config.set(constants.INISECT_INS, 'name', instance.name)
1168
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1169
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1170
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1171
  for nic_count, nic in enumerate(instance.nics):
1172
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1173
               nic_count, '%s' % nic.mac)
1174
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1175
  # TODO: redundant: on load can read nics until it doesn't exist
1176
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1177

    
1178
  for disk_count, disk in enumerate(snap_disks):
1179
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1180
               ('%s' % disk.iv_name))
1181
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1182
               ('%s' % disk.physical_id[1]))
1183
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1184
               ('%d' % disk.size))
1185
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1186

    
1187
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1188
  cfo = open(cff, 'w')
1189
  try:
1190
    config.write(cfo)
1191
  finally:
1192
    cfo.close()
1193

    
1194
  shutil.rmtree(finaldestdir, True)
1195
  shutil.move(destdir, finaldestdir)
1196

    
1197
  return True
1198

    
1199

    
1200
def ExportInfo(dest):
1201
  """Get export configuration information.
1202

1203
  Args:
1204
    dest: directory containing the export
1205

1206
  Returns:
1207
    A serializable config file containing the export info.
1208

1209
  """
1210
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1211

    
1212
  config = objects.SerializableConfigParser()
1213
  config.read(cff)
1214

    
1215
  if (not config.has_section(constants.INISECT_EXP) or
1216
      not config.has_section(constants.INISECT_INS)):
1217
    return None
1218

    
1219
  return config
1220

    
1221

    
1222
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1223
  """Import an os image into an instance.
1224

1225
  Args:
1226
    instance: the instance object
1227
    os_disk: the instance-visible name of the os device
1228
    swap_disk: the instance-visible name of the swap device
1229
    src_node: node holding the source image
1230
    src_image: path to the source image on src_node
1231

1232
  Returns:
1233
    False in case of error, True otherwise.
1234

1235
  """
1236
  inst_os = OSFromDisk(instance.os)
1237
  import_script = inst_os.import_script
1238

    
1239
  os_device = instance.FindDisk(os_disk)
1240
  if os_device is None:
1241
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1242
    return False
1243

    
1244
  swap_device = instance.FindDisk(swap_disk)
1245
  if swap_device is None:
1246
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1247
    return False
1248

    
1249
  real_os_dev = _RecursiveFindBD(os_device)
1250
  if real_os_dev is None:
1251
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1252
                                  str(os_device))
1253
  real_os_dev.Open()
1254

    
1255
  real_swap_dev = _RecursiveFindBD(swap_device)
1256
  if real_swap_dev is None:
1257
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1258
                                  str(swap_device))
1259
  real_swap_dev.Open()
1260

    
1261
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1262
                                        instance.name, int(time.time()))
1263
  if not os.path.exists(constants.LOG_OS_DIR):
1264
    os.mkdir(constants.LOG_OS_DIR, 0750)
1265

    
1266
  destcmd = utils.BuildShellCmd('cat %s', src_image)
1267
  remotecmd = ssh.BuildSSHCmd(src_node, constants.GANETI_RUNAS, destcmd)
1268

    
1269
  comprcmd = "gunzip"
1270
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1271
                               inst_os.path, import_script, instance.name,
1272
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1273
                               logfile)
1274

    
1275
  command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1276

    
1277
  result = utils.RunCmd(command)
1278

    
1279
  if result.failed:
1280
    logger.Error("os import command '%s' returned error: %s"
1281
                 " output: %s" %
1282
                 (command, result.fail_reason, result.output))
1283
    return False
1284

    
1285
  return True
1286

    
1287

    
1288
def ListExports():
1289
  """Return a list of exports currently available on this machine.
1290

1291
  """
1292
  if os.path.isdir(constants.EXPORT_DIR):
1293
    return utils.ListVisibleFiles(constants.EXPORT_DIR)
1294
  else:
1295
    return []
1296

    
1297

    
1298
def RemoveExport(export):
1299
  """Remove an existing export from the node.
1300

1301
  Args:
1302
    export: the name of the export to remove
1303

1304
  Returns:
1305
    False in case of error, True otherwise.
1306

1307
  """
1308
  target = os.path.join(constants.EXPORT_DIR, export)
1309

    
1310
  shutil.rmtree(target)
1311
  # TODO: catch some of the relevant exceptions and provide a pretty
1312
  # error message if rmtree fails.
1313

    
1314
  return True
1315

    
1316

    
1317
class HooksRunner(object):
1318
  """Hook runner.
1319

1320
  This class is instantiated on the node side (ganeti-noded) and not on
1321
  the master side.
1322

1323
  """
1324
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1325

    
1326
  def __init__(self, hooks_base_dir=None):
1327
    """Constructor for hooks runner.
1328

1329
    Args:
1330
      - hooks_base_dir: if not None, this overrides the
1331
        constants.HOOKS_BASE_DIR (useful for unittests)
1332
      - logs_base_dir: if not None, this overrides the
1333
        constants.LOG_HOOKS_DIR (useful for unittests)
1334
      - logging: enable or disable logging of script output
1335

1336
    """
1337
    if hooks_base_dir is None:
1338
      hooks_base_dir = constants.HOOKS_BASE_DIR
1339
    self._BASE_DIR = hooks_base_dir
1340

    
1341
  @staticmethod
1342
  def ExecHook(script, env):
1343
    """Exec one hook script.
1344

1345
    Args:
1346
     - phase: the phase
1347
     - script: the full path to the script
1348
     - env: the environment with which to exec the script
1349

1350
    """
1351
    # exec the process using subprocess and log the output
1352
    fdstdin = None
1353
    try:
1354
      fdstdin = open("/dev/null", "r")
1355
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1356
                               stderr=subprocess.STDOUT, close_fds=True,
1357
                               shell=False, cwd="/",env=env)
1358
      output = ""
1359
      try:
1360
        output = child.stdout.read(4096)
1361
        child.stdout.close()
1362
      except EnvironmentError, err:
1363
        output += "Hook script error: %s" % str(err)
1364

    
1365
      while True:
1366
        try:
1367
          result = child.wait()
1368
          break
1369
        except EnvironmentError, err:
1370
          if err.errno == errno.EINTR:
1371
            continue
1372
          raise
1373
    finally:
1374
      # try not to leak fds
1375
      for fd in (fdstdin, ):
1376
        if fd is not None:
1377
          try:
1378
            fd.close()
1379
          except EnvironmentError, err:
1380
            # just log the error
1381
            #logger.Error("While closing fd %s: %s" % (fd, err))
1382
            pass
1383

    
1384
    return result == 0, output
1385

    
1386
  def RunHooks(self, hpath, phase, env):
1387
    """Run the scripts in the hooks directory.
1388

1389
    This method will not be usually overriden by child opcodes.
1390

1391
    """
1392
    if phase == constants.HOOKS_PHASE_PRE:
1393
      suffix = "pre"
1394
    elif phase == constants.HOOKS_PHASE_POST:
1395
      suffix = "post"
1396
    else:
1397
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1398
    rr = []
1399

    
1400
    subdir = "%s-%s.d" % (hpath, suffix)
1401
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1402
    try:
1403
      dir_contents = utils.ListVisibleFiles(dir_name)
1404
    except OSError, err:
1405
      # must log
1406
      return rr
1407

    
1408
    # we use the standard python sort order,
1409
    # so 00name is the recommended naming scheme
1410
    dir_contents.sort()
1411
    for relname in dir_contents:
1412
      fname = os.path.join(dir_name, relname)
1413
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1414
          self.RE_MASK.match(relname) is not None):
1415
        rrval = constants.HKR_SKIP
1416
        output = ""
1417
      else:
1418
        result, output = self.ExecHook(fname, env)
1419
        if not result:
1420
          rrval = constants.HKR_FAIL
1421
        else:
1422
          rrval = constants.HKR_SUCCESS
1423
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1424

    
1425
    return rr