Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ d87ae7d2

History | View | Annotate | Download (45.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import tempfile
30
import stat
31
import errno
32
import re
33
import subprocess
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def StartMaster():
47
  """Activate local node as master node.
48

49
  There are two needed steps for this:
50
    - run the master script
51
    - register the cron script
52

53
  """
54
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
55

    
56
  if result.failed:
57
    logger.Error("could not activate cluster interface with command %s,"
58
                 " error: '%s'" % (result.cmd, result.output))
59
    return False
60

    
61
  return True
62

    
63

    
64
def StopMaster():
65
  """Deactivate this node as master.
66

67
  This does two things:
68
    - run the master stop script
69
    - remove link to master cron script.
70

71
  """
72
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
73

    
74
  if result.failed:
75
    logger.Error("could not deactivate cluster interface with command %s,"
76
                 " error: '%s'" % (result.cmd, result.output))
77
    return False
78

    
79
  return True
80

    
81

    
82
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
83
  """Joins this node to the cluster.
84

85
  This does the following:
86
      - updates the hostkeys of the machine (rsa and dsa)
87
      - adds the ssh private key to the user
88
      - adds the ssh public key to the users' authorized_keys file
89

90
  """
91
  sshd_keys =  [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
92
                (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
93
                (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
94
                (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
95
  for name, content, mode in sshd_keys:
96
    utils.WriteFile(name, data=content, mode=mode)
97

    
98
  try:
99
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
100
                                                    mkdir=True)
101
  except errors.OpExecError, err:
102
    logger.Error("Error while processing user ssh files: %s" % err)
103
    return False
104

    
105
  for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
106
    utils.WriteFile(name, data=content, mode=0600)
107

    
108
  utils.AddAuthorizedKey(auth_keys, sshpub)
109

    
110
  utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
111

    
112
  return True
113

    
114

    
115
def LeaveCluster():
116
  """Cleans up the current node and prepares it to be removed from the cluster.
117

118
  """
119
  if os.path.isdir(constants.DATA_DIR):
120
    for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
121
      full_name = os.path.join(constants.DATA_DIR, rel_name)
122
      if os.path.isfile(full_name) and not os.path.islink(full_name):
123
        utils.RemoveFile(full_name)
124

    
125
  try:
126
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
127
  except errors.OpExecError, err:
128
    logger.Error("Error while processing ssh files: %s" % err)
129
    return
130

    
131
  f = open(pub_key, 'r')
132
  try:
133
    utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
134
  finally:
135
    f.close()
136

    
137
  utils.RemoveFile(priv_key)
138
  utils.RemoveFile(pub_key)
139

    
140

    
141
def GetNodeInfo(vgname):
142
  """Gives back a hash with different informations about the node.
143

144
  Returns:
145
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
146
      'memory_free' : xxx, 'memory_total' : xxx }
147
    where
148
    vg_size is the size of the configured volume group in MiB
149
    vg_free is the free size of the volume group in MiB
150
    memory_dom0 is the memory allocated for domain0 in MiB
151
    memory_free is the currently available (free) ram in MiB
152
    memory_total is the total number of ram in MiB
153

154
  """
155
  outputarray = {}
156
  vginfo = _GetVGInfo(vgname)
157
  outputarray['vg_size'] = vginfo['vg_size']
158
  outputarray['vg_free'] = vginfo['vg_free']
159

    
160
  hyper = hypervisor.GetHypervisor()
161
  hyp_info = hyper.GetNodeInfo()
162
  if hyp_info is not None:
163
    outputarray.update(hyp_info)
164

    
165
  f = open("/proc/sys/kernel/random/boot_id", 'r')
166
  try:
167
    outputarray["bootid"] = f.read(128).rstrip("\n")
168
  finally:
169
    f.close()
170

    
171
  return outputarray
172

    
173

    
174
def VerifyNode(what):
175
  """Verify the status of the local node.
176

177
  Args:
178
    what - a dictionary of things to check:
179
      'filelist' : list of files for which to compute checksums
180
      'nodelist' : list of nodes we should check communication with
181
      'hypervisor': run the hypervisor-specific verify
182

183
  Requested files on local node are checksummed and the result returned.
184

185
  The nodelist is traversed, with the following checks being made
186
  for each node:
187
  - known_hosts key correct
188
  - correct resolving of node name (target node returns its own hostname
189
    by ssh-execution of 'hostname', result compared against name in list.
190

191
  """
192
  result = {}
193

    
194
  if 'hypervisor' in what:
195
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
196

    
197
  if 'filelist' in what:
198
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
199

    
200
  if 'nodelist' in what:
201
    result['nodelist'] = {}
202
    for node in what['nodelist']:
203
      success, message = ssh.VerifyNodeHostname(node)
204
      if not success:
205
        result['nodelist'][node] = message
206
  return result
207

    
208

    
209
def GetVolumeList(vg_name):
210
  """Compute list of logical volumes and their size.
211

212
  Returns:
213
    dictionary of all partions (key) with their size (in MiB), inactive
214
    and online status:
215
    {'test1': ('20.06', True, True)}
216

217
  """
218
  lvs = {}
219
  sep = '|'
220
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
221
                         "--separator=%s" % sep,
222
                         "-olv_name,lv_size,lv_attr", vg_name])
223
  if result.failed:
224
    logger.Error("Failed to list logical volumes, lvs output: %s" %
225
                 result.output)
226
    return result.output
227

    
228
  for line in result.stdout.splitlines():
229
    line = line.strip().rstrip(sep)
230
    name, size, attr = line.split(sep)
231
    if len(attr) != 6:
232
      attr = '------'
233
    inactive = attr[4] == '-'
234
    online = attr[5] == 'o'
235
    lvs[name] = (size, inactive, online)
236

    
237
  return lvs
238

    
239

    
240
def ListVolumeGroups():
241
  """List the volume groups and their size.
242

243
  Returns:
244
    Dictionary with keys volume name and values the size of the volume
245

246
  """
247
  return utils.ListVolumeGroups()
248

    
249

    
250
def NodeVolumes():
251
  """List all volumes on this node.
252

253
  """
254
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
255
                         "--separator=|",
256
                         "--options=lv_name,lv_size,devices,vg_name"])
257
  if result.failed:
258
    logger.Error("Failed to list logical volumes, lvs output: %s" %
259
                 result.output)
260
    return {}
261

    
262
  def parse_dev(dev):
263
    if '(' in dev:
264
      return dev.split('(')[0]
265
    else:
266
      return dev
267

    
268
  def map_line(line):
269
    return {
270
      'name': line[0].strip(),
271
      'size': line[1].strip(),
272
      'dev': parse_dev(line[2].strip()),
273
      'vg': line[3].strip(),
274
    }
275

    
276
  return [map_line(line.split('|')) for line in result.stdout.splitlines()]
277

    
278

    
279
def BridgesExist(bridges_list):
280
  """Check if a list of bridges exist on the current node.
281

282
  Returns:
283
    True if all of them exist, false otherwise
284

285
  """
286
  for bridge in bridges_list:
287
    if not utils.BridgeExists(bridge):
288
      return False
289

    
290
  return True
291

    
292

    
293
def GetInstanceList():
294
  """Provides a list of instances.
295

296
  Returns:
297
    A list of all running instances on the current node
298
    - instance1.example.com
299
    - instance2.example.com
300

301
  """
302
  try:
303
    names = hypervisor.GetHypervisor().ListInstances()
304
  except errors.HypervisorError, err:
305
    logger.Error("error enumerating instances: %s" % str(err))
306
    raise
307

    
308
  return names
309

    
310

    
311
def GetInstanceInfo(instance):
312
  """Gives back the informations about an instance as a dictionary.
313

314
  Args:
315
    instance: name of the instance (ex. instance1.example.com)
316

317
  Returns:
318
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
319
    where
320
    memory: memory size of instance (int)
321
    state: xen state of instance (string)
322
    time: cpu time of instance (float)
323

324
  """
325
  output = {}
326

    
327
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
328
  if iinfo is not None:
329
    output['memory'] = iinfo[2]
330
    output['state'] = iinfo[4]
331
    output['time'] = iinfo[5]
332

    
333
  return output
334

    
335

    
336
def GetAllInstancesInfo():
337
  """Gather data about all instances.
338

339
  This is the equivalent of `GetInstanceInfo()`, except that it
340
  computes data for all instances at once, thus being faster if one
341
  needs data about more than one instance.
342

343
  Returns: a dictionary of dictionaries, keys being the instance name,
344
    and with values:
345
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
346
    where
347
    memory: memory size of instance (int)
348
    state: xen state of instance (string)
349
    time: cpu time of instance (float)
350
    vcpus: the number of cpus
351

352
  """
353
  output = {}
354

    
355
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
356
  if iinfo:
357
    for name, inst_id, memory, vcpus, state, times in iinfo:
358
      output[name] = {
359
        'memory': memory,
360
        'vcpus': vcpus,
361
        'state': state,
362
        'time': times,
363
        }
364

    
365
  return output
366

    
367

    
368
def AddOSToInstance(instance, os_disk, swap_disk):
369
  """Add an OS to an instance.
370

371
  Args:
372
    instance: the instance object
373
    os_disk: the instance-visible name of the os device
374
    swap_disk: the instance-visible name of the swap device
375

376
  """
377
  inst_os = OSFromDisk(instance.os)
378

    
379
  create_script = inst_os.create_script
380

    
381
  os_device = instance.FindDisk(os_disk)
382
  if os_device is None:
383
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
384
    return False
385

    
386
  swap_device = instance.FindDisk(swap_disk)
387
  if swap_device is None:
388
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
389
    return False
390

    
391
  real_os_dev = _RecursiveFindBD(os_device)
392
  if real_os_dev is None:
393
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
394
                                  str(os_device))
395
  real_os_dev.Open()
396

    
397
  real_swap_dev = _RecursiveFindBD(swap_device)
398
  if real_swap_dev is None:
399
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
400
                                  str(swap_device))
401
  real_swap_dev.Open()
402

    
403
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
404
                                     instance.name, int(time.time()))
405
  if not os.path.exists(constants.LOG_OS_DIR):
406
    os.mkdir(constants.LOG_OS_DIR, 0750)
407

    
408
  command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
409
                                inst_os.path, create_script, instance.name,
410
                                real_os_dev.dev_path, real_swap_dev.dev_path,
411
                                logfile)
412

    
413
  result = utils.RunCmd(command)
414
  if result.failed:
415
    logger.Error("os create command '%s' returned error: %s, logfile: %s,"
416
                 " output: %s" %
417
                 (command, result.fail_reason, logfile, result.output))
418
    return False
419

    
420
  return True
421

    
422

    
423
def RunRenameInstance(instance, old_name, os_disk, swap_disk):
424
  """Run the OS rename script for an instance.
425

426
  Args:
427
    instance: the instance object
428
    old_name: the old name of the instance
429
    os_disk: the instance-visible name of the os device
430
    swap_disk: the instance-visible name of the swap device
431

432
  """
433
  inst_os = OSFromDisk(instance.os)
434

    
435
  script = inst_os.rename_script
436

    
437
  os_device = instance.FindDisk(os_disk)
438
  if os_device is None:
439
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
440
    return False
441

    
442
  swap_device = instance.FindDisk(swap_disk)
443
  if swap_device is None:
444
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
445
    return False
446

    
447
  real_os_dev = _RecursiveFindBD(os_device)
448
  if real_os_dev is None:
449
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
450
                                  str(os_device))
451
  real_os_dev.Open()
452

    
453
  real_swap_dev = _RecursiveFindBD(swap_device)
454
  if real_swap_dev is None:
455
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
456
                                  str(swap_device))
457
  real_swap_dev.Open()
458

    
459
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
460
                                           old_name,
461
                                           instance.name, int(time.time()))
462
  if not os.path.exists(constants.LOG_OS_DIR):
463
    os.mkdir(constants.LOG_OS_DIR, 0750)
464

    
465
  command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
466
                                inst_os.path, script, old_name, instance.name,
467
                                real_os_dev.dev_path, real_swap_dev.dev_path,
468
                                logfile)
469

    
470
  result = utils.RunCmd(command)
471

    
472
  if result.failed:
473
    logger.Error("os create command '%s' returned error: %s"
474
                 " output: %s" %
475
                 (command, result.fail_reason, result.output))
476
    return False
477

    
478
  return True
479

    
480

    
481
def _GetVGInfo(vg_name):
482
  """Get informations about the volume group.
483

484
  Args:
485
    vg_name: the volume group
486

487
  Returns:
488
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
489
    where
490
    vg_size is the total size of the volume group in MiB
491
    vg_free is the free size of the volume group in MiB
492
    pv_count are the number of physical disks in that vg
493

494
  If an error occurs during gathering of data, we return the same dict
495
  with keys all set to None.
496

497
  """
498
  retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
499

    
500
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
501
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
502

    
503
  if retval.failed:
504
    errmsg = "volume group %s not present" % vg_name
505
    logger.Error(errmsg)
506
    return retdic
507
  valarr = retval.stdout.strip().rstrip(':').split(':')
508
  if len(valarr) == 3:
509
    try:
510
      retdic = {
511
        "vg_size": int(round(float(valarr[0]), 0)),
512
        "vg_free": int(round(float(valarr[1]), 0)),
513
        "pv_count": int(valarr[2]),
514
        }
515
    except ValueError, err:
516
      logger.Error("Fail to parse vgs output: %s" % str(err))
517
  else:
518
    logger.Error("vgs output has the wrong number of fields (expected"
519
                 " three): %s" % str(valarr))
520
  return retdic
521

    
522

    
523
def _GatherBlockDevs(instance):
524
  """Set up an instance's block device(s).
525

526
  This is run on the primary node at instance startup. The block
527
  devices must be already assembled.
528

529
  """
530
  block_devices = []
531
  for disk in instance.disks:
532
    device = _RecursiveFindBD(disk)
533
    if device is None:
534
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
535
                                    str(disk))
536
    device.Open()
537
    block_devices.append((disk, device))
538
  return block_devices
539

    
540

    
541
def StartInstance(instance, extra_args):
542
  """Start an instance.
543

544
  Args:
545
    instance - name of instance to start.
546

547
  """
548
  running_instances = GetInstanceList()
549

    
550
  if instance.name in running_instances:
551
    return True
552

    
553
  block_devices = _GatherBlockDevs(instance)
554
  hyper = hypervisor.GetHypervisor()
555

    
556
  try:
557
    hyper.StartInstance(instance, block_devices, extra_args)
558
  except errors.HypervisorError, err:
559
    logger.Error("Failed to start instance: %s" % err)
560
    return False
561

    
562
  return True
563

    
564

    
565
def ShutdownInstance(instance):
566
  """Shut an instance down.
567

568
  Args:
569
    instance - name of instance to shutdown.
570

571
  """
572
  running_instances = GetInstanceList()
573

    
574
  if instance.name not in running_instances:
575
    return True
576

    
577
  hyper = hypervisor.GetHypervisor()
578
  try:
579
    hyper.StopInstance(instance)
580
  except errors.HypervisorError, err:
581
    logger.Error("Failed to stop instance: %s" % err)
582
    return False
583

    
584
  # test every 10secs for 2min
585
  shutdown_ok = False
586

    
587
  time.sleep(1)
588
  for dummy in range(11):
589
    if instance.name not in GetInstanceList():
590
      break
591
    time.sleep(10)
592
  else:
593
    # the shutdown did not succeed
594
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
595

    
596
    try:
597
      hyper.StopInstance(instance, force=True)
598
    except errors.HypervisorError, err:
599
      logger.Error("Failed to stop instance: %s" % err)
600
      return False
601

    
602
    time.sleep(1)
603
    if instance.name in GetInstanceList():
604
      logger.Error("could not shutdown instance '%s' even by destroy")
605
      return False
606

    
607
  return True
608

    
609

    
610
def RebootInstance(instance, reboot_type, extra_args):
611
  """Reboot an instance.
612

613
  Args:
614
    instance    - name of instance to reboot
615
    reboot_type - how to reboot [soft,hard,full]
616

617
  """
618
  running_instances = GetInstanceList()
619

    
620
  if instance.name not in running_instances:
621
    logger.Error("Cannot reboot instance that is not running")
622
    return False
623

    
624
  hyper = hypervisor.GetHypervisor()
625
  if reboot_type == constants.INSTANCE_REBOOT_SOFT:
626
    try:
627
      hyper.RebootInstance(instance)
628
    except errors.HypervisorError, err:
629
      logger.Error("Failed to soft reboot instance: %s" % err)
630
      return False
631
  elif reboot_type == constants.INSTANCE_REBOOT_HARD:
632
    try:
633
      ShutdownInstance(instance)
634
      StartInstance(instance, extra_args)
635
    except errors.HypervisorError, err:
636
      logger.Error("Failed to hard reboot instance: %s" % err)
637
      return False
638
  else:
639
    raise errors.ParameterError("reboot_type invalid")
640

    
641

    
642
  return True
643

    
644

    
645
def CreateBlockDevice(disk, size, owner, on_primary, info):
646
  """Creates a block device for an instance.
647

648
  Args:
649
   bdev: a ganeti.objects.Disk object
650
   size: the size of the physical underlying devices
651
   do_open: if the device should be `Assemble()`-d and
652
            `Open()`-ed after creation
653

654
  Returns:
655
    the new unique_id of the device (this can sometime be
656
    computed only after creation), or None. On secondary nodes,
657
    it's not required to return anything.
658

659
  """
660
  clist = []
661
  if disk.children:
662
    for child in disk.children:
663
      crdev = _RecursiveAssembleBD(child, owner, on_primary)
664
      if on_primary or disk.AssembleOnSecondary():
665
        # we need the children open in case the device itself has to
666
        # be assembled
667
        crdev.Open()
668
      else:
669
        crdev.Close()
670
      clist.append(crdev)
671
  try:
672
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
673
    if device is not None:
674
      logger.Info("removing existing device %s" % disk)
675
      device.Remove()
676
  except errors.BlockDeviceError, err:
677
    pass
678

    
679
  device = bdev.Create(disk.dev_type, disk.physical_id,
680
                       clist, size)
681
  if device is None:
682
    raise ValueError("Can't create child device for %s, %s" %
683
                     (disk, size))
684
  if on_primary or disk.AssembleOnSecondary():
685
    if not device.Assemble():
686
      errorstring = "Can't assemble device after creation"
687
      logger.Error(errorstring)
688
      raise errors.BlockDeviceError("%s, very unusual event - check the node"
689
                                    " daemon logs" % errorstring)
690
    device.SetSyncSpeed(constants.SYNC_SPEED)
691
    if on_primary or disk.OpenOnSecondary():
692
      device.Open(force=True)
693
    DevCacheManager.UpdateCache(device.dev_path, owner,
694
                                on_primary, disk.iv_name)
695

    
696
  device.SetInfo(info)
697

    
698
  physical_id = device.unique_id
699
  return physical_id
700

    
701

    
702
def RemoveBlockDevice(disk):
703
  """Remove a block device.
704

705
  This is intended to be called recursively.
706

707
  """
708
  try:
709
    # since we are removing the device, allow a partial match
710
    # this allows removal of broken mirrors
711
    rdev = _RecursiveFindBD(disk, allow_partial=True)
712
  except errors.BlockDeviceError, err:
713
    # probably can't attach
714
    logger.Info("Can't attach to device %s in remove" % disk)
715
    rdev = None
716
  if rdev is not None:
717
    r_path = rdev.dev_path
718
    result = rdev.Remove()
719
    if result:
720
      DevCacheManager.RemoveCache(r_path)
721
  else:
722
    result = True
723
  if disk.children:
724
    for child in disk.children:
725
      result = result and RemoveBlockDevice(child)
726
  return result
727

    
728

    
729
def _RecursiveAssembleBD(disk, owner, as_primary):
730
  """Activate a block device for an instance.
731

732
  This is run on the primary and secondary nodes for an instance.
733

734
  This function is called recursively.
735

736
  Args:
737
    disk: a objects.Disk object
738
    as_primary: if we should make the block device read/write
739

740
  Returns:
741
    the assembled device or None (in case no device was assembled)
742

743
  If the assembly is not successful, an exception is raised.
744

745
  """
746
  children = []
747
  if disk.children:
748
    mcn = disk.ChildrenNeeded()
749
    if mcn == -1:
750
      mcn = 0 # max number of Nones allowed
751
    else:
752
      mcn = len(disk.children) - mcn # max number of Nones
753
    for chld_disk in disk.children:
754
      try:
755
        cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
756
      except errors.BlockDeviceError, err:
757
        if children.count(None) >= mcn:
758
          raise
759
        cdev = None
760
        logger.Debug("Error in child activation: %s" % str(err))
761
      children.append(cdev)
762

    
763
  if as_primary or disk.AssembleOnSecondary():
764
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
765
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
766
    result = r_dev
767
    if as_primary or disk.OpenOnSecondary():
768
      r_dev.Open()
769
    else:
770
      r_dev.Close()
771
    DevCacheManager.UpdateCache(r_dev.dev_path, owner,
772
                                as_primary, disk.iv_name)
773

    
774
  else:
775
    result = True
776
  return result
777

    
778

    
779
def AssembleBlockDevice(disk, owner, as_primary):
780
  """Activate a block device for an instance.
781

782
  This is a wrapper over _RecursiveAssembleBD.
783

784
  Returns:
785
    a /dev path for primary nodes
786
    True for secondary nodes
787

788
  """
789
  result = _RecursiveAssembleBD(disk, owner, as_primary)
790
  if isinstance(result, bdev.BlockDev):
791
    result = result.dev_path
792
  return result
793

    
794

    
795
def ShutdownBlockDevice(disk):
796
  """Shut down a block device.
797

798
  First, if the device is assembled (can `Attach()`), then the device
799
  is shutdown. Then the children of the device are shutdown.
800

801
  This function is called recursively. Note that we don't cache the
802
  children or such, as oppossed to assemble, shutdown of different
803
  devices doesn't require that the upper device was active.
804

805
  """
806
  r_dev = _RecursiveFindBD(disk)
807
  if r_dev is not None:
808
    r_path = r_dev.dev_path
809
    result = r_dev.Shutdown()
810
    if result:
811
      DevCacheManager.RemoveCache(r_path)
812
  else:
813
    result = True
814
  if disk.children:
815
    for child in disk.children:
816
      result = result and ShutdownBlockDevice(child)
817
  return result
818

    
819

    
820
def MirrorAddChildren(parent_cdev, new_cdevs):
821
  """Extend a mirrored block device.
822

823
  """
824
  parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
825
  if parent_bdev is None:
826
    logger.Error("Can't find parent device")
827
    return False
828
  new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
829
  if new_bdevs.count(None) > 0:
830
    logger.Error("Can't find new device(s) to add: %s:%s" %
831
                 (new_bdevs, new_cdevs))
832
    return False
833
  parent_bdev.AddChildren(new_bdevs)
834
  return True
835

    
836

    
837
def MirrorRemoveChildren(parent_cdev, new_cdevs):
838
  """Shrink a mirrored block device.
839

840
  """
841
  parent_bdev = _RecursiveFindBD(parent_cdev)
842
  if parent_bdev is None:
843
    logger.Error("Can't find parent in remove children: %s" % parent_cdev)
844
    return False
845
  devs = []
846
  for disk in new_cdevs:
847
    rpath = disk.StaticDevPath()
848
    if rpath is None:
849
      bd = _RecursiveFindBD(disk)
850
      if bd is None:
851
        logger.Error("Can't find dynamic device %s while removing children" %
852
                     disk)
853
        return False
854
      else:
855
        devs.append(bd.dev_path)
856
    else:
857
      devs.append(rpath)
858
  parent_bdev.RemoveChildren(devs)
859
  return True
860

    
861

    
862
def GetMirrorStatus(disks):
863
  """Get the mirroring status of a list of devices.
864

865
  Args:
866
    disks: list of `objects.Disk`
867

868
  Returns:
869
    list of (mirror_done, estimated_time) tuples, which
870
    are the result of bdev.BlockDevice.CombinedSyncStatus()
871

872
  """
873
  stats = []
874
  for dsk in disks:
875
    rbd = _RecursiveFindBD(dsk)
876
    if rbd is None:
877
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
878
    stats.append(rbd.CombinedSyncStatus())
879
  return stats
880

    
881

    
882
def _RecursiveFindBD(disk, allow_partial=False):
883
  """Check if a device is activated.
884

885
  If so, return informations about the real device.
886

887
  Args:
888
    disk: the objects.Disk instance
889
    allow_partial: don't abort the find if a child of the
890
                   device can't be found; this is intended to be
891
                   used when repairing mirrors
892

893
  Returns:
894
    None if the device can't be found
895
    otherwise the device instance
896

897
  """
898
  children = []
899
  if disk.children:
900
    for chdisk in disk.children:
901
      children.append(_RecursiveFindBD(chdisk))
902

    
903
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
904

    
905

    
906
def FindBlockDevice(disk):
907
  """Check if a device is activated.
908

909
  If so, return informations about the real device.
910

911
  Args:
912
    disk: the objects.Disk instance
913
  Returns:
914
    None if the device can't be found
915
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
916

917
  """
918
  rbd = _RecursiveFindBD(disk)
919
  if rbd is None:
920
    return rbd
921
  return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
922

    
923

    
924
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
925
  """Write a file to the filesystem.
926

927
  This allows the master to overwrite(!) a file. It will only perform
928
  the operation if the file belongs to a list of configuration files.
929

930
  """
931
  if not os.path.isabs(file_name):
932
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
933
                 file_name)
934
    return False
935

    
936
  allowed_files = [
937
    constants.CLUSTER_CONF_FILE,
938
    constants.ETC_HOSTS,
939
    constants.SSH_KNOWN_HOSTS_FILE,
940
    ]
941
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
942
  if file_name not in allowed_files:
943
    logger.Error("Filename passed to UploadFile not in allowed"
944
                 " upload targets: '%s'" % file_name)
945
    return False
946

    
947
  dir_name, small_name = os.path.split(file_name)
948
  fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
949
  # here we need to make sure we remove the temp file, if any error
950
  # leaves it in place
951
  try:
952
    os.chown(new_name, uid, gid)
953
    os.chmod(new_name, mode)
954
    os.write(fd, data)
955
    os.fsync(fd)
956
    os.utime(new_name, (atime, mtime))
957
    os.rename(new_name, file_name)
958
  finally:
959
    os.close(fd)
960
    utils.RemoveFile(new_name)
961
  return True
962

    
963

    
964
def _ErrnoOrStr(err):
965
  """Format an EnvironmentError exception.
966

967
  If the `err` argument has an errno attribute, it will be looked up
968
  and converted into a textual EXXXX description. Otherwise the string
969
  representation of the error will be returned.
970

971
  """
972
  if hasattr(err, 'errno'):
973
    detail = errno.errorcode[err.errno]
974
  else:
975
    detail = str(err)
976
  return detail
977

    
978

    
979
def _OSSearch(name, search_path=None):
980
  """Search for OSes with the given name in the search_path.
981

982
  Args:
983
    name: The name of the OS to look for
984
    search_path: List of dirs to search (defaults to constants.OS_SEARCH_PATH)
985

986
  Returns:
987
    The base_dir the OS resides in
988

989
  """
990
  if search_path is None:
991
    search_path = constants.OS_SEARCH_PATH
992

    
993
  for dir_name in search_path:
994
    t_os_dir = os.path.sep.join([dir_name, name])
995
    if os.path.isdir(t_os_dir):
996
      return dir_name
997

    
998
  return None
999

    
1000

    
1001
def _OSOndiskVersion(name, os_dir):
1002
  """Compute and return the API version of a given OS.
1003

1004
  This function will try to read the API version of the os given by
1005
  the 'name' parameter and residing in the 'os_dir' directory.
1006

1007
  Return value will be either an integer denoting the version or None in the
1008
  case when this is not a valid OS name.
1009

1010
  """
1011
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1012

    
1013
  try:
1014
    st = os.stat(api_file)
1015
  except EnvironmentError, err:
1016
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1017
                           " found (%s)" % _ErrnoOrStr(err))
1018

    
1019
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1020
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1021
                           " a regular file")
1022

    
1023
  try:
1024
    f = open(api_file)
1025
    try:
1026
      api_version = f.read(256)
1027
    finally:
1028
      f.close()
1029
  except EnvironmentError, err:
1030
    raise errors.InvalidOS(name, os_dir, "error while reading the"
1031
                           " API version (%s)" % _ErrnoOrStr(err))
1032

    
1033
  api_version = api_version.strip()
1034
  try:
1035
    api_version = int(api_version)
1036
  except (TypeError, ValueError), err:
1037
    raise errors.InvalidOS(name, os_dir,
1038
                           "API version is not integer (%s)" % str(err))
1039

    
1040
  return api_version
1041

    
1042

    
1043
def DiagnoseOS(top_dirs=None):
1044
  """Compute the validity for all OSes.
1045

1046
  Returns an OS object for each name in all the given top directories
1047
  (if not given defaults to constants.OS_SEARCH_PATH)
1048

1049
  Returns:
1050
    list of OS objects
1051

1052
  """
1053
  if top_dirs is None:
1054
    top_dirs = constants.OS_SEARCH_PATH
1055

    
1056
  result = []
1057
  for dir_name in top_dirs:
1058
    if os.path.isdir(dir_name):
1059
      try:
1060
        f_names = utils.ListVisibleFiles(dir_name)
1061
      except EnvironmentError, err:
1062
        logger.Error("Can't list the OS directory %s: %s" %
1063
                     (dir_name, str(err)))
1064
        break
1065
      for name in f_names:
1066
        try:
1067
          os_inst = OSFromDisk(name, base_dir=dir_name)
1068
          result.append(os_inst)
1069
        except errors.InvalidOS, err:
1070
          result.append(objects.OS.FromInvalidOS(err))
1071

    
1072
  return result
1073

    
1074

    
1075
def OSFromDisk(name, base_dir=None):
1076
  """Create an OS instance from disk.
1077

1078
  This function will return an OS instance if the given name is a
1079
  valid OS name. Otherwise, it will raise an appropriate
1080
  `errors.InvalidOS` exception, detailing why this is not a valid
1081
  OS.
1082

1083
  Args:
1084
    os_dir: Directory containing the OS scripts. Defaults to a search
1085
            in all the OS_SEARCH_PATH directories.
1086

1087
  """
1088

    
1089
  if base_dir is None:
1090
    base_dir = _OSSearch(name)
1091

    
1092
  if base_dir is None:
1093
    raise errors.InvalidOS(name, None, "OS dir not found in search path")
1094

    
1095
  os_dir = os.path.sep.join([base_dir, name])
1096
  api_version = _OSOndiskVersion(name, os_dir)
1097

    
1098
  if api_version != constants.OS_API_VERSION:
1099
    raise errors.InvalidOS(name, os_dir, "API version mismatch"
1100
                           " (found %s want %s)"
1101
                           % (api_version, constants.OS_API_VERSION))
1102

    
1103
  # OS Scripts dictionary, we will populate it with the actual script names
1104
  os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1105

    
1106
  for script in os_scripts:
1107
    os_scripts[script] = os.path.sep.join([os_dir, script])
1108

    
1109
    try:
1110
      st = os.stat(os_scripts[script])
1111
    except EnvironmentError, err:
1112
      raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1113
                             (script, _ErrnoOrStr(err)))
1114

    
1115
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1116
      raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1117
                             script)
1118

    
1119
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1120
      raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1121
                             script)
1122

    
1123

    
1124
  return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1125
                    create_script=os_scripts['create'],
1126
                    export_script=os_scripts['export'],
1127
                    import_script=os_scripts['import'],
1128
                    rename_script=os_scripts['rename'],
1129
                    api_version=api_version)
1130

    
1131

    
1132
def SnapshotBlockDevice(disk):
1133
  """Create a snapshot copy of a block device.
1134

1135
  This function is called recursively, and the snapshot is actually created
1136
  just for the leaf lvm backend device.
1137

1138
  Args:
1139
    disk: the disk to be snapshotted
1140

1141
  Returns:
1142
    a config entry for the actual lvm device snapshotted.
1143

1144
  """
1145
  if disk.children:
1146
    if len(disk.children) == 1:
1147
      # only one child, let's recurse on it
1148
      return SnapshotBlockDevice(disk.children[0])
1149
    else:
1150
      # more than one child, choose one that matches
1151
      for child in disk.children:
1152
        if child.size == disk.size:
1153
          # return implies breaking the loop
1154
          return SnapshotBlockDevice(child)
1155
  elif disk.dev_type == constants.LD_LV:
1156
    r_dev = _RecursiveFindBD(disk)
1157
    if r_dev is not None:
1158
      # let's stay on the safe side and ask for the full size, for now
1159
      return r_dev.Snapshot(disk.size)
1160
    else:
1161
      return None
1162
  else:
1163
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1164
                                 " '%s' of type '%s'" %
1165
                                 (disk.unique_id, disk.dev_type))
1166

    
1167

    
1168
def ExportSnapshot(disk, dest_node, instance):
1169
  """Export a block device snapshot to a remote node.
1170

1171
  Args:
1172
    disk: the snapshot block device
1173
    dest_node: the node to send the image to
1174
    instance: instance being exported
1175

1176
  Returns:
1177
    True if successful, False otherwise.
1178

1179
  """
1180
  inst_os = OSFromDisk(instance.os)
1181
  export_script = inst_os.export_script
1182

    
1183
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1184
                                     instance.name, int(time.time()))
1185
  if not os.path.exists(constants.LOG_OS_DIR):
1186
    os.mkdir(constants.LOG_OS_DIR, 0750)
1187

    
1188
  real_os_dev = _RecursiveFindBD(disk)
1189
  if real_os_dev is None:
1190
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1191
                                  str(disk))
1192
  real_os_dev.Open()
1193

    
1194
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1195
  destfile = disk.physical_id[1]
1196

    
1197
  # the target command is built out of three individual commands,
1198
  # which are joined by pipes; we check each individual command for
1199
  # valid parameters
1200

    
1201
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1202
                               export_script, instance.name,
1203
                               real_os_dev.dev_path, logfile)
1204

    
1205
  comprcmd = "gzip"
1206

    
1207
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1208
                                destdir, destdir, destfile)
1209
  remotecmd = ssh.BuildSSHCmd(dest_node, constants.GANETI_RUNAS, destcmd)
1210

    
1211

    
1212

    
1213
  # all commands have been checked, so we're safe to combine them
1214
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1215

    
1216
  result = utils.RunCmd(command)
1217

    
1218
  if result.failed:
1219
    logger.Error("os snapshot export command '%s' returned error: %s"
1220
                 " output: %s" %
1221
                 (command, result.fail_reason, result.output))
1222
    return False
1223

    
1224
  return True
1225

    
1226

    
1227
def FinalizeExport(instance, snap_disks):
1228
  """Write out the export configuration information.
1229

1230
  Args:
1231
    instance: instance configuration
1232
    snap_disks: snapshot block devices
1233

1234
  Returns:
1235
    False in case of error, True otherwise.
1236

1237
  """
1238
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1239
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1240

    
1241
  config = objects.SerializableConfigParser()
1242

    
1243
  config.add_section(constants.INISECT_EXP)
1244
  config.set(constants.INISECT_EXP, 'version', '0')
1245
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1246
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1247
  config.set(constants.INISECT_EXP, 'os', instance.os)
1248
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1249

    
1250
  config.add_section(constants.INISECT_INS)
1251
  config.set(constants.INISECT_INS, 'name', instance.name)
1252
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1253
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1254
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1255
  for nic_count, nic in enumerate(instance.nics):
1256
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1257
               nic_count, '%s' % nic.mac)
1258
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1259
  # TODO: redundant: on load can read nics until it doesn't exist
1260
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1261

    
1262
  for disk_count, disk in enumerate(snap_disks):
1263
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1264
               ('%s' % disk.iv_name))
1265
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1266
               ('%s' % disk.physical_id[1]))
1267
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1268
               ('%d' % disk.size))
1269
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1270

    
1271
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1272
  cfo = open(cff, 'w')
1273
  try:
1274
    config.write(cfo)
1275
  finally:
1276
    cfo.close()
1277

    
1278
  shutil.rmtree(finaldestdir, True)
1279
  shutil.move(destdir, finaldestdir)
1280

    
1281
  return True
1282

    
1283

    
1284
def ExportInfo(dest):
1285
  """Get export configuration information.
1286

1287
  Args:
1288
    dest: directory containing the export
1289

1290
  Returns:
1291
    A serializable config file containing the export info.
1292

1293
  """
1294
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1295

    
1296
  config = objects.SerializableConfigParser()
1297
  config.read(cff)
1298

    
1299
  if (not config.has_section(constants.INISECT_EXP) or
1300
      not config.has_section(constants.INISECT_INS)):
1301
    return None
1302

    
1303
  return config
1304

    
1305

    
1306
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1307
  """Import an os image into an instance.
1308

1309
  Args:
1310
    instance: the instance object
1311
    os_disk: the instance-visible name of the os device
1312
    swap_disk: the instance-visible name of the swap device
1313
    src_node: node holding the source image
1314
    src_image: path to the source image on src_node
1315

1316
  Returns:
1317
    False in case of error, True otherwise.
1318

1319
  """
1320
  inst_os = OSFromDisk(instance.os)
1321
  import_script = inst_os.import_script
1322

    
1323
  os_device = instance.FindDisk(os_disk)
1324
  if os_device is None:
1325
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1326
    return False
1327

    
1328
  swap_device = instance.FindDisk(swap_disk)
1329
  if swap_device is None:
1330
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1331
    return False
1332

    
1333
  real_os_dev = _RecursiveFindBD(os_device)
1334
  if real_os_dev is None:
1335
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1336
                                  str(os_device))
1337
  real_os_dev.Open()
1338

    
1339
  real_swap_dev = _RecursiveFindBD(swap_device)
1340
  if real_swap_dev is None:
1341
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1342
                                  str(swap_device))
1343
  real_swap_dev.Open()
1344

    
1345
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1346
                                        instance.name, int(time.time()))
1347
  if not os.path.exists(constants.LOG_OS_DIR):
1348
    os.mkdir(constants.LOG_OS_DIR, 0750)
1349

    
1350
  destcmd = utils.BuildShellCmd('cat %s', src_image)
1351
  remotecmd = ssh.BuildSSHCmd(src_node, constants.GANETI_RUNAS, destcmd)
1352

    
1353
  comprcmd = "gunzip"
1354
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1355
                               inst_os.path, import_script, instance.name,
1356
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1357
                               logfile)
1358

    
1359
  command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1360

    
1361
  result = utils.RunCmd(command)
1362

    
1363
  if result.failed:
1364
    logger.Error("os import command '%s' returned error: %s"
1365
                 " output: %s" %
1366
                 (command, result.fail_reason, result.output))
1367
    return False
1368

    
1369
  return True
1370

    
1371

    
1372
def ListExports():
1373
  """Return a list of exports currently available on this machine.
1374

1375
  """
1376
  if os.path.isdir(constants.EXPORT_DIR):
1377
    return utils.ListVisibleFiles(constants.EXPORT_DIR)
1378
  else:
1379
    return []
1380

    
1381

    
1382
def RemoveExport(export):
1383
  """Remove an existing export from the node.
1384

1385
  Args:
1386
    export: the name of the export to remove
1387

1388
  Returns:
1389
    False in case of error, True otherwise.
1390

1391
  """
1392
  target = os.path.join(constants.EXPORT_DIR, export)
1393

    
1394
  shutil.rmtree(target)
1395
  # TODO: catch some of the relevant exceptions and provide a pretty
1396
  # error message if rmtree fails.
1397

    
1398
  return True
1399

    
1400

    
1401
def RenameBlockDevices(devlist):
1402
  """Rename a list of block devices.
1403

1404
  The devlist argument is a list of tuples (disk, new_logical,
1405
  new_physical). The return value will be a combined boolean result
1406
  (True only if all renames succeeded).
1407

1408
  """
1409
  result = True
1410
  for disk, unique_id in devlist:
1411
    dev = _RecursiveFindBD(disk)
1412
    if dev is None:
1413
      result = False
1414
      continue
1415
    try:
1416
      old_rpath = dev.dev_path
1417
      dev.Rename(unique_id)
1418
      new_rpath = dev.dev_path
1419
      if old_rpath != new_rpath:
1420
        DevCacheManager.RemoveCache(old_rpath)
1421
        # FIXME: we should add the new cache information here, like:
1422
        # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1423
        # but we don't have the owner here - maybe parse from existing
1424
        # cache? for now, we only lose lvm data when we rename, which
1425
        # is less critical than DRBD or MD
1426
    except errors.BlockDeviceError, err:
1427
      logger.Error("Can't rename device '%s' to '%s': %s" %
1428
                   (dev, unique_id, err))
1429
      result = False
1430
  return result
1431

    
1432

    
1433
class HooksRunner(object):
1434
  """Hook runner.
1435

1436
  This class is instantiated on the node side (ganeti-noded) and not on
1437
  the master side.
1438

1439
  """
1440
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1441

    
1442
  def __init__(self, hooks_base_dir=None):
1443
    """Constructor for hooks runner.
1444

1445
    Args:
1446
      - hooks_base_dir: if not None, this overrides the
1447
        constants.HOOKS_BASE_DIR (useful for unittests)
1448
      - logs_base_dir: if not None, this overrides the
1449
        constants.LOG_HOOKS_DIR (useful for unittests)
1450
      - logging: enable or disable logging of script output
1451

1452
    """
1453
    if hooks_base_dir is None:
1454
      hooks_base_dir = constants.HOOKS_BASE_DIR
1455
    self._BASE_DIR = hooks_base_dir
1456

    
1457
  @staticmethod
1458
  def ExecHook(script, env):
1459
    """Exec one hook script.
1460

1461
    Args:
1462
     - phase: the phase
1463
     - script: the full path to the script
1464
     - env: the environment with which to exec the script
1465

1466
    """
1467
    # exec the process using subprocess and log the output
1468
    fdstdin = None
1469
    try:
1470
      fdstdin = open("/dev/null", "r")
1471
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1472
                               stderr=subprocess.STDOUT, close_fds=True,
1473
                               shell=False, cwd="/", env=env)
1474
      output = ""
1475
      try:
1476
        output = child.stdout.read(4096)
1477
        child.stdout.close()
1478
      except EnvironmentError, err:
1479
        output += "Hook script error: %s" % str(err)
1480

    
1481
      while True:
1482
        try:
1483
          result = child.wait()
1484
          break
1485
        except EnvironmentError, err:
1486
          if err.errno == errno.EINTR:
1487
            continue
1488
          raise
1489
    finally:
1490
      # try not to leak fds
1491
      for fd in (fdstdin, ):
1492
        if fd is not None:
1493
          try:
1494
            fd.close()
1495
          except EnvironmentError, err:
1496
            # just log the error
1497
            #logger.Error("While closing fd %s: %s" % (fd, err))
1498
            pass
1499

    
1500
    return result == 0, output
1501

    
1502
  def RunHooks(self, hpath, phase, env):
1503
    """Run the scripts in the hooks directory.
1504

1505
    This method will not be usually overriden by child opcodes.
1506

1507
    """
1508
    if phase == constants.HOOKS_PHASE_PRE:
1509
      suffix = "pre"
1510
    elif phase == constants.HOOKS_PHASE_POST:
1511
      suffix = "post"
1512
    else:
1513
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1514
    rr = []
1515

    
1516
    subdir = "%s-%s.d" % (hpath, suffix)
1517
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1518
    try:
1519
      dir_contents = utils.ListVisibleFiles(dir_name)
1520
    except OSError, err:
1521
      # must log
1522
      return rr
1523

    
1524
    # we use the standard python sort order,
1525
    # so 00name is the recommended naming scheme
1526
    dir_contents.sort()
1527
    for relname in dir_contents:
1528
      fname = os.path.join(dir_name, relname)
1529
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1530
          self.RE_MASK.match(relname) is not None):
1531
        rrval = constants.HKR_SKIP
1532
        output = ""
1533
      else:
1534
        result, output = self.ExecHook(fname, env)
1535
        if not result:
1536
          rrval = constants.HKR_FAIL
1537
        else:
1538
          rrval = constants.HKR_SUCCESS
1539
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1540

    
1541
    return rr
1542

    
1543

    
1544
class DevCacheManager(object):
1545
  """Simple class for managing a chache of block device information.
1546

1547
  """
1548
  _DEV_PREFIX = "/dev/"
1549
  _ROOT_DIR = constants.BDEV_CACHE_DIR
1550

    
1551
  @classmethod
1552
  def _ConvertPath(cls, dev_path):
1553
    """Converts a /dev/name path to the cache file name.
1554

1555
    This replaces slashes with underscores and strips the /dev
1556
    prefix. It then returns the full path to the cache file
1557

1558
    """
1559
    if dev_path.startswith(cls._DEV_PREFIX):
1560
      dev_path = dev_path[len(cls._DEV_PREFIX):]
1561
    dev_path = dev_path.replace("/", "_")
1562
    fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1563
    return fpath
1564

    
1565
  @classmethod
1566
  def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1567
    """Updates the cache information for a given device.
1568

1569
    """
1570
    if dev_path is None:
1571
      logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1572
      return
1573
    fpath = cls._ConvertPath(dev_path)
1574
    if on_primary:
1575
      state = "primary"
1576
    else:
1577
      state = "secondary"
1578
    if iv_name is None:
1579
      iv_name = "not_visible"
1580
    fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1581
    try:
1582
      utils.WriteFile(fpath, data=fdata)
1583
    except EnvironmentError, err:
1584
      logger.Error("Can't update bdev cache for %s, error %s" %
1585
                   (dev_path, str(err)))
1586

    
1587
  @classmethod
1588
  def RemoveCache(cls, dev_path):
1589
    """Remove data for a dev_path.
1590

1591
    """
1592
    if dev_path is None:
1593
      logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1594
      return
1595
    fpath = cls._ConvertPath(dev_path)
1596
    try:
1597
      utils.RemoveFile(fpath)
1598
    except EnvironmentError, err:
1599
      logger.Error("Can't update bdev cache for %s, error %s" %
1600
                   (dev_path, str(err)))