Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ c99a3cc0

History | View | Annotate | Download (45.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import tempfile
30
import stat
31
import errno
32
import re
33
import subprocess
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def StartMaster():
47
  """Activate local node as master node.
48

49
  There are two needed steps for this:
50
    - run the master script
51
    - register the cron script
52

53
  """
54
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
55

    
56
  if result.failed:
57
    logger.Error("could not activate cluster interface with command %s,"
58
                 " error: '%s'" % (result.cmd, result.output))
59
    return False
60

    
61
  return True
62

    
63

    
64
def StopMaster():
65
  """Deactivate this node as master.
66

67
  This does two things:
68
    - run the master stop script
69
    - remove link to master cron script.
70

71
  """
72
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
73

    
74
  if result.failed:
75
    logger.Error("could not deactivate cluster interface with command %s,"
76
                 " error: '%s'" % (result.cmd, result.output))
77
    return False
78

    
79
  return True
80

    
81

    
82
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
83
  """Joins this node to the cluster.
84

85
  This does the following:
86
      - updates the hostkeys of the machine (rsa and dsa)
87
      - adds the ssh private key to the user
88
      - adds the ssh public key to the users' authorized_keys file
89

90
  """
91
  sshd_keys =  [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
92
                (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
93
                (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
94
                (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
95
  for name, content, mode in sshd_keys:
96
    utils.WriteFile(name, data=content, mode=mode)
97

    
98
  try:
99
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
100
                                                    mkdir=True)
101
  except errors.OpExecError, err:
102
    logger.Error("Error while processing user ssh files: %s" % err)
103
    return False
104

    
105
  for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
106
    utils.WriteFile(name, data=content, mode=0600)
107

    
108
  utils.AddAuthorizedKey(auth_keys, sshpub)
109

    
110
  utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
111

    
112
  return True
113

    
114

    
115
def LeaveCluster():
116
  """Cleans up the current node and prepares it to be removed from the cluster.
117

118
  """
119
  if os.path.isdir(constants.DATA_DIR):
120
    for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
121
      full_name = os.path.join(constants.DATA_DIR, rel_name)
122
      if os.path.isfile(full_name) and not os.path.islink(full_name):
123
        utils.RemoveFile(full_name)
124

    
125
  try:
126
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
127
  except errors.OpExecError, err:
128
    logger.Error("Error while processing ssh files: %s" % err)
129
    return
130

    
131
  f = open(pub_key, 'r')
132
  try:
133
    utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
134
  finally:
135
    f.close()
136

    
137
  utils.RemoveFile(priv_key)
138
  utils.RemoveFile(pub_key)
139

    
140

    
141
def GetNodeInfo(vgname):
142
  """Gives back a hash with different informations about the node.
143

144
  Returns:
145
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
146
      'memory_free' : xxx, 'memory_total' : xxx }
147
    where
148
    vg_size is the size of the configured volume group in MiB
149
    vg_free is the free size of the volume group in MiB
150
    memory_dom0 is the memory allocated for domain0 in MiB
151
    memory_free is the currently available (free) ram in MiB
152
    memory_total is the total number of ram in MiB
153

154
  """
155
  outputarray = {}
156
  vginfo = _GetVGInfo(vgname)
157
  outputarray['vg_size'] = vginfo['vg_size']
158
  outputarray['vg_free'] = vginfo['vg_free']
159

    
160
  hyper = hypervisor.GetHypervisor()
161
  hyp_info = hyper.GetNodeInfo()
162
  if hyp_info is not None:
163
    outputarray.update(hyp_info)
164

    
165
  f = open("/proc/sys/kernel/random/boot_id", 'r')
166
  try:
167
    outputarray["bootid"] = f.read(128).rstrip("\n")
168
  finally:
169
    f.close()
170

    
171
  return outputarray
172

    
173

    
174
def VerifyNode(what):
175
  """Verify the status of the local node.
176

177
  Args:
178
    what - a dictionary of things to check:
179
      'filelist' : list of files for which to compute checksums
180
      'nodelist' : list of nodes we should check communication with
181
      'hypervisor': run the hypervisor-specific verify
182

183
  Requested files on local node are checksummed and the result returned.
184

185
  The nodelist is traversed, with the following checks being made
186
  for each node:
187
  - known_hosts key correct
188
  - correct resolving of node name (target node returns its own hostname
189
    by ssh-execution of 'hostname', result compared against name in list.
190

191
  """
192
  result = {}
193

    
194
  if 'hypervisor' in what:
195
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
196

    
197
  if 'filelist' in what:
198
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
199

    
200
  if 'nodelist' in what:
201
    result['nodelist'] = {}
202
    for node in what['nodelist']:
203
      success, message = ssh.VerifyNodeHostname(node)
204
      if not success:
205
        result['nodelist'][node] = message
206
  return result
207

    
208

    
209
def GetVolumeList(vg_name):
210
  """Compute list of logical volumes and their size.
211

212
  Returns:
213
    dictionary of all partions (key) with their size (in MiB), inactive
214
    and online status:
215
    {'test1': ('20.06', True, True)}
216

217
  """
218
  lvs = {}
219
  sep = '|'
220
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
221
                         "--separator=%s" % sep,
222
                         "-olv_name,lv_size,lv_attr", vg_name])
223
  if result.failed:
224
    logger.Error("Failed to list logical volumes, lvs output: %s" %
225
                 result.output)
226
    return result.output
227

    
228
  for line in result.stdout.splitlines():
229
    line = line.strip().rstrip(sep)
230
    name, size, attr = line.split(sep)
231
    if len(attr) != 6:
232
      attr = '------'
233
    inactive = attr[4] == '-'
234
    online = attr[5] == 'o'
235
    lvs[name] = (size, inactive, online)
236

    
237
  return lvs
238

    
239

    
240
def ListVolumeGroups():
241
  """List the volume groups and their size.
242

243
  Returns:
244
    Dictionary with keys volume name and values the size of the volume
245

246
  """
247
  return utils.ListVolumeGroups()
248

    
249

    
250
def NodeVolumes():
251
  """List all volumes on this node.
252

253
  """
254
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
255
                         "--separator=|",
256
                         "--options=lv_name,lv_size,devices,vg_name"])
257
  if result.failed:
258
    logger.Error("Failed to list logical volumes, lvs output: %s" %
259
                 result.output)
260
    return {}
261

    
262
  def parse_dev(dev):
263
    if '(' in dev:
264
      return dev.split('(')[0]
265
    else:
266
      return dev
267

    
268
  def map_line(line):
269
    return {
270
      'name': line[0].strip(),
271
      'size': line[1].strip(),
272
      'dev': parse_dev(line[2].strip()),
273
      'vg': line[3].strip(),
274
    }
275

    
276
  return [map_line(line.split('|')) for line in result.stdout.splitlines()]
277

    
278

    
279
def BridgesExist(bridges_list):
280
  """Check if a list of bridges exist on the current node.
281

282
  Returns:
283
    True if all of them exist, false otherwise
284

285
  """
286
  for bridge in bridges_list:
287
    if not utils.BridgeExists(bridge):
288
      return False
289

    
290
  return True
291

    
292

    
293
def GetInstanceList():
294
  """Provides a list of instances.
295

296
  Returns:
297
    A list of all running instances on the current node
298
    - instance1.example.com
299
    - instance2.example.com
300

301
  """
302
  try:
303
    names = hypervisor.GetHypervisor().ListInstances()
304
  except errors.HypervisorError, err:
305
    logger.Error("error enumerating instances: %s" % str(err))
306
    raise
307

    
308
  return names
309

    
310

    
311
def GetInstanceInfo(instance):
312
  """Gives back the informations about an instance as a dictionary.
313

314
  Args:
315
    instance: name of the instance (ex. instance1.example.com)
316

317
  Returns:
318
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
319
    where
320
    memory: memory size of instance (int)
321
    state: xen state of instance (string)
322
    time: cpu time of instance (float)
323

324
  """
325
  output = {}
326

    
327
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
328
  if iinfo is not None:
329
    output['memory'] = iinfo[2]
330
    output['state'] = iinfo[4]
331
    output['time'] = iinfo[5]
332

    
333
  return output
334

    
335

    
336
def GetAllInstancesInfo():
337
  """Gather data about all instances.
338

339
  This is the equivalent of `GetInstanceInfo()`, except that it
340
  computes data for all instances at once, thus being faster if one
341
  needs data about more than one instance.
342

343
  Returns: a dictionary of dictionaries, keys being the instance name,
344
    and with values:
345
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
346
    where
347
    memory: memory size of instance (int)
348
    state: xen state of instance (string)
349
    time: cpu time of instance (float)
350
    vcpus: the number of cpus
351

352
  """
353
  output = {}
354

    
355
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
356
  if iinfo:
357
    for name, inst_id, memory, vcpus, state, times in iinfo:
358
      output[name] = {
359
        'memory': memory,
360
        'vcpus': vcpus,
361
        'state': state,
362
        'time': times,
363
        }
364

    
365
  return output
366

    
367

    
368
def AddOSToInstance(instance, os_disk, swap_disk):
369
  """Add an OS to an instance.
370

371
  Args:
372
    instance: the instance object
373
    os_disk: the instance-visible name of the os device
374
    swap_disk: the instance-visible name of the swap device
375

376
  """
377
  inst_os = OSFromDisk(instance.os)
378

    
379
  create_script = inst_os.create_script
380

    
381
  os_device = instance.FindDisk(os_disk)
382
  if os_device is None:
383
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
384
    return False
385

    
386
  swap_device = instance.FindDisk(swap_disk)
387
  if swap_device is None:
388
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
389
    return False
390

    
391
  real_os_dev = _RecursiveFindBD(os_device)
392
  if real_os_dev is None:
393
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
394
                                  str(os_device))
395
  real_os_dev.Open()
396

    
397
  real_swap_dev = _RecursiveFindBD(swap_device)
398
  if real_swap_dev is None:
399
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
400
                                  str(swap_device))
401
  real_swap_dev.Open()
402

    
403
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
404
                                     instance.name, int(time.time()))
405
  if not os.path.exists(constants.LOG_OS_DIR):
406
    os.mkdir(constants.LOG_OS_DIR, 0750)
407

    
408
  command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
409
                                inst_os.path, create_script, instance.name,
410
                                real_os_dev.dev_path, real_swap_dev.dev_path,
411
                                logfile)
412

    
413
  result = utils.RunCmd(command)
414
  if result.failed:
415
    logger.Error("os create command '%s' returned error: %s, logfile: %s,"
416
                 " output: %s" %
417
                 (command, result.fail_reason, logfile, result.output))
418
    return False
419

    
420
  return True
421

    
422

    
423
def RunRenameInstance(instance, old_name, os_disk, swap_disk):
424
  """Run the OS rename script for an instance.
425

426
  Args:
427
    instance: the instance object
428
    old_name: the old name of the instance
429
    os_disk: the instance-visible name of the os device
430
    swap_disk: the instance-visible name of the swap device
431

432
  """
433
  inst_os = OSFromDisk(instance.os)
434

    
435
  script = inst_os.rename_script
436

    
437
  os_device = instance.FindDisk(os_disk)
438
  if os_device is None:
439
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
440
    return False
441

    
442
  swap_device = instance.FindDisk(swap_disk)
443
  if swap_device is None:
444
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
445
    return False
446

    
447
  real_os_dev = _RecursiveFindBD(os_device)
448
  if real_os_dev is None:
449
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
450
                                  str(os_device))
451
  real_os_dev.Open()
452

    
453
  real_swap_dev = _RecursiveFindBD(swap_device)
454
  if real_swap_dev is None:
455
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
456
                                  str(swap_device))
457
  real_swap_dev.Open()
458

    
459
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
460
                                           old_name,
461
                                           instance.name, int(time.time()))
462
  if not os.path.exists(constants.LOG_OS_DIR):
463
    os.mkdir(constants.LOG_OS_DIR, 0750)
464

    
465
  command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
466
                                inst_os.path, script, old_name, instance.name,
467
                                real_os_dev.dev_path, real_swap_dev.dev_path,
468
                                logfile)
469

    
470
  result = utils.RunCmd(command)
471

    
472
  if result.failed:
473
    logger.Error("os create command '%s' returned error: %s"
474
                 " output: %s" %
475
                 (command, result.fail_reason, result.output))
476
    return False
477

    
478
  return True
479

    
480

    
481
def _GetVGInfo(vg_name):
482
  """Get informations about the volume group.
483

484
  Args:
485
    vg_name: the volume group
486

487
  Returns:
488
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
489
    where
490
    vg_size is the total size of the volume group in MiB
491
    vg_free is the free size of the volume group in MiB
492
    pv_count are the number of physical disks in that vg
493

494
  If an error occurs during gathering of data, we return the same dict
495
  with keys all set to None.
496

497
  """
498
  retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
499

    
500
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
501
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
502

    
503
  if retval.failed:
504
    errmsg = "volume group %s not present" % vg_name
505
    logger.Error(errmsg)
506
    return retdic
507
  valarr = retval.stdout.strip().rstrip(':').split(':')
508
  if len(valarr) == 3:
509
    try:
510
      retdic = {
511
        "vg_size": int(round(float(valarr[0]), 0)),
512
        "vg_free": int(round(float(valarr[1]), 0)),
513
        "pv_count": int(valarr[2]),
514
        }
515
    except ValueError, err:
516
      logger.Error("Fail to parse vgs output: %s" % str(err))
517
  else:
518
    logger.Error("vgs output has the wrong number of fields (expected"
519
                 " three): %s" % str(valarr))
520
  return retdic
521

    
522

    
523
def _GatherBlockDevs(instance):
524
  """Set up an instance's block device(s).
525

526
  This is run on the primary node at instance startup. The block
527
  devices must be already assembled.
528

529
  """
530
  block_devices = []
531
  for disk in instance.disks:
532
    device = _RecursiveFindBD(disk)
533
    if device is None:
534
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
535
                                    str(disk))
536
    device.Open()
537
    block_devices.append((disk, device))
538
  return block_devices
539

    
540

    
541
def StartInstance(instance, extra_args):
542
  """Start an instance.
543

544
  Args:
545
    instance - name of instance to start.
546

547
  """
548
  running_instances = GetInstanceList()
549

    
550
  if instance.name in running_instances:
551
    return True
552

    
553
  block_devices = _GatherBlockDevs(instance)
554
  hyper = hypervisor.GetHypervisor()
555

    
556
  try:
557
    hyper.StartInstance(instance, block_devices, extra_args)
558
  except errors.HypervisorError, err:
559
    logger.Error("Failed to start instance: %s" % err)
560
    return False
561

    
562
  return True
563

    
564

    
565
def ShutdownInstance(instance):
566
  """Shut an instance down.
567

568
  Args:
569
    instance - name of instance to shutdown.
570

571
  """
572
  running_instances = GetInstanceList()
573

    
574
  if instance.name not in running_instances:
575
    return True
576

    
577
  hyper = hypervisor.GetHypervisor()
578
  try:
579
    hyper.StopInstance(instance)
580
  except errors.HypervisorError, err:
581
    logger.Error("Failed to stop instance: %s" % err)
582
    return False
583

    
584
  # test every 10secs for 2min
585
  shutdown_ok = False
586

    
587
  time.sleep(1)
588
  for dummy in range(11):
589
    if instance.name not in GetInstanceList():
590
      break
591
    time.sleep(10)
592
  else:
593
    # the shutdown did not succeed
594
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
595

    
596
    try:
597
      hyper.StopInstance(instance, force=True)
598
    except errors.HypervisorError, err:
599
      logger.Error("Failed to stop instance: %s" % err)
600
      return False
601

    
602
    time.sleep(1)
603
    if instance.name in GetInstanceList():
604
      logger.Error("could not shutdown instance '%s' even by destroy")
605
      return False
606

    
607
  return True
608

    
609

    
610
def RebootInstance(instance, reboot_type, extra_args):
611
  """Reboot an instance.
612

613
  Args:
614
    instance    - name of instance to reboot
615
    reboot_type - how to reboot [soft,hard,full]
616

617
  """
618
  running_instances = GetInstanceList()
619

    
620
  if instance.name not in running_instances:
621
    logger.Error("Cannot reboot instance that is not running")
622
    return False
623

    
624
  hyper = hypervisor.GetHypervisor()
625
  if reboot_type == constants.INSTANCE_REBOOT_SOFT:
626
    try:
627
      hyper.RebootInstance(instance)
628
    except errors.HypervisorError, err:
629
      logger.Error("Failed to soft reboot instance: %s" % err)
630
      return False
631
  elif reboot_type == constants.INSTANCE_REBOOT_HARD:
632
    try:
633
      ShutdownInstance(instance)
634
      StartInstance(instance, extra_args)
635
    except errors.HypervisorError, err:
636
      logger.Error("Failed to hard reboot instance: %s" % err)
637
      return False
638
  else:
639
    raise errors.ParameterError("reboot_type invalid")
640

    
641

    
642
  return True
643

    
644

    
645
def CreateBlockDevice(disk, size, owner, on_primary, info):
646
  """Creates a block device for an instance.
647

648
  Args:
649
   disk: a ganeti.objects.Disk object
650
   size: the size of the physical underlying device
651
   owner: a string with the name of the instance
652
   on_primary: a boolean indicating if it will be created on the primary
653
     node or not
654
   info: string 
655

656
  Returns:
657
    the new unique_id of the device (this can sometime be
658
    computed only after creation), or None. On secondary nodes,
659
    it's not required to return anything.
660

661
  """
662
  clist = []
663
  if disk.children:
664
    for child in disk.children:
665
      crdev = _RecursiveAssembleBD(child, owner, on_primary)
666
      if on_primary or disk.AssembleOnSecondary():
667
        # we need the children open in case the device itself has to
668
        # be assembled
669
        crdev.Open()
670
      clist.append(crdev)
671
  try:
672
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
673
    if device is not None:
674
      logger.Info("removing existing device %s" % disk)
675
      device.Remove()
676
  except errors.BlockDeviceError, err:
677
    pass
678

    
679
  device = bdev.Create(disk.dev_type, disk.physical_id,
680
                       clist, size)
681
  if device is None:
682
    raise ValueError("Can't create child device for %s, %s" %
683
                     (disk, size))
684
  if on_primary or disk.AssembleOnSecondary():
685
    if not device.Assemble():
686
      errorstring = "Can't assemble device after creation"
687
      logger.Error(errorstring)
688
      raise errors.BlockDeviceError("%s, very unusual event - check the node"
689
                                    " daemon logs" % errorstring)
690
    device.SetSyncSpeed(constants.SYNC_SPEED)
691
    if on_primary or disk.OpenOnSecondary():
692
      device.Open(force=True)
693
    DevCacheManager.UpdateCache(device.dev_path, owner,
694
                                on_primary, disk.iv_name)
695

    
696
  device.SetInfo(info)
697

    
698
  physical_id = device.unique_id
699
  return physical_id
700

    
701

    
702
def RemoveBlockDevice(disk):
703
  """Remove a block device.
704

705
  This is intended to be called recursively.
706

707
  """
708
  try:
709
    # since we are removing the device, allow a partial match
710
    # this allows removal of broken mirrors
711
    rdev = _RecursiveFindBD(disk, allow_partial=True)
712
  except errors.BlockDeviceError, err:
713
    # probably can't attach
714
    logger.Info("Can't attach to device %s in remove" % disk)
715
    rdev = None
716
  if rdev is not None:
717
    r_path = rdev.dev_path
718
    result = rdev.Remove()
719
    if result:
720
      DevCacheManager.RemoveCache(r_path)
721
  else:
722
    result = True
723
  if disk.children:
724
    for child in disk.children:
725
      result = result and RemoveBlockDevice(child)
726
  return result
727

    
728

    
729
def _RecursiveAssembleBD(disk, owner, as_primary):
730
  """Activate a block device for an instance.
731

732
  This is run on the primary and secondary nodes for an instance.
733

734
  This function is called recursively.
735

736
  Args:
737
    disk: a objects.Disk object
738
    as_primary: if we should make the block device read/write
739

740
  Returns:
741
    the assembled device or None (in case no device was assembled)
742

743
  If the assembly is not successful, an exception is raised.
744

745
  """
746
  children = []
747
  if disk.children:
748
    mcn = disk.ChildrenNeeded()
749
    if mcn == -1:
750
      mcn = 0 # max number of Nones allowed
751
    else:
752
      mcn = len(disk.children) - mcn # max number of Nones
753
    for chld_disk in disk.children:
754
      try:
755
        cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
756
      except errors.BlockDeviceError, err:
757
        if children.count(None) >= mcn:
758
          raise
759
        cdev = None
760
        logger.Debug("Error in child activation: %s" % str(err))
761
      children.append(cdev)
762

    
763
  if as_primary or disk.AssembleOnSecondary():
764
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
765
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
766
    result = r_dev
767
    if as_primary or disk.OpenOnSecondary():
768
      r_dev.Open()
769
    DevCacheManager.UpdateCache(r_dev.dev_path, owner,
770
                                as_primary, disk.iv_name)
771

    
772
  else:
773
    result = True
774
  return result
775

    
776

    
777
def AssembleBlockDevice(disk, owner, as_primary):
778
  """Activate a block device for an instance.
779

780
  This is a wrapper over _RecursiveAssembleBD.
781

782
  Returns:
783
    a /dev path for primary nodes
784
    True for secondary nodes
785

786
  """
787
  result = _RecursiveAssembleBD(disk, owner, as_primary)
788
  if isinstance(result, bdev.BlockDev):
789
    result = result.dev_path
790
  return result
791

    
792

    
793
def ShutdownBlockDevice(disk):
794
  """Shut down a block device.
795

796
  First, if the device is assembled (can `Attach()`), then the device
797
  is shutdown. Then the children of the device are shutdown.
798

799
  This function is called recursively. Note that we don't cache the
800
  children or such, as oppossed to assemble, shutdown of different
801
  devices doesn't require that the upper device was active.
802

803
  """
804
  r_dev = _RecursiveFindBD(disk)
805
  if r_dev is not None:
806
    r_path = r_dev.dev_path
807
    result = r_dev.Shutdown()
808
    if result:
809
      DevCacheManager.RemoveCache(r_path)
810
  else:
811
    result = True
812
  if disk.children:
813
    for child in disk.children:
814
      result = result and ShutdownBlockDevice(child)
815
  return result
816

    
817

    
818
def MirrorAddChildren(parent_cdev, new_cdevs):
819
  """Extend a mirrored block device.
820

821
  """
822
  parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
823
  if parent_bdev is None:
824
    logger.Error("Can't find parent device")
825
    return False
826
  new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
827
  if new_bdevs.count(None) > 0:
828
    logger.Error("Can't find new device(s) to add: %s:%s" %
829
                 (new_bdevs, new_cdevs))
830
    return False
831
  parent_bdev.AddChildren(new_bdevs)
832
  return True
833

    
834

    
835
def MirrorRemoveChildren(parent_cdev, new_cdevs):
836
  """Shrink a mirrored block device.
837

838
  """
839
  parent_bdev = _RecursiveFindBD(parent_cdev)
840
  if parent_bdev is None:
841
    logger.Error("Can't find parent in remove children: %s" % parent_cdev)
842
    return False
843
  devs = []
844
  for disk in new_cdevs:
845
    rpath = disk.StaticDevPath()
846
    if rpath is None:
847
      bd = _RecursiveFindBD(disk)
848
      if bd is None:
849
        logger.Error("Can't find dynamic device %s while removing children" %
850
                     disk)
851
        return False
852
      else:
853
        devs.append(bd.dev_path)
854
    else:
855
      devs.append(rpath)
856
  parent_bdev.RemoveChildren(devs)
857
  return True
858

    
859

    
860
def GetMirrorStatus(disks):
861
  """Get the mirroring status of a list of devices.
862

863
  Args:
864
    disks: list of `objects.Disk`
865

866
  Returns:
867
    list of (mirror_done, estimated_time) tuples, which
868
    are the result of bdev.BlockDevice.CombinedSyncStatus()
869

870
  """
871
  stats = []
872
  for dsk in disks:
873
    rbd = _RecursiveFindBD(dsk)
874
    if rbd is None:
875
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
876
    stats.append(rbd.CombinedSyncStatus())
877
  return stats
878

    
879

    
880
def _RecursiveFindBD(disk, allow_partial=False):
881
  """Check if a device is activated.
882

883
  If so, return informations about the real device.
884

885
  Args:
886
    disk: the objects.Disk instance
887
    allow_partial: don't abort the find if a child of the
888
                   device can't be found; this is intended to be
889
                   used when repairing mirrors
890

891
  Returns:
892
    None if the device can't be found
893
    otherwise the device instance
894

895
  """
896
  children = []
897
  if disk.children:
898
    for chdisk in disk.children:
899
      children.append(_RecursiveFindBD(chdisk))
900

    
901
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
902

    
903

    
904
def FindBlockDevice(disk):
905
  """Check if a device is activated.
906

907
  If so, return informations about the real device.
908

909
  Args:
910
    disk: the objects.Disk instance
911
  Returns:
912
    None if the device can't be found
913
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
914

915
  """
916
  rbd = _RecursiveFindBD(disk)
917
  if rbd is None:
918
    return rbd
919
  return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
920

    
921

    
922
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
923
  """Write a file to the filesystem.
924

925
  This allows the master to overwrite(!) a file. It will only perform
926
  the operation if the file belongs to a list of configuration files.
927

928
  """
929
  if not os.path.isabs(file_name):
930
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
931
                 file_name)
932
    return False
933

    
934
  allowed_files = [
935
    constants.CLUSTER_CONF_FILE,
936
    constants.ETC_HOSTS,
937
    constants.SSH_KNOWN_HOSTS_FILE,
938
    ]
939
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
940
  if file_name not in allowed_files:
941
    logger.Error("Filename passed to UploadFile not in allowed"
942
                 " upload targets: '%s'" % file_name)
943
    return False
944

    
945
  dir_name, small_name = os.path.split(file_name)
946
  fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
947
  # here we need to make sure we remove the temp file, if any error
948
  # leaves it in place
949
  try:
950
    os.chown(new_name, uid, gid)
951
    os.chmod(new_name, mode)
952
    os.write(fd, data)
953
    os.fsync(fd)
954
    os.utime(new_name, (atime, mtime))
955
    os.rename(new_name, file_name)
956
  finally:
957
    os.close(fd)
958
    utils.RemoveFile(new_name)
959
  return True
960

    
961

    
962
def _ErrnoOrStr(err):
963
  """Format an EnvironmentError exception.
964

965
  If the `err` argument has an errno attribute, it will be looked up
966
  and converted into a textual EXXXX description. Otherwise the string
967
  representation of the error will be returned.
968

969
  """
970
  if hasattr(err, 'errno'):
971
    detail = errno.errorcode[err.errno]
972
  else:
973
    detail = str(err)
974
  return detail
975

    
976

    
977
def _OSSearch(name, search_path=None):
978
  """Search for OSes with the given name in the search_path.
979

980
  Args:
981
    name: The name of the OS to look for
982
    search_path: List of dirs to search (defaults to constants.OS_SEARCH_PATH)
983

984
  Returns:
985
    The base_dir the OS resides in
986

987
  """
988
  if search_path is None:
989
    search_path = constants.OS_SEARCH_PATH
990

    
991
  for dir_name in search_path:
992
    t_os_dir = os.path.sep.join([dir_name, name])
993
    if os.path.isdir(t_os_dir):
994
      return dir_name
995

    
996
  return None
997

    
998

    
999
def _OSOndiskVersion(name, os_dir):
1000
  """Compute and return the API version of a given OS.
1001

1002
  This function will try to read the API version of the os given by
1003
  the 'name' parameter and residing in the 'os_dir' directory.
1004

1005
  Return value will be either an integer denoting the version or None in the
1006
  case when this is not a valid OS name.
1007

1008
  """
1009
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1010

    
1011
  try:
1012
    st = os.stat(api_file)
1013
  except EnvironmentError, err:
1014
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1015
                           " found (%s)" % _ErrnoOrStr(err))
1016

    
1017
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1018
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1019
                           " a regular file")
1020

    
1021
  try:
1022
    f = open(api_file)
1023
    try:
1024
      api_version = f.read(256)
1025
    finally:
1026
      f.close()
1027
  except EnvironmentError, err:
1028
    raise errors.InvalidOS(name, os_dir, "error while reading the"
1029
                           " API version (%s)" % _ErrnoOrStr(err))
1030

    
1031
  api_version = api_version.strip()
1032
  try:
1033
    api_version = int(api_version)
1034
  except (TypeError, ValueError), err:
1035
    raise errors.InvalidOS(name, os_dir,
1036
                           "API version is not integer (%s)" % str(err))
1037

    
1038
  return api_version
1039

    
1040

    
1041
def DiagnoseOS(top_dirs=None):
1042
  """Compute the validity for all OSes.
1043

1044
  Returns an OS object for each name in all the given top directories
1045
  (if not given defaults to constants.OS_SEARCH_PATH)
1046

1047
  Returns:
1048
    list of OS objects
1049

1050
  """
1051
  if top_dirs is None:
1052
    top_dirs = constants.OS_SEARCH_PATH
1053

    
1054
  result = []
1055
  for dir_name in top_dirs:
1056
    if os.path.isdir(dir_name):
1057
      try:
1058
        f_names = utils.ListVisibleFiles(dir_name)
1059
      except EnvironmentError, err:
1060
        logger.Error("Can't list the OS directory %s: %s" %
1061
                     (dir_name, str(err)))
1062
        break
1063
      for name in f_names:
1064
        try:
1065
          os_inst = OSFromDisk(name, base_dir=dir_name)
1066
          result.append(os_inst)
1067
        except errors.InvalidOS, err:
1068
          result.append(objects.OS.FromInvalidOS(err))
1069

    
1070
  return result
1071

    
1072

    
1073
def OSFromDisk(name, base_dir=None):
1074
  """Create an OS instance from disk.
1075

1076
  This function will return an OS instance if the given name is a
1077
  valid OS name. Otherwise, it will raise an appropriate
1078
  `errors.InvalidOS` exception, detailing why this is not a valid
1079
  OS.
1080

1081
  Args:
1082
    os_dir: Directory containing the OS scripts. Defaults to a search
1083
            in all the OS_SEARCH_PATH directories.
1084

1085
  """
1086

    
1087
  if base_dir is None:
1088
    base_dir = _OSSearch(name)
1089

    
1090
  if base_dir is None:
1091
    raise errors.InvalidOS(name, None, "OS dir not found in search path")
1092

    
1093
  os_dir = os.path.sep.join([base_dir, name])
1094
  api_version = _OSOndiskVersion(name, os_dir)
1095

    
1096
  if api_version != constants.OS_API_VERSION:
1097
    raise errors.InvalidOS(name, os_dir, "API version mismatch"
1098
                           " (found %s want %s)"
1099
                           % (api_version, constants.OS_API_VERSION))
1100

    
1101
  # OS Scripts dictionary, we will populate it with the actual script names
1102
  os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1103

    
1104
  for script in os_scripts:
1105
    os_scripts[script] = os.path.sep.join([os_dir, script])
1106

    
1107
    try:
1108
      st = os.stat(os_scripts[script])
1109
    except EnvironmentError, err:
1110
      raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1111
                             (script, _ErrnoOrStr(err)))
1112

    
1113
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1114
      raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1115
                             script)
1116

    
1117
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1118
      raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1119
                             script)
1120

    
1121

    
1122
  return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1123
                    create_script=os_scripts['create'],
1124
                    export_script=os_scripts['export'],
1125
                    import_script=os_scripts['import'],
1126
                    rename_script=os_scripts['rename'],
1127
                    api_version=api_version)
1128

    
1129

    
1130
def SnapshotBlockDevice(disk):
1131
  """Create a snapshot copy of a block device.
1132

1133
  This function is called recursively, and the snapshot is actually created
1134
  just for the leaf lvm backend device.
1135

1136
  Args:
1137
    disk: the disk to be snapshotted
1138

1139
  Returns:
1140
    a config entry for the actual lvm device snapshotted.
1141

1142
  """
1143
  if disk.children:
1144
    if len(disk.children) == 1:
1145
      # only one child, let's recurse on it
1146
      return SnapshotBlockDevice(disk.children[0])
1147
    else:
1148
      # more than one child, choose one that matches
1149
      for child in disk.children:
1150
        if child.size == disk.size:
1151
          # return implies breaking the loop
1152
          return SnapshotBlockDevice(child)
1153
  elif disk.dev_type == constants.LD_LV:
1154
    r_dev = _RecursiveFindBD(disk)
1155
    if r_dev is not None:
1156
      # let's stay on the safe side and ask for the full size, for now
1157
      return r_dev.Snapshot(disk.size)
1158
    else:
1159
      return None
1160
  else:
1161
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1162
                                 " '%s' of type '%s'" %
1163
                                 (disk.unique_id, disk.dev_type))
1164

    
1165

    
1166
def ExportSnapshot(disk, dest_node, instance):
1167
  """Export a block device snapshot to a remote node.
1168

1169
  Args:
1170
    disk: the snapshot block device
1171
    dest_node: the node to send the image to
1172
    instance: instance being exported
1173

1174
  Returns:
1175
    True if successful, False otherwise.
1176

1177
  """
1178
  inst_os = OSFromDisk(instance.os)
1179
  export_script = inst_os.export_script
1180

    
1181
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1182
                                     instance.name, int(time.time()))
1183
  if not os.path.exists(constants.LOG_OS_DIR):
1184
    os.mkdir(constants.LOG_OS_DIR, 0750)
1185

    
1186
  real_os_dev = _RecursiveFindBD(disk)
1187
  if real_os_dev is None:
1188
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1189
                                  str(disk))
1190
  real_os_dev.Open()
1191

    
1192
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1193
  destfile = disk.physical_id[1]
1194

    
1195
  # the target command is built out of three individual commands,
1196
  # which are joined by pipes; we check each individual command for
1197
  # valid parameters
1198

    
1199
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1200
                               export_script, instance.name,
1201
                               real_os_dev.dev_path, logfile)
1202

    
1203
  comprcmd = "gzip"
1204

    
1205
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1206
                                destdir, destdir, destfile)
1207
  remotecmd = ssh.BuildSSHCmd(dest_node, constants.GANETI_RUNAS, destcmd)
1208

    
1209

    
1210

    
1211
  # all commands have been checked, so we're safe to combine them
1212
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1213

    
1214
  result = utils.RunCmd(command)
1215

    
1216
  if result.failed:
1217
    logger.Error("os snapshot export command '%s' returned error: %s"
1218
                 " output: %s" %
1219
                 (command, result.fail_reason, result.output))
1220
    return False
1221

    
1222
  return True
1223

    
1224

    
1225
def FinalizeExport(instance, snap_disks):
1226
  """Write out the export configuration information.
1227

1228
  Args:
1229
    instance: instance configuration
1230
    snap_disks: snapshot block devices
1231

1232
  Returns:
1233
    False in case of error, True otherwise.
1234

1235
  """
1236
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1237
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1238

    
1239
  config = objects.SerializableConfigParser()
1240

    
1241
  config.add_section(constants.INISECT_EXP)
1242
  config.set(constants.INISECT_EXP, 'version', '0')
1243
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1244
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1245
  config.set(constants.INISECT_EXP, 'os', instance.os)
1246
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1247

    
1248
  config.add_section(constants.INISECT_INS)
1249
  config.set(constants.INISECT_INS, 'name', instance.name)
1250
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1251
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1252
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1253
  for nic_count, nic in enumerate(instance.nics):
1254
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1255
               nic_count, '%s' % nic.mac)
1256
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1257
    config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge)
1258
  # TODO: redundant: on load can read nics until it doesn't exist
1259
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1260

    
1261
  for disk_count, disk in enumerate(snap_disks):
1262
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1263
               ('%s' % disk.iv_name))
1264
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1265
               ('%s' % disk.physical_id[1]))
1266
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1267
               ('%d' % disk.size))
1268
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1269

    
1270
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1271
  cfo = open(cff, 'w')
1272
  try:
1273
    config.write(cfo)
1274
  finally:
1275
    cfo.close()
1276

    
1277
  shutil.rmtree(finaldestdir, True)
1278
  shutil.move(destdir, finaldestdir)
1279

    
1280
  return True
1281

    
1282

    
1283
def ExportInfo(dest):
1284
  """Get export configuration information.
1285

1286
  Args:
1287
    dest: directory containing the export
1288

1289
  Returns:
1290
    A serializable config file containing the export info.
1291

1292
  """
1293
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1294

    
1295
  config = objects.SerializableConfigParser()
1296
  config.read(cff)
1297

    
1298
  if (not config.has_section(constants.INISECT_EXP) or
1299
      not config.has_section(constants.INISECT_INS)):
1300
    return None
1301

    
1302
  return config
1303

    
1304

    
1305
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1306
  """Import an os image into an instance.
1307

1308
  Args:
1309
    instance: the instance object
1310
    os_disk: the instance-visible name of the os device
1311
    swap_disk: the instance-visible name of the swap device
1312
    src_node: node holding the source image
1313
    src_image: path to the source image on src_node
1314

1315
  Returns:
1316
    False in case of error, True otherwise.
1317

1318
  """
1319
  inst_os = OSFromDisk(instance.os)
1320
  import_script = inst_os.import_script
1321

    
1322
  os_device = instance.FindDisk(os_disk)
1323
  if os_device is None:
1324
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1325
    return False
1326

    
1327
  swap_device = instance.FindDisk(swap_disk)
1328
  if swap_device is None:
1329
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1330
    return False
1331

    
1332
  real_os_dev = _RecursiveFindBD(os_device)
1333
  if real_os_dev is None:
1334
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1335
                                  str(os_device))
1336
  real_os_dev.Open()
1337

    
1338
  real_swap_dev = _RecursiveFindBD(swap_device)
1339
  if real_swap_dev is None:
1340
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1341
                                  str(swap_device))
1342
  real_swap_dev.Open()
1343

    
1344
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1345
                                        instance.name, int(time.time()))
1346
  if not os.path.exists(constants.LOG_OS_DIR):
1347
    os.mkdir(constants.LOG_OS_DIR, 0750)
1348

    
1349
  destcmd = utils.BuildShellCmd('cat %s', src_image)
1350
  remotecmd = ssh.BuildSSHCmd(src_node, constants.GANETI_RUNAS, destcmd)
1351

    
1352
  comprcmd = "gunzip"
1353
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1354
                               inst_os.path, import_script, instance.name,
1355
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1356
                               logfile)
1357

    
1358
  command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1359

    
1360
  result = utils.RunCmd(command)
1361

    
1362
  if result.failed:
1363
    logger.Error("os import command '%s' returned error: %s"
1364
                 " output: %s" %
1365
                 (command, result.fail_reason, result.output))
1366
    return False
1367

    
1368
  return True
1369

    
1370

    
1371
def ListExports():
1372
  """Return a list of exports currently available on this machine.
1373

1374
  """
1375
  if os.path.isdir(constants.EXPORT_DIR):
1376
    return utils.ListVisibleFiles(constants.EXPORT_DIR)
1377
  else:
1378
    return []
1379

    
1380

    
1381
def RemoveExport(export):
1382
  """Remove an existing export from the node.
1383

1384
  Args:
1385
    export: the name of the export to remove
1386

1387
  Returns:
1388
    False in case of error, True otherwise.
1389

1390
  """
1391
  target = os.path.join(constants.EXPORT_DIR, export)
1392

    
1393
  shutil.rmtree(target)
1394
  # TODO: catch some of the relevant exceptions and provide a pretty
1395
  # error message if rmtree fails.
1396

    
1397
  return True
1398

    
1399

    
1400
def RenameBlockDevices(devlist):
1401
  """Rename a list of block devices.
1402

1403
  The devlist argument is a list of tuples (disk, new_logical,
1404
  new_physical). The return value will be a combined boolean result
1405
  (True only if all renames succeeded).
1406

1407
  """
1408
  result = True
1409
  for disk, unique_id in devlist:
1410
    dev = _RecursiveFindBD(disk)
1411
    if dev is None:
1412
      result = False
1413
      continue
1414
    try:
1415
      old_rpath = dev.dev_path
1416
      dev.Rename(unique_id)
1417
      new_rpath = dev.dev_path
1418
      if old_rpath != new_rpath:
1419
        DevCacheManager.RemoveCache(old_rpath)
1420
        # FIXME: we should add the new cache information here, like:
1421
        # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1422
        # but we don't have the owner here - maybe parse from existing
1423
        # cache? for now, we only lose lvm data when we rename, which
1424
        # is less critical than DRBD or MD
1425
    except errors.BlockDeviceError, err:
1426
      logger.Error("Can't rename device '%s' to '%s': %s" %
1427
                   (dev, unique_id, err))
1428
      result = False
1429
  return result
1430

    
1431

    
1432
class HooksRunner(object):
1433
  """Hook runner.
1434

1435
  This class is instantiated on the node side (ganeti-noded) and not on
1436
  the master side.
1437

1438
  """
1439
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1440

    
1441
  def __init__(self, hooks_base_dir=None):
1442
    """Constructor for hooks runner.
1443

1444
    Args:
1445
      - hooks_base_dir: if not None, this overrides the
1446
        constants.HOOKS_BASE_DIR (useful for unittests)
1447
      - logs_base_dir: if not None, this overrides the
1448
        constants.LOG_HOOKS_DIR (useful for unittests)
1449
      - logging: enable or disable logging of script output
1450

1451
    """
1452
    if hooks_base_dir is None:
1453
      hooks_base_dir = constants.HOOKS_BASE_DIR
1454
    self._BASE_DIR = hooks_base_dir
1455

    
1456
  @staticmethod
1457
  def ExecHook(script, env):
1458
    """Exec one hook script.
1459

1460
    Args:
1461
     - phase: the phase
1462
     - script: the full path to the script
1463
     - env: the environment with which to exec the script
1464

1465
    """
1466
    # exec the process using subprocess and log the output
1467
    fdstdin = None
1468
    try:
1469
      fdstdin = open("/dev/null", "r")
1470
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1471
                               stderr=subprocess.STDOUT, close_fds=True,
1472
                               shell=False, cwd="/", env=env)
1473
      output = ""
1474
      try:
1475
        output = child.stdout.read(4096)
1476
        child.stdout.close()
1477
      except EnvironmentError, err:
1478
        output += "Hook script error: %s" % str(err)
1479

    
1480
      while True:
1481
        try:
1482
          result = child.wait()
1483
          break
1484
        except EnvironmentError, err:
1485
          if err.errno == errno.EINTR:
1486
            continue
1487
          raise
1488
    finally:
1489
      # try not to leak fds
1490
      for fd in (fdstdin, ):
1491
        if fd is not None:
1492
          try:
1493
            fd.close()
1494
          except EnvironmentError, err:
1495
            # just log the error
1496
            #logger.Error("While closing fd %s: %s" % (fd, err))
1497
            pass
1498

    
1499
    return result == 0, output
1500

    
1501
  def RunHooks(self, hpath, phase, env):
1502
    """Run the scripts in the hooks directory.
1503

1504
    This method will not be usually overriden by child opcodes.
1505

1506
    """
1507
    if phase == constants.HOOKS_PHASE_PRE:
1508
      suffix = "pre"
1509
    elif phase == constants.HOOKS_PHASE_POST:
1510
      suffix = "post"
1511
    else:
1512
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1513
    rr = []
1514

    
1515
    subdir = "%s-%s.d" % (hpath, suffix)
1516
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1517
    try:
1518
      dir_contents = utils.ListVisibleFiles(dir_name)
1519
    except OSError, err:
1520
      # must log
1521
      return rr
1522

    
1523
    # we use the standard python sort order,
1524
    # so 00name is the recommended naming scheme
1525
    dir_contents.sort()
1526
    for relname in dir_contents:
1527
      fname = os.path.join(dir_name, relname)
1528
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1529
          self.RE_MASK.match(relname) is not None):
1530
        rrval = constants.HKR_SKIP
1531
        output = ""
1532
      else:
1533
        result, output = self.ExecHook(fname, env)
1534
        if not result:
1535
          rrval = constants.HKR_FAIL
1536
        else:
1537
          rrval = constants.HKR_SUCCESS
1538
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1539

    
1540
    return rr
1541

    
1542

    
1543
class DevCacheManager(object):
1544
  """Simple class for managing a cache of block device information.
1545

1546
  """
1547
  _DEV_PREFIX = "/dev/"
1548
  _ROOT_DIR = constants.BDEV_CACHE_DIR
1549

    
1550
  @classmethod
1551
  def _ConvertPath(cls, dev_path):
1552
    """Converts a /dev/name path to the cache file name.
1553

1554
    This replaces slashes with underscores and strips the /dev
1555
    prefix. It then returns the full path to the cache file
1556

1557
    """
1558
    if dev_path.startswith(cls._DEV_PREFIX):
1559
      dev_path = dev_path[len(cls._DEV_PREFIX):]
1560
    dev_path = dev_path.replace("/", "_")
1561
    fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1562
    return fpath
1563

    
1564
  @classmethod
1565
  def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1566
    """Updates the cache information for a given device.
1567

1568
    """
1569
    if dev_path is None:
1570
      logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1571
      return
1572
    fpath = cls._ConvertPath(dev_path)
1573
    if on_primary:
1574
      state = "primary"
1575
    else:
1576
      state = "secondary"
1577
    if iv_name is None:
1578
      iv_name = "not_visible"
1579
    fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1580
    try:
1581
      utils.WriteFile(fpath, data=fdata)
1582
    except EnvironmentError, err:
1583
      logger.Error("Can't update bdev cache for %s, error %s" %
1584
                   (dev_path, str(err)))
1585

    
1586
  @classmethod
1587
  def RemoveCache(cls, dev_path):
1588
    """Remove data for a dev_path.
1589

1590
    """
1591
    if dev_path is None:
1592
      logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1593
      return
1594
    fpath = cls._ConvertPath(dev_path)
1595
    try:
1596
      utils.RemoveFile(fpath)
1597
    except EnvironmentError, err:
1598
      logger.Error("Can't update bdev cache for %s, error %s" %
1599
                   (dev_path, str(err)))