Statistics
| Branch: | Tag: | Revision:

root / lib / backend.py @ 2a10865c

History | View | Annotate | Download (50.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions used by the node daemon"""
23

    
24

    
25
import os
26
import os.path
27
import shutil
28
import time
29
import stat
30
import errno
31
import re
32
import subprocess
33
import random
34

    
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38
from ganeti import ssh
39
from ganeti import hypervisor
40
from ganeti import constants
41
from ganeti import bdev
42
from ganeti import objects
43
from ganeti import ssconf
44

    
45

    
46
def _GetSshRunner():
47
  return ssh.SshRunner()
48

    
49

    
50
def StartMaster():
51
  """Activate local node as master node.
52

53
  There are two needed steps for this:
54
    - run the master script
55
    - register the cron script
56

57
  """
58
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
59

    
60
  if result.failed:
61
    logger.Error("could not activate cluster interface with command %s,"
62
                 " error: '%s'" % (result.cmd, result.output))
63
    return False
64

    
65
  return True
66

    
67

    
68
def StopMaster():
69
  """Deactivate this node as master.
70

71
  This runs the master stop script.
72

73
  """
74
  result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
75

    
76
  if result.failed:
77
    logger.Error("could not deactivate cluster interface with command %s,"
78
                 " error: '%s'" % (result.cmd, result.output))
79
    return False
80

    
81
  return True
82

    
83

    
84
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
85
  """Joins this node to the cluster.
86

87
  This does the following:
88
      - updates the hostkeys of the machine (rsa and dsa)
89
      - adds the ssh private key to the user
90
      - adds the ssh public key to the users' authorized_keys file
91

92
  """
93
  sshd_keys =  [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
94
                (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
95
                (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
96
                (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
97
  for name, content, mode in sshd_keys:
98
    utils.WriteFile(name, data=content, mode=mode)
99

    
100
  try:
101
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
102
                                                    mkdir=True)
103
  except errors.OpExecError, err:
104
    logger.Error("Error while processing user ssh files: %s" % err)
105
    return False
106

    
107
  for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
108
    utils.WriteFile(name, data=content, mode=0600)
109

    
110
  utils.AddAuthorizedKey(auth_keys, sshpub)
111

    
112
  utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
113

    
114
  return True
115

    
116

    
117
def LeaveCluster():
118
  """Cleans up the current node and prepares it to be removed from the cluster.
119

120
  """
121
  if os.path.isdir(constants.DATA_DIR):
122
    for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
123
      full_name = os.path.join(constants.DATA_DIR, rel_name)
124
      if os.path.isfile(full_name) and not os.path.islink(full_name):
125
        utils.RemoveFile(full_name)
126

    
127
  try:
128
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
129
  except errors.OpExecError, err:
130
    logger.Error("Error while processing ssh files: %s" % err)
131
    return
132

    
133
  f = open(pub_key, 'r')
134
  try:
135
    utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
136
  finally:
137
    f.close()
138

    
139
  utils.RemoveFile(priv_key)
140
  utils.RemoveFile(pub_key)
141

    
142

    
143
def GetNodeInfo(vgname):
144
  """Gives back a hash with different informations about the node.
145

146
  Returns:
147
    { 'vg_size' : xxx,  'vg_free' : xxx, 'memory_domain0': xxx,
148
      'memory_free' : xxx, 'memory_total' : xxx }
149
    where
150
    vg_size is the size of the configured volume group in MiB
151
    vg_free is the free size of the volume group in MiB
152
    memory_dom0 is the memory allocated for domain0 in MiB
153
    memory_free is the currently available (free) ram in MiB
154
    memory_total is the total number of ram in MiB
155

156
  """
157
  outputarray = {}
158
  vginfo = _GetVGInfo(vgname)
159
  outputarray['vg_size'] = vginfo['vg_size']
160
  outputarray['vg_free'] = vginfo['vg_free']
161

    
162
  hyper = hypervisor.GetHypervisor()
163
  hyp_info = hyper.GetNodeInfo()
164
  if hyp_info is not None:
165
    outputarray.update(hyp_info)
166

    
167
  f = open("/proc/sys/kernel/random/boot_id", 'r')
168
  try:
169
    outputarray["bootid"] = f.read(128).rstrip("\n")
170
  finally:
171
    f.close()
172

    
173
  return outputarray
174

    
175

    
176
def VerifyNode(what):
177
  """Verify the status of the local node.
178

179
  Args:
180
    what - a dictionary of things to check:
181
      'filelist' : list of files for which to compute checksums
182
      'nodelist' : list of nodes we should check communication with
183
      'hypervisor': run the hypervisor-specific verify
184

185
  Requested files on local node are checksummed and the result returned.
186

187
  The nodelist is traversed, with the following checks being made
188
  for each node:
189
  - known_hosts key correct
190
  - correct resolving of node name (target node returns its own hostname
191
    by ssh-execution of 'hostname', result compared against name in list.
192

193
  """
194
  result = {}
195

    
196
  if 'hypervisor' in what:
197
    result['hypervisor'] = hypervisor.GetHypervisor().Verify()
198

    
199
  if 'filelist' in what:
200
    result['filelist'] = utils.FingerprintFiles(what['filelist'])
201

    
202
  if 'nodelist' in what:
203
    result['nodelist'] = {}
204
    random.shuffle(what['nodelist'])
205
    for node in what['nodelist']:
206
      success, message = _GetSshRunner().VerifyNodeHostname(node)
207
      if not success:
208
        result['nodelist'][node] = message
209
  if 'node-net-test' in what:
210
    result['node-net-test'] = {}
211
    my_name = utils.HostInfo().name
212
    my_pip = my_sip = None
213
    for name, pip, sip in what['node-net-test']:
214
      if name == my_name:
215
        my_pip = pip
216
        my_sip = sip
217
        break
218
    if not my_pip:
219
      result['node-net-test'][my_name] = ("Can't find my own"
220
                                          " primary/secondary IP"
221
                                          " in the node list")
222
    else:
223
      port = ssconf.SimpleStore().GetNodeDaemonPort()
224
      for name, pip, sip in what['node-net-test']:
225
        fail = []
226
        if not utils.TcpPing(pip, port, source=my_pip):
227
          fail.append("primary")
228
        if sip != pip:
229
          if not utils.TcpPing(sip, port, source=my_sip):
230
            fail.append("secondary")
231
        if fail:
232
          result['node-net-test'][name] = ("failure using the %s"
233
                                           " interface(s)" %
234
                                           " and ".join(fail))
235

    
236
  return result
237

    
238

    
239
def GetVolumeList(vg_name):
240
  """Compute list of logical volumes and their size.
241

242
  Returns:
243
    dictionary of all partions (key) with their size (in MiB), inactive
244
    and online status:
245
    {'test1': ('20.06', True, True)}
246

247
  """
248
  lvs = {}
249
  sep = '|'
250
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
251
                         "--separator=%s" % sep,
252
                         "-olv_name,lv_size,lv_attr", vg_name])
253
  if result.failed:
254
    logger.Error("Failed to list logical volumes, lvs output: %s" %
255
                 result.output)
256
    return result.output
257

    
258
  for line in result.stdout.splitlines():
259
    line = line.strip().rstrip(sep)
260
    name, size, attr = line.split(sep)
261
    if len(attr) != 6:
262
      attr = '------'
263
    inactive = attr[4] == '-'
264
    online = attr[5] == 'o'
265
    lvs[name] = (size, inactive, online)
266

    
267
  return lvs
268

    
269

    
270
def ListVolumeGroups():
271
  """List the volume groups and their size.
272

273
  Returns:
274
    Dictionary with keys volume name and values the size of the volume
275

276
  """
277
  return utils.ListVolumeGroups()
278

    
279

    
280
def NodeVolumes():
281
  """List all volumes on this node.
282

283
  """
284
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
285
                         "--separator=|",
286
                         "--options=lv_name,lv_size,devices,vg_name"])
287
  if result.failed:
288
    logger.Error("Failed to list logical volumes, lvs output: %s" %
289
                 result.output)
290
    return {}
291

    
292
  def parse_dev(dev):
293
    if '(' in dev:
294
      return dev.split('(')[0]
295
    else:
296
      return dev
297

    
298
  def map_line(line):
299
    return {
300
      'name': line[0].strip(),
301
      'size': line[1].strip(),
302
      'dev': parse_dev(line[2].strip()),
303
      'vg': line[3].strip(),
304
    }
305

    
306
  return [map_line(line.split('|')) for line in result.stdout.splitlines()]
307

    
308

    
309
def BridgesExist(bridges_list):
310
  """Check if a list of bridges exist on the current node.
311

312
  Returns:
313
    True if all of them exist, false otherwise
314

315
  """
316
  for bridge in bridges_list:
317
    if not utils.BridgeExists(bridge):
318
      return False
319

    
320
  return True
321

    
322

    
323
def GetInstanceList():
324
  """Provides a list of instances.
325

326
  Returns:
327
    A list of all running instances on the current node
328
    - instance1.example.com
329
    - instance2.example.com
330

331
  """
332
  try:
333
    names = hypervisor.GetHypervisor().ListInstances()
334
  except errors.HypervisorError, err:
335
    logger.Error("error enumerating instances: %s" % str(err))
336
    raise
337

    
338
  return names
339

    
340

    
341
def GetInstanceInfo(instance):
342
  """Gives back the informations about an instance as a dictionary.
343

344
  Args:
345
    instance: name of the instance (ex. instance1.example.com)
346

347
  Returns:
348
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
349
    where
350
    memory: memory size of instance (int)
351
    state: xen state of instance (string)
352
    time: cpu time of instance (float)
353

354
  """
355
  output = {}
356

    
357
  iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
358
  if iinfo is not None:
359
    output['memory'] = iinfo[2]
360
    output['state'] = iinfo[4]
361
    output['time'] = iinfo[5]
362

    
363
  return output
364

    
365

    
366
def GetAllInstancesInfo():
367
  """Gather data about all instances.
368

369
  This is the equivalent of `GetInstanceInfo()`, except that it
370
  computes data for all instances at once, thus being faster if one
371
  needs data about more than one instance.
372

373
  Returns: a dictionary of dictionaries, keys being the instance name,
374
    and with values:
375
    { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
376
    where
377
    memory: memory size of instance (int)
378
    state: xen state of instance (string)
379
    time: cpu time of instance (float)
380
    vcpus: the number of cpus
381

382
  """
383
  output = {}
384

    
385
  iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
386
  if iinfo:
387
    for name, inst_id, memory, vcpus, state, times in iinfo:
388
      output[name] = {
389
        'memory': memory,
390
        'vcpus': vcpus,
391
        'state': state,
392
        'time': times,
393
        }
394

    
395
  return output
396

    
397

    
398
def AddOSToInstance(instance, os_disk, swap_disk):
399
  """Add an OS to an instance.
400

401
  Args:
402
    instance: the instance object
403
    os_disk: the instance-visible name of the os device
404
    swap_disk: the instance-visible name of the swap device
405

406
  """
407
  inst_os = OSFromDisk(instance.os)
408

    
409
  create_script = inst_os.create_script
410

    
411
  os_device = instance.FindDisk(os_disk)
412
  if os_device is None:
413
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
414
    return False
415

    
416
  swap_device = instance.FindDisk(swap_disk)
417
  if swap_device is None:
418
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
419
    return False
420

    
421
  real_os_dev = _RecursiveFindBD(os_device)
422
  if real_os_dev is None:
423
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
424
                                  str(os_device))
425
  real_os_dev.Open()
426

    
427
  real_swap_dev = _RecursiveFindBD(swap_device)
428
  if real_swap_dev is None:
429
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
430
                                  str(swap_device))
431
  real_swap_dev.Open()
432

    
433
  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
434
                                     instance.name, int(time.time()))
435
  if not os.path.exists(constants.LOG_OS_DIR):
436
    os.mkdir(constants.LOG_OS_DIR, 0750)
437

    
438
  command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
439
                                inst_os.path, create_script, instance.name,
440
                                real_os_dev.dev_path, real_swap_dev.dev_path,
441
                                logfile)
442

    
443
  result = utils.RunCmd(command)
444
  if result.failed:
445
    logger.Error("os create command '%s' returned error: %s, logfile: %s,"
446
                 " output: %s" %
447
                 (command, result.fail_reason, logfile, result.output))
448
    return False
449

    
450
  return True
451

    
452

    
453
def RunRenameInstance(instance, old_name, os_disk, swap_disk):
454
  """Run the OS rename script for an instance.
455

456
  Args:
457
    instance: the instance object
458
    old_name: the old name of the instance
459
    os_disk: the instance-visible name of the os device
460
    swap_disk: the instance-visible name of the swap device
461

462
  """
463
  inst_os = OSFromDisk(instance.os)
464

    
465
  script = inst_os.rename_script
466

    
467
  os_device = instance.FindDisk(os_disk)
468
  if os_device is None:
469
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
470
    return False
471

    
472
  swap_device = instance.FindDisk(swap_disk)
473
  if swap_device is None:
474
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
475
    return False
476

    
477
  real_os_dev = _RecursiveFindBD(os_device)
478
  if real_os_dev is None:
479
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
480
                                  str(os_device))
481
  real_os_dev.Open()
482

    
483
  real_swap_dev = _RecursiveFindBD(swap_device)
484
  if real_swap_dev is None:
485
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
486
                                  str(swap_device))
487
  real_swap_dev.Open()
488

    
489
  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
490
                                           old_name,
491
                                           instance.name, int(time.time()))
492
  if not os.path.exists(constants.LOG_OS_DIR):
493
    os.mkdir(constants.LOG_OS_DIR, 0750)
494

    
495
  command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
496
                                inst_os.path, script, old_name, instance.name,
497
                                real_os_dev.dev_path, real_swap_dev.dev_path,
498
                                logfile)
499

    
500
  result = utils.RunCmd(command)
501

    
502
  if result.failed:
503
    logger.Error("os create command '%s' returned error: %s"
504
                 " output: %s" %
505
                 (command, result.fail_reason, result.output))
506
    return False
507

    
508
  return True
509

    
510

    
511
def _GetVGInfo(vg_name):
512
  """Get informations about the volume group.
513

514
  Args:
515
    vg_name: the volume group
516

517
  Returns:
518
    { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
519
    where
520
    vg_size is the total size of the volume group in MiB
521
    vg_free is the free size of the volume group in MiB
522
    pv_count are the number of physical disks in that vg
523

524
  If an error occurs during gathering of data, we return the same dict
525
  with keys all set to None.
526

527
  """
528
  retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
529

    
530
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
531
                         "--nosuffix", "--units=m", "--separator=:", vg_name])
532

    
533
  if retval.failed:
534
    errmsg = "volume group %s not present" % vg_name
535
    logger.Error(errmsg)
536
    return retdic
537
  valarr = retval.stdout.strip().rstrip(':').split(':')
538
  if len(valarr) == 3:
539
    try:
540
      retdic = {
541
        "vg_size": int(round(float(valarr[0]), 0)),
542
        "vg_free": int(round(float(valarr[1]), 0)),
543
        "pv_count": int(valarr[2]),
544
        }
545
    except ValueError, err:
546
      logger.Error("Fail to parse vgs output: %s" % str(err))
547
  else:
548
    logger.Error("vgs output has the wrong number of fields (expected"
549
                 " three): %s" % str(valarr))
550
  return retdic
551

    
552

    
553
def _GatherBlockDevs(instance):
554
  """Set up an instance's block device(s).
555

556
  This is run on the primary node at instance startup. The block
557
  devices must be already assembled.
558

559
  """
560
  block_devices = []
561
  for disk in instance.disks:
562
    device = _RecursiveFindBD(disk)
563
    if device is None:
564
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
565
                                    str(disk))
566
    device.Open()
567
    block_devices.append((disk, device))
568
  return block_devices
569

    
570

    
571
def StartInstance(instance, extra_args):
572
  """Start an instance.
573

574
  Args:
575
    instance - name of instance to start.
576

577
  """
578
  running_instances = GetInstanceList()
579

    
580
  if instance.name in running_instances:
581
    return True
582

    
583
  block_devices = _GatherBlockDevs(instance)
584
  hyper = hypervisor.GetHypervisor()
585

    
586
  try:
587
    hyper.StartInstance(instance, block_devices, extra_args)
588
  except errors.HypervisorError, err:
589
    logger.Error("Failed to start instance: %s" % err)
590
    return False
591

    
592
  return True
593

    
594

    
595
def ShutdownInstance(instance):
596
  """Shut an instance down.
597

598
  Args:
599
    instance - name of instance to shutdown.
600

601
  """
602
  running_instances = GetInstanceList()
603

    
604
  if instance.name not in running_instances:
605
    return True
606

    
607
  hyper = hypervisor.GetHypervisor()
608
  try:
609
    hyper.StopInstance(instance)
610
  except errors.HypervisorError, err:
611
    logger.Error("Failed to stop instance: %s" % err)
612
    return False
613

    
614
  # test every 10secs for 2min
615
  shutdown_ok = False
616

    
617
  time.sleep(1)
618
  for dummy in range(11):
619
    if instance.name not in GetInstanceList():
620
      break
621
    time.sleep(10)
622
  else:
623
    # the shutdown did not succeed
624
    logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
625

    
626
    try:
627
      hyper.StopInstance(instance, force=True)
628
    except errors.HypervisorError, err:
629
      logger.Error("Failed to stop instance: %s" % err)
630
      return False
631

    
632
    time.sleep(1)
633
    if instance.name in GetInstanceList():
634
      logger.Error("could not shutdown instance '%s' even by destroy")
635
      return False
636

    
637
  return True
638

    
639

    
640
def RebootInstance(instance, reboot_type, extra_args):
641
  """Reboot an instance.
642

643
  Args:
644
    instance    - name of instance to reboot
645
    reboot_type - how to reboot [soft,hard,full]
646

647
  """
648
  running_instances = GetInstanceList()
649

    
650
  if instance.name not in running_instances:
651
    logger.Error("Cannot reboot instance that is not running")
652
    return False
653

    
654
  hyper = hypervisor.GetHypervisor()
655
  if reboot_type == constants.INSTANCE_REBOOT_SOFT:
656
    try:
657
      hyper.RebootInstance(instance)
658
    except errors.HypervisorError, err:
659
      logger.Error("Failed to soft reboot instance: %s" % err)
660
      return False
661
  elif reboot_type == constants.INSTANCE_REBOOT_HARD:
662
    try:
663
      ShutdownInstance(instance)
664
      StartInstance(instance, extra_args)
665
    except errors.HypervisorError, err:
666
      logger.Error("Failed to hard reboot instance: %s" % err)
667
      return False
668
  else:
669
    raise errors.ParameterError("reboot_type invalid")
670

    
671

    
672
  return True
673

    
674

    
675
def MigrateInstance(instance, target, live):
676
  """Migrates an instance to another node.
677

678
  """
679
  hyper = hypervisor.GetHypervisor()
680

    
681
  try:
682
    hyper.MigrateInstance(instance, target, live)
683
  except errors.HypervisorError, err:
684
    msg = "Failed to migrate instance: %s" % str(err)
685
    logger.Error(msg)
686
    return (False, msg)
687
  return (True, "Migration successfull")
688

    
689

    
690
def CreateBlockDevice(disk, size, owner, on_primary, info):
691
  """Creates a block device for an instance.
692

693
  Args:
694
   disk: a ganeti.objects.Disk object
695
   size: the size of the physical underlying device
696
   owner: a string with the name of the instance
697
   on_primary: a boolean indicating if it is the primary node or not
698
   info: string that will be sent to the physical device creation
699

700
  Returns:
701
    the new unique_id of the device (this can sometime be
702
    computed only after creation), or None. On secondary nodes,
703
    it's not required to return anything.
704

705
  """
706
  clist = []
707
  if disk.children:
708
    for child in disk.children:
709
      crdev = _RecursiveAssembleBD(child, owner, on_primary)
710
      if on_primary or disk.AssembleOnSecondary():
711
        # we need the children open in case the device itself has to
712
        # be assembled
713
        crdev.Open()
714
      clist.append(crdev)
715
  try:
716
    device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
717
    if device is not None:
718
      logger.Info("removing existing device %s" % disk)
719
      device.Remove()
720
  except errors.BlockDeviceError, err:
721
    pass
722

    
723
  device = bdev.Create(disk.dev_type, disk.physical_id,
724
                       clist, size)
725
  if device is None:
726
    raise ValueError("Can't create child device for %s, %s" %
727
                     (disk, size))
728
  if on_primary or disk.AssembleOnSecondary():
729
    if not device.Assemble():
730
      errorstring = "Can't assemble device after creation"
731
      logger.Error(errorstring)
732
      raise errors.BlockDeviceError("%s, very unusual event - check the node"
733
                                    " daemon logs" % errorstring)
734
    device.SetSyncSpeed(constants.SYNC_SPEED)
735
    if on_primary or disk.OpenOnSecondary():
736
      device.Open(force=True)
737
    DevCacheManager.UpdateCache(device.dev_path, owner,
738
                                on_primary, disk.iv_name)
739

    
740
  device.SetInfo(info)
741

    
742
  physical_id = device.unique_id
743
  return physical_id
744

    
745

    
746
def RemoveBlockDevice(disk):
747
  """Remove a block device.
748

749
  This is intended to be called recursively.
750

751
  """
752
  try:
753
    # since we are removing the device, allow a partial match
754
    # this allows removal of broken mirrors
755
    rdev = _RecursiveFindBD(disk, allow_partial=True)
756
  except errors.BlockDeviceError, err:
757
    # probably can't attach
758
    logger.Info("Can't attach to device %s in remove" % disk)
759
    rdev = None
760
  if rdev is not None:
761
    r_path = rdev.dev_path
762
    result = rdev.Remove()
763
    if result:
764
      DevCacheManager.RemoveCache(r_path)
765
  else:
766
    result = True
767
  if disk.children:
768
    for child in disk.children:
769
      result = result and RemoveBlockDevice(child)
770
  return result
771

    
772

    
773
def _RecursiveAssembleBD(disk, owner, as_primary):
774
  """Activate a block device for an instance.
775

776
  This is run on the primary and secondary nodes for an instance.
777

778
  This function is called recursively.
779

780
  Args:
781
    disk: a objects.Disk object
782
    as_primary: if we should make the block device read/write
783

784
  Returns:
785
    the assembled device or None (in case no device was assembled)
786

787
  If the assembly is not successful, an exception is raised.
788

789
  """
790
  children = []
791
  if disk.children:
792
    mcn = disk.ChildrenNeeded()
793
    if mcn == -1:
794
      mcn = 0 # max number of Nones allowed
795
    else:
796
      mcn = len(disk.children) - mcn # max number of Nones
797
    for chld_disk in disk.children:
798
      try:
799
        cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
800
      except errors.BlockDeviceError, err:
801
        if children.count(None) >= mcn:
802
          raise
803
        cdev = None
804
        logger.Debug("Error in child activation: %s" % str(err))
805
      children.append(cdev)
806

    
807
  if as_primary or disk.AssembleOnSecondary():
808
    r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
809
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
810
    result = r_dev
811
    if as_primary or disk.OpenOnSecondary():
812
      r_dev.Open()
813
    DevCacheManager.UpdateCache(r_dev.dev_path, owner,
814
                                as_primary, disk.iv_name)
815

    
816
  else:
817
    result = True
818
  return result
819

    
820

    
821
def AssembleBlockDevice(disk, owner, as_primary):
822
  """Activate a block device for an instance.
823

824
  This is a wrapper over _RecursiveAssembleBD.
825

826
  Returns:
827
    a /dev path for primary nodes
828
    True for secondary nodes
829

830
  """
831
  result = _RecursiveAssembleBD(disk, owner, as_primary)
832
  if isinstance(result, bdev.BlockDev):
833
    result = result.dev_path
834
  return result
835

    
836

    
837
def ShutdownBlockDevice(disk):
838
  """Shut down a block device.
839

840
  First, if the device is assembled (can `Attach()`), then the device
841
  is shutdown. Then the children of the device are shutdown.
842

843
  This function is called recursively. Note that we don't cache the
844
  children or such, as oppossed to assemble, shutdown of different
845
  devices doesn't require that the upper device was active.
846

847
  """
848
  r_dev = _RecursiveFindBD(disk)
849
  if r_dev is not None:
850
    r_path = r_dev.dev_path
851
    result = r_dev.Shutdown()
852
    if result:
853
      DevCacheManager.RemoveCache(r_path)
854
  else:
855
    result = True
856
  if disk.children:
857
    for child in disk.children:
858
      result = result and ShutdownBlockDevice(child)
859
  return result
860

    
861

    
862
def MirrorAddChildren(parent_cdev, new_cdevs):
863
  """Extend a mirrored block device.
864

865
  """
866
  parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
867
  if parent_bdev is None:
868
    logger.Error("Can't find parent device")
869
    return False
870
  new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
871
  if new_bdevs.count(None) > 0:
872
    logger.Error("Can't find new device(s) to add: %s:%s" %
873
                 (new_bdevs, new_cdevs))
874
    return False
875
  parent_bdev.AddChildren(new_bdevs)
876
  return True
877

    
878

    
879
def MirrorRemoveChildren(parent_cdev, new_cdevs):
880
  """Shrink a mirrored block device.
881

882
  """
883
  parent_bdev = _RecursiveFindBD(parent_cdev)
884
  if parent_bdev is None:
885
    logger.Error("Can't find parent in remove children: %s" % parent_cdev)
886
    return False
887
  devs = []
888
  for disk in new_cdevs:
889
    rpath = disk.StaticDevPath()
890
    if rpath is None:
891
      bd = _RecursiveFindBD(disk)
892
      if bd is None:
893
        logger.Error("Can't find dynamic device %s while removing children" %
894
                     disk)
895
        return False
896
      else:
897
        devs.append(bd.dev_path)
898
    else:
899
      devs.append(rpath)
900
  parent_bdev.RemoveChildren(devs)
901
  return True
902

    
903

    
904
def GetMirrorStatus(disks):
905
  """Get the mirroring status of a list of devices.
906

907
  Args:
908
    disks: list of `objects.Disk`
909

910
  Returns:
911
    list of (mirror_done, estimated_time) tuples, which
912
    are the result of bdev.BlockDevice.CombinedSyncStatus()
913

914
  """
915
  stats = []
916
  for dsk in disks:
917
    rbd = _RecursiveFindBD(dsk)
918
    if rbd is None:
919
      raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
920
    stats.append(rbd.CombinedSyncStatus())
921
  return stats
922

    
923

    
924
def _RecursiveFindBD(disk, allow_partial=False):
925
  """Check if a device is activated.
926

927
  If so, return informations about the real device.
928

929
  Args:
930
    disk: the objects.Disk instance
931
    allow_partial: don't abort the find if a child of the
932
                   device can't be found; this is intended to be
933
                   used when repairing mirrors
934

935
  Returns:
936
    None if the device can't be found
937
    otherwise the device instance
938

939
  """
940
  children = []
941
  if disk.children:
942
    for chdisk in disk.children:
943
      children.append(_RecursiveFindBD(chdisk))
944

    
945
  return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
946

    
947

    
948
def FindBlockDevice(disk):
949
  """Check if a device is activated.
950

951
  If so, return informations about the real device.
952

953
  Args:
954
    disk: the objects.Disk instance
955
  Returns:
956
    None if the device can't be found
957
    (device_path, major, minor, sync_percent, estimated_time, is_degraded)
958

959
  """
960
  rbd = _RecursiveFindBD(disk)
961
  if rbd is None:
962
    return rbd
963
  return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
964

    
965

    
966
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
967
  """Write a file to the filesystem.
968

969
  This allows the master to overwrite(!) a file. It will only perform
970
  the operation if the file belongs to a list of configuration files.
971

972
  """
973
  if not os.path.isabs(file_name):
974
    logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
975
                 file_name)
976
    return False
977

    
978
  allowed_files = [
979
    constants.CLUSTER_CONF_FILE,
980
    constants.ETC_HOSTS,
981
    constants.SSH_KNOWN_HOSTS_FILE,
982
    ]
983
  allowed_files.extend(ssconf.SimpleStore().GetFileList())
984
  if file_name not in allowed_files:
985
    logger.Error("Filename passed to UploadFile not in allowed"
986
                 " upload targets: '%s'" % file_name)
987
    return False
988

    
989
  utils.WriteFile(file_name, data=data, mode=mode, uid=uid, gid=gid,
990
                  atime=atime, mtime=mtime)
991
  return True
992

    
993

    
994
def _ErrnoOrStr(err):
995
  """Format an EnvironmentError exception.
996

997
  If the `err` argument has an errno attribute, it will be looked up
998
  and converted into a textual EXXXX description. Otherwise the string
999
  representation of the error will be returned.
1000

1001
  """
1002
  if hasattr(err, 'errno'):
1003
    detail = errno.errorcode[err.errno]
1004
  else:
1005
    detail = str(err)
1006
  return detail
1007

    
1008

    
1009
def _OSOndiskVersion(name, os_dir):
1010
  """Compute and return the API version of a given OS.
1011

1012
  This function will try to read the API version of the os given by
1013
  the 'name' parameter and residing in the 'os_dir' directory.
1014

1015
  Return value will be either an integer denoting the version or None in the
1016
  case when this is not a valid OS name.
1017

1018
  """
1019
  api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1020

    
1021
  try:
1022
    st = os.stat(api_file)
1023
  except EnvironmentError, err:
1024
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1025
                           " found (%s)" % _ErrnoOrStr(err))
1026

    
1027
  if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1028
    raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1029
                           " a regular file")
1030

    
1031
  try:
1032
    f = open(api_file)
1033
    try:
1034
      api_version = f.read(256)
1035
    finally:
1036
      f.close()
1037
  except EnvironmentError, err:
1038
    raise errors.InvalidOS(name, os_dir, "error while reading the"
1039
                           " API version (%s)" % _ErrnoOrStr(err))
1040

    
1041
  api_version = api_version.strip()
1042
  try:
1043
    api_version = int(api_version)
1044
  except (TypeError, ValueError), err:
1045
    raise errors.InvalidOS(name, os_dir,
1046
                           "API version is not integer (%s)" % str(err))
1047

    
1048
  return api_version
1049

    
1050

    
1051
def DiagnoseOS(top_dirs=None):
1052
  """Compute the validity for all OSes.
1053

1054
  Returns an OS object for each name in all the given top directories
1055
  (if not given defaults to constants.OS_SEARCH_PATH)
1056

1057
  Returns:
1058
    list of OS objects
1059

1060
  """
1061
  if top_dirs is None:
1062
    top_dirs = constants.OS_SEARCH_PATH
1063

    
1064
  result = []
1065
  for dir_name in top_dirs:
1066
    if os.path.isdir(dir_name):
1067
      try:
1068
        f_names = utils.ListVisibleFiles(dir_name)
1069
      except EnvironmentError, err:
1070
        logger.Error("Can't list the OS directory %s: %s" %
1071
                     (dir_name, str(err)))
1072
        break
1073
      for name in f_names:
1074
        try:
1075
          os_inst = OSFromDisk(name, base_dir=dir_name)
1076
          result.append(os_inst)
1077
        except errors.InvalidOS, err:
1078
          result.append(objects.OS.FromInvalidOS(err))
1079

    
1080
  return result
1081

    
1082

    
1083
def OSFromDisk(name, base_dir=None):
1084
  """Create an OS instance from disk.
1085

1086
  This function will return an OS instance if the given name is a
1087
  valid OS name. Otherwise, it will raise an appropriate
1088
  `errors.InvalidOS` exception, detailing why this is not a valid
1089
  OS.
1090

1091
  Args:
1092
    os_dir: Directory containing the OS scripts. Defaults to a search
1093
            in all the OS_SEARCH_PATH directories.
1094

1095
  """
1096

    
1097
  if base_dir is None:
1098
    os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir)
1099
    if os_dir is None:
1100
      raise errors.InvalidOS(name, None, "OS dir not found in search path")
1101
  else:
1102
    os_dir = os.path.sep.join([base_dir, name])
1103

    
1104
  api_version = _OSOndiskVersion(name, os_dir)
1105

    
1106
  if api_version != constants.OS_API_VERSION:
1107
    raise errors.InvalidOS(name, os_dir, "API version mismatch"
1108
                           " (found %s want %s)"
1109
                           % (api_version, constants.OS_API_VERSION))
1110

    
1111
  # OS Scripts dictionary, we will populate it with the actual script names
1112
  os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1113

    
1114
  for script in os_scripts:
1115
    os_scripts[script] = os.path.sep.join([os_dir, script])
1116

    
1117
    try:
1118
      st = os.stat(os_scripts[script])
1119
    except EnvironmentError, err:
1120
      raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1121
                             (script, _ErrnoOrStr(err)))
1122

    
1123
    if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1124
      raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1125
                             script)
1126

    
1127
    if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1128
      raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1129
                             script)
1130

    
1131

    
1132
  return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1133
                    create_script=os_scripts['create'],
1134
                    export_script=os_scripts['export'],
1135
                    import_script=os_scripts['import'],
1136
                    rename_script=os_scripts['rename'],
1137
                    api_version=api_version)
1138

    
1139

    
1140
def SnapshotBlockDevice(disk):
1141
  """Create a snapshot copy of a block device.
1142

1143
  This function is called recursively, and the snapshot is actually created
1144
  just for the leaf lvm backend device.
1145

1146
  Args:
1147
    disk: the disk to be snapshotted
1148

1149
  Returns:
1150
    a config entry for the actual lvm device snapshotted.
1151

1152
  """
1153
  if disk.children:
1154
    if len(disk.children) == 1:
1155
      # only one child, let's recurse on it
1156
      return SnapshotBlockDevice(disk.children[0])
1157
    else:
1158
      # more than one child, choose one that matches
1159
      for child in disk.children:
1160
        if child.size == disk.size:
1161
          # return implies breaking the loop
1162
          return SnapshotBlockDevice(child)
1163
  elif disk.dev_type == constants.LD_LV:
1164
    r_dev = _RecursiveFindBD(disk)
1165
    if r_dev is not None:
1166
      # let's stay on the safe side and ask for the full size, for now
1167
      return r_dev.Snapshot(disk.size)
1168
    else:
1169
      return None
1170
  else:
1171
    raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1172
                                 " '%s' of type '%s'" %
1173
                                 (disk.unique_id, disk.dev_type))
1174

    
1175

    
1176
def ExportSnapshot(disk, dest_node, instance):
1177
  """Export a block device snapshot to a remote node.
1178

1179
  Args:
1180
    disk: the snapshot block device
1181
    dest_node: the node to send the image to
1182
    instance: instance being exported
1183

1184
  Returns:
1185
    True if successful, False otherwise.
1186

1187
  """
1188
  inst_os = OSFromDisk(instance.os)
1189
  export_script = inst_os.export_script
1190

    
1191
  logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1192
                                     instance.name, int(time.time()))
1193
  if not os.path.exists(constants.LOG_OS_DIR):
1194
    os.mkdir(constants.LOG_OS_DIR, 0750)
1195

    
1196
  real_os_dev = _RecursiveFindBD(disk)
1197
  if real_os_dev is None:
1198
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1199
                                  str(disk))
1200
  real_os_dev.Open()
1201

    
1202
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1203
  destfile = disk.physical_id[1]
1204

    
1205
  # the target command is built out of three individual commands,
1206
  # which are joined by pipes; we check each individual command for
1207
  # valid parameters
1208

    
1209
  expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1210
                               export_script, instance.name,
1211
                               real_os_dev.dev_path, logfile)
1212

    
1213
  comprcmd = "gzip"
1214

    
1215
  destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1216
                                destdir, destdir, destfile)
1217
  remotecmd = _GetSshRunner().BuildCmd(dest_node, constants.GANETI_RUNAS,
1218
                                       destcmd)
1219

    
1220
  # all commands have been checked, so we're safe to combine them
1221
  command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1222

    
1223
  result = utils.RunCmd(command)
1224

    
1225
  if result.failed:
1226
    logger.Error("os snapshot export command '%s' returned error: %s"
1227
                 " output: %s" %
1228
                 (command, result.fail_reason, result.output))
1229
    return False
1230

    
1231
  return True
1232

    
1233

    
1234
def FinalizeExport(instance, snap_disks):
1235
  """Write out the export configuration information.
1236

1237
  Args:
1238
    instance: instance configuration
1239
    snap_disks: snapshot block devices
1240

1241
  Returns:
1242
    False in case of error, True otherwise.
1243

1244
  """
1245
  destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1246
  finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1247

    
1248
  config = objects.SerializableConfigParser()
1249

    
1250
  config.add_section(constants.INISECT_EXP)
1251
  config.set(constants.INISECT_EXP, 'version', '0')
1252
  config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1253
  config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1254
  config.set(constants.INISECT_EXP, 'os', instance.os)
1255
  config.set(constants.INISECT_EXP, 'compression', 'gzip')
1256

    
1257
  config.add_section(constants.INISECT_INS)
1258
  config.set(constants.INISECT_INS, 'name', instance.name)
1259
  config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1260
  config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1261
  config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1262

    
1263
  nic_count = 0
1264
  for nic_count, nic in enumerate(instance.nics):
1265
    config.set(constants.INISECT_INS, 'nic%d_mac' %
1266
               nic_count, '%s' % nic.mac)
1267
    config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1268
    config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge)
1269
  # TODO: redundant: on load can read nics until it doesn't exist
1270
  config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1271

    
1272
  disk_count = 0
1273
  for disk_count, disk in enumerate(snap_disks):
1274
    config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1275
               ('%s' % disk.iv_name))
1276
    config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1277
               ('%s' % disk.physical_id[1]))
1278
    config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1279
               ('%d' % disk.size))
1280
  config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1281

    
1282
  cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1283
  cfo = open(cff, 'w')
1284
  try:
1285
    config.write(cfo)
1286
  finally:
1287
    cfo.close()
1288

    
1289
  shutil.rmtree(finaldestdir, True)
1290
  shutil.move(destdir, finaldestdir)
1291

    
1292
  return True
1293

    
1294

    
1295
def ExportInfo(dest):
1296
  """Get export configuration information.
1297

1298
  Args:
1299
    dest: directory containing the export
1300

1301
  Returns:
1302
    A serializable config file containing the export info.
1303

1304
  """
1305
  cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1306

    
1307
  config = objects.SerializableConfigParser()
1308
  config.read(cff)
1309

    
1310
  if (not config.has_section(constants.INISECT_EXP) or
1311
      not config.has_section(constants.INISECT_INS)):
1312
    return None
1313

    
1314
  return config
1315

    
1316

    
1317
def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1318
  """Import an os image into an instance.
1319

1320
  Args:
1321
    instance: the instance object
1322
    os_disk: the instance-visible name of the os device
1323
    swap_disk: the instance-visible name of the swap device
1324
    src_node: node holding the source image
1325
    src_image: path to the source image on src_node
1326

1327
  Returns:
1328
    False in case of error, True otherwise.
1329

1330
  """
1331
  inst_os = OSFromDisk(instance.os)
1332
  import_script = inst_os.import_script
1333

    
1334
  os_device = instance.FindDisk(os_disk)
1335
  if os_device is None:
1336
    logger.Error("Can't find this device-visible name '%s'" % os_disk)
1337
    return False
1338

    
1339
  swap_device = instance.FindDisk(swap_disk)
1340
  if swap_device is None:
1341
    logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1342
    return False
1343

    
1344
  real_os_dev = _RecursiveFindBD(os_device)
1345
  if real_os_dev is None:
1346
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1347
                                  str(os_device))
1348
  real_os_dev.Open()
1349

    
1350
  real_swap_dev = _RecursiveFindBD(swap_device)
1351
  if real_swap_dev is None:
1352
    raise errors.BlockDeviceError("Block device '%s' is not set up" %
1353
                                  str(swap_device))
1354
  real_swap_dev.Open()
1355

    
1356
  logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1357
                                        instance.name, int(time.time()))
1358
  if not os.path.exists(constants.LOG_OS_DIR):
1359
    os.mkdir(constants.LOG_OS_DIR, 0750)
1360

    
1361
  destcmd = utils.BuildShellCmd('cat %s', src_image)
1362
  remotecmd = _GetSshRunner().BuildCmd(src_node, constants.GANETI_RUNAS,
1363
                                       destcmd)
1364

    
1365
  comprcmd = "gunzip"
1366
  impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1367
                               inst_os.path, import_script, instance.name,
1368
                               real_os_dev.dev_path, real_swap_dev.dev_path,
1369
                               logfile)
1370

    
1371
  command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1372

    
1373
  result = utils.RunCmd(command)
1374

    
1375
  if result.failed:
1376
    logger.Error("os import command '%s' returned error: %s"
1377
                 " output: %s" %
1378
                 (command, result.fail_reason, result.output))
1379
    return False
1380

    
1381
  return True
1382

    
1383

    
1384
def ListExports():
1385
  """Return a list of exports currently available on this machine.
1386

1387
  """
1388
  if os.path.isdir(constants.EXPORT_DIR):
1389
    return utils.ListVisibleFiles(constants.EXPORT_DIR)
1390
  else:
1391
    return []
1392

    
1393

    
1394
def RemoveExport(export):
1395
  """Remove an existing export from the node.
1396

1397
  Args:
1398
    export: the name of the export to remove
1399

1400
  Returns:
1401
    False in case of error, True otherwise.
1402

1403
  """
1404
  target = os.path.join(constants.EXPORT_DIR, export)
1405

    
1406
  shutil.rmtree(target)
1407
  # TODO: catch some of the relevant exceptions and provide a pretty
1408
  # error message if rmtree fails.
1409

    
1410
  return True
1411

    
1412

    
1413
def RenameBlockDevices(devlist):
1414
  """Rename a list of block devices.
1415

1416
  The devlist argument is a list of tuples (disk, new_logical,
1417
  new_physical). The return value will be a combined boolean result
1418
  (True only if all renames succeeded).
1419

1420
  """
1421
  result = True
1422
  for disk, unique_id in devlist:
1423
    dev = _RecursiveFindBD(disk)
1424
    if dev is None:
1425
      result = False
1426
      continue
1427
    try:
1428
      old_rpath = dev.dev_path
1429
      dev.Rename(unique_id)
1430
      new_rpath = dev.dev_path
1431
      if old_rpath != new_rpath:
1432
        DevCacheManager.RemoveCache(old_rpath)
1433
        # FIXME: we should add the new cache information here, like:
1434
        # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1435
        # but we don't have the owner here - maybe parse from existing
1436
        # cache? for now, we only lose lvm data when we rename, which
1437
        # is less critical than DRBD or MD
1438
    except errors.BlockDeviceError, err:
1439
      logger.Error("Can't rename device '%s' to '%s': %s" %
1440
                   (dev, unique_id, err))
1441
      result = False
1442
  return result
1443

    
1444

    
1445
def _TransformFileStorageDir(file_storage_dir):
1446
  """Checks whether given file_storage_dir is valid.
1447

1448
  Checks wheter the given file_storage_dir is within the cluster-wide
1449
  default file_storage_dir stored in SimpleStore. Only paths under that
1450
  directory are allowed.
1451

1452
  Args:
1453
    file_storage_dir: string with path
1454
  
1455
  Returns:
1456
    normalized file_storage_dir (string) if valid, None otherwise
1457

1458
  """
1459
  file_storage_dir = os.path.normpath(file_storage_dir)
1460
  base_file_storage_dir = ssconf.SimpleStore().GetFileStorageDir()
1461
  if (not os.path.commonprefix([file_storage_dir, base_file_storage_dir]) ==
1462
      base_file_storage_dir):
1463
    logger.Error("file storage directory '%s' is not under base file"
1464
                 " storage directory '%s'" %
1465
                 (file_storage_dir, base_file_storage_dir))
1466
    return None
1467
  return file_storage_dir
1468

    
1469

    
1470
def CreateFileStorageDir(file_storage_dir):
1471
  """Create file storage directory.
1472

1473
  Args:
1474
    file_storage_dir: string containing the path
1475

1476
  Returns:
1477
    tuple with first element a boolean indicating wheter dir
1478
    creation was successful or not
1479

1480
  """
1481
  file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1482
  result = True,
1483
  if not file_storage_dir:
1484
    result = False,
1485
  else:
1486
    if os.path.exists(file_storage_dir):
1487
      if not os.path.isdir(file_storage_dir):
1488
        logger.Error("'%s' is not a directory" % file_storage_dir)
1489
        result = False,
1490
    else:
1491
      try:
1492
        os.makedirs(file_storage_dir, 0750)
1493
      except OSError, err:
1494
        logger.Error("Cannot create file storage directory '%s': %s" %
1495
                     (file_storage_dir, err))
1496
        result = False,
1497
  return result
1498

    
1499

    
1500
def RemoveFileStorageDir(file_storage_dir):
1501
  """Remove file storage directory.
1502

1503
  Remove it only if it's empty. If not log an error and return.
1504

1505
  Args:
1506
    file_storage_dir: string containing the path
1507

1508
  Returns:
1509
    tuple with first element a boolean indicating wheter dir
1510
    removal was successful or not
1511

1512
  """
1513
  file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1514
  result = True,
1515
  if not file_storage_dir:
1516
    result = False,
1517
  else:
1518
    if os.path.exists(file_storage_dir):
1519
      if not os.path.isdir(file_storage_dir):
1520
        logger.Error("'%s' is not a directory" % file_storage_dir)
1521
        result = False,
1522
      # deletes dir only if empty, otherwise we want to return False
1523
      try:
1524
        os.rmdir(file_storage_dir)
1525
      except OSError, err:
1526
        logger.Error("Cannot remove file storage directory '%s': %s" %
1527
                     (file_storage_dir, err))
1528
        result = False,
1529
  return result
1530

    
1531

    
1532
def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):
1533
  """Rename the file storage directory.
1534

1535
  Args:
1536
    old_file_storage_dir: string containing the old path
1537
    new_file_storage_dir: string containing the new path
1538

1539
  Returns:
1540
    tuple with first element a boolean indicating wheter dir
1541
    rename was successful or not
1542

1543
  """
1544
  old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
1545
  new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
1546
  result = True,
1547
  if not old_file_storage_dir or not new_file_storage_dir:
1548
    result = False,
1549
  else:
1550
    if not os.path.exists(new_file_storage_dir):
1551
      if os.path.isdir(old_file_storage_dir):
1552
        try:
1553
          os.rename(old_file_storage_dir, new_file_storage_dir)
1554
        except OSError, err:
1555
          logger.Error("Cannot rename '%s' to '%s': %s"
1556
                       % (old_file_storage_dir, new_file_storage_dir, err))
1557
          result =  False,
1558
      else:
1559
        logger.Error("'%s' is not a directory" % old_file_storage_dir)
1560
        result = False,
1561
    else:
1562
      if os.path.exists(old_file_storage_dir):
1563
        logger.Error("Cannot rename '%s' to '%s'. Both locations exist." %
1564
                     old_file_storage_dir, new_file_storage_dir)
1565
        result = False,
1566
  return result
1567

    
1568

    
1569
class HooksRunner(object):
1570
  """Hook runner.
1571

1572
  This class is instantiated on the node side (ganeti-noded) and not on
1573
  the master side.
1574

1575
  """
1576
  RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1577

    
1578
  def __init__(self, hooks_base_dir=None):
1579
    """Constructor for hooks runner.
1580

1581
    Args:
1582
      - hooks_base_dir: if not None, this overrides the
1583
        constants.HOOKS_BASE_DIR (useful for unittests)
1584

1585
    """
1586
    if hooks_base_dir is None:
1587
      hooks_base_dir = constants.HOOKS_BASE_DIR
1588
    self._BASE_DIR = hooks_base_dir
1589

    
1590
  @staticmethod
1591
  def ExecHook(script, env):
1592
    """Exec one hook script.
1593

1594
    Args:
1595
     - script: the full path to the script
1596
     - env: the environment with which to exec the script
1597

1598
    """
1599
    # exec the process using subprocess and log the output
1600
    fdstdin = None
1601
    try:
1602
      fdstdin = open("/dev/null", "r")
1603
      child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1604
                               stderr=subprocess.STDOUT, close_fds=True,
1605
                               shell=False, cwd="/", env=env)
1606
      output = ""
1607
      try:
1608
        output = child.stdout.read(4096)
1609
        child.stdout.close()
1610
      except EnvironmentError, err:
1611
        output += "Hook script error: %s" % str(err)
1612

    
1613
      while True:
1614
        try:
1615
          result = child.wait()
1616
          break
1617
        except EnvironmentError, err:
1618
          if err.errno == errno.EINTR:
1619
            continue
1620
          raise
1621
    finally:
1622
      # try not to leak fds
1623
      for fd in (fdstdin, ):
1624
        if fd is not None:
1625
          try:
1626
            fd.close()
1627
          except EnvironmentError, err:
1628
            # just log the error
1629
            #logger.Error("While closing fd %s: %s" % (fd, err))
1630
            pass
1631

    
1632
    return result == 0, output
1633

    
1634
  def RunHooks(self, hpath, phase, env):
1635
    """Run the scripts in the hooks directory.
1636

1637
    This method will not be usually overriden by child opcodes.
1638

1639
    """
1640
    if phase == constants.HOOKS_PHASE_PRE:
1641
      suffix = "pre"
1642
    elif phase == constants.HOOKS_PHASE_POST:
1643
      suffix = "post"
1644
    else:
1645
      raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1646
    rr = []
1647

    
1648
    subdir = "%s-%s.d" % (hpath, suffix)
1649
    dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1650
    try:
1651
      dir_contents = utils.ListVisibleFiles(dir_name)
1652
    except OSError, err:
1653
      # must log
1654
      return rr
1655

    
1656
    # we use the standard python sort order,
1657
    # so 00name is the recommended naming scheme
1658
    dir_contents.sort()
1659
    for relname in dir_contents:
1660
      fname = os.path.join(dir_name, relname)
1661
      if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1662
          self.RE_MASK.match(relname) is not None):
1663
        rrval = constants.HKR_SKIP
1664
        output = ""
1665
      else:
1666
        result, output = self.ExecHook(fname, env)
1667
        if not result:
1668
          rrval = constants.HKR_FAIL
1669
        else:
1670
          rrval = constants.HKR_SUCCESS
1671
      rr.append(("%s/%s" % (subdir, relname), rrval, output))
1672

    
1673
    return rr
1674

    
1675

    
1676
class IAllocatorRunner(object):
1677
  """IAllocator runner.
1678

1679
  This class is instantiated on the node side (ganeti-noded) and not on
1680
  the master side.
1681

1682
  """
1683
  def Run(self, name, idata):
1684
    """Run an iallocator script.
1685

1686
    Return value: tuple of:
1687
       - run status (one of the IARUN_ constants)
1688
       - stdout
1689
       - stderr
1690
       - fail reason (as from utils.RunResult)
1691

1692
    """
1693
    alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
1694
                                  os.path.isfile)
1695
    if alloc_script is None:
1696
      return (constants.IARUN_NOTFOUND, None, None, None)
1697

    
1698
    fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
1699
    try:
1700
      os.write(fd, idata)
1701
      os.close(fd)
1702
      result = utils.RunCmd([alloc_script, fin_name])
1703
      if result.failed:
1704
        return (constants.IARUN_FAILURE, result.stdout, result.stderr,
1705
                result.fail_reason)
1706
    finally:
1707
      os.unlink(fin_name)
1708

    
1709
    return (constants.IARUN_SUCCESS, result.stdout, result.stderr, None)
1710

    
1711

    
1712
class DevCacheManager(object):
1713
  """Simple class for managing a cache of block device information.
1714

1715
  """
1716
  _DEV_PREFIX = "/dev/"
1717
  _ROOT_DIR = constants.BDEV_CACHE_DIR
1718

    
1719
  @classmethod
1720
  def _ConvertPath(cls, dev_path):
1721
    """Converts a /dev/name path to the cache file name.
1722

1723
    This replaces slashes with underscores and strips the /dev
1724
    prefix. It then returns the full path to the cache file
1725

1726
    """
1727
    if dev_path.startswith(cls._DEV_PREFIX):
1728
      dev_path = dev_path[len(cls._DEV_PREFIX):]
1729
    dev_path = dev_path.replace("/", "_")
1730
    fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1731
    return fpath
1732

    
1733
  @classmethod
1734
  def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1735
    """Updates the cache information for a given device.
1736

1737
    """
1738
    if dev_path is None:
1739
      logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1740
      return
1741
    fpath = cls._ConvertPath(dev_path)
1742
    if on_primary:
1743
      state = "primary"
1744
    else:
1745
      state = "secondary"
1746
    if iv_name is None:
1747
      iv_name = "not_visible"
1748
    fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1749
    try:
1750
      utils.WriteFile(fpath, data=fdata)
1751
    except EnvironmentError, err:
1752
      logger.Error("Can't update bdev cache for %s, error %s" %
1753
                   (dev_path, str(err)))
1754

    
1755
  @classmethod
1756
  def RemoveCache(cls, dev_path):
1757
    """Remove data for a dev_path.
1758

1759
    """
1760
    if dev_path is None:
1761
      logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1762
      return
1763
    fpath = cls._ConvertPath(dev_path)
1764
    try:
1765
      utils.RemoveFile(fpath)
1766
    except EnvironmentError, err:
1767
      logger.Error("Can't update bdev cache for %s, error %s" %
1768
                   (dev_path, str(err)))