From 6b93ec9d798ed53089a06bc0ced58ef1d8a9e4b0 Mon Sep 17 00:00:00 2001 From: Iustin Pop Date: Tue, 13 Jan 2009 15:20:44 +0000 Subject: [PATCH] Forward-port DrbdNetReconfig This is a modified forward-port of DrbdNetReconfig and their associated RPCs. In Ganeti 2.0, these functions will be used for two things: - live migration (as in 1.2) - and for other network reconfiguration tasks, since DRBD8.Attach() doesn't do them anymore Because of the Attach() changes, we can now implement the AttachNet/DisconnectNet functions as independent entities, and we don't need the cache anymore. Note these functions are copies of the latest 1.2 code, and not cherry-picks of the (many) patches that went into 1.2. Reviewed-by: ultrotter --- daemons/ganeti-noded | 38 ++++++++++++++++ lib/backend.py | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/bdev.py | 2 + lib/rpc.py | 29 ++++++++++++ 4 files changed, 188 insertions(+) diff --git a/daemons/ganeti-noded b/daemons/ganeti-noded index a00bf54..c146e8b 100755 --- a/daemons/ganeti-noded +++ b/daemons/ganeti-noded @@ -232,6 +232,44 @@ class NodeHttpServer(http.server.HttpServer): disks = [objects.Disk.FromDict(cf) for cf in params[1]] return backend.CloseBlockDevices(params[0], disks) + # blockdev/drbd specific methods ---------- + + @staticmethod + def perspective_drbd_disconnect_net(params): + """Disconnects the network connection of drbd disks. + + Note that this is only valid for drbd disks, so the members of the + disk list must all be drbd devices. + + """ + nodes_ip, disks = params + disks = [objects.Disk.FromDict(cf) for cf in disks] + return backend.DrbdDisconnectNet(nodes_ip, disks) + + @staticmethod + def perspective_drbd_attach_net(params): + """Attaches the network connection of drbd disks. + + Note that this is only valid for drbd disks, so the members of the + disk list must all be drbd devices. + + """ + nodes_ip, disks, instance_name, multimaster = params + disks = [objects.Disk.FromDict(cf) for cf in disks] + return backend.DrbdAttachNet(nodes_ip, disks, instance_name, multimaster) + + @staticmethod + def perspective_drbd_wait_sync(params): + """Wait until DRBD disks are synched. + + Note that this is only valid for drbd disks, so the members of the + disk list must all be drbd devices. + + """ + nodes_ip, disks = params + disks = [objects.Disk.FromDict(cf) for cf in disks] + return backend.DrbdWaitSync(nodes_ip, disks) + # export/import -------------------------- @staticmethod diff --git a/lib/backend.py b/lib/backend.py index c57eeaf..f73d87e 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -2177,6 +2177,125 @@ def DemoteFromMC(): return (True, "Done") +def _FindDisks(nodes_ip, disks): + """Sets the physical ID on disks and returns the block devices. + + """ + # set the correct physical ID + my_name = utils.HostInfo().name + for cf in disks: + cf.SetPhysicalID(my_name, nodes_ip) + + bdevs = [] + + for cf in disks: + rd = _RecursiveFindBD(cf) + if rd is None: + return (False, "Can't find device %s" % cf) + bdevs.append(rd) + return (True, bdevs) + + +def DrbdDisconnectNet(nodes_ip, disks): + """Disconnects the network on a list of drbd devices. + + """ + status, bdevs = _FindDisks(nodes_ip, disks) + if not status: + return status, bdevs + + # disconnect disks + for rd in bdevs: + try: + rd.DisconnectNet() + except errors.BlockDeviceError, err: + logging.exception("Failed to go into standalone mode") + return (False, "Can't change network configuration: %s" % str(err)) + return (True, "All disks are now disconnected") + + +def DrbdAttachNet(nodes_ip, disks, instance_name, multimaster): + """Attaches the network on a list of drbd devices. + + """ + status, bdevs = _FindDisks(nodes_ip, disks) + if not status: + return status, bdevs + + if multimaster: + for cf, rd in zip(disks, bdevs): + try: + _SymlinkBlockDev(instance_name, rd.dev_path, cf.iv_name) + except EnvironmentError, err: + return (False, "Can't create symlink: %s" % str(err)) + # reconnect disks, switch to new master configuration and if + # needed primary mode + for rd in bdevs: + try: + rd.AttachNet(multimaster) + except errors.BlockDeviceError, err: + return (False, "Can't change network configuration: %s" % str(err)) + # wait until the disks are connected; we need to retry the re-attach + # if the device becomes standalone, as this might happen if the one + # node disconnects and reconnects in a different mode before the + # other node reconnects; in this case, one or both of the nodes will + # decide it has wrong configuration and switch to standalone + RECONNECT_TIMEOUT = 2 * 60 + sleep_time = 0.100 # start with 100 miliseconds + timeout_limit = time.time() + RECONNECT_TIMEOUT + while time.time() < timeout_limit: + all_connected = True + for rd in bdevs: + stats = rd.GetProcStatus() + if not (stats.is_connected or stats.is_in_resync): + all_connected = False + if stats.is_standalone: + # peer had different config info and this node became + # standalone, even though this should not happen with the + # new staged way of changing disk configs + try: + rd.ReAttachNet(multimaster) + except errors.BlockDeviceError, err: + return (False, "Can't change network configuration: %s" % str(err)) + if all_connected: + break + time.sleep(sleep_time) + sleep_time = min(5, sleep_time * 1.5) + if not all_connected: + return (False, "Timeout in disk reconnecting") + if multimaster: + # change to primary mode + for rd in bdevs: + rd.Open() + if multimaster: + msg = "multi-master and primary" + else: + msg = "single-master" + return (True, "Disks are now configured as %s" % msg) + + +def DrbdWaitSync(nodes_ip, disks): + """Wait until DRBDs have synchronized. + + """ + status, bdevs = _FindDisks(nodes_ip, disks) + if not status: + return status, bdevs + + min_resync = 100 + alldone = True + failure = False + for rd in bdevs: + stats = rd.GetProcStatus() + if not (stats.is_connected or stats.is_in_resync): + failure = True + break + alldone = alldone and (not stats.is_in_resync) + if stats.sync_percent is not None: + min_resync = min(min_resync, stats.sync_percent) + return (not failure, (alldone, min_resync)) + + class HooksRunner(object): """Hook runner. diff --git a/lib/bdev.py b/lib/bdev.py index 14f1e5c..2824648 100644 --- a/lib/bdev.py +++ b/lib/bdev.py @@ -567,6 +567,8 @@ class DRBD8Status(object): self.is_diskless = self.ldisk == "Diskless" self.is_disk_uptodate = self.ldisk == "UpToDate" + self.is_in_resync = self.cstatus in ('SyncSource', 'SyncTarget') + m = self.SYNC_RE.match(procline) if m: self.sync_percent = float(m.group(1)) diff --git a/lib/rpc.py b/lib/rpc.py index dd7c176..467163e 100644 --- a/lib/rpc.py +++ b/lib/rpc.py @@ -736,6 +736,35 @@ class RpcRunner(object): params = [instance_name, [cf.ToDict() for cf in disks]] return self._SingleNodeCall(node, "blockdev_close", params) + def call_drbd_disconnect_net(self, node_list, nodes_ip, disks): + """Disconnects the network of the given drbd devices. + + This is a multi-node call. + + """ + return self._MultiNodeCall(node_list, "drbd_disconnect_net", + [nodes_ip, [cf.ToDict() for cf in disks]]) + + def call_drbd_attach_net(self, node_list, nodes_ip, + disks, instance_name, multimaster): + """Disconnects the given drbd devices. + + This is a multi-node call. + + """ + return self._MultiNodeCall(node_list, "drbd_attach_net", + [nodes_ip, [cf.ToDict() for cf in disks], + instance_name, multimaster]) + + def call_drbd_wait_sync(self, node_list, nodes_ip, disks): + """Waits for the synchronization of drbd devices is complete. + + This is a multi-node call. + + """ + return self._MultiNodeCall(node_list, "drbd_wait_sync", + [nodes_ip, [cf.ToDict() for cf in disks]]) + @classmethod def call_upload_file(cls, node_list, file_name, address_list=None): """Upload a file. -- 1.7.10.4