Skip to content

Commit

Permalink
context: remove context handling from linstor plugin
Browse files Browse the repository at this point in the history
Use default ssh/context driver, as context images are very small
and are generated anyway everytime the VM is started.

For now it will run in an compat mode and trying to delete old
linstor context images, but new one will be file based
  • Loading branch information
rp- committed Jan 15, 2024
1 parent 646e973 commit 9b97da1
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 118 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,25 @@ python setup.py install

To upgrade the driver, simply run the above installation script again.

## From 2.x to 3.x


### Linstor system datastore

In version 3.x we removed handling context images within Linstor plugin
and fallback to the `tm/ssh` implementation.

This means you have to remove the `CONTEXT_DISK_TYPE` attribute from
the Linstor system datastore, as we now use the default FILE type.

Context images are just read-only ISO files that are recreated everytime
the VM is started and there is not much benefit form having them
as DRBD resources.

The plugin will delete old context images on VM delete and undeploy actions.
If you want to clean up the context images `OpenNebula-vm-context-*` you need to
stop and start the VM that is still using it (no reboot).

## Configuration
Please refer the DRBD user guide for configuration and documentation:
[Linstor User's guide](https://docs.linbit.com/docs/linstor-guide/#ch-opennebula-linstor)
Expand Down
117 changes: 18 additions & 99 deletions tm/context
Original file line number Diff line number Diff line change
Expand Up @@ -20,119 +20,38 @@ limitations under the License.
from __future__ import print_function

import sys
import os
import shutil
import subprocess
from one import util, consts
from one import util, vm
from one.datastore import Datastore
from one.extender import get_device_path, get_current_context_id, deploy
from linstor import Resource
from one.extender import delete_vm_contexts

FILES = sys.argv[1:-3]
DST = sys.argv[-3]
VM_ID = sys.argv[-2]
VM_ID = int(sys.argv[-2])
DS_ID = sys.argv[-1]


def ensure_dir(directory):
try:
os.mkdir(directory)
except OSError:
pass


def _get_correct_paths(path):
"""
If a path with a ':' is passed, we have to split this entry.
As the part before ':' is the source file and the suffix is the actual filename
:param str path: opennebula iso file path argument
:return: A tuple with the source and destination name
"""
if ':' in path:
s = path.split(':', 1)
return s[0], "'" + s[1] + "'"
return path, ""


def main():
util.log_info("Entering tm/context on {h} for files {f}.".format(h=DST, f=FILES))

disk_id = DST.split(".")[-1].strip()

dst_host = util.arg_host(DST).strip()
dst_path = util.arg_path(DST).strip()
dst_dir = os.path.dirname(dst_path).strip()
util.log_info("Entering tm/context on {h}.".format(h=DST))

datastore = Datastore(util.show_ds(DS_ID))
target_vm = vm.Vm(util.show_vm(VM_ID))

iso_dir = os.path.join(datastore.base_path, ".isofiles", str(VM_ID))

ensure_dir(datastore.base_path)
ensure_dir(os.path.join(datastore.base_path, ".isofiles"))
ensure_dir(iso_dir)

iso_file = os.path.join(iso_dir, str(VM_ID) + ".iso")

for file in FILES:
util.log_info("copy {s} {d}".format(s=file, d=os.path.join(iso_dir, os.path.basename(file))))
if file.startswith("http"):
subprocess.check_call(["wget", "-P", iso_dir, file])
else:
src, dst_name = _get_correct_paths(file)

# instead of using python copy utils, we do a bash call
# because we don't want to handle shell quoting in python
util.exec_and_log(
"cp -R {src} {dst}".format(src=src, dst=os.path.join(iso_dir, os.path.basename(dst_name))),
"Error copying {src} to {dir}".format(src=src, dir=iso_dir))

rc = util.mkiso_command(
'-o {iso_file} -V CONTEXT -J -R {iso_dir}'.format(
iso_file=iso_file, iso_dir=iso_dir)
)

if rc == 0:
iso_size = os.path.getsize(iso_file)
iso_size_mb = int(iso_size / 1024 / 1024) + 1

c_id = get_current_context_id(datastore.linstor_controllers, int(VM_ID), int(disk_id))
# del_result = delete_vm_contexts(datastore.linstor_controllers, VM_ID, disk_id)
# for res in del_result:
# if del_result[res] is not None:
# util.log_info(str(del_result[res]))

if c_id is None:
res_name = consts.CONTEXT_PREFIX + "-vm{vm_id}-disk{disk_id}-{cid}".format(
vm_id=VM_ID, disk_id=disk_id, cid=1
)
resource = deploy(
linstor_controllers=datastore.linstor_controllers,
resource_name=res_name,
vlm_size_str='{s}MiB'.format(s=iso_size_mb),
resource_group=datastore.linstor_resource_group
)
else:
res_name = consts.CONTEXT_PREFIX + "-vm{vm_id}-disk{disk_id}-{cid}".format(
vm_id=VM_ID, disk_id=disk_id, cid=c_id
)
resource = Resource(res_name, datastore.linstor_controllers)

resource.activate(dst_host)

if util.exec_and_log(
'cat {iso} | ssh {n} dd of={dev_path} status=none'.format(
iso=iso_file, n=dst_host, dev_path=get_device_path(resource)),
"Error copying iso to drbd"
) != 0:
raise RuntimeError("ERROR: Error copying iso to drbd device {dev_path} on node {n}".format(
dev_path=get_device_path(resource), n=dst_host))
# cleanup old context images no longer needed since v3.x
if target_vm.has_context:
del_result = delete_vm_contexts(datastore.linstor_controllers, VM_ID, target_vm.context_id)

# link drbd device
util.link_file(dst_host, dst_dir, dst_path, get_device_path(resource), res_name)
for res in del_result:
if del_result[res] is not None:
util.log_info(str(del_result[res]))
else:
util.log_info("No CONTEXT section in VM({vm}) XML info".format(vm=VM_ID))

shutil.rmtree(iso_dir, ignore_errors=True)
arguments = sys.argv[1:]
cmd = ["/var/lib/one/remotes/tm/ssh/context"] + arguments
process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

util.log_info("Leaving tm/context successfully.")
util.log_info("tm/ssh/context finished with {r}.".format(r=process.returncode))
sys.exit(process.returncode)


if __name__ == "__main__":
Expand Down
16 changes: 14 additions & 2 deletions tm/mv
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import sys
from linstor import Resource, MultiLinstor
from one import util, vm
from one.datastore import Datastore
from one.extender import get_rsc_name, get_device_path, get_satellite_nodes, deploy
from one.extender import get_rsc_name, get_device_path, get_satellite_nodes, deploy, delete_vm_contexts

SRC = sys.argv[1]
DST = sys.argv[2]
Expand Down Expand Up @@ -140,11 +140,24 @@ def main():
datastore = Datastore(util.show_ds(DS_ID))

disk_id = SRC.split(".")[-1].strip() if "disk." in SRC else None
target_vm = vm.Vm(util.show_vm(VM_ID))

if disk_id is None:
# this path is executed if used as system ds and as final mv call as the vm directory
util.ssh_make_path(" ".join([dst_host, dst_dir]))

# try to delete VM context images as we move to context file images
if True:
if target_vm.has_context:
del_result = delete_vm_contexts(datastore.linstor_controllers, VM_ID, target_vm.context_id)

for res in del_result:
if del_result[res] is not None:
util.log_info(str(del_result[res]))
else:
util.log_info("No CONTEXT section in VM({vm}) XML info".format(vm=VM_ID))
# end of context delete

# this copy command does the same as the opennebula ssh/mv
# the tar copy might fail if the src host is already down
tar_copy_cmd = " && ".join([
Expand All @@ -169,7 +182,6 @@ def main():
with MultiLinstor(MultiLinstor.controller_uri_list(datastore.linstor_controllers)) as lin:
satellite_nodes = get_satellite_nodes(lin)

target_vm = vm.Vm(util.show_vm(VM_ID))
# normal drbd image
res_name = get_rsc_name(target_vm, disk_id)

Expand Down
26 changes: 15 additions & 11 deletions tm/postmigrate
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,21 @@ def main():
res.deactivate(src_host)
res.allow_two_primaries = False

datastore = Datastore(util.show_ds(DS_ID))
if datastore.tm_mad == "linstor":
if target_vm.has_context:
# deactivate context image too
context_id = target_vm.context_id
res_name = get_current_context(datastore.linstor_controllers, int(VM_ID), context_id)
res = Resource(name=res_name, uri=datastore.linstor_controllers)
res.deactivate(src_host)
res.allow_two_primaries = False
else:
util.log_info("No CONTEXT section in VM({vm}) XML info".format(vm=VM_ID))
# TODO delete this if-block after context transition time is over
if True:
datastore = Datastore(util.show_ds(DS_ID))
if datastore.tm_mad == "linstor":
if target_vm.has_context:
# deactivate context image too
context_id = target_vm.context_id
res_name = get_current_context(datastore.linstor_controllers, int(VM_ID), context_id)
if res_name:
res = Resource(name=res_name, uri=datastore.linstor_controllers)
res.deactivate(src_host)
res.allow_two_primaries = False
else:
util.log_info("No CONTEXT section in VM({vm}) XML info".format(vm=VM_ID))
# end delete

args = ""
for arg in sys.argv[1:]:
Expand Down
14 changes: 8 additions & 6 deletions tm/premigrate
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ def main():

context_id = target_vm.context_id
context_path = "{}/disk.{}".format(dst_dir, context_id)
if datastore.tm_mad != "linstor":
res_name = get_current_context(datastore.linstor_controllers, VM_ID, context_id)
if datastore.tm_mad != "linstor" or res_name is None:
# [phil] I guess this does not belong here, but I want working live
# migration with ssh system store.
# scp'ing context block device from the source to the destination machine
Expand All @@ -86,11 +87,12 @@ def main():
raise RuntimeError("Error: Unable to scp {} from {} to {}".format(context_path, src_host, dst_host))
else:
# here we have for sure a linstor datastore
res_name = get_current_context(datastore.linstor_controllers, VM_ID, context_id)
res = Resource(name=res_name, uri=datastore.linstor_controllers)
res.activate(DST_HOST)
res.allow_two_primaries = True
util.link_file(dst_host, dst_dir, context_path, get_device_path(res), res.name)
# TODO remove this else after we some minor verions, transition code for running VM's
if res_name:
res = Resource(name=res_name, uri=datastore.linstor_controllers)
res.activate(DST_HOST)
res.allow_two_primaries = True
util.link_file(dst_host, dst_dir, context_path, get_device_path(res), res.name)
else:
util.log_info("No CONTEXT section in VM({vm}) XML info".format(vm=VM_ID))

Expand Down

0 comments on commit 9b97da1

Please sign in to comment.