From d5a2a4802c7f3df174938112225cc88dfdf14e63 Mon Sep 17 00:00:00 2001 From: Aria Kang Date: Thu, 16 Jan 2025 08:19:02 -0800 Subject: [PATCH 1/2] {Feature} Core - Vignetting - devignetting mask download link update Summary: Update devignetting mask with gamma correction version. Differential Revision: D68227793 --- .../dataprovider_quickstart_tutorial.ipynb | 1585 ++++++++--------- .../image_utilities.mdx | 2 +- 2 files changed, 793 insertions(+), 794 deletions(-) diff --git a/core/examples/dataprovider_quickstart_tutorial.ipynb b/core/examples/dataprovider_quickstart_tutorial.ipynb index c18b82c60..db784654b 100644 --- a/core/examples/dataprovider_quickstart_tutorial.ipynb +++ b/core/examples/dataprovider_quickstart_tutorial.ipynb @@ -1,800 +1,799 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "fb140897", - "metadata": {}, - "source": [ - "# Interactive Examples on Project Aria Tools\n", - "\n", - "### Notebook stuck?\n", - "Note that because of Jupyter issues, sometimes the code may stuck at visualization. We recommend **restart the kernels** and try again to see if the issue is resolved." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10e0572c", - "metadata": {}, - "outputs": [], - "source": [ - "# Specifics for Google Colab\n", - "google_colab_env = 'google.colab' in str(get_ipython())\n", - "if google_colab_env:\n", - " print(\"Running from Google Colab, installing projectaria_tools and getting sample data\")\n", - " !pip install projectaria-tools\n", - " !curl -O -J -L \"https://github.com/facebookresearch/projectaria_tools/raw/main/data/mps_sample/sample.vrs\"\n", - " vrsfile = \"sample.vrs\"\n", - "else:\n", - " print(\"Using a pre-existing projectaria_tool github repository\")\n", - " vrsfile = \"../../data/mps_sample/sample.vrs\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8211dc17", - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "import os\n", - "\n", - "# Add the current repository path to sys.path\n", - "repo_path = os.path.abspath(os.path.join(os.getcwd(), '../../'))\n", - "sys.path.insert(0, repo_path)\n", - "print(repo_path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e0909f2-ba66-4758-8a05-2e925574f43b", - "metadata": {}, - "outputs": [], - "source": [ - "from projectaria_tools.core import data_provider, calibration\n", - "from projectaria_tools.core.image import InterpolationMethod\n", - "from projectaria_tools.core.sensor_data import TimeDomain, TimeQueryOptions\n", - "from projectaria_tools.core.stream_id import RecordableTypeId, StreamId\n", - "import numpy as np\n", - "from matplotlib import pyplot as plt\n", - "from PIL import Image" - ] - }, - { - "cell_type": "markdown", - "id": "8196ad05", - "metadata": {}, - "source": [ - "## Create data provider" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fb04b53b", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"Creating data provider from {vrsfile}\")\n", - "provider = data_provider.create_vrs_data_provider(vrsfile)\n", - "if not provider:\n", - " print(\"Invalid vrs data provider\")" - ] - }, - { - "cell_type": "markdown", - "id": "87be2866", - "metadata": {}, - "source": [ - "# Retrieving image data\n", - "\n", - "Goals:\n", - "- Learn how to retrieve Image data for a given Image stream\n", - "\n", - "Key learnings:\n", - "- VRS contains data streams are identified with a Unique Identifier: stream_id\n", - "- Learn what are the Stream Ids used by Aria data (Slam, Rgb, EyeTracking)\n", - "- Learn that image data can be retrieved by using a record Index or a timestamp\n", - "- For each stream_id, index ranges from [0, get_num_data(stream_id)], and the same index for different streams could have different timestamps\n", - "- Query data from different sensors of the same timestamp can be done through `get_image_data_by_time_ns`, `get_imu_data_by_time_ns`, etc" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "023ae1a6", - "metadata": {}, - "outputs": [], - "source": [ - "stream_mappings = {\n", - " \"camera-slam-left\": StreamId(\"1201-1\"),\n", - " \"camera-slam-right\":StreamId(\"1201-2\"),\n", - " \"camera-rgb\":StreamId(\"214-1\"),\n", - " \"camera-eyetracking\":StreamId(\"211-1\"),\n", - "}\n", - "\n", - "axes = []\n", - "fig, axes = plt.subplots(1, 4, figsize=(12, 5))\n", - "fig.suptitle('Retrieving image data using Record Index')\n", - "\n", - "# Query data with index\n", - "frame_index = 1\n", - "for idx, [stream_name, stream_id] in enumerate(list(stream_mappings.items())):\n", - " image = provider.get_image_data_by_index(stream_id, frame_index)\n", - " axes[idx].imshow(image[0].to_numpy_array(), cmap=\"gray\", vmin=0, vmax=255)\n", - " axes[idx].title.set_text(stream_name)\n", - " axes[idx].tick_params(left=False, right=False, labelleft=False, labelbottom=False, bottom=False)\n", - "plt.show()\n", - "\n", - "# Same example using Time\n", - "rgb_stream_id = StreamId('214-1')\n", - "plt.figure()\n", - "fig, axes = plt.subplots(1, 4, figsize=(12, 5))\n", - "fig.suptitle('Retrieving image data using Time')\n", - "\n", - "time_domain = TimeDomain.DEVICE_TIME # query data based on host time\n", - "option = TimeQueryOptions.CLOSEST # get data whose time [in TimeDomain] is CLOSEST to query time\n", - "start_time = provider.get_first_time_ns(rgb_stream_id, time_domain)\n", - "\n", - "for idx, [stream_name, stream_id] in enumerate(list(stream_mappings.items())):\n", - " image = provider.get_image_data_by_time_ns(stream_id, start_time, time_domain, option)\n", - " axes[idx].imshow(image[0].to_numpy_array(), cmap=\"gray\", vmin=0, vmax=255)\n", - " axes[idx].title.set_text(stream_name)\n", - " axes[idx].tick_params(left=False, right=False, labelleft=False, labelbottom=False, bottom=False)\n", - "plt.show()\n" - ] - }, - { - "cell_type": "markdown", - "id": "c5033225", - "metadata": {}, - "source": [ - "# Summarize a VRS using thumbnails\n", - "\n", - "Goals:\n", - "- Summarize a VRS using 10 image side by side\n", - "\n", - "Key learnings:\n", - "- Image streams are identified with a Unique Identifier: stream_id\n", - "- PIL images can be created from Numpy array" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "933725b6", - "metadata": {}, - "outputs": [], - "source": [ - "from PIL import Image, ImageOps\n", - "from tqdm import tqdm\n", - "\n", - "rgb_stream_id = StreamId(\"214-1\")\n", - "\n", - "# Retrieve Start and End time for the given Sensor Stream Id\n", - "start_time = provider.get_first_time_ns(rgb_stream_id, time_domain)\n", - "end_time = provider.get_last_time_ns(rgb_stream_id, time_domain)\n", - "\n", - "# Retrieve image size for the RGB stream\n", - "time_domain = TimeDomain.DEVICE_TIME # query data based on host time\n", - "option = TimeQueryOptions.CLOSEST # get data whose time [in TimeDomain] is CLOSEST to query time\n", - "\n", - "image_config = provider.get_image_configuration(rgb_stream_id)\n", - "width = image_config.image_width\n", - "height = image_config.image_height\n", - "\n", - "sample_count = 10\n", - "resize_ratio = 10\n", - "thumbnail = newImage = Image.new(\n", - " \"RGB\", (int(width * sample_count / resize_ratio), int(height / resize_ratio))\n", - ")\n", - "current_width = 0\n", - "\n", - "\n", - "# Samples 10 timestamps\n", - "sample_timestamps = np.linspace(start_time, end_time, sample_count)\n", - "for sample in tqdm(sample_timestamps):\n", - " image_tuple = provider.get_image_data_by_time_ns(rgb_stream_id, int(sample), time_domain, option)\n", - " image_array = image_tuple[0].to_numpy_array()\n", - " image = Image.fromarray(image_array)\n", - " new_size = (\n", - " int(image.size[0] / resize_ratio),\n", - " int(image.size[1] / resize_ratio),\n", - " )\n", - " image = image.resize(new_size).rotate(-90)\n", - " thumbnail.paste(image, (current_width, 0))\n", - " current_width = int(current_width + width / resize_ratio)\n", - "\n", - "from IPython.display import Image\n", - "display(thumbnail)" - ] - }, - { - "cell_type": "markdown", - "id": "0850064e", - "metadata": {}, - "source": [ - "# Obtain mapping between stream_id and sensor label\n", - "Goals:\n", - "- In a vrs file, each sensor data is identified through stream_id\n", - "- Learn mapping between stream_id and label for each sensor\n", - "\n", - "Key learnings:\n", - "- VRS is using Unique Identifier for each stream called stream_id. \n", - "- For each sensor data, it is attached with a stream_id, which contains two parts [RecordableTypeId, InstanceId]. \n", - "- To get the actual readable name of each sensor,\n", - "we can use `get_label_from_stream_id` vise versa `get_stream_id_from_label`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "43d83a1e", - "metadata": {}, - "outputs": [], - "source": [ - "streams = provider.get_all_streams()\n", - "for stream_id in streams:\n", - " label = provider.get_label_from_stream_id(stream_id)\n", - " print(\n", - " f\"stream_id: [{stream_id}] convert to label: [{label}] and back: [{provider.get_stream_id_from_label(label)}]\"\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "3def75bb", - "metadata": {}, - "source": [ - "# Get sensor data in a sequence based on data capture time\n", - "Goal:\n", - "- Obtain sensor data sequentially based on timestamp\n", - "\n", - "Key learnings\n", - "- Default option activates all sensors and playback the entire dataset from vrs\n", - "- Setup option to only activate certain streams, truncate start/end time, and sample rate\n", - "- Obtain data from different sensor types\n", - "- `TimeDomain` are separated into four categories: `RECORD_TIME`, `DEVICE_TIME`, `HOST_TIME`, `TIME_CODE`" - ] - }, - { - "cell_type": "markdown", - "id": "89aba5a2", - "metadata": {}, - "source": [ - "### Step 1: obtain default options that provides the whole dataset from VRS\n", - "* activates all sensor streams\n", - "* No truncation for first/last timestamp\n", - "* Subsample rate = 1 (do not skip any data per sensor)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f0124864", - "metadata": {}, - "outputs": [], - "source": [ - "options = (\n", - " provider.get_default_deliver_queued_options()\n", - ") # default options activates all streams" - ] - }, - { - "cell_type": "markdown", - "id": "fb6dca83", - "metadata": {}, - "source": [ - "### Step 2: set preferred deliver options\n", - "* truncate first/last time: `set_truncate_first_device_time_ns/set_truncate_last_device_time_ns()`\n", - "* subselect sensor streams to play: `activate_stream(stream_id)`\n", - "* skip sensor data : `set_subsample_rate(stream_id, rate)`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a046582c", - "metadata": {}, - "outputs": [], - "source": [ - "options.set_truncate_first_device_time_ns(int(1e8)) # 0.1 secs after vrs first timestamp\n", - "options.set_truncate_last_device_time_ns(int(1e9)) # 1 sec before vrs last timestamp\n", - "\n", - "# deactivate all sensors\n", - "options.deactivate_stream_all()\n", - "# activate only a subset of sensors\n", - "slam_stream_ids = options.get_stream_ids(RecordableTypeId.SLAM_CAMERA_DATA)\n", - "imu_stream_ids = options.get_stream_ids(RecordableTypeId.SLAM_IMU_DATA)\n", - "\n", - "for stream_id in slam_stream_ids:\n", - " options.activate_stream(stream_id) # activate slam cameras\n", - " options.set_subsample_rate(stream_id, 1) # sample every data for each slam camera\n", - "\n", - "for stream_id in imu_stream_ids:\n", - " options.activate_stream(stream_id) # activate imus\n", - " options.set_subsample_rate(stream_id, 10) # sample every 10th data for each imu" - ] - }, - { - "cell_type": "markdown", - "id": "fdff6dd3", - "metadata": {}, - "source": [ - "### Step 3: create iterator to deliver data\n", - "`TimeDomain` contains the following\n", - "* `RECORD_TIME`: timestamp stored in vrs index, fast to access, but not guaranteed which time domain\n", - "* `DEVICE_TIME`: capture time in device's timedomain, accurate\n", - "* `HOST_TIME`: arrival time in host computer's timedomain, may not be accurate\n", - "* `TIME_CODE`: capture in TimeSync server's timedomain\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "efa5aad8", - "metadata": {}, - "outputs": [], - "source": [ - "iterator = provider.deliver_queued_sensor_data(options)\n", - "for sensor_data in iterator:\n", - " label = provider.get_label_from_stream_id(sensor_data.stream_id())\n", - " sensor_type = sensor_data.sensor_data_type()\n", - " device_timestamp = sensor_data.get_time_ns(TimeDomain.DEVICE_TIME)\n", - " host_timestamp = sensor_data.get_time_ns(TimeDomain.HOST_TIME)\n", - " timecode_timestamp = sensor_data.get_time_ns(TimeDomain.TIME_CODE)\n", - " print(\n", - " f\"\"\"obtain data from {label} of type {sensor_type} with \\n\n", - " DEVICE_TIME: {device_timestamp} nanoseconds \\n\n", - " HOST_TIME: {host_timestamp} nanoseconds \\n\n", - " \"\"\"\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "a0796407", - "metadata": {}, - "source": [ - "# Random access data\n", - "Goal\n", - "- Access data from a stream randomly using a data index or a timestamp\n", - "\n", - "Key learnings\n", - "- Sensor data can be obtained through index within the range of [0, number of data for this stream_id)\n", - "\n", - " - `get_sensor_data_by_index(stream_id, index)`\n", - " - `get_image_data_by_index(stream_id, index)`\n", - " - Access other sensor data by index interface is available in core/python/VrsDataProviderPyBind.h\n", - " \n", - "- `TimeQueryOptions` has three options: `TimeQueryOptions.BEFORE`, `TimeQueryOptions.AFTER`, `TimeQueryOptions.CLOSEST`\n", - "- Query through index will provide the exact data vs query through a timestamp that is not exact, data nearby will be omitted base on `TimeQueryOptions`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "63657c1f", - "metadata": {}, - "outputs": [], - "source": [ - "sensor_name = \"camera-slam-right\"\n", - "sensor_stream_id = provider.get_stream_id_from_label(sensor_name)\n", - "\n", - "# get all image data by index\n", - "num_data = provider.get_num_data(sensor_stream_id)\n", - "\n", - "for index in range(0, num_data):\n", - " image_data = provider.get_image_data_by_index(sensor_stream_id, index)\n", - " print(\n", - " f\"Get image: {index} with timestamp {image_data[1].capture_timestamp_ns}\"\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "dca84bb5", - "metadata": {}, - "source": [ - "### Sensor data can be obtained by timestamp (nanoseconds)\n", - "* Get stream time range `get_first_time_ns` and `get_last_time_ns`\n", - "* Specify timedomain: `TimeDomain.DEVICE_TIME` (default)\n", - "* Query data by queryTime\n", - " * `TimeQueryOptions.BEFORE` (default): sensor_dataTime <= queryTime\n", - " * `TimeQueryOptions.AFTER` : sensor_dataTime >= queryTime\n", - " * `TimeQueryOptions.CLOSEST` : sensor_dataTime closest to queryTime" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7783e83f", - "metadata": {}, - "outputs": [], - "source": [ - "time_domain = TimeDomain.DEVICE_TIME # query data based on DEVICE_TIME\n", - "option = TimeQueryOptions.CLOSEST # get data whose time [in TimeDomain] is CLOSEST to query time\n", - "\n", - "start_time = provider.get_first_time_ns(sensor_stream_id, time_domain)\n", - "end_time = provider.get_last_time_ns(sensor_stream_id, time_domain)\n", - "\n", - "for time in range(start_time, end_time, int(1e7)):\n", - " image_data = provider.get_image_data_by_time_ns(\n", - " sensor_stream_id, time, time_domain, option\n", - " )\n", - " print(\n", - " f\"query time {time} and get capture image time {image_data[1].capture_timestamp_ns} within range {start_time} {end_time}\"\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "a8be0b53", - "metadata": {}, - "source": [ - "### Get sensor data configuration" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6824e56a", - "metadata": {}, - "outputs": [], - "source": [ - "def image_config_example(config):\n", - " print(f\"device_type {config.device_type}\")\n", - " print(f\"device_version {config.device_version}\")\n", - " print(f\"device_serial {config.device_serial}\")\n", - " print(f\"sensor_serial {config.sensor_serial}\")\n", - " print(f\"nominal_rate_hz {config.nominal_rate_hz}\")\n", - " print(f\"image_width {config.image_width}\")\n", - " print(f\"image_height {config.image_height}\")\n", - " print(f\"pixel_format {config.pixel_format}\")\n", - " print(f\"gamma_factor {config.gamma_factor}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f3bf5afb", - "metadata": {}, - "outputs": [], - "source": [ - "config = provider.get_image_configuration(provider.get_stream_id_from_label(\"camera-slam-right\"))\n", - "image_config_example(config)" - ] - }, - { - "cell_type": "markdown", - "id": "ddf4af2e", - "metadata": {}, - "source": [ - "# Calibration examples\n", - "Goal:\n", - "- Obtain camera extrinsics and intrinsics\n", - "- Learn to project a 3D point to camera frame\n", - "\n", - "Key learnings\n", - "- Get calibration for different sensors using sensor labels\n", - "- Learn how to use extrinsics/intrinsics to project a 3D points to a given camera\n", - "- Reference frame convention" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c47e8e18", - "metadata": {}, - "outputs": [], - "source": [ - "device_calib = provider.get_device_calibration()\n", - "all_sensor_labels = device_calib.get_all_labels()\n", - "print(f\"device calibration contains calibrations for the following sensors \\n {all_sensor_labels}\")" - ] - }, - { - "cell_type": "markdown", - "id": "872040fa", - "metadata": {}, - "source": [ - "### Project a 3D point to camera frame\n", - "\n", - "In this section we will learn how to retrieve calibration data and how to use it.\n", - "Aria calibration is defined by two objects: one defining the intrinsics (`rgb_calib.project` and `rgb_calib.unproject`) and one defining the extrinsics as a SE3 pose (`device_calib.get_transform_device_sensor(sensor_label`).\n", - "\n", - "Intrinsics can be used to project a 3d point to the image plane or un-project a 2d point as a bearing vector. Extrinsics are used to set the camera in world coordinates at a given rotation and position in space.\n", - "\n", - "### Reference frame convention\n", - "\n", - "> `transform_sensor1_sensor3` = `transform_sensor1_sensor2` * `transform_sensor2_sensor3` \\\n", - "> `point_in_sensor`: 3D point measured from sensor's reference frame \\\n", - "> `point_in_sensor` = `transform_sensor1_sensor` * `point_in_sensor`\n", - "\n", - "Device Frame: `device_calib.get_origin_label() = camera-slam-left`\\\n", - "Sensor extrinsics: `device_calib.get_transform_device_sensor(sensor_label)`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f7e39f66", - "metadata": {}, - "outputs": [], - "source": [ - "camera_name = \"camera-rgb\"\n", - "transform_device_camera = device_calib.get_transform_device_sensor(camera_name).to_matrix()\n", - "transform_camera_device = np.linalg.inv(transform_device_camera)\n", - "print(f\"Device calibration origin label {device_calib.get_origin_label()}\")\n", - "print(f\"{camera_name} has extrinsics of \\n {transform_device_camera}\")\n", - "\n", - "rgb_calib = device_calib.get_camera_calib(\"camera-rgb\")\n", - "if rgb_calib is not None:\n", - " # project a 3D point in device frame [camera-slam-left] to rgb camera\n", - " point_in_device = np.array([0, 0, 10])\n", - " point_in_camera = (\n", - " np.matmul(transform_camera_device[0:3,0:3], point_in_device.transpose())\n", - " + transform_camera_device[0:3,3]\n", - " )\n", - "\n", - " maybe_pixel = rgb_calib.project(point_in_camera)\n", - " if maybe_pixel is not None:\n", - " print(\n", - " f\"Get pixel {maybe_pixel} within image of size {rgb_calib.get_image_size()}\"\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "3ad7ddcb", - "metadata": {}, - "source": [ - "### Get calibration data for other sensors\n", - "Aria is a multimodal capture device, each sensors calibration can be retrieved using the same interface. Only EyeTracking (`get_aria_et_camera_calib()`) and Audio calibration (`get_aria_microphone_calib()`) is a bit different since we have multiple sensors that share the same stream_id." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5ecc4835", - "metadata": {}, - "outputs": [], - "source": [ - "et_calib = device_calib.get_aria_et_camera_calib()\n", - "if et_calib is not None:\n", - " print(f\"Camera {et_calib[0].get_label()} has image size {et_calib[0].get_image_size()}\")\n", - " print(f\"Camera {et_calib[1].get_label()} has image size {et_calib[1].get_image_size()}\")\n", - "\n", - "imu_calib = device_calib.get_imu_calib(\"imu-left\")\n", - "if imu_calib is not None:\n", - " print(f\"{imu_calib.get_label()} has extrinsics transform_Device_Imu:\\n {imu_calib.get_transform_device_imu().to_matrix3x4()}\")" - ] - }, - { - "cell_type": "markdown", - "id": "700e8af5", - "metadata": {}, - "source": [ - "### Undistort an image\n", - "You can remove distortions in an image in three steps. \n", - "\n", - "First, use the provider to access the image and the camera calibration of the stream. Then create a \"linear\" spherical camera model with `get_spherical_camera_calibration`. The function allows you to specify the image size as well as focal length of the model, assuming principal point is at the image center. Finally, apply `distort_by_calibration` function to distort the image." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e20cd362", - "metadata": {}, - "outputs": [], - "source": [ - "# input: retrieve image as a numpy array\n", - "sensor_name = \"camera-rgb\"\n", - "sensor_stream_id = provider.get_stream_id_from_label(sensor_name)\n", - "image_data = provider.get_image_data_by_index(sensor_stream_id, 0)\n", - "image_array = image_data[0].to_numpy_array()\n", - "# input: retrieve image distortion\n", - "device_calib = provider.get_device_calibration()\n", - "src_calib = device_calib.get_camera_calib(sensor_name)\n", - "\n", - "# create output calibration: a linear model of image size 512x512 and focal length 150\n", - "# Invisible pixels are shown as black.\n", - "dst_calib = calibration.get_linear_camera_calibration(512, 512, 150, camera_name)\n", - "\n", - "# distort image\n", - "rectified_array = calibration.distort_by_calibration(image_array, dst_calib, src_calib, InterpolationMethod.BILINEAR)\n", - "\n", - "# visualize input and results\n", - "plt.figure()\n", - "fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n", - "fig.suptitle(f\"Image undistortion (focal length = {dst_calib.get_focal_lengths()})\")\n", - "\n", - "axes[0].imshow(image_array, cmap=\"gray\", vmin=0, vmax=255)\n", - "axes[0].title.set_text(f\"sensor image ({sensor_name})\")\n", - "axes[0].tick_params(left=False, right=False, labelleft=False, labelbottom=False, bottom=False)\n", - "axes[1].imshow(rectified_array, cmap=\"gray\", vmin=0, vmax=255)\n", - "axes[1].title.set_text(f\"undistorted image ({sensor_name})\")\n", - "axes[1].tick_params(left=False, right=False, labelleft=False, labelbottom=False, bottom=False)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "a098c273", - "metadata": {}, - "source": [ - "Note the rectified image shows a circular area of visible pixels. If you want the entire rectified image to be covered by pixels, you can increase the magnification." - ] - }, - { - "cell_type": "markdown", - "id": "4d29fa4b", - "metadata": {}, - "source": [ - "# Devignetting\n", - "\n", - "Devignetting corrects uneven lighting, enhancing image uniformity and clarity. We provide devignetting for camera-rgb full size image [2880, 2880], camera-rgb half size image[1408, 1408] and slam image [640, 480].\n", - "1. Aria devignetting masks can be downloaded from [Link](https://www.projectaria.com/async/sample/download/?bucket=core&filename=aria_camera_devignetting_masks.zip). It contains the following files:\n", - "\n", - "```\n", - "aria_camera_devignetting_masks\n", - "|- slam_devignetting_mask.bin\n", - "|- rgb_half_devignetting_mask.bin\n", - "|- rgb_full_devignetting_mask.bin\n", - "```\n", - "2. set devignetting mask folder path with the local aria camera devignetting masks folder path.\n", - " `mask_folder_path = \"aria_camera_devignetting_masks\"`\n", - " `set_devignetting_mask_folder_path(mask_folder_path)`\n", - "4. load mask by passing camera label, now supporting \"camera-rgb\", \"camera-slam-left\" and \"camera-slam-right\".\n", - " `load_devignetting_mask(label)`\n", - "5. apply devignetting to source image.\n", - " `devignetting(src_image, devignetting_mask)`" - ] - }, - { - "cell_type": "markdown", - "id": "2b7e5d21-80d1-4acd-9ba9-c2dcb84e6380", - "metadata": {}, - "source": [ - "## Step 1: Download devignetting mask" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "747698e2-5cba-4b03-a90d-2d08b3e53456", - "metadata": {}, - "outputs": [], - "source": [ - "from urllib.request import urlretrieve\n", - "import zipfile\n", - "\n", - "# Download from url\n", - "devignetting_mask_folder_path = os.path.join(repo_path, \"devignetting_masks\")\n", - "downloaded_devignetting_mask_zip = os.path.join(devignetting_mask_folder_path, \"aria_camera_devignetting_masks.zip\")\n", - "if not os.path.exists(devignetting_mask_folder_path):\n", - " os.mkdir(devignetting_mask_folder_path)\n", - "urlretrieve(\"https://www.projectaria.com/async/sample/download/?bucket=core&filename=aria_camera_devignetting_masks.zip\", downloaded_devignetting_mask_zip)\n", - "\n", - "# unzip the mask files, with cross-platform compatibility\n", - "with zipfile.ZipFile(downloaded_devignetting_mask_zip, 'r') as zip_ref:\n", - " # Extract all files\n", - " zip_ref.extractall(devignetting_mask_folder_path)\n", - "\n", - " # Print out the filenames\n", - " print(f\"Successfully downloaded and extracted the following files for devignetting:\")\n", - " for file_info in zip_ref.infolist():\n", - " print(file_info.filename)\n" - ] - }, - { - "cell_type": "markdown", - "id": "62f31e06-a088-42ad-9500-c5157c871205", - "metadata": {}, - "source": [ - "## Step 2: Set devignetting mask folder path with the local folder path" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d373b069-46bd-4855-b402-a6f9aa61fed4", - "metadata": {}, - "outputs": [], - "source": [ - "deviceCalib = provider.get_device_calibration()\n", - "deviceCalib.set_devignetting_mask_folder_path(devignetting_mask_folder_path)" - ] - }, - { - "cell_type": "markdown", - "id": "5a69ed80-ddbb-48f5-92d5-6b06e0c393a4", - "metadata": {}, - "source": [ - "## Step 3 & 4: load mask and apply devignetting to source image." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "752d24b0-da52-4480-9373-3a4e1cbee0b9", - "metadata": {}, - "outputs": [], - "source": [ - "stream_id = provider.get_stream_id_from_label(\"camera-rgb\")\n", - "image_data = provider.get_image_data_by_index(stream_id, 0)\n", - "src_image_array = image_data[0].to_numpy_array()\n", - "\n", - "# load devignetting mask by feeding camera label\n", - "devignetting_mask = deviceCalib.load_devignetting_mask(\"camera-rgb\")\n", - "\n", - "# apply devignetting to source image\n", - "devignetted_image_array = calibration.devignetting(\n", - " src_image_array, devignetting_mask\n", - " ).astype(np.uint8)\n", - "\n", - "# visualize input and results\n", - "plt.figure()\n", - "fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n", - "fig.suptitle(f\"Image devignetting (camera-rgb)\")\n", - "\n", - "axes[0].imshow(src_image_array, vmin=0, vmax=255)\n", - "axes[0].title.set_text(f\"original image\")\n", - "axes[1].imshow(devignetted_image_array, vmin=0, vmax=255)\n", - "axes[1].title.set_text(f\"devignetted image\")\n", - "\n", - "plt.show()" - ] - } - ], - "metadata": { - "custom": { - "cells": [], - "metadata": { - "fileHeader": "", - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 - }, - "indentAmount": 2, + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "fb140897", + "metadata": {}, + "source": [ + "# Interactive Examples on Project Aria Tools\n", + "\n", + "### Notebook stuck?\n", + "Note that because of Jupyter issues, sometimes the code may stuck at visualization. We recommend **restart the kernels** and try again to see if the issue is resolved." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10e0572c", + "metadata": {}, + "outputs": [], + "source": [ + "# Specifics for Google Colab\n", + "google_colab_env = 'google.colab' in str(get_ipython())\n", + "if google_colab_env:\n", + " print(\"Running from Google Colab, installing projectaria_tools and getting sample data\")\n", + " !pip install projectaria-tools\n", + " !curl -O -J -L \"https://github.com/facebookresearch/projectaria_tools/raw/main/data/mps_sample/sample.vrs\"\n", + " vrsfile = \"sample.vrs\"\n", + "else:\n", + " print(\"Using a pre-existing projectaria_tool github repository\")\n", + " vrsfile = \"../../data/mps_sample/sample.vrs\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8211dc17", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "\n", + "# Add the current repository path to sys.path\n", + "repo_path = os.path.abspath(os.path.join(os.getcwd(), '../../'))\n", + "sys.path.insert(0, repo_path)\n", + "print(repo_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e0909f2-ba66-4758-8a05-2e925574f43b", + "metadata": {}, + "outputs": [], + "source": [ + "from projectaria_tools.core import data_provider, calibration\n", + "from projectaria_tools.core.image import InterpolationMethod\n", + "from projectaria_tools.core.sensor_data import TimeDomain, TimeQueryOptions\n", + "from projectaria_tools.core.stream_id import RecordableTypeId, StreamId\n", + "import numpy as np\n", + "from matplotlib import pyplot as plt\n", + "from PIL import Image" + ] + }, + { + "cell_type": "markdown", + "id": "8196ad05", + "metadata": {}, + "source": [ + "## Create data provider" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb04b53b", + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Creating data provider from {vrsfile}\")\n", + "provider = data_provider.create_vrs_data_provider(vrsfile)\n", + "if not provider:\n", + " print(\"Invalid vrs data provider\")" + ] + }, + { + "cell_type": "markdown", + "id": "87be2866", + "metadata": {}, + "source": [ + "# Retrieving image data\n", + "\n", + "Goals:\n", + "- Learn how to retrieve Image data for a given Image stream\n", + "\n", + "Key learnings:\n", + "- VRS contains data streams are identified with a Unique Identifier: stream_id\n", + "- Learn what are the Stream Ids used by Aria data (Slam, Rgb, EyeTracking)\n", + "- Learn that image data can be retrieved by using a record Index or a timestamp\n", + "- For each stream_id, index ranges from [0, get_num_data(stream_id)], and the same index for different streams could have different timestamps\n", + "- Query data from different sensors of the same timestamp can be done through `get_image_data_by_time_ns`, `get_imu_data_by_time_ns`, etc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "023ae1a6", + "metadata": {}, + "outputs": [], + "source": [ + "stream_mappings = {\n", + " \"camera-slam-left\": StreamId(\"1201-1\"),\n", + " \"camera-slam-right\":StreamId(\"1201-2\"),\n", + " \"camera-rgb\":StreamId(\"214-1\"),\n", + " \"camera-eyetracking\":StreamId(\"211-1\"),\n", + "}\n", + "\n", + "axes = []\n", + "fig, axes = plt.subplots(1, 4, figsize=(12, 5))\n", + "fig.suptitle('Retrieving image data using Record Index')\n", + "\n", + "# Query data with index\n", + "frame_index = 1\n", + "for idx, [stream_name, stream_id] in enumerate(list(stream_mappings.items())):\n", + " image = provider.get_image_data_by_index(stream_id, frame_index)\n", + " axes[idx].imshow(image[0].to_numpy_array(), cmap=\"gray\", vmin=0, vmax=255)\n", + " axes[idx].title.set_text(stream_name)\n", + " axes[idx].tick_params(left=False, right=False, labelleft=False, labelbottom=False, bottom=False)\n", + "plt.show()\n", + "\n", + "# Same example using Time\n", + "rgb_stream_id = StreamId('214-1')\n", + "plt.figure()\n", + "fig, axes = plt.subplots(1, 4, figsize=(12, 5))\n", + "fig.suptitle('Retrieving image data using Time')\n", + "\n", + "time_domain = TimeDomain.DEVICE_TIME # query data based on host time\n", + "option = TimeQueryOptions.CLOSEST # get data whose time [in TimeDomain] is CLOSEST to query time\n", + "start_time = provider.get_first_time_ns(rgb_stream_id, time_domain)\n", + "\n", + "for idx, [stream_name, stream_id] in enumerate(list(stream_mappings.items())):\n", + " image = provider.get_image_data_by_time_ns(stream_id, start_time, time_domain, option)\n", + " axes[idx].imshow(image[0].to_numpy_array(), cmap=\"gray\", vmin=0, vmax=255)\n", + " axes[idx].title.set_text(stream_name)\n", + " axes[idx].tick_params(left=False, right=False, labelleft=False, labelbottom=False, bottom=False)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "c5033225", + "metadata": {}, + "source": [ + "# Summarize a VRS using thumbnails\n", + "\n", + "Goals:\n", + "- Summarize a VRS using 10 image side by side\n", + "\n", + "Key learnings:\n", + "- Image streams are identified with a Unique Identifier: stream_id\n", + "- PIL images can be created from Numpy array" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "933725b6", + "metadata": {}, + "outputs": [], + "source": [ + "from PIL import Image, ImageOps\n", + "from tqdm import tqdm\n", + "\n", + "rgb_stream_id = StreamId(\"214-1\")\n", + "\n", + "# Retrieve Start and End time for the given Sensor Stream Id\n", + "start_time = provider.get_first_time_ns(rgb_stream_id, time_domain)\n", + "end_time = provider.get_last_time_ns(rgb_stream_id, time_domain)\n", + "\n", + "# Retrieve image size for the RGB stream\n", + "time_domain = TimeDomain.DEVICE_TIME # query data based on host time\n", + "option = TimeQueryOptions.CLOSEST # get data whose time [in TimeDomain] is CLOSEST to query time\n", + "\n", + "image_config = provider.get_image_configuration(rgb_stream_id)\n", + "width = image_config.image_width\n", + "height = image_config.image_height\n", + "\n", + "sample_count = 10\n", + "resize_ratio = 10\n", + "thumbnail = newImage = Image.new(\n", + " \"RGB\", (int(width * sample_count / resize_ratio), int(height / resize_ratio))\n", + ")\n", + "current_width = 0\n", + "\n", + "\n", + "# Samples 10 timestamps\n", + "sample_timestamps = np.linspace(start_time, end_time, sample_count)\n", + "for sample in tqdm(sample_timestamps):\n", + " image_tuple = provider.get_image_data_by_time_ns(rgb_stream_id, int(sample), time_domain, option)\n", + " image_array = image_tuple[0].to_numpy_array()\n", + " image = Image.fromarray(image_array)\n", + " new_size = (\n", + " int(image.size[0] / resize_ratio),\n", + " int(image.size[1] / resize_ratio),\n", + " )\n", + " image = image.resize(new_size).rotate(-90)\n", + " thumbnail.paste(image, (current_width, 0))\n", + " current_width = int(current_width + width / resize_ratio)\n", + "\n", + "from IPython.display import Image\n", + "display(thumbnail)" + ] + }, + { + "cell_type": "markdown", + "id": "0850064e", + "metadata": {}, + "source": [ + "# Obtain mapping between stream_id and sensor label\n", + "Goals:\n", + "- In a vrs file, each sensor data is identified through stream_id\n", + "- Learn mapping between stream_id and label for each sensor\n", + "\n", + "Key learnings:\n", + "- VRS is using Unique Identifier for each stream called stream_id. \n", + "- For each sensor data, it is attached with a stream_id, which contains two parts [RecordableTypeId, InstanceId]. \n", + "- To get the actual readable name of each sensor,\n", + "we can use `get_label_from_stream_id` vise versa `get_stream_id_from_label`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43d83a1e", + "metadata": {}, + "outputs": [], + "source": [ + "streams = provider.get_all_streams()\n", + "for stream_id in streams:\n", + " label = provider.get_label_from_stream_id(stream_id)\n", + " print(\n", + " f\"stream_id: [{stream_id}] convert to label: [{label}] and back: [{provider.get_stream_id_from_label(label)}]\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "3def75bb", + "metadata": {}, + "source": [ + "# Get sensor data in a sequence based on data capture time\n", + "Goal:\n", + "- Obtain sensor data sequentially based on timestamp\n", + "\n", + "Key learnings\n", + "- Default option activates all sensors and playback the entire dataset from vrs\n", + "- Setup option to only activate certain streams, truncate start/end time, and sample rate\n", + "- Obtain data from different sensor types\n", + "- `TimeDomain` are separated into four categories: `RECORD_TIME`, `DEVICE_TIME`, `HOST_TIME`, `TIME_CODE`" + ] + }, + { + "cell_type": "markdown", + "id": "89aba5a2", + "metadata": {}, + "source": [ + "### Step 1: obtain default options that provides the whole dataset from VRS\n", + "* activates all sensor streams\n", + "* No truncation for first/last timestamp\n", + "* Subsample rate = 1 (do not skip any data per sensor)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0124864", + "metadata": {}, + "outputs": [], + "source": [ + "options = (\n", + " provider.get_default_deliver_queued_options()\n", + ") # default options activates all streams" + ] + }, + { + "cell_type": "markdown", + "id": "fb6dca83", + "metadata": {}, + "source": [ + "### Step 2: set preferred deliver options\n", + "* truncate first/last time: `set_truncate_first_device_time_ns/set_truncate_last_device_time_ns()`\n", + "* subselect sensor streams to play: `activate_stream(stream_id)`\n", + "* skip sensor data : `set_subsample_rate(stream_id, rate)`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a046582c", + "metadata": {}, + "outputs": [], + "source": [ + "options.set_truncate_first_device_time_ns(int(1e8)) # 0.1 secs after vrs first timestamp\n", + "options.set_truncate_last_device_time_ns(int(1e9)) # 1 sec before vrs last timestamp\n", + "\n", + "# deactivate all sensors\n", + "options.deactivate_stream_all()\n", + "# activate only a subset of sensors\n", + "slam_stream_ids = options.get_stream_ids(RecordableTypeId.SLAM_CAMERA_DATA)\n", + "imu_stream_ids = options.get_stream_ids(RecordableTypeId.SLAM_IMU_DATA)\n", + "\n", + "for stream_id in slam_stream_ids:\n", + " options.activate_stream(stream_id) # activate slam cameras\n", + " options.set_subsample_rate(stream_id, 1) # sample every data for each slam camera\n", + "\n", + "for stream_id in imu_stream_ids:\n", + " options.activate_stream(stream_id) # activate imus\n", + " options.set_subsample_rate(stream_id, 10) # sample every 10th data for each imu" + ] + }, + { + "cell_type": "markdown", + "id": "fdff6dd3", + "metadata": {}, + "source": [ + "### Step 3: create iterator to deliver data\n", + "`TimeDomain` contains the following\n", + "* `RECORD_TIME`: timestamp stored in vrs index, fast to access, but not guaranteed which time domain\n", + "* `DEVICE_TIME`: capture time in device's timedomain, accurate\n", + "* `HOST_TIME`: arrival time in host computer's timedomain, may not be accurate\n", + "* `TIME_CODE`: capture in TimeSync server's timedomain\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "efa5aad8", + "metadata": {}, + "outputs": [], + "source": [ + "iterator = provider.deliver_queued_sensor_data(options)\n", + "for sensor_data in iterator:\n", + " label = provider.get_label_from_stream_id(sensor_data.stream_id())\n", + " sensor_type = sensor_data.sensor_data_type()\n", + " device_timestamp = sensor_data.get_time_ns(TimeDomain.DEVICE_TIME)\n", + " host_timestamp = sensor_data.get_time_ns(TimeDomain.HOST_TIME)\n", + " timecode_timestamp = sensor_data.get_time_ns(TimeDomain.TIME_CODE)\n", + " print(\n", + " f\"\"\"obtain data from {label} of type {sensor_type} with \\n\n", + " DEVICE_TIME: {device_timestamp} nanoseconds \\n\n", + " HOST_TIME: {host_timestamp} nanoseconds \\n\n", + " \"\"\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "a0796407", + "metadata": {}, + "source": [ + "# Random access data\n", + "Goal\n", + "- Access data from a stream randomly using a data index or a timestamp\n", + "\n", + "Key learnings\n", + "- Sensor data can be obtained through index within the range of [0, number of data for this stream_id)\n", + "\n", + " - `get_sensor_data_by_index(stream_id, index)`\n", + " - `get_image_data_by_index(stream_id, index)`\n", + " - Access other sensor data by index interface is available in core/python/VrsDataProviderPyBind.h\n", + " \n", + "- `TimeQueryOptions` has three options: `TimeQueryOptions.BEFORE`, `TimeQueryOptions.AFTER`, `TimeQueryOptions.CLOSEST`\n", + "- Query through index will provide the exact data vs query through a timestamp that is not exact, data nearby will be omitted base on `TimeQueryOptions`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63657c1f", + "metadata": {}, + "outputs": [], + "source": [ + "sensor_name = \"camera-slam-right\"\n", + "sensor_stream_id = provider.get_stream_id_from_label(sensor_name)\n", + "\n", + "# get all image data by index\n", + "num_data = provider.get_num_data(sensor_stream_id)\n", + "\n", + "for index in range(0, num_data):\n", + " image_data = provider.get_image_data_by_index(sensor_stream_id, index)\n", + " print(\n", + " f\"Get image: {index} with timestamp {image_data[1].capture_timestamp_ns}\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "dca84bb5", + "metadata": {}, + "source": [ + "### Sensor data can be obtained by timestamp (nanoseconds)\n", + "* Get stream time range `get_first_time_ns` and `get_last_time_ns`\n", + "* Specify timedomain: `TimeDomain.DEVICE_TIME` (default)\n", + "* Query data by queryTime\n", + " * `TimeQueryOptions.BEFORE` (default): sensor_dataTime <= queryTime\n", + " * `TimeQueryOptions.AFTER` : sensor_dataTime >= queryTime\n", + " * `TimeQueryOptions.CLOSEST` : sensor_dataTime closest to queryTime" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7783e83f", + "metadata": {}, + "outputs": [], + "source": [ + "time_domain = TimeDomain.DEVICE_TIME # query data based on DEVICE_TIME\n", + "option = TimeQueryOptions.CLOSEST # get data whose time [in TimeDomain] is CLOSEST to query time\n", + "\n", + "start_time = provider.get_first_time_ns(sensor_stream_id, time_domain)\n", + "end_time = provider.get_last_time_ns(sensor_stream_id, time_domain)\n", + "\n", + "for time in range(start_time, end_time, int(1e7)):\n", + " image_data = provider.get_image_data_by_time_ns(\n", + " sensor_stream_id, time, time_domain, option\n", + " )\n", + " print(\n", + " f\"query time {time} and get capture image time {image_data[1].capture_timestamp_ns} within range {start_time} {end_time}\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "a8be0b53", + "metadata": {}, + "source": [ + "### Get sensor data configuration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6824e56a", + "metadata": {}, + "outputs": [], + "source": [ + "def image_config_example(config):\n", + " print(f\"device_type {config.device_type}\")\n", + " print(f\"device_version {config.device_version}\")\n", + " print(f\"device_serial {config.device_serial}\")\n", + " print(f\"sensor_serial {config.sensor_serial}\")\n", + " print(f\"nominal_rate_hz {config.nominal_rate_hz}\")\n", + " print(f\"image_width {config.image_width}\")\n", + " print(f\"image_height {config.image_height}\")\n", + " print(f\"pixel_format {config.pixel_format}\")\n", + " print(f\"gamma_factor {config.gamma_factor}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3bf5afb", + "metadata": {}, + "outputs": [], + "source": [ + "config = provider.get_image_configuration(provider.get_stream_id_from_label(\"camera-slam-right\"))\n", + "image_config_example(config)" + ] + }, + { + "cell_type": "markdown", + "id": "ddf4af2e", + "metadata": {}, + "source": [ + "# Calibration examples\n", + "Goal:\n", + "- Obtain camera extrinsics and intrinsics\n", + "- Learn to project a 3D point to camera frame\n", + "\n", + "Key learnings\n", + "- Get calibration for different sensors using sensor labels\n", + "- Learn how to use extrinsics/intrinsics to project a 3D points to a given camera\n", + "- Reference frame convention" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c47e8e18", + "metadata": {}, + "outputs": [], + "source": [ + "device_calib = provider.get_device_calibration()\n", + "all_sensor_labels = device_calib.get_all_labels()\n", + "print(f\"device calibration contains calibrations for the following sensors \\n {all_sensor_labels}\")" + ] + }, + { + "cell_type": "markdown", + "id": "872040fa", + "metadata": {}, + "source": [ + "### Project a 3D point to camera frame\n", + "\n", + "In this section we will learn how to retrieve calibration data and how to use it.\n", + "Aria calibration is defined by two objects: one defining the intrinsics (`rgb_calib.project` and `rgb_calib.unproject`) and one defining the extrinsics as a SE3 pose (`device_calib.get_transform_device_sensor(sensor_label`).\n", + "\n", + "Intrinsics can be used to project a 3d point to the image plane or un-project a 2d point as a bearing vector. Extrinsics are used to set the camera in world coordinates at a given rotation and position in space.\n", + "\n", + "### Reference frame convention\n", + "\n", + "> `transform_sensor1_sensor3` = `transform_sensor1_sensor2` * `transform_sensor2_sensor3` \\\n", + "> `point_in_sensor`: 3D point measured from sensor's reference frame \\\n", + "> `point_in_sensor` = `transform_sensor1_sensor` * `point_in_sensor`\n", + "\n", + "Device Frame: `device_calib.get_origin_label() = camera-slam-left`\\\n", + "Sensor extrinsics: `device_calib.get_transform_device_sensor(sensor_label)`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7e39f66", + "metadata": {}, + "outputs": [], + "source": [ + "camera_name = \"camera-rgb\"\n", + "transform_device_camera = device_calib.get_transform_device_sensor(camera_name).to_matrix()\n", + "transform_camera_device = np.linalg.inv(transform_device_camera)\n", + "print(f\"Device calibration origin label {device_calib.get_origin_label()}\")\n", + "print(f\"{camera_name} has extrinsics of \\n {transform_device_camera}\")\n", + "\n", + "rgb_calib = device_calib.get_camera_calib(\"camera-rgb\")\n", + "if rgb_calib is not None:\n", + " # project a 3D point in device frame [camera-slam-left] to rgb camera\n", + " point_in_device = np.array([0, 0, 10])\n", + " point_in_camera = (\n", + " np.matmul(transform_camera_device[0:3,0:3], point_in_device.transpose())\n", + " + transform_camera_device[0:3,3]\n", + " )\n", + "\n", + " maybe_pixel = rgb_calib.project(point_in_camera)\n", + " if maybe_pixel is not None:\n", + " print(\n", + " f\"Get pixel {maybe_pixel} within image of size {rgb_calib.get_image_size()}\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "3ad7ddcb", + "metadata": {}, + "source": [ + "### Get calibration data for other sensors\n", + "Aria is a multimodal capture device, each sensors calibration can be retrieved using the same interface. Only EyeTracking (`get_aria_et_camera_calib()`) and Audio calibration (`get_aria_microphone_calib()`) is a bit different since we have multiple sensors that share the same stream_id." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5ecc4835", + "metadata": {}, + "outputs": [], + "source": [ + "et_calib = device_calib.get_aria_et_camera_calib()\n", + "if et_calib is not None:\n", + " print(f\"Camera {et_calib[0].get_label()} has image size {et_calib[0].get_image_size()}\")\n", + " print(f\"Camera {et_calib[1].get_label()} has image size {et_calib[1].get_image_size()}\")\n", + "\n", + "imu_calib = device_calib.get_imu_calib(\"imu-left\")\n", + "if imu_calib is not None:\n", + " print(f\"{imu_calib.get_label()} has extrinsics transform_Device_Imu:\\n {imu_calib.get_transform_device_imu().to_matrix3x4()}\")" + ] + }, + { + "cell_type": "markdown", + "id": "700e8af5", + "metadata": {}, + "source": [ + "### Undistort an image\n", + "You can remove distortions in an image in three steps. \n", + "\n", + "First, use the provider to access the image and the camera calibration of the stream. Then create a \"linear\" spherical camera model with `get_spherical_camera_calibration`. The function allows you to specify the image size as well as focal length of the model, assuming principal point is at the image center. Finally, apply `distort_by_calibration` function to distort the image." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e20cd362", + "metadata": {}, + "outputs": [], + "source": [ + "# input: retrieve image as a numpy array\n", + "sensor_name = \"camera-rgb\"\n", + "sensor_stream_id = provider.get_stream_id_from_label(sensor_name)\n", + "image_data = provider.get_image_data_by_index(sensor_stream_id, 0)\n", + "image_array = image_data[0].to_numpy_array()\n", + "# input: retrieve image distortion\n", + "device_calib = provider.get_device_calibration()\n", + "src_calib = device_calib.get_camera_calib(sensor_name)\n", + "\n", + "# create output calibration: a linear model of image size 512x512 and focal length 150\n", + "# Invisible pixels are shown as black.\n", + "dst_calib = calibration.get_linear_camera_calibration(512, 512, 150, camera_name)\n", + "\n", + "# distort image\n", + "rectified_array = calibration.distort_by_calibration(image_array, dst_calib, src_calib, InterpolationMethod.BILINEAR)\n", + "\n", + "# visualize input and results\n", + "plt.figure()\n", + "fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n", + "fig.suptitle(f\"Image undistortion (focal length = {dst_calib.get_focal_lengths()})\")\n", + "\n", + "axes[0].imshow(image_array, cmap=\"gray\", vmin=0, vmax=255)\n", + "axes[0].title.set_text(f\"sensor image ({sensor_name})\")\n", + "axes[0].tick_params(left=False, right=False, labelleft=False, labelbottom=False, bottom=False)\n", + "axes[1].imshow(rectified_array, cmap=\"gray\", vmin=0, vmax=255)\n", + "axes[1].title.set_text(f\"undistorted image ({sensor_name})\")\n", + "axes[1].tick_params(left=False, right=False, labelleft=False, labelbottom=False, bottom=False)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "a098c273", + "metadata": {}, + "source": [ + "Note the rectified image shows a circular area of visible pixels. If you want the entire rectified image to be covered by pixels, you can increase the magnification." + ] + }, + { + "cell_type": "markdown", + "id": "4d29fa4b", + "metadata": {}, + "source": [ + "# Devignetting\n", + "\n", + "Devignetting corrects uneven lighting, enhancing image uniformity and clarity. We provide devignetting for camera-rgb full size image [2880, 2880], camera-rgb half size image[1408, 1408] and slam image [640, 480].\n", + "1. Aria devignetting masks can be downloaded from [Link](https://www.projectaria.com/async/sample/download/?bucket=core&filename=aria_camera_devignetting_masks_updated.zip). It contains the following files:\n", + "\n", + "```\n", + "aria_camera_devignetting_masks\n", + "|- slam_devignetting_mask.bin\n", + "|- rgb_half_devignetting_mask.bin\n", + "|- rgb_full_devignetting_mask.bin\n", + "```\n", + "2. set devignetting mask folder path with the local aria camera devignetting masks folder path.\n", + " `mask_folder_path = \"aria_camera_devignetting_masks\"`\n", + " `set_devignetting_mask_folder_path(mask_folder_path)`\n", + "4. load mask by passing camera label, now supporting \"camera-rgb\", \"camera-slam-left\" and \"camera-slam-right\".\n", + " `load_devignetting_mask(label)`\n", + "5. apply devignetting to source image.\n", + " `devignetting(src_image, devignetting_mask)`" + ] + }, + { + "cell_type": "markdown", + "id": "2b7e5d21-80d1-4acd-9ba9-c2dcb84e6380", + "metadata": {}, + "source": [ + "## Step 1: Download devignetting mask" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "747698e2-5cba-4b03-a90d-2d08b3e53456", + "metadata": {}, + "outputs": [], + "source": [ + "from urllib.request import urlretrieve\n", + "import zipfile\n", + "\n", + "# Download from url\n", + "devignetting_mask_folder_path = os.path.join(repo_path, \"devignetting_masks\")\n", + "downloaded_devignetting_mask_zip = os.path.join(devignetting_mask_folder_path, \"aria_camera_devignetting_masks.zip\")\n", + "if not os.path.exists(devignetting_mask_folder_path):\n", + " os.mkdir(devignetting_mask_folder_path)\n", + "urlretrieve(\"https://www.projectaria.com/async/sample/download/?bucket=core&filename=aria_camera_devignetting_masks_updated.zip\", downloaded_devignetting_mask_zip)\n", + "\n", + "# unzip the mask files, with cross-platform compatibility\n", + "with zipfile.ZipFile(downloaded_devignetting_mask_zip, 'r') as zip_ref:\n", + " # Extract all files\n", + " files_to_extract = [f for f in zip_ref.namelist() if (not f[0] =='_')]\n", + " zip_ref.extractall(devignetting_mask_folder_path, members=files_to_extract)\n", + " # Print out the filenames\n", + " print(f\"Successfully downloaded and extracted the following files for devignetting:\")\n", + " print(files_to_extract)" + ] + }, + { + "cell_type": "markdown", + "id": "62f31e06-a088-42ad-9500-c5157c871205", + "metadata": {}, + "source": [ + "## Step 2: Set devignetting mask folder path with the local folder path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d373b069-46bd-4855-b402-a6f9aa61fed4", + "metadata": {}, + "outputs": [], + "source": [ + "deviceCalib = provider.get_device_calibration()\n", + "deviceCalib.set_devignetting_mask_folder_path(devignetting_mask_folder_path)" + ] + }, + { + "cell_type": "markdown", + "id": "5a69ed80-ddbb-48f5-92d5-6b06e0c393a4", + "metadata": {}, + "source": [ + "## Step 3 & 4: load mask and apply devignetting to source image." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "752d24b0-da52-4480-9373-3a4e1cbee0b9", + "metadata": {}, + "outputs": [], + "source": [ + "stream_id = provider.get_stream_id_from_label(\"camera-rgb\")\n", + "image_data = provider.get_image_data_by_index(stream_id, 0)\n", + "src_image_array = image_data[0].to_numpy_array()\n", + "\n", + "# load devignetting mask by feeding camera label\n", + "devignetting_mask = deviceCalib.load_devignetting_mask(\"camera-rgb\")\n", + "\n", + "# apply devignetting to source image\n", + "devignetted_image_array = calibration.devignetting(\n", + " src_image_array, devignetting_mask\n", + " ).astype(np.uint8)\n", + "\n", + "# visualize input and results\n", + "plt.figure()\n", + "fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n", + "fig.suptitle(f\"Image devignetting (camera-rgb)\")\n", + "\n", + "axes[0].imshow(src_image_array, vmin=0, vmax=255)\n", + "axes[0].title.set_text(f\"original image\")\n", + "axes[1].imshow(devignetted_image_array, vmin=0, vmax=255)\n", + "axes[1].title.set_text(f\"devignetted image\")\n", + "\n", + "plt.show()" + ] + } + ], + "metadata": { + "custom": { + "cells": [], + "metadata": { + "fileHeader": "", "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" }, "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" } + }, + "nbformat": 4, + "nbformat_minor": 5 + }, + "indentAmount": 2, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 5 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/website/docs/data_utilities/advanced_code_snippets/image_utilities.mdx b/website/docs/data_utilities/advanced_code_snippets/image_utilities.mdx index 401f69f60..a6a96b8e7 100644 --- a/website/docs/data_utilities/advanced_code_snippets/image_utilities.mdx +++ b/website/docs/data_utilities/advanced_code_snippets/image_utilities.mdx @@ -155,7 +155,7 @@ auto ray = pinholeCW90.getT_Device_Camera() * pinholeCW90.projectNoChecks(textPi ## Image devignetting Devignetting corrects uneven lighting, enhancing image uniformity and clarity. We provide devignetting for camera-rgb full size image [2880, 2880], camera-rgb half size image[1408, 1408] and slam iamge [640, 480]. -1. Download devignetting masks from [Link](https://www.projectaria.com/async/sample/download/?bucket=core&filename=aria_camera_devignetting_masks.zip) +1. Download devignetting masks from [Link](https://www.projectaria.com/async/sample/download/?bucket=core&filename=aria_camera_devignetting_masks_updated.zip) 2. Unzip to local folder. The folder should follow the structure below: ``` From e1ad725bea945c77368b340e40d867dd7147c7be Mon Sep 17 00:00:00 2001 From: Pierre Moulon Date: Thu, 16 Jan 2025 08:19:02 -0800 Subject: [PATCH 2/2] {Build} Continuous Integration - Use Ubuntu-22.04 (#176) Summary: GitHub Runners is now set to Ubuntu-24.04 as default image (before it was Ubuntu 22.04) https://github.com/actions/runner-images/issues/10636 We are here continuous to use Ubuntu-22.04 as default image for our python wheel workflow (our process is failing on finding JPEG library for some reason with Ubuntu-24-04) and we will solve this problem later. - It could be because Ubuntu 24 decided to not support JPEG-XL by default Note: - We force using Ubuntu 22.04 for workflows build-and-test_projects.yml; build-and-test.yml; build-wheels-and-deploy.yml and don't need to change the PIXI workflow since build is done in isolation and so not impacted. Differential Revision: D68273681 --- .github/workflows/build-and-test.yml | 2 +- .github/workflows/build-and-test_projects.yml | 2 +- .github/workflows/build-wheels-and-deploy.yml | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index bb6d6b9a6..f43dd2678 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -16,7 +16,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-13, macos-14] # macos-14 is macSilicon + os: [ubuntu-22.04, macos-13, macos-14] # macos-14 is macSilicon steps: - name : Checkout uses: actions/checkout@v4 diff --git a/.github/workflows/build-and-test_projects.yml b/.github/workflows/build-and-test_projects.yml index 4f97ef160..c0fdfc476 100644 --- a/.github/workflows/build-and-test_projects.yml +++ b/.github/workflows/build-and-test_projects.yml @@ -16,7 +16,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-13, macos-14] # macos-14 is macSilicon + os: [ubuntu-22.04, macos-13, macos-14] # macos-14 is macSilicon project: [ PROJECTARIA_TOOLS_BUILD_PROJECTS_ADT, PROJECTARIA_TOOLS_BUILD_TOOLS] diff --git a/.github/workflows/build-wheels-and-deploy.yml b/.github/workflows/build-wheels-and-deploy.yml index 83935a933..a8af09bfd 100644 --- a/.github/workflows/build-wheels-and-deploy.yml +++ b/.github/workflows/build-wheels-and-deploy.yml @@ -17,7 +17,7 @@ on: jobs: build-stubs: name: Build Python stubs - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - name: Checkout repo uses: actions/checkout@v4 @@ -75,7 +75,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [macos-13, ubuntu-latest, macos-latest-xlarge, windows-2022] + os: [macos-13, ubuntu-22.04, macos-latest-xlarge, windows-2022] python-version: ["3.10", "3.11", "3.12"] # Default consider only Python >=3.10 since actions/setup-python supports only python<3.10 and mac-silicon # Other Python versions are added below as matrix entries @@ -87,10 +87,10 @@ jobs: - python-version: "3.12" cibw_build: "cp312-*" # Add matrix entries: Python ["3.8", "3.9"] to [macos-13, ubuntu-latest] - - os: ubuntu-latest + - os: ubuntu-22.04 python-version: "3.8" cibw_build: "cp38-*" - - os: ubuntu-latest + - os: ubuntu-22.04 python-version: "3.9" cibw_build: "cp39-*" - os: macos-13 @@ -129,7 +129,7 @@ jobs: CIBW_SKIP: "*-manylinux_i686 *-musllinux_* *x86_64*" CIBW_ARCHS: "arm64" - name: Build wheels for CPython - if: ${{ (matrix.os == 'macos-13') || (matrix.os == 'ubuntu-latest') }} + if: ${{ (matrix.os == 'macos-13') || (matrix.os == 'ubuntu-22.04') }} shell: bash run: | CMAKE_BUILD_PARALLEL_LEVEL=4 python3 -m cibuildwheel --output-dir dist