diff --git a/applications/CMakeLists.txt b/applications/CMakeLists.txt index fa22887c09..f66eae7625 100644 --- a/applications/CMakeLists.txt +++ b/applications/CMakeLists.txt @@ -49,6 +49,8 @@ add_holohub_application(deltacast_receiver DEPENDS add_holohub_application(depth_anything_v2) +add_holohub_application(depth_to_point_cloud_demo DEPENDS OPERATORS depth_to_point_cloud) + add_subdirectory(distributed) add_holohub_application(endoscopy_depth_estimation) diff --git a/applications/depth_to_point_cloud_demo/CMakeLists.txt b/applications/depth_to_point_cloud_demo/CMakeLists.txt new file mode 100644 index 0000000000..261e1061f1 --- /dev/null +++ b/applications/depth_to_point_cloud_demo/CMakeLists.txt @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +project(depth_to_point_cloud_demo NONE) + +find_package(holoscan 4.0.0 REQUIRED CONFIG + PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install") + +if(BUILD_TESTING) + add_test(NAME depth_to_point_cloud_demo_python_test + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/depth_to_point_cloud_demo.py --frames 10 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + + set_property(TEST depth_to_point_cloud_demo_python_test PROPERTY ENVIRONMENT + "PYTHONPATH=${GXF_LIB_DIR}/../python/lib:${CMAKE_BINARY_DIR}/python/lib") + + set_tests_properties(depth_to_point_cloud_demo_python_test + PROPERTIES + PASS_REGULAR_EXPRESSION "valid=" + FAIL_REGULAR_EXPRESSION "(^|[^a-z])Error;ERROR;Failed") +endif() diff --git a/applications/depth_to_point_cloud_demo/README.md b/applications/depth_to_point_cloud_demo/README.md new file mode 100644 index 0000000000..cdce9b402c --- /dev/null +++ b/applications/depth_to_point_cloud_demo/README.md @@ -0,0 +1,87 @@ +# Depth to Point Cloud Demo + +A minimal demo of the [`depth_to_point_cloud`](../../operators/depth_to_point_cloud) operator. + +Here **hardware-free** means the input is produced by an on-GPU synthetic data generator +(`SyntheticDepthGeneratorOp`) rather than a physical sensor: no depth camera, recorded dataset, or +depth-estimation network is required, so the demo runs in CI on any GPU. The generator emits a +synthetic organized depth image (a gently tilting plane) plus an aligned RGB image entirely on the +GPU; the operator deprojects it into an organized `H x W x 3` point cloud, and the demo reports the +valid-point count and Z range each frame. + +## Run + +```bash +# Synthetic source (default, hardware-free, CI-friendly): +./holohub run depth_to_point_cloud_demo +# equivalently: +./holohub run depth_to_point_cloud_demo synthetic + +# or directly: +python3 applications/depth_to_point_cloud_demo/depth_to_point_cloud_demo.py --frames 100 +``` + +Expected output (per frame): + +```text +[depth_to_point_cloud_demo] points=307200 valid=307200 z=[1.xxx, 2.xxx] m +``` + +### Sources (`--source` / run modes) + +The demo selects its input source with `--source`, exposed as HoloHub run modes: + +| Mode / `--source` | Description | +| --- | --- | +| `synthetic` (default) | On-GPU synthetic depth + RGB generator. No hardware, runs in CI. | +| `realsense` | Live Intel RealSense camera (see caveat below). | + +### Interactive 3D visualization (`--visualize`) + +By default the demo ends in a headless, CI-friendly `PointCloudStatsOp` sink that just reports +per-frame statistics. Pass `--visualize` to instead render the cloud in `HolovizOp` as 3D points: + +```bash +python3 applications/depth_to_point_cloud_demo/depth_to_point_cloud_demo.py --visualize +``` + +With `--visualize`, the organized `H x W x 3` cloud is compacted to `N x 3` with invalid (NaN) +points dropped before being handed to `HolovizOp` as a `points_3d` primitive. This path needs a +display and is therefore disabled by default (the CI mode keeps the statistics sink). + +## Pipeline + +```text +SyntheticDepthGeneratorOp --depth--> DepthToPointCloudOp --point_cloud--> PointCloudStatsOp + --color--> (or HolovizOp with --visualize) +``` + +## Using a real Intel RealSense camera + +`--source realsense` is scaffolded but **not yet runnable from this Python demo**. The +[`realsense_camera`](../../operators/realsense_camera) operator is currently C++-only (it ships no +Python bindings), and it emits its `depth_buffer` / `color_buffer` as GXF `VideoBuffer`s, whereas +`DepthToPointCloudOp` consumes a GXF `Tensor`. Wiring them from Python therefore requires: + +1. Python bindings for `realsense_camera` (add an `operators/realsense_camera/python/` module). +2. A `FormatConverterOp` between the camera and the operator to convert `VideoBuffer` → `Tensor` + (and `RGBA8` → `RGB8` for the color path). +3. Feeding intrinsics — either statically (`fx/fy/cx/cy`) or by consuming the camera's + `depth_camera_model` output through the operator's optional `intrinsics` input. + +Selecting `--source realsense` today raises a clear `NotImplementedError` pointing here. The wiring, +once the bindings exist, is: + +```python +from holohub.realsense_camera import RealsenseCameraOp # requires new Python bindings + +camera = RealsenseCameraOp(self, name="camera", allocator=...) +# camera.depth_buffer / color_buffer are VideoBuffers -> convert to Tensor via FormatConverterOp, +# then feed DepthToPointCloudOp. RealSense depth is float32 meters (units_transform applied), so +# use depth_scale=1.0 — not 0.001 (the 0.001 uint16-millimeter value is only for raw Z16 sources). +``` + +## Requirements + +- Holoscan SDK ≥ 4.0.0, CUDA, CuPy. Builds the `depth_to_point_cloud` operator (declared as a + dependency). Platforms: `x86_64`, `aarch64`. diff --git a/applications/depth_to_point_cloud_demo/depth_to_point_cloud_demo.py b/applications/depth_to_point_cloud_demo/depth_to_point_cloud_demo.py new file mode 100644 index 0000000000..6fe9d98af9 --- /dev/null +++ b/applications/depth_to_point_cloud_demo/depth_to_point_cloud_demo.py @@ -0,0 +1,268 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Demo for DepthToPointCloudOp. + +Selectable input source (``--source``): + +* ``synthetic`` (default) generates an organized depth image (a gently tilting plane) plus an + aligned RGB image entirely on the GPU. No camera or dataset is required, so the app runs in CI. +* ``realsense`` is scaffolded for a live Intel RealSense camera but is not yet runnable from Python + (the ``realsense_camera`` operator ships no Python bindings and emits ``VideoBuffer``s rather than + the ``Tensor`` this operator consumes); selecting it raises ``NotImplementedError``. See README.md. + +The deprojected organized point cloud is either summarized by ``PointCloudStatsOp`` (default, +CI-friendly) or rendered as 3D points by ``HolovizOp`` when ``--visualize`` is passed. +""" + +import argparse + +import cupy as cp +from holoscan.conditions import CountCondition +from holoscan.core import Application, Operator, OperatorSpec +from holoscan.operators import HolovizOp +from holoscan.resources import BlockMemoryPool, MemoryStorageType + +from holohub.depth_to_point_cloud import DepthToPointCloudOp + + +class SyntheticDepthGeneratorOp(Operator): + """Emit a synthetic float32 depth image (meters) and an aligned uint8 RGB image.""" + + def __init__(self, fragment, *args, width=640, height=480, **kwargs): + self.width = width + self.height = height + self.frame = 0 + ys, xs = cp.meshgrid( + cp.arange(height, dtype=cp.float32), + cp.arange(width, dtype=cp.float32), + indexing="ij", + ) + self._xs = xs + self._ys = ys + super().__init__(fragment, *args, **kwargs) + + def setup(self, spec: OperatorSpec): + spec.output("depth") + spec.output("color") + + def compute(self, op_input, op_output, context): + t = self.frame * 0.05 + # A tilted plane in meters: ~1.0 m near the top-left, increasing across the frame, + # with a slow global oscillation so successive frames differ. + depth = ( + 1.0 + 0.5 * (self._xs / self.width) + 0.4 * (self._ys / self.height) + 0.3 * cp.sin(t) + ).astype(cp.float32) + + r = (255.0 * self._xs / self.width).astype(cp.uint8) + g = (255.0 * self._ys / self.height).astype(cp.uint8) + b = cp.full_like(r, 128) + color = cp.ascontiguousarray(cp.stack([r, g, b], axis=-1)) # HxWx3 uint8 + + op_output.emit({"depth": depth}, "depth") + op_output.emit({"color": color}, "color") + self.frame += 1 + + +class PointCloudStatsOp(Operator): + """Pull the point cloud and report valid-point count and Z range (CI-friendly sink).""" + + def setup(self, spec: OperatorSpec): + spec.input("in") + + def compute(self, op_input, op_output, context): + msg = op_input.receive("in") + pc = cp.asarray(msg["point_cloud"]) # HxWx3 float32 + z = pc[..., 2] + valid = ~cp.isnan(z) + n_valid = int(valid.sum().get()) + + # The colored path is connected, so a colors tensor must accompany the cloud and + # share its H x W footprint (3 uint8 channels). + colors = cp.asarray(msg["colors"]) # HxWx3 uint8 + if colors.shape[:2] != pc.shape[:2] or colors.shape[2] != 3: + raise RuntimeError( + f"colors shape {colors.shape} does not match cloud {pc.shape[:2]} x 3" + ) + + if n_valid: + zmin = float(z[valid].min().get()) + zmax = float(z[valid].max().get()) + print( + f"[depth_to_point_cloud_demo] points={pc.shape[0] * pc.shape[1]} " + f"valid={n_valid} z=[{zmin:.3f}, {zmax:.3f}] m colors={tuple(colors.shape)}" + ) + else: + print("[depth_to_point_cloud_demo] no valid points") + + +class CloudToHolovizOp(Operator): + """Compact the organized H x W x 3 cloud to a flat N x 3 of finite points for HolovizOp. + + HolovizOp renders a ``points_3d`` primitive from an ``(N, 3)`` coordinate tensor, so the + organized cloud is flattened and the invalid (NaN) pixels are dropped first, as recommended in + the README. The compaction stays on the GPU (CuPy), keeping the path device-resident. + """ + + def setup(self, spec: OperatorSpec): + spec.input("in") + spec.output("out") + + def compute(self, op_input, op_output, context): + msg = op_input.receive("in") + pc = cp.asarray(msg["point_cloud"]).reshape(-1, 3) # (H*W, 3) float32 + valid = cp.isfinite(pc).all(axis=1) + pts = pc[valid] # (N, 3) + # Holoviz expects at least one coordinate; emit a degenerate point if the frame is empty. + if pts.shape[0] == 0: + pts = cp.zeros((1, 3), dtype=cp.float32) + coords = cp.ascontiguousarray(pts.astype(cp.float32)) # (N, 3) as HolovizOp expects + op_output.emit({"point_cloud": coords}, "out") + + +class DepthToPointCloudDemoApp(Application): + def __init__(self, frames=100, width=640, height=480, source="synthetic", visualize=False): + super().__init__() + self._frames = frames + self._width = width + self._height = height + self._source = source + self._visualize = visualize + + def compose(self): + if self._source == "realsense": + raise NotImplementedError( + "--source realsense is not yet runnable from this Python demo: the " + "realsense_camera operator ships no Python bindings and emits VideoBuffers " + "rather than the Tensor DepthToPointCloudOp consumes. See the 'Using a real " + "Intel RealSense camera' section of README.md for the required wiring " + "(Python bindings + FormatConverterOp + intrinsics)." + ) + if self._source != "synthetic": + raise ValueError(f"unknown source '{self._source}'") + + generator = SyntheticDepthGeneratorOp( + self, + CountCondition(self, count=self._frames), + name="generator", + width=self._width, + height=self._height, + ) + + # Two device tensors per frame (HxWx3 float32 point cloud + HxWx3 uint8 colors) drawn + # from this pool; size each block for the larger (float32 XYZ) output and keep enough + # blocks for both tensors plus one frame of pipelining headroom. + out_blocks = 4 + block_size = self._width * self._height * 3 * 4 # float32 XYZ is the larger output + cloud = DepthToPointCloudOp( + self, + name="point_cloud", + allocator=BlockMemoryPool( + self, + name="pool", + storage_type=MemoryStorageType.DEVICE, + block_size=block_size, + num_blocks=out_blocks, + ), + # Pinhole intrinsics for the synthetic camera: square pixels (fx == fy) with the + # principal point at the image center. A single focal length is used for both axes + # by design; the focal length is independent of the image aspect ratio. + fx=float(self._width) * 0.8, + fy=float(self._width) * 0.8, + cx=(self._width - 1) / 2.0, + cy=(self._height - 1) / 2.0, + depth_scale=1.0, # synthetic depth is already in meters + depth_min=0.1, + depth_max=10.0, + ) + + self.add_flow(generator, cloud, {("depth", "depth")}) + self.add_flow(generator, cloud, {("color", "color")}) + + if self._visualize: + # Compact the organized cloud to N x 3 finite points, then render as 3D points. + compact = CloudToHolovizOp(self, name="compact") + visualizer = HolovizOp( + self, + name="holoviz", + window_title="depth_to_point_cloud_demo", + width=1280, + height=720, + # The cloud lives in the camera optical frame (x-right, y-down, z-forward), + # centered near (0, 0, ~1.5 m). Place the camera in front of and above it (negative + # z is "in front", negative y is "up" in this frame) so the tilted plane is framed. + camera_eye=[1.5, -1.5, -1.0], + camera_look_at=[0.0, 0.0, 1.5], + camera_up=[0.0, -1.0, 0.0], + tensors=[ + dict( + name="point_cloud", + type="points_3d", + color=[0.0, 1.0, 0.0, 1.0], + point_size=3.0, + ), + ], + ) + self.add_flow(cloud, compact, {("point_cloud", "in")}) + self.add_flow(compact, visualizer, {("out", "receivers")}) + else: + sink = PointCloudStatsOp(self, name="stats") + self.add_flow(cloud, sink, {("point_cloud", "in")}) + + +def main(): + parser = argparse.ArgumentParser(description="DepthToPointCloudOp demo") + parser.add_argument( + "-s", + "--source", + choices=["synthetic", "realsense"], + default="synthetic", + help=( + "Input source. 'synthetic' (default) uses an on-GPU depth+RGB generator (hardware-free, " + "CI-friendly); 'realsense' targets a live Intel RealSense camera (see README — not yet " + "runnable from Python)." + ), + ) + parser.add_argument( + "--visualize", + action="store_true", + help="Render the cloud as 3D points in HolovizOp instead of the statistics sink " + "(requires a display).", + ) + parser.add_argument("--frames", type=int, default=100, help="Number of frames to process") + parser.add_argument("--width", type=int, default=640) + parser.add_argument("--height", type=int, default=480) + args = parser.parse_args() + + for name, value in ( + ("--frames", args.frames), + ("--width", args.width), + ("--height", args.height), + ): + if value <= 0: + parser.error(f"{name} must be a positive integer (got {value})") + + app = DepthToPointCloudDemoApp( + frames=args.frames, + width=args.width, + height=args.height, + source=args.source, + visualize=args.visualize, + ) + app.run() + + +if __name__ == "__main__": + main() diff --git a/applications/depth_to_point_cloud_demo/metadata.json b/applications/depth_to_point_cloud_demo/metadata.json new file mode 100644 index 0000000000..cc0f81f783 --- /dev/null +++ b/applications/depth_to_point_cloud_demo/metadata.json @@ -0,0 +1,55 @@ +{ + "$schema": "urn:holohub:application:v1", + "application": { + "name": "Depth to Point Cloud Demo", + "description": "Hardware-free demo of DepthToPointCloudOp: a synthetic GPU depth+RGB generator is deprojected into an organized point cloud and validated each frame.", + "authors": [ + { + "name": "Nitheesh Kumar", + "affiliation": "Zobot Lab" + } + ], + "language": "Python", + "version": "0.1.0", + "changelog": { + "0.1.0": "Initial release: synthetic depth generator -> DepthToPointCloudOp -> validation sink." + }, + "holoscan_sdk": { + "minimum_required_version": "4.0.0", + "tested_versions": [ + "4.3.0" + ] + }, + "platforms": [ + "x86_64", + "aarch64" + ], + "tags": ["Computer Vision and Perception", "Robotics", "Point Cloud", "Depth", "3D"], + "ranking": 3, + "requirements": { + "python-packages": [ + { + "name": "cupy", + "version": "13.6.0" + } + ] + }, + "default_mode": "synthetic", + "modes": { + "synthetic": { + "description": "Hardware-free on-GPU synthetic depth+RGB source (CI-friendly)", + "run": { + "command": "python3 /depth_to_point_cloud_demo.py --source synthetic", + "workdir": "holohub_bin" + } + }, + "realsense": { + "description": "Live Intel RealSense camera source (requires hardware; see README — not yet runnable from Python)", + "run": { + "command": "python3 /depth_to_point_cloud_demo.py --source realsense", + "workdir": "holohub_bin" + } + } + } + } +} diff --git a/operators/CMakeLists.txt b/operators/CMakeLists.txt index ee5ac0ae1f..9a3e09c88f 100644 --- a/operators/CMakeLists.txt +++ b/operators/CMakeLists.txt @@ -21,6 +21,7 @@ add_holohub_operator(atracsys_mode_switcher) add_holohub_operator(cvcuda_holoscan_interop) add_subdirectory(deidentification) add_subdirectory(dds) +add_holohub_operator(depth_to_point_cloud) add_holohub_operator(display_gpu_resident) add_holohub_operator(ucxx_send_receive) add_holohub_operator(emergent_source DEPENDS EXTENSIONS emergent_source) diff --git a/operators/depth_to_point_cloud/CMakeLists.txt b/operators/depth_to_point_cloud/CMakeLists.txt new file mode 100644 index 0000000000..4062e4aa7c --- /dev/null +++ b/operators/depth_to_point_cloud/CMakeLists.txt @@ -0,0 +1,54 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.24) + +project(depth_to_point_cloud LANGUAGES CXX CUDA) + +find_package(holoscan 4.0.0 REQUIRED CONFIG + PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install") + +add_library(depth_to_point_cloud SHARED + depth_to_point_cloud.cpp + depth_to_point_cloud.hpp + deproject.cu + deproject.hpp +) + +set_target_properties(depth_to_point_cloud + PROPERTIES + # compile for the architecture of the current GPU + CUDA_ARCHITECTURES "native" +) + +target_link_libraries(depth_to_point_cloud + PUBLIC + holoscan::core +) + +target_include_directories(depth_to_point_cloud + INTERFACE + ${CMAKE_CURRENT_SOURCE_DIR} +) + +if(HOLOHUB_BUILD_PYTHON) + add_subdirectory(python) +endif() + +if(BUILD_TESTING) + add_subdirectory(test) +endif() + +install(TARGETS depth_to_point_cloud) diff --git a/operators/depth_to_point_cloud/README.md b/operators/depth_to_point_cloud/README.md new file mode 100644 index 0000000000..5e0ec07e8a --- /dev/null +++ b/operators/depth_to_point_cloud/README.md @@ -0,0 +1,101 @@ +# Depth to Point Cloud Operator + +Deproject an organized depth image into an organized point cloud on the GPU. This is the +"gateway to 3D" building block: it turns a depth image (from a depth camera, stereo matcher, +or monocular depth network) plus pinhole intrinsics into per-pixel `XYZ` points that downstream +operators (mapping, ground-plane / traversability estimation, registration, Holoviz 3D rendering, +PCL/Open3D interop) can consume — all GPU-resident, zero-copy on the hot path. + +## What it computes + +For each pixel `(u, v)` with metric depth `Z = raw_depth * depth_scale`, the output point in the +**camera optical frame** (x-right, y-down, z-forward) is: + +```text +X = (u - cx) * Z / fx +Y = (v - cy) * Z / fy +Z = Z +``` + +The kernel is a single map/elementwise pass (one CUDA thread per pixel, grid-stride, coalesced). +Pixels with `raw_depth == 0`, a non-finite value, or a metric depth outside `[depth_min, depth_max]` +are written as `(invalid_value, invalid_value, invalid_value)` (default `NaN`), keeping the output +**organized** (`H x W x 3`) so pixel neighborhoods are preserved for downstream normals/segmentation. + +> Frame note: the output is in the optical frame. For a ROS body frame (x-forward, y-left, z-up) +> apply the standard optical→body rotation downstream. + +## Ports + +| Port | Direction | Type | Notes | +| --- | --- | --- | --- | +| `depth` | in | `Entity` w/ 2D tensor | `uint16` (scaled by `depth_scale`) or `float32` (meters at `depth_scale=1.0`), shape `[H, W]` or `[H, W, 1]`, device memory | +| `intrinsics` | in (optional) | `Entity` w/ `float32[4]` | `[fx, fy, cx, cy]`; overrides the params for that frame | +| `color` | in (optional) | `Entity` w/ `uint8` image | `[H, W, 3]` or `[H, W, 4]` aligned to depth; enables colored output | +| `point_cloud` | out | `Entity` | `float32 [H, W, 3]` XYZ (and `uint8 [H, W, 3]` RGB colors when `color` is connected) | + +The emitted `colors` tensor is always 3-channel RGB; a 4-channel (RGBA) `color` input is converted to +RGB and its alpha channel is dropped. + +## Parameters + +| Parameter | Default | Description | +| --- | --- | --- | +| `fx`, `fy`, `cx`, `cy` | `0.0` | Pinhole intrinsics in pixels (used when `intrinsics` port is unconnected) | +| `depth_scale` | `0.001` | Raw-depth → meters multiplier (`0.001` for uint16 mm; `1.0` for float32 m) | +| `depth_min`, `depth_max` | `0.0`, `100.0` | Valid metric depth range (meters) | +| `invalid_value` | `NaN` | Value written to X/Y/Z for invalid pixels | +| `depth_tensor_name` / `color_tensor_name` | `""` | Input tensor names (empty = first tensor) | +| `output_tensor_name` | `"point_cloud"` | Name of the emitted XYZ tensor | +| `output_color_tensor_name` | `"colors"` | Name of the emitted colors tensor | +| `allocator` | — | Device allocator for the output tensors (e.g. `BlockMemoryPool`) | + +## Usage + +### Python + +```python +from holohub.depth_to_point_cloud import DepthToPointCloudOp + +cloud = DepthToPointCloudOp( + self, + name="point_cloud", + allocator=BlockMemoryPool(self, ...), + fx=fx, fy=fy, cx=cx, cy=cy, + depth_scale=0.001, # uint16 millimeters + depth_min=0.1, depth_max=10.0, +) +# depth_source -> cloud -> HolovizOp (3D points) +``` + +### C++ + +```cpp +auto cloud = make_operator( + "point_cloud", + Arg("allocator", make_resource(...)), + Arg("fx", fx), Arg("fy", fy), Arg("cx", cx), Arg("cy", cy), + Arg("depth_scale", 0.001f)); +``` + +## Testing + +`test/test_deproject.cu` is a standalone golden-reference unit test that depends only on the CUDA +runtime (no Holoscan SDK). It verifies the deprojection math (float32 and uint16 paths), invalid / +out-of-range handling, and color passthrough against an analytic CPU reference: + +```bash +nvcc -O2 -arch=native -o test_deproject test/test_deproject.cu deproject.cu && ./test_deproject +``` + +It is also registered with CTest (`depth_to_point_cloud_test`) when the project is built with +`BUILD_TESTING` enabled. + +## Requirements + +- Holoscan SDK ≥ 4.0.0 +- CUDA runtime +- Platforms: `x86_64`, `aarch64` (Jetson) +- No third-party dependencies beyond the CUDA runtime — the deprojection runs as a custom CUDA + kernel (no VPI/PVA/VIC fixed-function block exists for depth deprojection, and it is not a + library-shaped op). diff --git a/operators/depth_to_point_cloud/deproject.cu b/operators/depth_to_point_cloud/deproject.cu new file mode 100644 index 0000000000..4e7b93e34d --- /dev/null +++ b/operators/depth_to_point_cloud/deproject.cu @@ -0,0 +1,98 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "deproject.hpp" + +namespace holoscan::ops { + +// Pattern 1 (map / elementwise): one thread per pixel, grid-stride loop. +template +__global__ void __launch_bounds__(256) deproject_kernel( + const T* __restrict__ depth, float depth_scale, CameraIntrinsics k, float depth_min, + float depth_max, float invalid_value, const uchar3* __restrict__ color3, + const uchar4* __restrict__ color4, float3* __restrict__ out_xyz, + uchar3* __restrict__ out_color, int width, int height) { + const int n = width * height; + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) { + const int u = i % width; + const int v = i / width; + + const T raw = depth[i]; + const float z = static_cast(raw) * depth_scale; + // Use a positive-depth check (rather than `raw != 0`) so that negative and NaN depths + // are uniformly rejected for both the uint16 and float32 paths. + const bool valid = (z > 0.0f) && isfinite(z) && (z >= depth_min) && (z <= depth_max); + + out_xyz[i] = valid ? make_float3((u - k.cx) * z / k.fx, (v - k.cy) * z / k.fy, z) + : make_float3(invalid_value, invalid_value, invalid_value); + + if (out_color != nullptr) { + if (color4 != nullptr) { + const uchar4 c = color4[i]; + out_color[i] = make_uchar3(c.x, c.y, c.z); + } else if (color3 != nullptr) { + out_color[i] = color3[i]; + } + } + } +} + +cudaError_t launch_deproject(const void* depth, DepthDType dtype, float depth_scale, + CameraIntrinsics intr, float depth_min, float depth_max, + float invalid_value, const void* color, int color_channels, + float3* out_xyz, uchar3* out_color, int width, int height, + cudaStream_t stream) { + const int n = width * height; + if (n <= 0) { return cudaSuccess; } + + constexpr int kBlock = 256; + // One block per kBlock pixels. All targeted architectures allow up to 2^31-1 blocks in x, + // far beyond any image resolution, so no grid cap is needed; the grid-stride loop in the + // kernel still covers any n correctly. + const int grid = (n + kBlock - 1) / kBlock; + + const uchar3* color3 = nullptr; + const uchar4* color4 = nullptr; + if (color != nullptr && out_color != nullptr) { + if (color_channels == 4) { + color4 = static_cast(color); + } else if (color_channels == 3) { + color3 = static_cast(color); + } else { + return cudaErrorInvalidValue; // only 3- or 4-channel uint8 color is supported + } + } + + switch (dtype) { + case DepthDType::kUint16: + deproject_kernel<<>>( + static_cast(depth), depth_scale, intr, depth_min, depth_max, + invalid_value, color3, color4, out_xyz, out_color, width, height); + break; + case DepthDType::kFloat32: + deproject_kernel<<>>( + static_cast(depth), depth_scale, intr, depth_min, depth_max, invalid_value, + color3, color4, out_xyz, out_color, width, height); + break; + default: + return cudaErrorInvalidValue; // unknown depth dtype -> no kernel launched + } + + return cudaPeekAtLastError(); +} + +} // namespace holoscan::ops diff --git a/operators/depth_to_point_cloud/deproject.hpp b/operators/depth_to_point_cloud/deproject.hpp new file mode 100644 index 0000000000..5d8e9f811f --- /dev/null +++ b/operators/depth_to_point_cloud/deproject.hpp @@ -0,0 +1,77 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef HOLOSCAN_OPERATORS_DEPTH_TO_POINT_CLOUD_DEPROJECT_HPP +#define HOLOSCAN_OPERATORS_DEPTH_TO_POINT_CLOUD_DEPROJECT_HPP + +#include + +#include + +// This header is intentionally free of any Holoscan dependency so the deproject +// kernel can be unit-tested in isolation with nvcc (see test/test_deproject.cu). + +namespace holoscan::ops { + +/// Supported element types of the input depth image. +enum class DepthDType : int { kUint16 = 0, kFloat32 = 1 }; + +/// Pinhole camera intrinsics in pixels. +struct CameraIntrinsics { + float fx; + float fy; + float cx; + float cy; +}; + +/** + * @brief Deproject an organized depth image into an organized HxWx3 point cloud. + * + * One CUDA thread per pixel (grid-stride). For each pixel (u, v) with metric depth + * Z = raw_depth * depth_scale, the output point (optical frame: x-right, y-down, + * z-forward) is: + * X = (u - cx) * Z / fx + * Y = (v - cy) * Z / fy + * Z = Z + * Pixels with raw depth == 0 or metric depth outside [depth_min, depth_max] are + * written as (invalid_value, invalid_value, invalid_value). + * + * @param depth device pointer to the HxW depth image (element type = dtype) + * @param dtype depth element type (uint16 or float32) + * @param depth_scale multiply raw depth by this to get meters (e.g. 0.001 for uint16 mm) + * @param intr pinhole intrinsics in pixels + * @param depth_min minimum valid metric depth (meters), inclusive + * @param depth_max maximum valid metric depth (meters), inclusive + * @param invalid_value value written to X/Y/Z for invalid pixels (e.g. NaN or 0) + * @param color optional device pointer to HxW color (uchar3/uchar4); nullptr to disable + * @param color_channels 3 or 4; ignored when color == nullptr + * @param out_xyz device pointer to HxW float3 output (organized, AoS) + * @param out_color optional device pointer to HxW uchar3 output; nullptr to disable + * @param width image width in pixels + * @param height image height in pixels + * @param stream CUDA stream to launch on + * @return cudaPeekAtLastError() result after the launch + */ +cudaError_t launch_deproject(const void* depth, DepthDType dtype, float depth_scale, + CameraIntrinsics intr, float depth_min, float depth_max, + float invalid_value, const void* color, int color_channels, + float3* out_xyz, uchar3* out_color, int width, int height, + cudaStream_t stream); + +} // namespace holoscan::ops + +#endif /* HOLOSCAN_OPERATORS_DEPTH_TO_POINT_CLOUD_DEPROJECT_HPP */ diff --git a/operators/depth_to_point_cloud/depth_to_point_cloud.cpp b/operators/depth_to_point_cloud/depth_to_point_cloud.cpp new file mode 100644 index 0000000000..d0cf25cbf9 --- /dev/null +++ b/operators/depth_to_point_cloud/depth_to_point_cloud.cpp @@ -0,0 +1,244 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "depth_to_point_cloud.hpp" + +#include +#include +#include + +#include + +#include +#include + +#include + +#include "deproject.hpp" + +#define CUDA_TRY(stmt) \ + { \ + cudaError_t cuda_status = stmt; \ + if (cudaSuccess != cuda_status) { \ + HOLOSCAN_LOG_ERROR("CUDA runtime call {} in line {} of file {} failed with '{}' ({}).", \ + #stmt, \ + __LINE__, \ + __FILE__, \ + cudaGetErrorString(cuda_status), \ + static_cast(cuda_status)); \ + throw std::runtime_error("CUDA runtime call failed"); \ + } \ + } + +namespace holoscan::ops { + +namespace { + +// Map a DLPack dtype to the kernel's depth element type. +DepthDType to_depth_dtype(const DLDataType& dtype) { + if (dtype.code == kDLFloat && dtype.bits == 32) { return DepthDType::kFloat32; } + if (dtype.code == kDLUInt && dtype.bits == 16) { return DepthDType::kUint16; } + throw std::runtime_error( + "DepthToPointCloudOp: unsupported depth dtype (expected float32 or uint16)"); +} + +// Fetch a tensor from an entity, by name if given, otherwise the first tensor. +std::shared_ptr get_tensor(const holoscan::gxf::Entity& message, const std::string& name) { + auto maybe = name.empty() ? message.get() : message.get(name.c_str()); + if (!maybe) { + throw std::runtime_error("DepthToPointCloudOp: input tensor '" + name + "' not found"); + } + return maybe; +} + +} // namespace + +void DepthToPointCloudOp::setup(OperatorSpec& spec) { + auto& depth_in = spec.input("depth"); + // Optional inputs must not block execution when unconnected. + auto& intrinsics_in = spec.input("intrinsics").condition(ConditionType::kNone); + auto& color_in = spec.input("color").condition(ConditionType::kNone); + auto& cloud_out = spec.output("point_cloud"); + (void)depth_in; + (void)intrinsics_in; + (void)color_in; + (void)cloud_out; + + spec.param(fx_, "fx", "Focal length x", "Focal length in pixels (x).", 0.0f); + spec.param(fy_, "fy", "Focal length y", "Focal length in pixels (y).", 0.0f); + spec.param(cx_, "cx", "Principal point x", "Principal point in pixels (x).", 0.0f); + spec.param(cy_, "cy", "Principal point y", "Principal point in pixels (y).", 0.0f); + spec.param(depth_scale_, "depth_scale", "Depth scale", + "Multiplier converting raw depth to meters (e.g. 0.001 for uint16 mm).", 0.001f); + spec.param(depth_min_, "depth_min", "Min depth", "Minimum valid depth in meters.", 0.0f); + spec.param(depth_max_, "depth_max", "Max depth", "Maximum valid depth in meters.", 100.0f); + spec.param(invalid_value_, "invalid_value", "Invalid value", + "Value written to X/Y/Z for invalid pixels.", + std::numeric_limits::quiet_NaN()); + spec.param(depth_tensor_name_, "depth_tensor_name", "Depth tensor name", + "Name of the depth tensor in the input message (empty = first tensor).", + std::string("")); + spec.param(color_tensor_name_, "color_tensor_name", "Color tensor name", + "Name of the color tensor in the color message (empty = first tensor).", + std::string("")); + spec.param(output_tensor_name_, "output_tensor_name", "Output tensor name", + "Name of the emitted point-cloud tensor.", std::string("point_cloud")); + spec.param(output_color_tensor_name_, "output_color_tensor_name", "Output color tensor name", + "Name of the emitted colors tensor.", std::string("colors")); + spec.param(allocator_, "allocator", "Allocator", "Device allocator for output tensors."); +} + +void DepthToPointCloudOp::compute(InputContext& op_input, OutputContext& op_output, + ExecutionContext& context) { + auto depth_message = op_input.receive("depth").value(); + + // Acquire this operator's CUDA stream and synchronize the depth producer onto it (via events). + // The optional intrinsics/color producers are synchronized below when those ports are connected, + // so every input the kernel reads is correctly ordered even if it comes from a different stream. + const cudaStream_t stream = op_input.receive_cuda_stream("depth"); + + // --- Depth tensor: dtype, dimensions, device pointer --- + auto depth_tensor = get_tensor(depth_message, depth_tensor_name_.get()); + const DepthDType depth_dtype = to_depth_dtype(depth_tensor->dtype()); + const auto& shape = depth_tensor->shape(); + // Accept only [H, W] or [H, W, 1]; a higher-rank tensor (e.g. [H, W, 3]) would otherwise be + // silently reinterpreted as a single-channel depth buffer. + if (shape.size() < 2 || shape.size() > 3 || + (shape.size() == 3 && static_cast(shape[2]) != 1)) { + throw std::runtime_error("DepthToPointCloudOp: depth tensor must be [H, W] or [H, W, 1]"); + } + const int height = static_cast(shape[0]); + const int width = static_cast(shape[1]); + + // --- Optional per-frame intrinsics override --- + CameraIntrinsics intr{fx_.get(), fy_.get(), cx_.get(), cy_.get()}; + if (auto maybe_intr = op_input.receive("intrinsics")) { + op_input.receive_cuda_stream("intrinsics"); // sync the intrinsics producer onto `stream` + auto intr_tensor = get_tensor(maybe_intr.value(), std::string("")); + const DLDataType idt = intr_tensor->dtype(); + if (intr_tensor->size() < 4 || idt.code != kDLFloat || idt.bits != 32) { + throw std::runtime_error( + "DepthToPointCloudOp: intrinsics tensor must be float32 [fx, fy, cx, cy]"); + } + float host[4]; + // Tiny (16 B) config read; pixel data stays GPU-resident. The intrinsics tensor is + // produced by an upstream operator on `stream`, so the copy must be ordered on that + // same stream (a plain cudaMemcpy on the default stream would not wait for it). + // Known cost: this per-frame stream sync runs ONLY when the optional `intrinsics` + // port is connected; pipelines that pass static fx/fy/cx/cy via parameters (the common + // case) skip this branch entirely and incur no sync. + CUDA_TRY(cudaMemcpyAsync(host, intr_tensor->data(), sizeof(host), cudaMemcpyDefault, stream)); + CUDA_TRY(cudaStreamSynchronize(stream)); + intr = CameraIntrinsics{host[0], host[1], host[2], host[3]}; + } + + // Focal lengths must be non-zero (they divide the deprojection); the defaults are 0.0, so a + // caller that neither sets the fx/fy params nor connects the intrinsics input is rejected here + // rather than dividing by zero in the kernel. + if (intr.fx <= 0.0f || intr.fy <= 0.0f) { + throw std::runtime_error( + "DepthToPointCloudOp: fx and fy must be positive (set the fx/fy parameters or connect the " + "intrinsics input)"); + } + + // --- Optional color input --- + const void* color_ptr = nullptr; + int color_channels = 0; + if (auto maybe_color = op_input.receive("color")) { + op_input.receive_cuda_stream("color"); // sync the color producer onto `stream` + auto color_tensor = get_tensor(maybe_color.value(), color_tensor_name_.get()); + // The kernel reinterprets the color buffer as uchar3/uchar4, so the element type must be + // uint8 (a float or other-width tensor would be misread byte-for-byte). + const DLDataType cdt = color_tensor->dtype(); + if (cdt.code != kDLUInt || cdt.bits != 8) { + throw std::runtime_error("DepthToPointCloudOp: color tensor must be uint8"); + } + const auto& cshape = color_tensor->shape(); + // Require an explicit channel dimension; a 2D [H, W] tensor is rejected rather than + // silently assumed to be 3-channel (which would read past the buffer in the kernel). + if (cshape.size() < 3) { + throw std::runtime_error( + "DepthToPointCloudOp: color tensor must be H x W x 3 (uchar3) or H x W x 4 (uchar4)"); + } + color_channels = static_cast(cshape[2]); + if (color_channels != 3 && color_channels != 4) { + throw std::runtime_error( + "DepthToPointCloudOp: color tensor must be H x W x 3 (uchar3) or H x W x 4 (uchar4)"); + } + if (static_cast(cshape[0]) != height || static_cast(cshape[1]) != width) { + throw std::runtime_error( + "DepthToPointCloudOp: color image dimensions must match the depth image"); + } + color_ptr = color_tensor->data(); + } + + // --- Allocate outputs --- + auto allocator = nvidia::gxf::Handle::Create( + context.context(), allocator_.get()->gxf_cid()); + if (!allocator) { + throw std::runtime_error("DepthToPointCloudOp: failed to create allocator handle"); + } + auto out_message = nvidia::gxf::Entity::New(context.context()); + if (!out_message) { + throw std::runtime_error("DepthToPointCloudOp: failed to create output entity"); + } + + auto xyz_tensor = out_message.value().add(output_tensor_name_.get().c_str()); + if (!xyz_tensor) { + throw std::runtime_error("DepthToPointCloudOp: failed to add point_cloud tensor to message"); + } + xyz_tensor.value()->reshape(nvidia::gxf::Shape{height, width, 3}, + nvidia::gxf::MemoryStorageType::kDevice, allocator.value()); + if (!xyz_tensor.value()->pointer()) { + throw std::runtime_error("DepthToPointCloudOp: failed to allocate point_cloud tensor"); + } + + uchar3* out_color = nullptr; + if (color_ptr != nullptr) { + auto color_out = + out_message.value().add(output_color_tensor_name_.get().c_str()); + if (!color_out) { + throw std::runtime_error("DepthToPointCloudOp: failed to add colors tensor to message"); + } + color_out.value()->reshape(nvidia::gxf::Shape{height, width, 3}, + nvidia::gxf::MemoryStorageType::kDevice, allocator.value()); + if (!color_out.value()->pointer()) { + throw std::runtime_error("DepthToPointCloudOp: failed to allocate colors tensor"); + } + out_color = reinterpret_cast(color_out.value()->pointer()); + } + + CUDA_TRY(launch_deproject(depth_tensor->data(), + depth_dtype, + depth_scale_.get(), + intr, + depth_min_.get(), + depth_max_.get(), + invalid_value_.get(), + color_ptr, + color_channels, + reinterpret_cast(xyz_tensor.value()->pointer()), + out_color, + width, + height, + stream)); + + auto result = gxf::Entity(std::move(out_message.value())); + op_output.emit(result, "point_cloud"); +} + +} // namespace holoscan::ops diff --git a/operators/depth_to_point_cloud/depth_to_point_cloud.hpp b/operators/depth_to_point_cloud/depth_to_point_cloud.hpp new file mode 100644 index 0000000000..54cdf95004 --- /dev/null +++ b/operators/depth_to_point_cloud/depth_to_point_cloud.hpp @@ -0,0 +1,91 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef HOLOSCAN_OPERATORS_DEPTH_TO_POINT_CLOUD_DEPTH_TO_POINT_CLOUD_HPP +#define HOLOSCAN_OPERATORS_DEPTH_TO_POINT_CLOUD_DEPTH_TO_POINT_CLOUD_HPP + +#include +#include + +#include "holoscan/core/operator.hpp" + +namespace holoscan::ops { + +/** + * @brief Deproject an organized depth image into an organized point cloud on the GPU. + * + * ==Named Inputs== + * + * - **depth** : `nvidia::gxf::Entity` containing a 2D depth `nvidia::gxf::Tensor` + * - Element type `uint16` (raw units scaled by `depth_scale`) or `float32` (meters when + * `depth_scale == 1.0`). Shape `[H, W]` or `[H, W, 1]`, device memory. + * - **intrinsics** *(optional)* : `nvidia::gxf::Entity` containing a `float32` tensor of 4 + * values `[fx, fy, cx, cy]`. When present, overrides the `fx/fy/cx/cy` parameters for the + * current frame (e.g. for sensors that publish per-stream intrinsics). + * - **color** *(optional)* : `nvidia::gxf::Entity` containing an `H x W x 3` or `H x W x 4` + * `uint8` color image aligned to the depth image. When connected, a colored output is emitted. + * + * ==Named Outputs== + * + * - **point_cloud** : `nvidia::gxf::Entity` containing: + * - a `float32` tensor (`output_tensor_name`, default `"point_cloud"`) of shape `[H, W, 3]` + * (organized, AoS XYZ) in the camera optical frame (x-right, y-down, z-forward). Invalid + * pixels (depth == 0 or outside `[depth_min, depth_max]`) are set to `invalid_value`; and + * - *(only when `color` is connected)* a `uint8` tensor (`output_color_tensor_name`, default + * `"colors"`) of shape `[H, W, 3]`. + * + * ==Parameters== + * + * - **fx**, **fy**, **cx**, **cy**: pinhole intrinsics in pixels (used when the `intrinsics` + * port is not connected). + * - **depth_scale**: multiply raw depth by this to get meters (default `0.001`, i.e. uint16 mm). + * - **depth_min**, **depth_max**: valid metric depth range in meters (defaults `0.0` / `100.0`). + * - **invalid_value**: value written to X/Y/Z for invalid pixels (default `NaN`). + * - **depth_tensor_name**, **color_tensor_name**: input tensor names (default: first tensor). + * - **output_tensor_name**: name of the emitted point-cloud tensor (default `"point_cloud"`). + * - **output_color_tensor_name**: name of the emitted colors tensor (default `"colors"`). + * - **allocator**: device `holoscan::Allocator` for the output tensors (e.g. BlockMemoryPool). + */ +class DepthToPointCloudOp : public holoscan::Operator { + public: + HOLOSCAN_OPERATOR_FORWARD_ARGS(DepthToPointCloudOp) + + DepthToPointCloudOp() = default; + + void setup(OperatorSpec& spec) override; + void compute(InputContext& op_input, OutputContext& op_output, + ExecutionContext& context) override; + + private: + Parameter fx_; + Parameter fy_; + Parameter cx_; + Parameter cy_; + Parameter depth_scale_; + Parameter depth_min_; + Parameter depth_max_; + Parameter invalid_value_; + Parameter depth_tensor_name_; + Parameter color_tensor_name_; + Parameter output_tensor_name_; + Parameter output_color_tensor_name_; + Parameter> allocator_; +}; + +} // namespace holoscan::ops + +#endif /* HOLOSCAN_OPERATORS_DEPTH_TO_POINT_CLOUD_DEPTH_TO_POINT_CLOUD_HPP */ diff --git a/operators/depth_to_point_cloud/metadata.json b/operators/depth_to_point_cloud/metadata.json new file mode 100644 index 0000000000..e87bb3ba17 --- /dev/null +++ b/operators/depth_to_point_cloud/metadata.json @@ -0,0 +1,30 @@ +{ + "$schema": "urn:holohub:operator:v1", + "operator": { + "name": "DepthToPointCloudOp", + "authors": [ + { + "name": "Nitheesh Kumar", + "affiliation": "Zobot Lab" + } + ], + "language": ["C++", "Python"], + "version": "0.1.0", + "changelog": { + "0.1.0": "Initial release: GPU deprojection of organized depth into an organized point cloud (uint16/float32 depth, per-frame intrinsics override, optional colored output)." + }, + "holoscan_sdk": { + "minimum_required_version": "4.0.0", + "tested_versions": [ + "4.3.0" + ] + }, + "platforms": [ + "x86_64", + "aarch64" + ], + "tags": ["Computer Vision and Perception", "Robotics", "Point Cloud", "Depth", "3D", "Camera"], + "ranking": 3, + "requirements": {} + } +} diff --git a/operators/depth_to_point_cloud/python/CMakeLists.txt b/operators/depth_to_point_cloud/python/CMakeLists.txt new file mode 100644 index 0000000000..dd5a6e4ac9 --- /dev/null +++ b/operators/depth_to_point_cloud/python/CMakeLists.txt @@ -0,0 +1,21 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(pybind11_add_holohub_module) +pybind11_add_holohub_module( + CPP_CMAKE_TARGET depth_to_point_cloud + CLASS_NAME "DepthToPointCloudOp" + SOURCES depth_to_point_cloud.cpp +) diff --git a/operators/depth_to_point_cloud/python/depth_to_point_cloud.cpp b/operators/depth_to_point_cloud/python/depth_to_point_cloud.cpp new file mode 100644 index 0000000000..fa71002d5f --- /dev/null +++ b/operators/depth_to_point_cloud/python/depth_to_point_cloud.cpp @@ -0,0 +1,124 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../../operator_util.hpp" + +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +using std::string_literals::operator""s; +using pybind11::literals::operator""_a; + +namespace py = pybind11; + +namespace holoscan::ops { + +/* Trampoline class providing a Pythonic kwarg-based constructor matching the C++ defaults. */ +class PyDepthToPointCloudOp : public DepthToPointCloudOp { + public: + using DepthToPointCloudOp::DepthToPointCloudOp; + + PyDepthToPointCloudOp(Fragment* fragment, const py::args& args, + std::shared_ptr allocator, float fx = 0.0f, float fy = 0.0f, + float cx = 0.0f, float cy = 0.0f, float depth_scale = 0.001f, + float depth_min = 0.0f, float depth_max = 100.0f, + float invalid_value = std::numeric_limits::quiet_NaN(), + const std::string& depth_tensor_name = "", + const std::string& color_tensor_name = "", + const std::string& output_tensor_name = "point_cloud", + const std::string& output_color_tensor_name = "colors", + const std::string& name = "depth_to_point_cloud") + : DepthToPointCloudOp(ArgList{Arg{"allocator", allocator}, + Arg{"fx", fx}, + Arg{"fy", fy}, + Arg{"cx", cx}, + Arg{"cy", cy}, + Arg{"depth_scale", depth_scale}, + Arg{"depth_min", depth_min}, + Arg{"depth_max", depth_max}, + Arg{"invalid_value", invalid_value}, + Arg{"depth_tensor_name", depth_tensor_name}, + Arg{"color_tensor_name", color_tensor_name}, + Arg{"output_tensor_name", output_tensor_name}, + Arg{"output_color_tensor_name", output_color_tensor_name}}) { + add_positional_condition_and_resource_args(this, args); + name_ = name; + fragment_ = fragment; + spec_ = std::make_shared(fragment); + setup(*spec_.get()); + } +}; + +PYBIND11_MODULE(_depth_to_point_cloud, m) { + m.doc() = R"pbdoc( + DepthToPointCloudOp Python Bindings + ------------------------------------- + .. currentmodule:: _depth_to_point_cloud + )pbdoc"; + + py::class_>( + m, "DepthToPointCloudOp", + "Deproject an organized depth image into an organized point cloud on the GPU.") + .def(py::init, + float, + float, + float, + float, + float, + float, + float, + float, + const std::string&, + const std::string&, + const std::string&, + const std::string&, + const std::string&>(), + "fragment"_a, + "allocator"_a, + "fx"_a = 0.0f, + "fy"_a = 0.0f, + "cx"_a = 0.0f, + "cy"_a = 0.0f, + "depth_scale"_a = 0.001f, + "depth_min"_a = 0.0f, + "depth_max"_a = 100.0f, + "invalid_value"_a = std::numeric_limits::quiet_NaN(), + "depth_tensor_name"_a = ""s, + "color_tensor_name"_a = ""s, + "output_tensor_name"_a = "point_cloud"s, + "output_color_tensor_name"_a = "colors"s, + "name"_a = "depth_to_point_cloud"s) + .def("setup", &DepthToPointCloudOp::setup, "spec"_a); +} // PYBIND11_MODULE + +} // namespace holoscan::ops diff --git a/operators/depth_to_point_cloud/test/CMakeLists.txt b/operators/depth_to_point_cloud/test/CMakeLists.txt new file mode 100644 index 0000000000..2508209812 --- /dev/null +++ b/operators/depth_to_point_cloud/test/CMakeLists.txt @@ -0,0 +1,28 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Standalone golden-reference unit test for the deproject kernel. It depends only on +# the CUDA runtime (no Holoscan SDK) and returns a non-zero exit code on failure. +add_executable(depth_to_point_cloud_test + test_deproject.cu + ${CMAKE_CURRENT_SOURCE_DIR}/../deproject.cu +) + +set_target_properties(depth_to_point_cloud_test + PROPERTIES + CUDA_ARCHITECTURES "native" +) + +add_test(NAME depth_to_point_cloud_test COMMAND depth_to_point_cloud_test) diff --git a/operators/depth_to_point_cloud/test/test_deproject.cu b/operators/depth_to_point_cloud/test/test_deproject.cu new file mode 100644 index 0000000000..6608571385 --- /dev/null +++ b/operators/depth_to_point_cloud/test/test_deproject.cu @@ -0,0 +1,180 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Standalone golden-reference unit test for the deproject kernel. Compiles with +// nvcc alone (no Holoscan SDK) so kernel correctness can be verified on any GPU: +// nvcc -O2 -arch=native -o test_deproject test/test_deproject.cu deproject.cu && ./test_deproject + +#include + +#include +#include +#include +#include + +#include "../deproject.hpp" + +using namespace holoscan::ops; + +static int g_failures = 0; +#define CHECK(cond, msg) \ + do { \ + if (!(cond)) { \ + printf("FAIL: %s\n", (msg)); \ + ++g_failures; \ + } \ + } while (0) + +// Check that a CUDA runtime call returns cudaSuccess (allocations, copies, syncs). +#define CUDA_OK(call) CHECK((call) == cudaSuccess, #call " failed") + +// CPU golden deproject for a single pixel. +static void golden(float z, int u, int v, CameraIntrinsics k, float dmin, float dmax, float invalid, + float& X, float& Y, float& Z) { + if (z <= 0.f || z < dmin || z > dmax) { + X = Y = Z = invalid; + return; + } + X = (u - k.cx) * z / k.fx; + Y = (v - k.cy) * z / k.fy; + Z = z; +} + +int main() { + const int W = 64, H = 48, N = W * H; + // cx,cy at image center: (W-1)/2, (H-1)/2 + const CameraIntrinsics k{50.f, 50.f, 31.5f, 23.5f}; + const float dmin = 0.1f, dmax = 10.f, invalid = NAN; + + // ---- Case 1: float32 constant plane Z=2.0, one hole -> NaN ---- + { + std::vector depth(N, 2.0f); + depth[10 * W + 20] = 0.0f; // hole + float* d_depth = nullptr; + float3* d_xyz = nullptr; + CUDA_OK(cudaMalloc(&d_depth, N * sizeof(float))); + CUDA_OK(cudaMalloc(&d_xyz, N * sizeof(float3))); + CUDA_OK(cudaMemcpy(d_depth, depth.data(), N * sizeof(float), cudaMemcpyHostToDevice)); + cudaError_t err = launch_deproject(d_depth, DepthDType::kFloat32, 1.0f, k, dmin, dmax, invalid, + nullptr, 0, d_xyz, nullptr, W, H, 0); + CHECK(err == cudaSuccess, "Case1 launch returned error"); + CUDA_OK(cudaDeviceSynchronize()); + std::vector out(N); + CUDA_OK(cudaMemcpy(out.data(), d_xyz, N * sizeof(float3), cudaMemcpyDeviceToHost)); + bool ok = true; + for (int v = 0; v < H && ok; ++v) { + for (int u = 0; u < W; ++u) { + float gX, gY, gZ; + golden(depth[v * W + u], u, v, k, dmin, dmax, invalid, gX, gY, gZ); + float3 o = out[v * W + u]; + if (std::isnan(gX)) { + if (!(std::isnan(o.x) && std::isnan(o.y) && std::isnan(o.z))) { ok = false; break; } + } else if (fabsf(o.x - gX) > 1e-4f || fabsf(o.y - gY) > 1e-4f || fabsf(o.z - gZ) > 1e-4f) { + ok = false; + break; + } + } + } + CHECK(ok, "Case1 float32 plane deprojection mismatch / hole not NaN"); + cudaFree(d_depth); + cudaFree(d_xyz); + } + + // ---- Case 2: uint16 millimeters with depth_scale 0.001 ---- + { + std::vector depth(N, 2000); // 2000 mm -> 2.0 m + uint16_t* d_depth = nullptr; + float3* d_xyz = nullptr; + CUDA_OK(cudaMalloc(&d_depth, N * sizeof(uint16_t))); + CUDA_OK(cudaMalloc(&d_xyz, N * sizeof(float3))); + CUDA_OK(cudaMemcpy(d_depth, depth.data(), N * sizeof(uint16_t), cudaMemcpyHostToDevice)); + cudaError_t err = launch_deproject(d_depth, DepthDType::kUint16, 0.001f, k, dmin, dmax, invalid, + nullptr, 0, d_xyz, nullptr, W, H, 0); + CHECK(err == cudaSuccess, "Case2 launch returned error"); + CUDA_OK(cudaDeviceSynchronize()); + std::vector out(N); + CUDA_OK(cudaMemcpy(out.data(), d_xyz, N * sizeof(float3), cudaMemcpyDeviceToHost)); + const int u = 40, v = 30; + float3 o = out[v * W + u]; + float gX = (u - k.cx) * 2.0f / k.fx, gY = (v - k.cy) * 2.0f / k.fy; + CHECK(fabsf(o.z - 2.0f) < 1e-4f, "Case2 uint16 depth_scale Z wrong"); + CHECK(fabsf(o.x - gX) < 1e-4f && fabsf(o.y - gY) < 1e-4f, "Case2 uint16 XY wrong"); + cudaFree(d_depth); + cudaFree(d_xyz); + } + + // ---- Case 3: depth beyond depth_max -> invalid ---- + { + std::vector depth(N, 50.0f); // > dmax + float* d_depth = nullptr; + float3* d_xyz = nullptr; + CUDA_OK(cudaMalloc(&d_depth, N * sizeof(float))); + CUDA_OK(cudaMalloc(&d_xyz, N * sizeof(float3))); + CUDA_OK(cudaMemcpy(d_depth, depth.data(), N * sizeof(float), cudaMemcpyHostToDevice)); + cudaError_t err = launch_deproject(d_depth, DepthDType::kFloat32, 1.0f, k, dmin, dmax, invalid, + nullptr, 0, d_xyz, nullptr, W, H, 0); + CHECK(err == cudaSuccess, "Case3 launch returned error"); + CUDA_OK(cudaDeviceSynchronize()); + std::vector out(N); + CUDA_OK(cudaMemcpy(out.data(), d_xyz, N * sizeof(float3), cudaMemcpyDeviceToHost)); + CHECK(std::isnan(out[0].x), "Case3 depth beyond max not invalidated"); + cudaFree(d_depth); + cudaFree(d_xyz); + } + + // ---- Case 4: color passthrough (uchar3) ---- + { + std::vector depth(N, 2.0f); + std::vector color(N); + for (int i = 0; i < N; ++i) color[i] = make_uchar3(i % 256, (i * 2) % 256, (i * 3) % 256); + float* d_depth = nullptr; + float3* d_xyz = nullptr; + uchar3* d_color = nullptr; + uchar3* d_outcolor = nullptr; + CUDA_OK(cudaMalloc(&d_depth, N * sizeof(float))); + CUDA_OK(cudaMalloc(&d_xyz, N * sizeof(float3))); + CUDA_OK(cudaMalloc(&d_color, N * sizeof(uchar3))); + CUDA_OK(cudaMalloc(&d_outcolor, N * sizeof(uchar3))); + CUDA_OK(cudaMemcpy(d_depth, depth.data(), N * sizeof(float), cudaMemcpyHostToDevice)); + CUDA_OK(cudaMemcpy(d_color, color.data(), N * sizeof(uchar3), cudaMemcpyHostToDevice)); + cudaError_t err = launch_deproject(d_depth, DepthDType::kFloat32, 1.0f, k, dmin, dmax, invalid, + d_color, 3, d_xyz, d_outcolor, W, H, 0); + CHECK(err == cudaSuccess, "Case4 launch returned error"); + CUDA_OK(cudaDeviceSynchronize()); + std::vector outc(N); + CUDA_OK(cudaMemcpy(outc.data(), d_outcolor, N * sizeof(uchar3), cudaMemcpyDeviceToHost)); + bool ok = true; + for (int i = 0; i < N; ++i) { + if (outc[i].x != color[i].x || outc[i].y != color[i].y || outc[i].z != color[i].z) { + ok = false; + break; + } + } + CHECK(ok, "Case4 color passthrough mismatch"); + cudaFree(d_depth); + cudaFree(d_xyz); + cudaFree(d_color); + cudaFree(d_outcolor); + } + + if (g_failures == 0) { + printf("ALL TESTS PASSED\n"); + return 0; + } + printf("%d FAILURE(S)\n", g_failures); + return 1; +} diff --git a/operators/depth_to_point_cloud/test_depth_to_point_cloud.py b/operators/depth_to_point_cloud/test_depth_to_point_cloud.py new file mode 100644 index 0000000000..c4ea84a576 --- /dev/null +++ b/operators/depth_to_point_cloud/test_depth_to_point_cloud.py @@ -0,0 +1,88 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for the DepthToPointCloudOp Python bindings. + +Covers construction, port wiring, parameter handling, and error handling across +the pybind11 boundary. Numerical correctness of the deprojection kernel is +covered separately by the standalone CUDA golden-reference test +(``test/test_deproject.cu``) and the ``depth_to_point_cloud_demo`` application, +since the compute path requires real GXF entities, an allocator, and a CUDA +stream that the lightweight mock fixtures do not provide. +""" + +import pytest +from holoscan.core import Operator +from holoscan.resources import UnboundedAllocator + +from holohub.depth_to_point_cloud import DepthToPointCloudOp + +try: + from holoscan.core import BaseOperator +except ImportError: + from holoscan.core import _Operator as BaseOperator + + +def _make_op(fragment, **overrides): + """Construct a DepthToPointCloudOp with sane defaults, overridable per test.""" + params = dict( + name="depth_to_point_cloud", + allocator=UnboundedAllocator(fragment, name="alloc"), + fx=500.0, + fy=500.0, + cx=320.0, + cy=240.0, + ) + params.update(overrides) + return DepthToPointCloudOp(fragment, **params) + + +def test_init(fragment): + """Operator constructs and exposes the expected Holoscan properties.""" + name = "d2p_op" + op = _make_op(fragment, name=name) + assert isinstance(op, BaseOperator), "DepthToPointCloudOp should be a Holoscan operator" + assert op.operator_type == Operator.OperatorType.NATIVE, "Operator type should be NATIVE" + assert f"name: {name}" in repr(op), "Operator name should appear in repr()" + + +def test_ports(fragment): + """setup() wires the required depth input, optional inputs, and the output.""" + spec = _make_op(fragment).spec + assert "depth" in spec.inputs, 'required input "depth" missing' + assert "intrinsics" in spec.inputs, 'optional per-frame "intrinsics" input missing' + assert "color" in spec.inputs, 'optional "color" input missing' + assert "point_cloud" in spec.outputs, 'output "point_cloud" missing' + + +def test_requires_allocator(fragment): + """allocator is a required argument; omitting it is a TypeError.""" + with pytest.raises(TypeError): + DepthToPointCloudOp(fragment, name="no_alloc", fx=500.0, fy=500.0, cx=320.0, cy=240.0) + + +@pytest.mark.parametrize( + "overrides", + [ + {"depth_scale": 0.001, "depth_min": 0.1, "depth_max": 10.0}, + {"output_tensor_name": "xyz", "output_color_tensor_name": "rgb"}, + {"invalid_value": 0.0}, + {"depth_tensor_name": "depth", "color_tensor_name": "color"}, + ], +) +def test_param_acceptance(fragment, overrides): + """The operator accepts its documented parameters without error.""" + op = _make_op(fragment, **overrides) + assert isinstance(op, BaseOperator)