diff --git a/applications/CMakeLists.txt b/applications/CMakeLists.txt
index fa22887c09..f66eae7625 100644
--- a/applications/CMakeLists.txt
+++ b/applications/CMakeLists.txt
@@ -49,6 +49,8 @@ add_holohub_application(deltacast_receiver DEPENDS
 
 add_holohub_application(depth_anything_v2)
 
+add_holohub_application(depth_to_point_cloud_demo DEPENDS OPERATORS depth_to_point_cloud)
+
 add_subdirectory(distributed)
 
 add_holohub_application(endoscopy_depth_estimation)
diff --git a/applications/depth_to_point_cloud_demo/CMakeLists.txt b/applications/depth_to_point_cloud_demo/CMakeLists.txt
new file mode 100644
index 0000000000..261e1061f1
--- /dev/null
+++ b/applications/depth_to_point_cloud_demo/CMakeLists.txt
@@ -0,0 +1,33 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+project(depth_to_point_cloud_demo NONE)
+
+find_package(holoscan 4.0.0 REQUIRED CONFIG
+             PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install")
+
+if(BUILD_TESTING)
+  add_test(NAME depth_to_point_cloud_demo_python_test
+    COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/depth_to_point_cloud_demo.py --frames 10
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+
+  set_property(TEST depth_to_point_cloud_demo_python_test PROPERTY ENVIRONMENT
+               "PYTHONPATH=${GXF_LIB_DIR}/../python/lib:${CMAKE_BINARY_DIR}/python/lib")
+
+  set_tests_properties(depth_to_point_cloud_demo_python_test
+    PROPERTIES
+      PASS_REGULAR_EXPRESSION "valid="
+      FAIL_REGULAR_EXPRESSION "(^|[^a-z])Error;ERROR;Failed")
+endif()
diff --git a/applications/depth_to_point_cloud_demo/README.md b/applications/depth_to_point_cloud_demo/README.md
new file mode 100644
index 0000000000..cdce9b402c
--- /dev/null
+++ b/applications/depth_to_point_cloud_demo/README.md
@@ -0,0 +1,87 @@
+# Depth to Point Cloud Demo
+
+A minimal demo of the [`depth_to_point_cloud`](../../operators/depth_to_point_cloud) operator.
+
+Here **hardware-free** means the input is produced by an on-GPU synthetic data generator
+(`SyntheticDepthGeneratorOp`) rather than a physical sensor: no depth camera, recorded dataset, or
+depth-estimation network is required, so the demo runs in CI on any GPU. The generator emits a
+synthetic organized depth image (a gently tilting plane) plus an aligned RGB image entirely on the
+GPU; the operator deprojects it into an organized `H x W x 3` point cloud, and the demo reports the
+valid-point count and Z range each frame.
+
+## Run
+
+```bash
+# Synthetic source (default, hardware-free, CI-friendly):
+./holohub run depth_to_point_cloud_demo
+# equivalently:
+./holohub run depth_to_point_cloud_demo synthetic
+
+# or directly:
+python3 applications/depth_to_point_cloud_demo/depth_to_point_cloud_demo.py --frames 100
+```
+
+Expected output (per frame):
+
+```text
+[depth_to_point_cloud_demo] points=307200 valid=307200 z=[1.xxx, 2.xxx] m
+```
+
+### Sources (`--source` / run modes)
+
+The demo selects its input source with `--source`, exposed as HoloHub run modes:
+
+| Mode / `--source` | Description |
+| --- | --- |
+| `synthetic` (default) | On-GPU synthetic depth + RGB generator. No hardware, runs in CI. |
+| `realsense` | Live Intel RealSense camera (see caveat below). |
+
+### Interactive 3D visualization (`--visualize`)
+
+By default the demo ends in a headless, CI-friendly `PointCloudStatsOp` sink that just reports
+per-frame statistics. Pass `--visualize` to instead render the cloud in `HolovizOp` as 3D points:
+
+```bash
+python3 applications/depth_to_point_cloud_demo/depth_to_point_cloud_demo.py --visualize
+```
+
+With `--visualize`, the organized `H x W x 3` cloud is compacted to `N x 3` with invalid (NaN)
+points dropped before being handed to `HolovizOp` as a `points_3d` primitive. This path needs a
+display and is therefore disabled by default (the CI mode keeps the statistics sink).
+
+## Pipeline
+
+```text
+SyntheticDepthGeneratorOp  --depth-->  DepthToPointCloudOp  --point_cloud-->  PointCloudStatsOp
+                           --color-->                                         (or HolovizOp with --visualize)
+```
+
+## Using a real Intel RealSense camera
+
+`--source realsense` is scaffolded but **not yet runnable from this Python demo**. The
+[`realsense_camera`](../../operators/realsense_camera) operator is currently C++-only (it ships no
+Python bindings), and it emits its `depth_buffer` / `color_buffer` as GXF `VideoBuffer`s, whereas
+`DepthToPointCloudOp` consumes a GXF `Tensor`. Wiring them from Python therefore requires:
+
+1. Python bindings for `realsense_camera` (add an `operators/realsense_camera/python/` module).
+2. A `FormatConverterOp` between the camera and the operator to convert `VideoBuffer` → `Tensor`
+   (and `RGBA8` → `RGB8` for the color path).
+3. Feeding intrinsics — either statically (`fx/fy/cx/cy`) or by consuming the camera's
+   `depth_camera_model` output through the operator's optional `intrinsics` input.
+
+Selecting `--source realsense` today raises a clear `NotImplementedError` pointing here. The wiring,
+once the bindings exist, is:
+
+```python
+from holohub.realsense_camera import RealsenseCameraOp  # requires new Python bindings
+
+camera = RealsenseCameraOp(self, name="camera", allocator=...)
+# camera.depth_buffer / color_buffer are VideoBuffers -> convert to Tensor via FormatConverterOp,
+# then feed DepthToPointCloudOp. RealSense depth is float32 meters (units_transform applied), so
+# use depth_scale=1.0 — not 0.001 (the 0.001 uint16-millimeter value is only for raw Z16 sources).
+```
+
+## Requirements
+
+- Holoscan SDK ≥ 4.0.0, CUDA, CuPy. Builds the `depth_to_point_cloud` operator (declared as a
+  dependency). Platforms: `x86_64`, `aarch64`.
diff --git a/applications/depth_to_point_cloud_demo/depth_to_point_cloud_demo.py b/applications/depth_to_point_cloud_demo/depth_to_point_cloud_demo.py
new file mode 100644
index 0000000000..6fe9d98af9
--- /dev/null
+++ b/applications/depth_to_point_cloud_demo/depth_to_point_cloud_demo.py
@@ -0,0 +1,268 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Demo for DepthToPointCloudOp.
+
+Selectable input source (``--source``):
+
+* ``synthetic`` (default) generates an organized depth image (a gently tilting plane) plus an
+  aligned RGB image entirely on the GPU. No camera or dataset is required, so the app runs in CI.
+* ``realsense`` is scaffolded for a live Intel RealSense camera but is not yet runnable from Python
+  (the ``realsense_camera`` operator ships no Python bindings and emits ``VideoBuffer``s rather than
+  the ``Tensor`` this operator consumes); selecting it raises ``NotImplementedError``. See README.md.
+
+The deprojected organized point cloud is either summarized by ``PointCloudStatsOp`` (default,
+CI-friendly) or rendered as 3D points by ``HolovizOp`` when ``--visualize`` is passed.
+"""
+
+import argparse
+
+import cupy as cp
+from holoscan.conditions import CountCondition
+from holoscan.core import Application, Operator, OperatorSpec
+from holoscan.operators import HolovizOp
+from holoscan.resources import BlockMemoryPool, MemoryStorageType
+
+from holohub.depth_to_point_cloud import DepthToPointCloudOp
+
+
+class SyntheticDepthGeneratorOp(Operator):
+    """Emit a synthetic float32 depth image (meters) and an aligned uint8 RGB image."""
+
+    def __init__(self, fragment, *args, width=640, height=480, **kwargs):
+        self.width = width
+        self.height = height
+        self.frame = 0
+        ys, xs = cp.meshgrid(
+            cp.arange(height, dtype=cp.float32),
+            cp.arange(width, dtype=cp.float32),
+            indexing="ij",
+        )
+        self._xs = xs
+        self._ys = ys
+        super().__init__(fragment, *args, **kwargs)
+
+    def setup(self, spec: OperatorSpec):
+        spec.output("depth")
+        spec.output("color")
+
+    def compute(self, op_input, op_output, context):
+        t = self.frame * 0.05
+        # A tilted plane in meters: ~1.0 m near the top-left, increasing across the frame,
+        # with a slow global oscillation so successive frames differ.
+        depth = (
+            1.0 + 0.5 * (self._xs / self.width) + 0.4 * (self._ys / self.height) + 0.3 * cp.sin(t)
+        ).astype(cp.float32)
+
+        r = (255.0 * self._xs / self.width).astype(cp.uint8)
+        g = (255.0 * self._ys / self.height).astype(cp.uint8)
+        b = cp.full_like(r, 128)
+        color = cp.ascontiguousarray(cp.stack([r, g, b], axis=-1))  # HxWx3 uint8
+
+        op_output.emit({"depth": depth}, "depth")
+        op_output.emit({"color": color}, "color")
+        self.frame += 1
+
+
+class PointCloudStatsOp(Operator):
+    """Pull the point cloud and report valid-point count and Z range (CI-friendly sink)."""
+
+    def setup(self, spec: OperatorSpec):
+        spec.input("in")
+
+    def compute(self, op_input, op_output, context):
+        msg = op_input.receive("in")
+        pc = cp.asarray(msg["point_cloud"])  # HxWx3 float32
+        z = pc[..., 2]
+        valid = ~cp.isnan(z)
+        n_valid = int(valid.sum().get())
+
+        # The colored path is connected, so a colors tensor must accompany the cloud and
+        # share its H x W footprint (3 uint8 channels).
+        colors = cp.asarray(msg["colors"])  # HxWx3 uint8
+        if colors.shape[:2] != pc.shape[:2] or colors.shape[2] != 3:
+            raise RuntimeError(
+                f"colors shape {colors.shape} does not match cloud {pc.shape[:2]} x 3"
+            )
+
+        if n_valid:
+            zmin = float(z[valid].min().get())
+            zmax = float(z[valid].max().get())
+            print(
+                f"[depth_to_point_cloud_demo] points={pc.shape[0] * pc.shape[1]} "
+                f"valid={n_valid} z=[{zmin:.3f}, {zmax:.3f}] m colors={tuple(colors.shape)}"
+            )
+        else:
+            print("[depth_to_point_cloud_demo] no valid points")
+
+
+class CloudToHolovizOp(Operator):
+    """Compact the organized H x W x 3 cloud to a flat N x 3 of finite points for HolovizOp.
+
+    HolovizOp renders a ``points_3d`` primitive from an ``(N, 3)`` coordinate tensor, so the
+    organized cloud is flattened and the invalid (NaN) pixels are dropped first, as recommended in
+    the README. The compaction stays on the GPU (CuPy), keeping the path device-resident.
+    """
+
+    def setup(self, spec: OperatorSpec):
+        spec.input("in")
+        spec.output("out")
+
+    def compute(self, op_input, op_output, context):
+        msg = op_input.receive("in")
+        pc = cp.asarray(msg["point_cloud"]).reshape(-1, 3)  # (H*W, 3) float32
+        valid = cp.isfinite(pc).all(axis=1)
+        pts = pc[valid]  # (N, 3)
+        # Holoviz expects at least one coordinate; emit a degenerate point if the frame is empty.
+        if pts.shape[0] == 0:
+            pts = cp.zeros((1, 3), dtype=cp.float32)
+        coords = cp.ascontiguousarray(pts.astype(cp.float32))  # (N, 3) as HolovizOp expects
+        op_output.emit({"point_cloud": coords}, "out")
+
+
+class DepthToPointCloudDemoApp(Application):
+    def __init__(self, frames=100, width=640, height=480, source="synthetic", visualize=False):
+        super().__init__()
+        self._frames = frames
+        self._width = width
+        self._height = height
+        self._source = source
+        self._visualize = visualize
+
+    def compose(self):
+        if self._source == "realsense":
+            raise NotImplementedError(
+                "--source realsense is not yet runnable from this Python demo: the "
+                "realsense_camera operator ships no Python bindings and emits VideoBuffers "
+                "rather than the Tensor DepthToPointCloudOp consumes. See the 'Using a real "
+                "Intel RealSense camera' section of README.md for the required wiring "
+                "(Python bindings + FormatConverterOp + intrinsics)."
+            )
+        if self._source != "synthetic":
+            raise ValueError(f"unknown source '{self._source}'")
+
+        generator = SyntheticDepthGeneratorOp(
+            self,
+            CountCondition(self, count=self._frames),
+            name="generator",
+            width=self._width,
+            height=self._height,
+        )
+
+        # Two device tensors per frame (HxWx3 float32 point cloud + HxWx3 uint8 colors) drawn
+        # from this pool; size each block for the larger (float32 XYZ) output and keep enough
+        # blocks for both tensors plus one frame of pipelining headroom.
+        out_blocks = 4
+        block_size = self._width * self._height * 3 * 4  # float32 XYZ is the larger output
+        cloud = DepthToPointCloudOp(
+            self,
+            name="point_cloud",
+            allocator=BlockMemoryPool(
+                self,
+                name="pool",
+                storage_type=MemoryStorageType.DEVICE,
+                block_size=block_size,
+                num_blocks=out_blocks,
+            ),
+            # Pinhole intrinsics for the synthetic camera: square pixels (fx == fy) with the
+            # principal point at the image center. A single focal length is used for both axes
+            # by design; the focal length is independent of the image aspect ratio.
+            fx=float(self._width) * 0.8,
+            fy=float(self._width) * 0.8,
+            cx=(self._width - 1) / 2.0,
+            cy=(self._height - 1) / 2.0,
+            depth_scale=1.0,  # synthetic depth is already in meters
+            depth_min=0.1,
+            depth_max=10.0,
+        )
+
+        self.add_flow(generator, cloud, {("depth", "depth")})
+        self.add_flow(generator, cloud, {("color", "color")})
+
+        if self._visualize:
+            # Compact the organized cloud to N x 3 finite points, then render as 3D points.
+            compact = CloudToHolovizOp(self, name="compact")
+            visualizer = HolovizOp(
+                self,
+                name="holoviz",
+                window_title="depth_to_point_cloud_demo",
+                width=1280,
+                height=720,
+                # The cloud lives in the camera optical frame (x-right, y-down, z-forward),
+                # centered near (0, 0, ~1.5 m). Place the camera in front of and above it (negative
+                # z is "in front", negative y is "up" in this frame) so the tilted plane is framed.
+                camera_eye=[1.5, -1.5, -1.0],
+                camera_look_at=[0.0, 0.0, 1.5],
+                camera_up=[0.0, -1.0, 0.0],
+                tensors=[
+                    dict(
+                        name="point_cloud",
+                        type="points_3d",
+                        color=[0.0, 1.0, 0.0, 1.0],
+                        point_size=3.0,
+                    ),
+                ],
+            )
+            self.add_flow(cloud, compact, {("point_cloud", "in")})
+            self.add_flow(compact, visualizer, {("out", "receivers")})
+        else:
+            sink = PointCloudStatsOp(self, name="stats")
+            self.add_flow(cloud, sink, {("point_cloud", "in")})
+
+
+def main():
+    parser = argparse.ArgumentParser(description="DepthToPointCloudOp demo")
+    parser.add_argument(
+        "-s",
+        "--source",
+        choices=["synthetic", "realsense"],
+        default="synthetic",
+        help=(
+            "Input source. 'synthetic' (default) uses an on-GPU depth+RGB generator (hardware-free, "
+            "CI-friendly); 'realsense' targets a live Intel RealSense camera (see README — not yet "
+            "runnable from Python)."
+        ),
+    )
+    parser.add_argument(
+        "--visualize",
+        action="store_true",
+        help="Render the cloud as 3D points in HolovizOp instead of the statistics sink "
+        "(requires a display).",
+    )
+    parser.add_argument("--frames", type=int, default=100, help="Number of frames to process")
+    parser.add_argument("--width", type=int, default=640)
+    parser.add_argument("--height", type=int, default=480)
+    args = parser.parse_args()
+
+    for name, value in (
+        ("--frames", args.frames),
+        ("--width", args.width),
+        ("--height", args.height),
+    ):
+        if value <= 0:
+            parser.error(f"{name} must be a positive integer (got {value})")
+
+    app = DepthToPointCloudDemoApp(
+        frames=args.frames,
+        width=args.width,
+        height=args.height,
+        source=args.source,
+        visualize=args.visualize,
+    )
+    app.run()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/applications/depth_to_point_cloud_demo/metadata.json b/applications/depth_to_point_cloud_demo/metadata.json
new file mode 100644
index 0000000000..cc0f81f783
--- /dev/null
+++ b/applications/depth_to_point_cloud_demo/metadata.json
@@ -0,0 +1,55 @@
+{
+	"$schema": "urn:holohub:application:v1",
+	"application": {
+		"name": "Depth to Point Cloud Demo",
+		"description": "Hardware-free demo of DepthToPointCloudOp: a synthetic GPU depth+RGB generator is deprojected into an organized point cloud and validated each frame.",
+		"authors": [
+			{
+				"name": "Nitheesh Kumar",
+				"affiliation": "Zobot Lab"
+			}
+		],
+		"language": "Python",
+		"version": "0.1.0",
+		"changelog": {
+			"0.1.0": "Initial release: synthetic depth generator -> DepthToPointCloudOp -> validation sink."
+		},
+		"holoscan_sdk": {
+			"minimum_required_version": "4.0.0",
+			"tested_versions": [
+				"4.3.0"
+			]
+		},
+		"platforms": [
+			"x86_64",
+			"aarch64"
+		],
+		"tags": ["Computer Vision and Perception", "Robotics", "Point Cloud", "Depth", "3D"],
+		"ranking": 3,
+		"requirements": {
+			"python-packages": [
+				{
+					"name": "cupy",
+					"version": "13.6.0"
+				}
+			]
+		},
+		"default_mode": "synthetic",
+		"modes": {
+			"synthetic": {
+				"description": "Hardware-free on-GPU synthetic depth+RGB source (CI-friendly)",
+				"run": {
+					"command": "python3 <holohub_app_source>/depth_to_point_cloud_demo.py --source synthetic",
+					"workdir": "holohub_bin"
+				}
+			},
+			"realsense": {
+				"description": "Live Intel RealSense camera source (requires hardware; see README — not yet runnable from Python)",
+				"run": {
+					"command": "python3 <holohub_app_source>/depth_to_point_cloud_demo.py --source realsense",
+					"workdir": "holohub_bin"
+				}
+			}
+		}
+	}
+}
diff --git a/operators/CMakeLists.txt b/operators/CMakeLists.txt
index ee5ac0ae1f..9a3e09c88f 100644
--- a/operators/CMakeLists.txt
+++ b/operators/CMakeLists.txt
@@ -21,6 +21,7 @@ add_holohub_operator(atracsys_mode_switcher)
 add_holohub_operator(cvcuda_holoscan_interop)
 add_subdirectory(deidentification)
 add_subdirectory(dds)
+add_holohub_operator(depth_to_point_cloud)
 add_holohub_operator(display_gpu_resident)
 add_holohub_operator(ucxx_send_receive)
 add_holohub_operator(emergent_source DEPENDS EXTENSIONS emergent_source)
diff --git a/operators/depth_to_point_cloud/CMakeLists.txt b/operators/depth_to_point_cloud/CMakeLists.txt
new file mode 100644
index 0000000000..4062e4aa7c
--- /dev/null
+++ b/operators/depth_to_point_cloud/CMakeLists.txt
@@ -0,0 +1,54 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cmake_minimum_required(VERSION 3.24)
+
+project(depth_to_point_cloud LANGUAGES CXX CUDA)
+
+find_package(holoscan 4.0.0 REQUIRED CONFIG
+             PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install")
+
+add_library(depth_to_point_cloud SHARED
+  depth_to_point_cloud.cpp
+  depth_to_point_cloud.hpp
+  deproject.cu
+  deproject.hpp
+)
+
+set_target_properties(depth_to_point_cloud
+  PROPERTIES
+    # compile for the architecture of the current GPU
+    CUDA_ARCHITECTURES "native"
+)
+
+target_link_libraries(depth_to_point_cloud
+  PUBLIC
+    holoscan::core
+)
+
+target_include_directories(depth_to_point_cloud
+  INTERFACE
+    ${CMAKE_CURRENT_SOURCE_DIR}
+)
+
+if(HOLOHUB_BUILD_PYTHON)
+    add_subdirectory(python)
+endif()
+
+if(BUILD_TESTING)
+    add_subdirectory(test)
+endif()
+
+install(TARGETS depth_to_point_cloud)
diff --git a/operators/depth_to_point_cloud/README.md b/operators/depth_to_point_cloud/README.md
new file mode 100644
index 0000000000..5e0ec07e8a
--- /dev/null
+++ b/operators/depth_to_point_cloud/README.md
@@ -0,0 +1,101 @@
+# Depth to Point Cloud Operator
+
+Deproject an organized depth image into an organized point cloud on the GPU. This is the
+"gateway to 3D" building block: it turns a depth image (from a depth camera, stereo matcher,
+or monocular depth network) plus pinhole intrinsics into per-pixel `XYZ` points that downstream
+operators (mapping, ground-plane / traversability estimation, registration, Holoviz 3D rendering,
+PCL/Open3D interop) can consume — all GPU-resident, zero-copy on the hot path.
+
+## What it computes
+
+For each pixel `(u, v)` with metric depth `Z = raw_depth * depth_scale`, the output point in the
+**camera optical frame** (x-right, y-down, z-forward) is:
+
+```text
+X = (u - cx) * Z / fx
+Y = (v - cy) * Z / fy
+Z = Z
+```
+
+The kernel is a single map/elementwise pass (one CUDA thread per pixel, grid-stride, coalesced).
+Pixels with `raw_depth == 0`, a non-finite value, or a metric depth outside `[depth_min, depth_max]`
+are written as `(invalid_value, invalid_value, invalid_value)` (default `NaN`), keeping the output
+**organized** (`H x W x 3`) so pixel neighborhoods are preserved for downstream normals/segmentation.
+
+> Frame note: the output is in the optical frame. For a ROS body frame (x-forward, y-left, z-up)
+> apply the standard optical→body rotation downstream.
+
+## Ports
+
+| Port | Direction | Type | Notes |
+| --- | --- | --- | --- |
+| `depth` | in | `Entity` w/ 2D tensor | `uint16` (scaled by `depth_scale`) or `float32` (meters at `depth_scale=1.0`), shape `[H, W]` or `[H, W, 1]`, device memory |
+| `intrinsics` | in (optional) | `Entity` w/ `float32[4]` | `[fx, fy, cx, cy]`; overrides the params for that frame |
+| `color` | in (optional) | `Entity` w/ `uint8` image | `[H, W, 3]` or `[H, W, 4]` aligned to depth; enables colored output |
+| `point_cloud` | out | `Entity` | `float32 [H, W, 3]` XYZ (and `uint8 [H, W, 3]` RGB colors when `color` is connected) |
+
+The emitted `colors` tensor is always 3-channel RGB; a 4-channel (RGBA) `color` input is converted to
+RGB and its alpha channel is dropped.
+
+## Parameters
+
+| Parameter | Default | Description |
+| --- | --- | --- |
+| `fx`, `fy`, `cx`, `cy` | `0.0` | Pinhole intrinsics in pixels (used when `intrinsics` port is unconnected) |
+| `depth_scale` | `0.001` | Raw-depth → meters multiplier (`0.001` for uint16 mm; `1.0` for float32 m) |
+| `depth_min`, `depth_max` | `0.0`, `100.0` | Valid metric depth range (meters) |
+| `invalid_value` | `NaN` | Value written to X/Y/Z for invalid pixels |
+| `depth_tensor_name` / `color_tensor_name` | `""` | Input tensor names (empty = first tensor) |
+| `output_tensor_name` | `"point_cloud"` | Name of the emitted XYZ tensor |
+| `output_color_tensor_name` | `"colors"` | Name of the emitted colors tensor |
+| `allocator` | — | Device allocator for the output tensors (e.g. `BlockMemoryPool`) |
+
+## Usage
+
+### Python
+
+```python
+from holohub.depth_to_point_cloud import DepthToPointCloudOp
+
+cloud = DepthToPointCloudOp(
+    self,
+    name="point_cloud",
+    allocator=BlockMemoryPool(self, ...),
+    fx=fx, fy=fy, cx=cx, cy=cy,
+    depth_scale=0.001,          # uint16 millimeters
+    depth_min=0.1, depth_max=10.0,
+)
+# depth_source -> cloud -> HolovizOp (3D points)
+```
+
+### C++
+
+```cpp
+auto cloud = make_operator<ops::DepthToPointCloudOp>(
+    "point_cloud",
+    Arg("allocator", make_resource<BlockMemoryPool>(...)),
+    Arg("fx", fx), Arg("fy", fy), Arg("cx", cx), Arg("cy", cy),
+    Arg("depth_scale", 0.001f));
+```
+
+## Testing
+
+`test/test_deproject.cu` is a standalone golden-reference unit test that depends only on the CUDA
+runtime (no Holoscan SDK). It verifies the deprojection math (float32 and uint16 paths), invalid /
+out-of-range handling, and color passthrough against an analytic CPU reference:
+
+```bash
+nvcc -O2 -arch=native -o test_deproject test/test_deproject.cu deproject.cu && ./test_deproject
+```
+
+It is also registered with CTest (`depth_to_point_cloud_test`) when the project is built with
+`BUILD_TESTING` enabled.
+
+## Requirements
+
+- Holoscan SDK ≥ 4.0.0
+- CUDA runtime
+- Platforms: `x86_64`, `aarch64` (Jetson)
+- No third-party dependencies beyond the CUDA runtime — the deprojection runs as a custom CUDA
+  kernel (no VPI/PVA/VIC fixed-function block exists for depth deprojection, and it is not a
+  library-shaped op).
diff --git a/operators/depth_to_point_cloud/deproject.cu b/operators/depth_to_point_cloud/deproject.cu
new file mode 100644
index 0000000000..4e7b93e34d
--- /dev/null
+++ b/operators/depth_to_point_cloud/deproject.cu
@@ -0,0 +1,98 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "deproject.hpp"
+
+namespace holoscan::ops {
+
+// Pattern 1 (map / elementwise): one thread per pixel, grid-stride loop.
+template <typename T>
+__global__ void __launch_bounds__(256) deproject_kernel(
+    const T* __restrict__ depth, float depth_scale, CameraIntrinsics k, float depth_min,
+    float depth_max, float invalid_value, const uchar3* __restrict__ color3,
+    const uchar4* __restrict__ color4, float3* __restrict__ out_xyz,
+    uchar3* __restrict__ out_color, int width, int height) {
+  const int n = width * height;
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
+    const int u = i % width;
+    const int v = i / width;
+
+    const T raw = depth[i];
+    const float z = static_cast<float>(raw) * depth_scale;
+    // Use a positive-depth check (rather than `raw != 0`) so that negative and NaN depths
+    // are uniformly rejected for both the uint16 and float32 paths.
+    const bool valid = (z > 0.0f) && isfinite(z) && (z >= depth_min) && (z <= depth_max);
+
+    out_xyz[i] = valid ? make_float3((u - k.cx) * z / k.fx, (v - k.cy) * z / k.fy, z)
+                       : make_float3(invalid_value, invalid_value, invalid_value);
+
+    if (out_color != nullptr) {
+      if (color4 != nullptr) {
+        const uchar4 c = color4[i];
+        out_color[i] = make_uchar3(c.x, c.y, c.z);
+      } else if (color3 != nullptr) {
+        out_color[i] = color3[i];
+      }
+    }
+  }
+}
+
+cudaError_t launch_deproject(const void* depth, DepthDType dtype, float depth_scale,
+                             CameraIntrinsics intr, float depth_min, float depth_max,
+                             float invalid_value, const void* color, int color_channels,
+                             float3* out_xyz, uchar3* out_color, int width, int height,
+                             cudaStream_t stream) {
+  const int n = width * height;
+  if (n <= 0) { return cudaSuccess; }
+
+  constexpr int kBlock = 256;
+  // One block per kBlock pixels. All targeted architectures allow up to 2^31-1 blocks in x,
+  // far beyond any image resolution, so no grid cap is needed; the grid-stride loop in the
+  // kernel still covers any n correctly.
+  const int grid = (n + kBlock - 1) / kBlock;
+
+  const uchar3* color3 = nullptr;
+  const uchar4* color4 = nullptr;
+  if (color != nullptr && out_color != nullptr) {
+    if (color_channels == 4) {
+      color4 = static_cast<const uchar4*>(color);
+    } else if (color_channels == 3) {
+      color3 = static_cast<const uchar3*>(color);
+    } else {
+      return cudaErrorInvalidValue;  // only 3- or 4-channel uint8 color is supported
+    }
+  }
+
+  switch (dtype) {
+    case DepthDType::kUint16:
+      deproject_kernel<uint16_t><<<grid, kBlock, 0, stream>>>(
+          static_cast<const uint16_t*>(depth), depth_scale, intr, depth_min, depth_max,
+          invalid_value, color3, color4, out_xyz, out_color, width, height);
+      break;
+    case DepthDType::kFloat32:
+      deproject_kernel<float><<<grid, kBlock, 0, stream>>>(
+          static_cast<const float*>(depth), depth_scale, intr, depth_min, depth_max, invalid_value,
+          color3, color4, out_xyz, out_color, width, height);
+      break;
+    default:
+      return cudaErrorInvalidValue;  // unknown depth dtype -> no kernel launched
+  }
+
+  return cudaPeekAtLastError();
+}
+
+}  // namespace holoscan::ops
diff --git a/operators/depth_to_point_cloud/deproject.hpp b/operators/depth_to_point_cloud/deproject.hpp
new file mode 100644
index 0000000000..5d8e9f811f
--- /dev/null
+++ b/operators/depth_to_point_cloud/deproject.hpp
@@ -0,0 +1,77 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOLOSCAN_OPERATORS_DEPTH_TO_POINT_CLOUD_DEPROJECT_HPP
+#define HOLOSCAN_OPERATORS_DEPTH_TO_POINT_CLOUD_DEPROJECT_HPP
+
+#include <cuda_runtime.h>
+
+#include <cstdint>
+
+// This header is intentionally free of any Holoscan dependency so the deproject
+// kernel can be unit-tested in isolation with nvcc (see test/test_deproject.cu).
+
+namespace holoscan::ops {
+
+/// Supported element types of the input depth image.
+enum class DepthDType : int { kUint16 = 0, kFloat32 = 1 };
+
+/// Pinhole camera intrinsics in pixels.
+struct CameraIntrinsics {
+  float fx;
+  float fy;
+  float cx;
+  float cy;
+};
+
+/**
+ * @brief Deproject an organized depth image into an organized HxWx3 point cloud.
+ *
+ * One CUDA thread per pixel (grid-stride). For each pixel (u, v) with metric depth
+ * Z = raw_depth * depth_scale, the output point (optical frame: x-right, y-down,
+ * z-forward) is:
+ *     X = (u - cx) * Z / fx
+ *     Y = (v - cy) * Z / fy
+ *     Z = Z
+ * Pixels with raw depth == 0 or metric depth outside [depth_min, depth_max] are
+ * written as (invalid_value, invalid_value, invalid_value).
+ *
+ * @param depth          device pointer to the HxW depth image (element type = dtype)
+ * @param dtype          depth element type (uint16 or float32)
+ * @param depth_scale    multiply raw depth by this to get meters (e.g. 0.001 for uint16 mm)
+ * @param intr           pinhole intrinsics in pixels
+ * @param depth_min      minimum valid metric depth (meters), inclusive
+ * @param depth_max      maximum valid metric depth (meters), inclusive
+ * @param invalid_value  value written to X/Y/Z for invalid pixels (e.g. NaN or 0)
+ * @param color          optional device pointer to HxW color (uchar3/uchar4); nullptr to disable
+ * @param color_channels 3 or 4; ignored when color == nullptr
+ * @param out_xyz        device pointer to HxW float3 output (organized, AoS)
+ * @param out_color      optional device pointer to HxW uchar3 output; nullptr to disable
+ * @param width          image width in pixels
+ * @param height         image height in pixels
+ * @param stream         CUDA stream to launch on
+ * @return cudaPeekAtLastError() result after the launch
+ */
+cudaError_t launch_deproject(const void* depth, DepthDType dtype, float depth_scale,
+                             CameraIntrinsics intr, float depth_min, float depth_max,
+                             float invalid_value, const void* color, int color_channels,
+                             float3* out_xyz, uchar3* out_color, int width, int height,
+                             cudaStream_t stream);
+
+}  // namespace holoscan::ops
+
+#endif /* HOLOSCAN_OPERATORS_DEPTH_TO_POINT_CLOUD_DEPROJECT_HPP */
diff --git a/operators/depth_to_point_cloud/depth_to_point_cloud.cpp b/operators/depth_to_point_cloud/depth_to_point_cloud.cpp
new file mode 100644
index 0000000000..d0cf25cbf9
--- /dev/null
+++ b/operators/depth_to_point_cloud/depth_to_point_cloud.cpp
@@ -0,0 +1,244 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "depth_to_point_cloud.hpp"
+
+#include <limits>
+#include <memory>
+#include <string>
+
+#include <cuda_runtime.h>
+
+#include <holoscan/core/execution_context.hpp>
+#include <holoscan/core/io_context.hpp>
+
+#include <gxf/std/tensor.hpp>
+
+#include "deproject.hpp"
+
+#define CUDA_TRY(stmt)                                                                        \
+  {                                                                                           \
+    cudaError_t cuda_status = stmt;                                                           \
+    if (cudaSuccess != cuda_status) {                                                         \
+      HOLOSCAN_LOG_ERROR("CUDA runtime call {} in line {} of file {} failed with '{}' ({}).", \
+                         #stmt,                                                               \
+                         __LINE__,                                                            \
+                         __FILE__,                                                            \
+                         cudaGetErrorString(cuda_status),                                     \
+                         static_cast<int>(cuda_status));                                      \
+      throw std::runtime_error("CUDA runtime call failed");                                   \
+    }                                                                                         \
+  }
+
+namespace holoscan::ops {
+
+namespace {
+
+// Map a DLPack dtype to the kernel's depth element type.
+DepthDType to_depth_dtype(const DLDataType& dtype) {
+  if (dtype.code == kDLFloat && dtype.bits == 32) { return DepthDType::kFloat32; }
+  if (dtype.code == kDLUInt && dtype.bits == 16) { return DepthDType::kUint16; }
+  throw std::runtime_error(
+      "DepthToPointCloudOp: unsupported depth dtype (expected float32 or uint16)");
+}
+
+// Fetch a tensor from an entity, by name if given, otherwise the first tensor.
+std::shared_ptr<Tensor> get_tensor(const holoscan::gxf::Entity& message, const std::string& name) {
+  auto maybe = name.empty() ? message.get<Tensor>() : message.get<Tensor>(name.c_str());
+  if (!maybe) {
+    throw std::runtime_error("DepthToPointCloudOp: input tensor '" + name + "' not found");
+  }
+  return maybe;
+}
+
+}  // namespace
+
+void DepthToPointCloudOp::setup(OperatorSpec& spec) {
+  auto& depth_in = spec.input<gxf::Entity>("depth");
+  // Optional inputs must not block execution when unconnected.
+  auto& intrinsics_in = spec.input<gxf::Entity>("intrinsics").condition(ConditionType::kNone);
+  auto& color_in = spec.input<gxf::Entity>("color").condition(ConditionType::kNone);
+  auto& cloud_out = spec.output<gxf::Entity>("point_cloud");
+  (void)depth_in;
+  (void)intrinsics_in;
+  (void)color_in;
+  (void)cloud_out;
+
+  spec.param(fx_, "fx", "Focal length x", "Focal length in pixels (x).", 0.0f);
+  spec.param(fy_, "fy", "Focal length y", "Focal length in pixels (y).", 0.0f);
+  spec.param(cx_, "cx", "Principal point x", "Principal point in pixels (x).", 0.0f);
+  spec.param(cy_, "cy", "Principal point y", "Principal point in pixels (y).", 0.0f);
+  spec.param(depth_scale_, "depth_scale", "Depth scale",
+             "Multiplier converting raw depth to meters (e.g. 0.001 for uint16 mm).", 0.001f);
+  spec.param(depth_min_, "depth_min", "Min depth", "Minimum valid depth in meters.", 0.0f);
+  spec.param(depth_max_, "depth_max", "Max depth", "Maximum valid depth in meters.", 100.0f);
+  spec.param(invalid_value_, "invalid_value", "Invalid value",
+             "Value written to X/Y/Z for invalid pixels.",
+             std::numeric_limits<float>::quiet_NaN());
+  spec.param(depth_tensor_name_, "depth_tensor_name", "Depth tensor name",
+             "Name of the depth tensor in the input message (empty = first tensor).",
+             std::string(""));
+  spec.param(color_tensor_name_, "color_tensor_name", "Color tensor name",
+             "Name of the color tensor in the color message (empty = first tensor).",
+             std::string(""));
+  spec.param(output_tensor_name_, "output_tensor_name", "Output tensor name",
+             "Name of the emitted point-cloud tensor.", std::string("point_cloud"));
+  spec.param(output_color_tensor_name_, "output_color_tensor_name", "Output color tensor name",
+             "Name of the emitted colors tensor.", std::string("colors"));
+  spec.param(allocator_, "allocator", "Allocator", "Device allocator for output tensors.");
+}
+
+void DepthToPointCloudOp::compute(InputContext& op_input, OutputContext& op_output,
+                                    ExecutionContext& context) {
+  auto depth_message = op_input.receive<gxf::Entity>("depth").value();
+
+  // Acquire this operator's CUDA stream and synchronize the depth producer onto it (via events).
+  // The optional intrinsics/color producers are synchronized below when those ports are connected,
+  // so every input the kernel reads is correctly ordered even if it comes from a different stream.
+  const cudaStream_t stream = op_input.receive_cuda_stream("depth");
+
+  // --- Depth tensor: dtype, dimensions, device pointer ---
+  auto depth_tensor = get_tensor(depth_message, depth_tensor_name_.get());
+  const DepthDType depth_dtype = to_depth_dtype(depth_tensor->dtype());
+  const auto& shape = depth_tensor->shape();
+  // Accept only [H, W] or [H, W, 1]; a higher-rank tensor (e.g. [H, W, 3]) would otherwise be
+  // silently reinterpreted as a single-channel depth buffer.
+  if (shape.size() < 2 || shape.size() > 3 ||
+      (shape.size() == 3 && static_cast<int>(shape[2]) != 1)) {
+    throw std::runtime_error("DepthToPointCloudOp: depth tensor must be [H, W] or [H, W, 1]");
+  }
+  const int height = static_cast<int>(shape[0]);
+  const int width = static_cast<int>(shape[1]);
+
+  // --- Optional per-frame intrinsics override ---
+  CameraIntrinsics intr{fx_.get(), fy_.get(), cx_.get(), cy_.get()};
+  if (auto maybe_intr = op_input.receive<gxf::Entity>("intrinsics")) {
+    op_input.receive_cuda_stream("intrinsics");  // sync the intrinsics producer onto `stream`
+    auto intr_tensor = get_tensor(maybe_intr.value(), std::string(""));
+    const DLDataType idt = intr_tensor->dtype();
+    if (intr_tensor->size() < 4 || idt.code != kDLFloat || idt.bits != 32) {
+      throw std::runtime_error(
+          "DepthToPointCloudOp: intrinsics tensor must be float32 [fx, fy, cx, cy]");
+    }
+    float host[4];
+    // Tiny (16 B) config read; pixel data stays GPU-resident. The intrinsics tensor is
+    // produced by an upstream operator on `stream`, so the copy must be ordered on that
+    // same stream (a plain cudaMemcpy on the default stream would not wait for it).
+    // Known cost: this per-frame stream sync runs ONLY when the optional `intrinsics`
+    // port is connected; pipelines that pass static fx/fy/cx/cy via parameters (the common
+    // case) skip this branch entirely and incur no sync.
+    CUDA_TRY(cudaMemcpyAsync(host, intr_tensor->data(), sizeof(host), cudaMemcpyDefault, stream));
+    CUDA_TRY(cudaStreamSynchronize(stream));
+    intr = CameraIntrinsics{host[0], host[1], host[2], host[3]};
+  }
+
+  // Focal lengths must be non-zero (they divide the deprojection); the defaults are 0.0, so a
+  // caller that neither sets the fx/fy params nor connects the intrinsics input is rejected here
+  // rather than dividing by zero in the kernel.
+  if (intr.fx <= 0.0f || intr.fy <= 0.0f) {
+    throw std::runtime_error(
+        "DepthToPointCloudOp: fx and fy must be positive (set the fx/fy parameters or connect the "
+        "intrinsics input)");
+  }
+
+  // --- Optional color input ---
+  const void* color_ptr = nullptr;
+  int color_channels = 0;
+  if (auto maybe_color = op_input.receive<gxf::Entity>("color")) {
+    op_input.receive_cuda_stream("color");  // sync the color producer onto `stream`
+    auto color_tensor = get_tensor(maybe_color.value(), color_tensor_name_.get());
+    // The kernel reinterprets the color buffer as uchar3/uchar4, so the element type must be
+    // uint8 (a float or other-width tensor would be misread byte-for-byte).
+    const DLDataType cdt = color_tensor->dtype();
+    if (cdt.code != kDLUInt || cdt.bits != 8) {
+      throw std::runtime_error("DepthToPointCloudOp: color tensor must be uint8");
+    }
+    const auto& cshape = color_tensor->shape();
+    // Require an explicit channel dimension; a 2D [H, W] tensor is rejected rather than
+    // silently assumed to be 3-channel (which would read past the buffer in the kernel).
+    if (cshape.size() < 3) {
+      throw std::runtime_error(
+          "DepthToPointCloudOp: color tensor must be H x W x 3 (uchar3) or H x W x 4 (uchar4)");
+    }
+    color_channels = static_cast<int>(cshape[2]);
+    if (color_channels != 3 && color_channels != 4) {
+      throw std::runtime_error(
+          "DepthToPointCloudOp: color tensor must be H x W x 3 (uchar3) or H x W x 4 (uchar4)");
+    }
+    if (static_cast<int>(cshape[0]) != height || static_cast<int>(cshape[1]) != width) {
+      throw std::runtime_error(
+          "DepthToPointCloudOp: color image dimensions must match the depth image");
+    }
+    color_ptr = color_tensor->data();
+  }
+
+  // --- Allocate outputs ---
+  auto allocator = nvidia::gxf::Handle<nvidia::gxf::Allocator>::Create(
+      context.context(), allocator_.get()->gxf_cid());
+  if (!allocator) {
+    throw std::runtime_error("DepthToPointCloudOp: failed to create allocator handle");
+  }
+  auto out_message = nvidia::gxf::Entity::New(context.context());
+  if (!out_message) {
+    throw std::runtime_error("DepthToPointCloudOp: failed to create output entity");
+  }
+
+  auto xyz_tensor = out_message.value().add<nvidia::gxf::Tensor>(output_tensor_name_.get().c_str());
+  if (!xyz_tensor) {
+    throw std::runtime_error("DepthToPointCloudOp: failed to add point_cloud tensor to message");
+  }
+  xyz_tensor.value()->reshape<float>(nvidia::gxf::Shape{height, width, 3},
+                                     nvidia::gxf::MemoryStorageType::kDevice, allocator.value());
+  if (!xyz_tensor.value()->pointer()) {
+    throw std::runtime_error("DepthToPointCloudOp: failed to allocate point_cloud tensor");
+  }
+
+  uchar3* out_color = nullptr;
+  if (color_ptr != nullptr) {
+    auto color_out =
+        out_message.value().add<nvidia::gxf::Tensor>(output_color_tensor_name_.get().c_str());
+    if (!color_out) {
+      throw std::runtime_error("DepthToPointCloudOp: failed to add colors tensor to message");
+    }
+    color_out.value()->reshape<uint8_t>(nvidia::gxf::Shape{height, width, 3},
+                                        nvidia::gxf::MemoryStorageType::kDevice, allocator.value());
+    if (!color_out.value()->pointer()) {
+      throw std::runtime_error("DepthToPointCloudOp: failed to allocate colors tensor");
+    }
+    out_color = reinterpret_cast<uchar3*>(color_out.value()->pointer());
+  }
+
+  CUDA_TRY(launch_deproject(depth_tensor->data(),
+                            depth_dtype,
+                            depth_scale_.get(),
+                            intr,
+                            depth_min_.get(),
+                            depth_max_.get(),
+                            invalid_value_.get(),
+                            color_ptr,
+                            color_channels,
+                            reinterpret_cast<float3*>(xyz_tensor.value()->pointer()),
+                            out_color,
+                            width,
+                            height,
+                            stream));
+
+  auto result = gxf::Entity(std::move(out_message.value()));
+  op_output.emit(result, "point_cloud");
+}
+
+}  // namespace holoscan::ops
diff --git a/operators/depth_to_point_cloud/depth_to_point_cloud.hpp b/operators/depth_to_point_cloud/depth_to_point_cloud.hpp
new file mode 100644
index 0000000000..54cdf95004
--- /dev/null
+++ b/operators/depth_to_point_cloud/depth_to_point_cloud.hpp
@@ -0,0 +1,91 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOLOSCAN_OPERATORS_DEPTH_TO_POINT_CLOUD_DEPTH_TO_POINT_CLOUD_HPP
+#define HOLOSCAN_OPERATORS_DEPTH_TO_POINT_CLOUD_DEPTH_TO_POINT_CLOUD_HPP
+
+#include <memory>
+#include <string>
+
+#include "holoscan/core/operator.hpp"
+
+namespace holoscan::ops {
+
+/**
+ * @brief Deproject an organized depth image into an organized point cloud on the GPU.
+ *
+ * ==Named Inputs==
+ *
+ * - **depth** : `nvidia::gxf::Entity` containing a 2D depth `nvidia::gxf::Tensor`
+ *   - Element type `uint16` (raw units scaled by `depth_scale`) or `float32` (meters when
+ *     `depth_scale == 1.0`). Shape `[H, W]` or `[H, W, 1]`, device memory.
+ * - **intrinsics** *(optional)* : `nvidia::gxf::Entity` containing a `float32` tensor of 4
+ *   values `[fx, fy, cx, cy]`. When present, overrides the `fx/fy/cx/cy` parameters for the
+ *   current frame (e.g. for sensors that publish per-stream intrinsics).
+ * - **color** *(optional)* : `nvidia::gxf::Entity` containing an `H x W x 3` or `H x W x 4`
+ *   `uint8` color image aligned to the depth image. When connected, a colored output is emitted.
+ *
+ * ==Named Outputs==
+ *
+ * - **point_cloud** : `nvidia::gxf::Entity` containing:
+ *   - a `float32` tensor (`output_tensor_name`, default `"point_cloud"`) of shape `[H, W, 3]`
+ *     (organized, AoS XYZ) in the camera optical frame (x-right, y-down, z-forward). Invalid
+ *     pixels (depth == 0 or outside `[depth_min, depth_max]`) are set to `invalid_value`; and
+ *   - *(only when `color` is connected)* a `uint8` tensor (`output_color_tensor_name`, default
+ *     `"colors"`) of shape `[H, W, 3]`.
+ *
+ * ==Parameters==
+ *
+ * - **fx**, **fy**, **cx**, **cy**: pinhole intrinsics in pixels (used when the `intrinsics`
+ *   port is not connected).
+ * - **depth_scale**: multiply raw depth by this to get meters (default `0.001`, i.e. uint16 mm).
+ * - **depth_min**, **depth_max**: valid metric depth range in meters (defaults `0.0` / `100.0`).
+ * - **invalid_value**: value written to X/Y/Z for invalid pixels (default `NaN`).
+ * - **depth_tensor_name**, **color_tensor_name**: input tensor names (default: first tensor).
+ * - **output_tensor_name**: name of the emitted point-cloud tensor (default `"point_cloud"`).
+ * - **output_color_tensor_name**: name of the emitted colors tensor (default `"colors"`).
+ * - **allocator**: device `holoscan::Allocator` for the output tensors (e.g. BlockMemoryPool).
+ */
+class DepthToPointCloudOp : public holoscan::Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(DepthToPointCloudOp)
+
+  DepthToPointCloudOp() = default;
+
+  void setup(OperatorSpec& spec) override;
+  void compute(InputContext& op_input, OutputContext& op_output,
+               ExecutionContext& context) override;
+
+ private:
+  Parameter<float> fx_;
+  Parameter<float> fy_;
+  Parameter<float> cx_;
+  Parameter<float> cy_;
+  Parameter<float> depth_scale_;
+  Parameter<float> depth_min_;
+  Parameter<float> depth_max_;
+  Parameter<float> invalid_value_;
+  Parameter<std::string> depth_tensor_name_;
+  Parameter<std::string> color_tensor_name_;
+  Parameter<std::string> output_tensor_name_;
+  Parameter<std::string> output_color_tensor_name_;
+  Parameter<std::shared_ptr<Allocator>> allocator_;
+};
+
+}  // namespace holoscan::ops
+
+#endif /* HOLOSCAN_OPERATORS_DEPTH_TO_POINT_CLOUD_DEPTH_TO_POINT_CLOUD_HPP */
diff --git a/operators/depth_to_point_cloud/metadata.json b/operators/depth_to_point_cloud/metadata.json
new file mode 100644
index 0000000000..e87bb3ba17
--- /dev/null
+++ b/operators/depth_to_point_cloud/metadata.json
@@ -0,0 +1,30 @@
+{
+	"$schema": "urn:holohub:operator:v1",
+	"operator": {
+		"name": "DepthToPointCloudOp",
+		"authors": [
+			{
+				"name": "Nitheesh Kumar",
+				"affiliation": "Zobot Lab"
+			}
+		],
+		"language": ["C++", "Python"],
+		"version": "0.1.0",
+		"changelog": {
+			"0.1.0": "Initial release: GPU deprojection of organized depth into an organized point cloud (uint16/float32 depth, per-frame intrinsics override, optional colored output)."
+		},
+		"holoscan_sdk": {
+			"minimum_required_version": "4.0.0",
+			"tested_versions": [
+				"4.3.0"
+			]
+		},
+		"platforms": [
+			"x86_64",
+			"aarch64"
+		],
+		"tags": ["Computer Vision and Perception", "Robotics", "Point Cloud", "Depth", "3D", "Camera"],
+		"ranking": 3,
+		"requirements": {}
+	}
+}
diff --git a/operators/depth_to_point_cloud/python/CMakeLists.txt b/operators/depth_to_point_cloud/python/CMakeLists.txt
new file mode 100644
index 0000000000..dd5a6e4ac9
--- /dev/null
+++ b/operators/depth_to_point_cloud/python/CMakeLists.txt
@@ -0,0 +1,21 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+include(pybind11_add_holohub_module)
+pybind11_add_holohub_module(
+    CPP_CMAKE_TARGET depth_to_point_cloud
+    CLASS_NAME "DepthToPointCloudOp"
+    SOURCES depth_to_point_cloud.cpp
+)
diff --git a/operators/depth_to_point_cloud/python/depth_to_point_cloud.cpp b/operators/depth_to_point_cloud/python/depth_to_point_cloud.cpp
new file mode 100644
index 0000000000..fa71002d5f
--- /dev/null
+++ b/operators/depth_to_point_cloud/python/depth_to_point_cloud.cpp
@@ -0,0 +1,124 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../../operator_util.hpp"
+
+#include <depth_to_point_cloud.hpp>
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include <limits>
+#include <memory>
+#include <string>
+
+#include <holoscan/core/fragment.hpp>
+#include <holoscan/core/operator.hpp>
+#include <holoscan/core/operator_spec.hpp>
+#include <holoscan/core/resources/gxf/allocator.hpp>
+
+using std::string_literals::operator""s;
+using pybind11::literals::operator""_a;
+
+namespace py = pybind11;
+
+namespace holoscan::ops {
+
+/* Trampoline class providing a Pythonic kwarg-based constructor matching the C++ defaults. */
+class PyDepthToPointCloudOp : public DepthToPointCloudOp {
+ public:
+  using DepthToPointCloudOp::DepthToPointCloudOp;
+
+  PyDepthToPointCloudOp(Fragment* fragment, const py::args& args,
+                        std::shared_ptr<Allocator> allocator, float fx = 0.0f, float fy = 0.0f,
+                        float cx = 0.0f, float cy = 0.0f, float depth_scale = 0.001f,
+                        float depth_min = 0.0f, float depth_max = 100.0f,
+                        float invalid_value = std::numeric_limits<float>::quiet_NaN(),
+                        const std::string& depth_tensor_name = "",
+                        const std::string& color_tensor_name = "",
+                        const std::string& output_tensor_name = "point_cloud",
+                        const std::string& output_color_tensor_name = "colors",
+                        const std::string& name = "depth_to_point_cloud")
+      : DepthToPointCloudOp(ArgList{Arg{"allocator", allocator},
+                                      Arg{"fx", fx},
+                                      Arg{"fy", fy},
+                                      Arg{"cx", cx},
+                                      Arg{"cy", cy},
+                                      Arg{"depth_scale", depth_scale},
+                                      Arg{"depth_min", depth_min},
+                                      Arg{"depth_max", depth_max},
+                                      Arg{"invalid_value", invalid_value},
+                                      Arg{"depth_tensor_name", depth_tensor_name},
+                                      Arg{"color_tensor_name", color_tensor_name},
+                                      Arg{"output_tensor_name", output_tensor_name},
+                                      Arg{"output_color_tensor_name", output_color_tensor_name}}) {
+    add_positional_condition_and_resource_args(this, args);
+    name_ = name;
+    fragment_ = fragment;
+    spec_ = std::make_shared<OperatorSpec>(fragment);
+    setup(*spec_.get());
+  }
+};
+
+PYBIND11_MODULE(_depth_to_point_cloud, m) {
+  m.doc() = R"pbdoc(
+        DepthToPointCloudOp Python Bindings
+        -------------------------------------
+        .. currentmodule:: _depth_to_point_cloud
+    )pbdoc";
+
+  py::class_<DepthToPointCloudOp,
+             PyDepthToPointCloudOp,
+             Operator,
+             std::shared_ptr<DepthToPointCloudOp>>(
+      m, "DepthToPointCloudOp",
+      "Deproject an organized depth image into an organized point cloud on the GPU.")
+      .def(py::init<Fragment*,
+                    const py::args&,
+                    std::shared_ptr<Allocator>,
+                    float,
+                    float,
+                    float,
+                    float,
+                    float,
+                    float,
+                    float,
+                    float,
+                    const std::string&,
+                    const std::string&,
+                    const std::string&,
+                    const std::string&,
+                    const std::string&>(),
+           "fragment"_a,
+           "allocator"_a,
+           "fx"_a = 0.0f,
+           "fy"_a = 0.0f,
+           "cx"_a = 0.0f,
+           "cy"_a = 0.0f,
+           "depth_scale"_a = 0.001f,
+           "depth_min"_a = 0.0f,
+           "depth_max"_a = 100.0f,
+           "invalid_value"_a = std::numeric_limits<float>::quiet_NaN(),
+           "depth_tensor_name"_a = ""s,
+           "color_tensor_name"_a = ""s,
+           "output_tensor_name"_a = "point_cloud"s,
+           "output_color_tensor_name"_a = "colors"s,
+           "name"_a = "depth_to_point_cloud"s)
+      .def("setup", &DepthToPointCloudOp::setup, "spec"_a);
+}  // PYBIND11_MODULE
+
+}  // namespace holoscan::ops
diff --git a/operators/depth_to_point_cloud/test/CMakeLists.txt b/operators/depth_to_point_cloud/test/CMakeLists.txt
new file mode 100644
index 0000000000..2508209812
--- /dev/null
+++ b/operators/depth_to_point_cloud/test/CMakeLists.txt
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Standalone golden-reference unit test for the deproject kernel. It depends only on
+# the CUDA runtime (no Holoscan SDK) and returns a non-zero exit code on failure.
+add_executable(depth_to_point_cloud_test
+  test_deproject.cu
+  ${CMAKE_CURRENT_SOURCE_DIR}/../deproject.cu
+)
+
+set_target_properties(depth_to_point_cloud_test
+  PROPERTIES
+    CUDA_ARCHITECTURES "native"
+)
+
+add_test(NAME depth_to_point_cloud_test COMMAND depth_to_point_cloud_test)
diff --git a/operators/depth_to_point_cloud/test/test_deproject.cu b/operators/depth_to_point_cloud/test/test_deproject.cu
new file mode 100644
index 0000000000..6608571385
--- /dev/null
+++ b/operators/depth_to_point_cloud/test/test_deproject.cu
@@ -0,0 +1,180 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Standalone golden-reference unit test for the deproject kernel. Compiles with
+// nvcc alone (no Holoscan SDK) so kernel correctness can be verified on any GPU:
+//   nvcc -O2 -arch=native -o test_deproject test/test_deproject.cu deproject.cu && ./test_deproject
+
+#include <cuda_runtime.h>
+
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <vector>
+
+#include "../deproject.hpp"
+
+using namespace holoscan::ops;
+
+static int g_failures = 0;
+#define CHECK(cond, msg)                       \
+  do {                                         \
+    if (!(cond)) {                             \
+      printf("FAIL: %s\n", (msg));             \
+      ++g_failures;                            \
+    }                                          \
+  } while (0)
+
+// Check that a CUDA runtime call returns cudaSuccess (allocations, copies, syncs).
+#define CUDA_OK(call) CHECK((call) == cudaSuccess, #call " failed")
+
+// CPU golden deproject for a single pixel.
+static void golden(float z, int u, int v, CameraIntrinsics k, float dmin, float dmax, float invalid,
+                   float& X, float& Y, float& Z) {
+  if (z <= 0.f || z < dmin || z > dmax) {
+    X = Y = Z = invalid;
+    return;
+  }
+  X = (u - k.cx) * z / k.fx;
+  Y = (v - k.cy) * z / k.fy;
+  Z = z;
+}
+
+int main() {
+  const int W = 64, H = 48, N = W * H;
+  // cx,cy at image center: (W-1)/2, (H-1)/2
+  const CameraIntrinsics k{50.f, 50.f, 31.5f, 23.5f};
+  const float dmin = 0.1f, dmax = 10.f, invalid = NAN;
+
+  // ---- Case 1: float32 constant plane Z=2.0, one hole -> NaN ----
+  {
+    std::vector<float> depth(N, 2.0f);
+    depth[10 * W + 20] = 0.0f;  // hole
+    float* d_depth = nullptr;
+    float3* d_xyz = nullptr;
+    CUDA_OK(cudaMalloc(&d_depth, N * sizeof(float)));
+    CUDA_OK(cudaMalloc(&d_xyz, N * sizeof(float3)));
+    CUDA_OK(cudaMemcpy(d_depth, depth.data(), N * sizeof(float), cudaMemcpyHostToDevice));
+    cudaError_t err = launch_deproject(d_depth, DepthDType::kFloat32, 1.0f, k, dmin, dmax, invalid,
+                                       nullptr, 0, d_xyz, nullptr, W, H, 0);
+    CHECK(err == cudaSuccess, "Case1 launch returned error");
+    CUDA_OK(cudaDeviceSynchronize());
+    std::vector<float3> out(N);
+    CUDA_OK(cudaMemcpy(out.data(), d_xyz, N * sizeof(float3), cudaMemcpyDeviceToHost));
+    bool ok = true;
+    for (int v = 0; v < H && ok; ++v) {
+      for (int u = 0; u < W; ++u) {
+        float gX, gY, gZ;
+        golden(depth[v * W + u], u, v, k, dmin, dmax, invalid, gX, gY, gZ);
+        float3 o = out[v * W + u];
+        if (std::isnan(gX)) {
+          if (!(std::isnan(o.x) && std::isnan(o.y) && std::isnan(o.z))) { ok = false; break; }
+        } else if (fabsf(o.x - gX) > 1e-4f || fabsf(o.y - gY) > 1e-4f || fabsf(o.z - gZ) > 1e-4f) {
+          ok = false;
+          break;
+        }
+      }
+    }
+    CHECK(ok, "Case1 float32 plane deprojection mismatch / hole not NaN");
+    cudaFree(d_depth);
+    cudaFree(d_xyz);
+  }
+
+  // ---- Case 2: uint16 millimeters with depth_scale 0.001 ----
+  {
+    std::vector<uint16_t> depth(N, 2000);  // 2000 mm -> 2.0 m
+    uint16_t* d_depth = nullptr;
+    float3* d_xyz = nullptr;
+    CUDA_OK(cudaMalloc(&d_depth, N * sizeof(uint16_t)));
+    CUDA_OK(cudaMalloc(&d_xyz, N * sizeof(float3)));
+    CUDA_OK(cudaMemcpy(d_depth, depth.data(), N * sizeof(uint16_t), cudaMemcpyHostToDevice));
+    cudaError_t err = launch_deproject(d_depth, DepthDType::kUint16, 0.001f, k, dmin, dmax, invalid,
+                                       nullptr, 0, d_xyz, nullptr, W, H, 0);
+    CHECK(err == cudaSuccess, "Case2 launch returned error");
+    CUDA_OK(cudaDeviceSynchronize());
+    std::vector<float3> out(N);
+    CUDA_OK(cudaMemcpy(out.data(), d_xyz, N * sizeof(float3), cudaMemcpyDeviceToHost));
+    const int u = 40, v = 30;
+    float3 o = out[v * W + u];
+    float gX = (u - k.cx) * 2.0f / k.fx, gY = (v - k.cy) * 2.0f / k.fy;
+    CHECK(fabsf(o.z - 2.0f) < 1e-4f, "Case2 uint16 depth_scale Z wrong");
+    CHECK(fabsf(o.x - gX) < 1e-4f && fabsf(o.y - gY) < 1e-4f, "Case2 uint16 XY wrong");
+    cudaFree(d_depth);
+    cudaFree(d_xyz);
+  }
+
+  // ---- Case 3: depth beyond depth_max -> invalid ----
+  {
+    std::vector<float> depth(N, 50.0f);  // > dmax
+    float* d_depth = nullptr;
+    float3* d_xyz = nullptr;
+    CUDA_OK(cudaMalloc(&d_depth, N * sizeof(float)));
+    CUDA_OK(cudaMalloc(&d_xyz, N * sizeof(float3)));
+    CUDA_OK(cudaMemcpy(d_depth, depth.data(), N * sizeof(float), cudaMemcpyHostToDevice));
+    cudaError_t err = launch_deproject(d_depth, DepthDType::kFloat32, 1.0f, k, dmin, dmax, invalid,
+                                       nullptr, 0, d_xyz, nullptr, W, H, 0);
+    CHECK(err == cudaSuccess, "Case3 launch returned error");
+    CUDA_OK(cudaDeviceSynchronize());
+    std::vector<float3> out(N);
+    CUDA_OK(cudaMemcpy(out.data(), d_xyz, N * sizeof(float3), cudaMemcpyDeviceToHost));
+    CHECK(std::isnan(out[0].x), "Case3 depth beyond max not invalidated");
+    cudaFree(d_depth);
+    cudaFree(d_xyz);
+  }
+
+  // ---- Case 4: color passthrough (uchar3) ----
+  {
+    std::vector<float> depth(N, 2.0f);
+    std::vector<uchar3> color(N);
+    for (int i = 0; i < N; ++i) color[i] = make_uchar3(i % 256, (i * 2) % 256, (i * 3) % 256);
+    float* d_depth = nullptr;
+    float3* d_xyz = nullptr;
+    uchar3* d_color = nullptr;
+    uchar3* d_outcolor = nullptr;
+    CUDA_OK(cudaMalloc(&d_depth, N * sizeof(float)));
+    CUDA_OK(cudaMalloc(&d_xyz, N * sizeof(float3)));
+    CUDA_OK(cudaMalloc(&d_color, N * sizeof(uchar3)));
+    CUDA_OK(cudaMalloc(&d_outcolor, N * sizeof(uchar3)));
+    CUDA_OK(cudaMemcpy(d_depth, depth.data(), N * sizeof(float), cudaMemcpyHostToDevice));
+    CUDA_OK(cudaMemcpy(d_color, color.data(), N * sizeof(uchar3), cudaMemcpyHostToDevice));
+    cudaError_t err = launch_deproject(d_depth, DepthDType::kFloat32, 1.0f, k, dmin, dmax, invalid,
+                                       d_color, 3, d_xyz, d_outcolor, W, H, 0);
+    CHECK(err == cudaSuccess, "Case4 launch returned error");
+    CUDA_OK(cudaDeviceSynchronize());
+    std::vector<uchar3> outc(N);
+    CUDA_OK(cudaMemcpy(outc.data(), d_outcolor, N * sizeof(uchar3), cudaMemcpyDeviceToHost));
+    bool ok = true;
+    for (int i = 0; i < N; ++i) {
+      if (outc[i].x != color[i].x || outc[i].y != color[i].y || outc[i].z != color[i].z) {
+        ok = false;
+        break;
+      }
+    }
+    CHECK(ok, "Case4 color passthrough mismatch");
+    cudaFree(d_depth);
+    cudaFree(d_xyz);
+    cudaFree(d_color);
+    cudaFree(d_outcolor);
+  }
+
+  if (g_failures == 0) {
+    printf("ALL TESTS PASSED\n");
+    return 0;
+  }
+  printf("%d FAILURE(S)\n", g_failures);
+  return 1;
+}
diff --git a/operators/depth_to_point_cloud/test_depth_to_point_cloud.py b/operators/depth_to_point_cloud/test_depth_to_point_cloud.py
new file mode 100644
index 0000000000..c4ea84a576
--- /dev/null
+++ b/operators/depth_to_point_cloud/test_depth_to_point_cloud.py
@@ -0,0 +1,88 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unit tests for the DepthToPointCloudOp Python bindings.
+
+Covers construction, port wiring, parameter handling, and error handling across
+the pybind11 boundary. Numerical correctness of the deprojection kernel is
+covered separately by the standalone CUDA golden-reference test
+(``test/test_deproject.cu``) and the ``depth_to_point_cloud_demo`` application,
+since the compute path requires real GXF entities, an allocator, and a CUDA
+stream that the lightweight mock fixtures do not provide.
+"""
+
+import pytest
+from holoscan.core import Operator
+from holoscan.resources import UnboundedAllocator
+
+from holohub.depth_to_point_cloud import DepthToPointCloudOp
+
+try:
+    from holoscan.core import BaseOperator
+except ImportError:
+    from holoscan.core import _Operator as BaseOperator
+
+
+def _make_op(fragment, **overrides):
+    """Construct a DepthToPointCloudOp with sane defaults, overridable per test."""
+    params = dict(
+        name="depth_to_point_cloud",
+        allocator=UnboundedAllocator(fragment, name="alloc"),
+        fx=500.0,
+        fy=500.0,
+        cx=320.0,
+        cy=240.0,
+    )
+    params.update(overrides)
+    return DepthToPointCloudOp(fragment, **params)
+
+
+def test_init(fragment):
+    """Operator constructs and exposes the expected Holoscan properties."""
+    name = "d2p_op"
+    op = _make_op(fragment, name=name)
+    assert isinstance(op, BaseOperator), "DepthToPointCloudOp should be a Holoscan operator"
+    assert op.operator_type == Operator.OperatorType.NATIVE, "Operator type should be NATIVE"
+    assert f"name: {name}" in repr(op), "Operator name should appear in repr()"
+
+
+def test_ports(fragment):
+    """setup() wires the required depth input, optional inputs, and the output."""
+    spec = _make_op(fragment).spec
+    assert "depth" in spec.inputs, 'required input "depth" missing'
+    assert "intrinsics" in spec.inputs, 'optional per-frame "intrinsics" input missing'
+    assert "color" in spec.inputs, 'optional "color" input missing'
+    assert "point_cloud" in spec.outputs, 'output "point_cloud" missing'
+
+
+def test_requires_allocator(fragment):
+    """allocator is a required argument; omitting it is a TypeError."""
+    with pytest.raises(TypeError):
+        DepthToPointCloudOp(fragment, name="no_alloc", fx=500.0, fy=500.0, cx=320.0, cy=240.0)
+
+
+@pytest.mark.parametrize(
+    "overrides",
+    [
+        {"depth_scale": 0.001, "depth_min": 0.1, "depth_max": 10.0},
+        {"output_tensor_name": "xyz", "output_color_tensor_name": "rgb"},
+        {"invalid_value": 0.0},
+        {"depth_tensor_name": "depth", "color_tensor_name": "color"},
+    ],
+)
+def test_param_acceptance(fragment, overrides):
+    """The operator accepts its documented parameters without error."""
+    op = _make_op(fragment, **overrides)
+    assert isinstance(op, BaseOperator)