Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .github/workflows/arm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ jobs:
BUILD_SHARED_LIBS:BOOL=OFF
BUILD_EXAMPLES:BOOL=OFF
ITK_WRAP_PYTHON:BOOL=ON
ITK_WRAP_CASTXML_CACHE:BOOL=ON
ITK_USE_CLANG_FORMAT:BOOL=OFF
ITK_COMPUTER_MEMORY_SIZE:STRING=11
ctest-options: "-E itkPyBufferMemoryLeakTest"
Expand All @@ -102,6 +103,7 @@ jobs:
echo "CCACHE_SLOPPINESS=pch_defines,time_macros" >> "$GITHUB_ENV"
echo "CCACHE_DIR=${{ runner.temp }}/ccache" >> "$GITHUB_ENV"
echo "CCACHE_MAXSIZE=5G" >> "$GITHUB_ENV"
echo "ITK_WRAP_CACHE=${{ runner.temp }}/itk-castxml-cache" >> "$GITHUB_ENV"
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt-get update -qq && sudo apt-get install -y ccache locales
sudo locale-gen de_DE.UTF-8
Expand All @@ -118,6 +120,16 @@ jobs:
restore-keys: |
ccache-v4-${{ runner.os }}-${{ matrix.name }}-

- name: Restore CastXML cache
if: matrix.python-version != ''
id: restore-castxml-cache
uses: actions/cache/restore@v5
with:
path: ${{ runner.temp }}/itk-castxml-cache
key: castxml-v1-${{ runner.os }}-arm-python-${{ github.sha }}
restore-keys: |
castxml-v1-${{ runner.os }}-arm-python-

- name: Restore ExternalData object store
id: restore-externaldata
uses: actions/cache/restore@v5
Expand Down Expand Up @@ -202,6 +214,13 @@ jobs:
path: ${{ runner.temp }}/ccache
key: ccache-v4-${{ runner.os }}-${{ matrix.name }}-${{ github.sha }}

- name: Save CastXML cache
if: ${{ !cancelled() && matrix.python-version != '' }}
uses: actions/cache/save@v5
with:
path: ${{ runner.temp }}/itk-castxml-cache
key: castxml-v1-${{ runner.os }}-arm-python-${{ github.sha }}

# ExternalData object store is populated by
# .github/workflows/populate-externaldata-cache.yml — a dedicated
# workflow whose only job is to prefetch every CID and write the
Expand Down
58 changes: 58 additions & 0 deletions CMake/itkWrapCastXMLCacheSupport.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
set(
_ITK_WRAP_CASTXML_CACHE_SCRIPT_DEFAULT
"${ITK_SOURCE_DIR}/Wrapping/Generators/CastXML/itk-castxml-cache.py"
)

option(
ITK_WRAP_CASTXML_CACHE
"Use a content-addressed two-level cache for CastXML wrapping steps."
ON
)
mark_as_advanced(ITK_WRAP_CASTXML_CACHE)

if(ITK_WRAP_CASTXML_CACHE)
set(
ITK_WRAP_CASTXML_CACHE_SCRIPT
"${_ITK_WRAP_CASTXML_CACHE_SCRIPT_DEFAULT}"
CACHE FILEPATH
"Path to the CastXML content-addressed cache wrapper script"
)
mark_as_advanced(ITK_WRAP_CASTXML_CACHE_SCRIPT)

if(NOT EXISTS "${ITK_WRAP_CASTXML_CACHE_SCRIPT}")
message(
FATAL_ERROR
"ITK_WRAP_CASTXML_CACHE is ON but the wrapper script was not found:\n"
" ${ITK_WRAP_CASTXML_CACHE_SCRIPT}\n"
"Set ITK_WRAP_CASTXML_CACHE_SCRIPT to the correct path or turn off ITK_WRAP_CASTXML_CACHE."
)
endif()

if(NOT Python3_EXECUTABLE)
message(
FATAL_ERROR
"ITK_WRAP_CASTXML_CACHE requires Python3_EXECUTABLE to be set."
)
endif()

set(
ITK_WRAP_CASTXML_CACHE_MAX_DAYS
"13.9"
CACHE STRING
"Days before a CastXML cache entry is evicted by --evict after each build (default 13.9)"
)
mark_as_advanced(ITK_WRAP_CASTXML_CACHE_MAX_DAYS)

message(STATUS "CastXML content-addressed cache enabled")
message(STATUS " Script: ${ITK_WRAP_CASTXML_CACHE_SCRIPT}")
message(
STATUS
" Cache root: set ITK_WRAP_CACHE env var at build time (default: ~/.cache/itk-wrap)"
)
message(
STATUS
" Eviction (after each build): entries older than ${ITK_WRAP_CASTXML_CACHE_MAX_DAYS} days, then oldest trimmed to ITK_WRAP_CACHE_MAX_SIZE GB (default 2.0)"
)
endif()

unset(_ITK_WRAP_CASTXML_CACHE_SCRIPT_DEFAULT)
1 change: 1 addition & 0 deletions Documentation/docs/contributing/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,7 @@ CDash Dashboard <https://open.cdash.org/index.php?project=Insight>
dashboard.md
updating_third_party.md
python_packaging.md
wrapping_architecture.md
../README.md
```

Expand Down
215 changes: 215 additions & 0 deletions Documentation/docs/contributing/wrapping_architecture.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
Python Wrapping Architecture
============================

ITK's Python wrapping pipeline converts C++ template declarations into
importable Python extension modules (`.abi3.so`) and type-stub files
(`.pyi`). The pipeline runs in two distinct phases: a **configure phase**
driven by CMake and a **build phase** driven by Ninja.

## Configure phase: `.wrap` → `castxml_inputs/`

Each ITK module that supports wrapping contains a `wrapping/` subdirectory
with one `.wrap` file per submodule. A `.wrap` file is a CMake script that
calls macros such as `itk_wrap_class()` and `itk_wrap_template()` to declare
which C++ template instantiations should be exposed to Python.

CMake processes every `.wrap` file at configure time and writes three
files per submodule into `<build>/Wrapping/castxml_inputs/`:

| Generated file | How | Contents |
|---|---|---|
| `<submodule>.cxx` | `configure_file` | `#include` directives + `_wrapping_` namespace with `using` aliases for every requested template instantiation |
| `<submodule>.castxml.inc` | `file(GENERATE …)` | Compiler `-I` and `-D` flags needed to parse the `.cxx` file |
| `<submodule>SwigInterface.h.in` | `configure_file` | `#include` list used by SWIG |

CMake also registers one `add_custom_command` per submodule (for CastXML)
and one per ITK module (for `igenerator.py`). No compilation happens at
configure time; only the input files and build rules are written.

## Build phase: CastXML → igenerator → SWIG → compile → link

### Step 1 — CastXML (816 independent jobs)

Each submodule produces exactly one XML file:

```
castxml_inputs/<submodule>.cxx
castxml_inputs/<submodule>.castxml.inc ──▶ itk-castxml-cache.py ──▶ castxml_inputs/<submodule>.xml
Modules/.../include/<Class>.h (many)
```

`itk-castxml-cache.py` wraps the CastXML binary with a two-level
content-addressed cache (`~/.cache/itk-wrap` or `$ITK_WRAP_CACHE`):

- **L1** — hash of the `.cxx` file content → L2 key
- **L2** — `castxml -E` (preprocessor only) output hash → cached `output.xml.gz`

A cache hit avoids running CastXML entirely. All 816 CastXML jobs are
independent and run fully in parallel. No CastXML job reads or modifies
another submodule's `.xml` output.

### Step 2 — `igenerator.py` (96 per-module jobs)

Each ITK module (e.g. `ITKImageIntensity`) batches all of its submodules
into a single `igenerator.py` invocation:

```
castxml_inputs/itkAbsImageFilter.xml ──┐
castxml_inputs/itkImage.xml ──┤
castxml_inputs/itkOffset.xml ──┤ igenerator.py [ITKImageIntensity]
... (all N submodule XMLs) ──┘ --submodule-order "sub1;sub2;...;subN"
┌──────────────────────────┼─────────────────────────────┐
│ per submodule (×N) │ │
▼ ▼ ▼
Typedefs/<sub>.i itk-pkl/<sub>.index.txt itk-pkl-v3.db (SQLite)
Typedefs/<sub>.idx (lists DB keys; byproduct) (class metadata; WAL mode)
Typedefs/<sub>SwigInterface.h
+ per module (×1):
itk-pkl/<Module>.stamp
itk/Configuration/<Module>_snake_case.py
```

`igenerator.py` uses `pygccxml` to parse each `.xml` file and emit SWIG
interface (`.i`) and index (`.idx`) files, class-metadata rows in a
build-local SQLite database consumed later by `pyi_generator.py`, and a
`.index.txt` manifest listing the DB keys for each submodule.

The SQLite database (`itk-pkl-v3.db`) is written to the `itk-pkl/` directory
inside the build tree. It is intermediate handoff state keyed by bare class
name, so it is always build-tree-local and is never shared via
`ITK_WRAP_CACHE`.

**Ninja scheduling**: an `igenerator.py` job for module A starts as soon
as all of A's CastXML jobs are complete, even while CastXML is still
running for module B. There is no global barrier between the CastXML and
`igenerator.py` layers.

### Step 3 — SWIG, compile, link (per submodule)

```
Typedefs/<sub>.i
Typedefs/<sub>SwigInterface.h ──▶ swig ──▶ Modules/.../<sub>Python.cpp
Generators/Python/itk/<sub>Python.py

Modules/.../<sub>Python.cpp ──▶ ccache + g++ ──▶ .o ──▶ link ──▶ _<Module>Python.abi3.so
```

### Step 4 — `pyi_generator.py` (one global job)

After **all** 816 `.index.txt` files exist, `pyi_generator.py` reads every
`.index.txt`, queries the SQLite database for each key, and writes the
type-stub files used by IDEs:

```
itk-pkl/<sub>.index.txt (×816) ──▶ pyi_generator.py ──▶ _proxies.pyi
itk-pkl-v3.db (SQLite) (queries DB via keys in .index.txt) __init__.pyi
```

## Key file reference

| Path pattern | Written by | Read by |
|---|---|---|
| `Wrapping/castxml_inputs/<sub>.cxx` | CMake `configure_file` | CastXML |
| `Wrapping/castxml_inputs/<sub>.castxml.inc` | CMake `file(GENERATE)` | `itk-castxml-cache.py` |
| `Wrapping/castxml_inputs/<sub>.xml` | `itk-castxml-cache.py` / CastXML | `igenerator.py` |
| `Wrapping/Typedefs/<sub>.i` | `igenerator.py` | SWIG |
| `Wrapping/Typedefs/<sub>.idx` | `igenerator.py` | SWIG |
| `Wrapping/Generators/Python/itk-pkl/<sub>.index.txt` | `igenerator.py` | `pyi_generator.py` |
| `Wrapping/Generators/Python/itk-pkl/itk-pkl-v3.db` | `igenerator.py` | `pyi_generator.py` |
| `Wrapping/Generators/Python/itk-pkl/<Module>.stamp` | `igenerator.py` | ninja (tracks DB write completeness) |
| `Wrapping/Generators/Python/itk/_<Module>Python.abi3.so` | linker | Python `import itk` |
| `Wrapping/Generators/Python/itk/_proxies.pyi` | `pyi_generator.py` | IDEs |

## Caches

### CastXML cache (`itk-castxml-cache.py`)

| Variable | Default | Purpose |
|---|---|---|
| `ITK_WRAP_CACHE` | `~/.cache/itk-wrap` | Cache root for CastXML `.xml.gz` files |
| `ITK_WRAP_CACHE_VERBOSE` | unset | Set to `1` to log HIT/MISS per file |

The CastXML cache is content-addressed and generator-version-stamped
(`_KEY_VERSION` in `itk-castxml-cache.py`). It is shared across build
directories; a fresh configure reuses XML from a previous build on the same
machine.

### pkl SQLite database (`igenerator.py` / `pyi_generator.py`)

The pkl database (`itk-pkl-v3.db`) lives in the build tree's `itk-pkl/`
directory and is a build artifact, not a user-level cache. Unlike the
content-addressed CastXML cache, it is keyed by bare `class.submodule`
strings, so it is **always build-tree-local and never honors
`ITK_WRAP_CACHE`** — sharing it across build trees would collide same-named
classes from different ITK sources.

`igenerator.py` writes the rows; `pyi_generator.py` reads them and prunes any
row whose key is absent from the current build's `.index.txt` manifests
(exact keyset cleanup). The schema version in the filename means a database
from an older schema is ignored rather than migrated.

### ccache

CastXML re-runs produce identical `.xml` files (the content is deterministic)
but are slow. The CastXML cache eliminates that cost. For the C++ compilation
steps (Step 3), ccache caches compiled `.o` files keyed on source content.
Both caches are independent and complement each other.

### Build-phase timing on 2-core CI runners

On a 2-core CI runner (cold caches throughout):

| Phase | Approx. time | Notes |
|---|---|---|
| CastXML (816 jobs, 2 cores) | ~32 min | Eliminated on warm-cache runs |
| igenerator + SWIG + C++ compile | ~225 min | Reduced by ccache on subsequent runs |
| Tests | ~44 min | |

CastXML is ~10 % of the cold-cache total. The C++ compilation phase
dominates; `ccache` is the primary lever there.

## Ninja dependency graph summary

```
.wrap files (configure time, not in graph)
│ cmake configure_file / file(GENERATE)
castxml_inputs/<sub>.cxx + .castxml.inc + .h headers
│ itk-castxml-cache.py [816 parallel jobs]
castxml_inputs/<sub>.xml (write-once; never mutated after creation)
│ igenerator.py [96 per-module jobs; starts per-module, not globally gated]
Typedefs/<sub>.i + .idx + SwigInterface.h
itk-pkl/<sub>.index.txt (DB keys) + itk-pkl-v3.db (SQLite) + <Module>.stamp
│ swig + ccache compile + link [parallel per submodule]
_<Module>Python.abi3.so
│ pyi_generator.py [1 global job; needs all .index.txt]
_proxies.pyi + __init__.pyi
```

## Troubleshooting

**CastXML cache not being used**
: Set `ITK_WRAP_CACHE_VERBOSE=1` and rebuild one module to confirm HIT or
MISS log lines. Ensure `ITK_WRAP_CASTXML_CACHE=ON` is set in CMake.
A version bump to `_KEY_VERSION` in `itk-castxml-cache.py` forces a cold
cache for all entries.

**`No pkl keys were found in index files in itk-pkl`**
: The `.index.txt` manifests exist (so ninja considers `igenerator.py`
up-to-date) but the pkl database is absent or stale. Delete the stamp
files to force `igenerator.py` to re-run and repopulate the DB:
```bash
find <build>/Wrapping/Generators/Python/itk-pkl -name "*.index.txt" -o -name "*.stamp" | xargs rm -f
ninja -C <build>
```

**Adding a new wrapped class**
: Add `itk_wrap_class()` / `itk_wrap_template()` calls to the relevant
`.wrap` file. Re-run CMake to regenerate the `.cxx` and `.castxml.inc`
files, then build normally. CMake will automatically include the new
submodule in the `--submodule-order` passed to `igenerator.py`.
11 changes: 11 additions & 0 deletions Testing/ContinuousIntegration/AzurePipelinesLinuxPython.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ variables:
CCACHE_NOHASHDIR: 'true'
CCACHE_SLOPPINESS: pch_defines,time_macros
CCACHE_MAXSIZE: 8G
ITK_WRAP_CACHE: $(Pipeline.Workspace)/.castxml-cache
jobs:
- job: Linux
timeoutInMinutes: 0
Expand All @@ -55,6 +56,7 @@ jobs:
df -h /
displayName: 'Free preinstalled software'


- checkout: self
clean: true
fetchDepth: 5
Expand Down Expand Up @@ -102,6 +104,14 @@ jobs:
path: $(CCACHE_DIR)
displayName: 'Restore ccache'

- task: Cache@2
inputs:
key: '"castxml-v1" | "$(Agent.OS)" | "LinuxPython" | "$(Build.SourceVersion)"'
restoreKeys: |
"castxml-v1" | "$(Agent.OS)" | "LinuxPython"
path: $(ITK_WRAP_CACHE)
displayName: 'Restore CastXML cache'

- bash: |
set -x
ccache --zero-stats
Expand All @@ -123,6 +133,7 @@ jobs:
BUILD_SHARED_LIBS:BOOL=OFF
BUILD_EXAMPLES:BOOL=OFF
ITK_WRAP_PYTHON:BOOL=ON
ITK_WRAP_CASTXML_CACHE:BOOL=ON
CMAKE_C_COMPILER_LAUNCHER:STRING=ccache
CMAKE_CXX_COMPILER_LAUNCHER:STRING=ccache
ITK_COMPUTER_MEMORY_SIZE:STRING=4.5
Expand Down
10 changes: 10 additions & 0 deletions Testing/ContinuousIntegration/AzurePipelinesMacOSPython.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ variables:
CCACHE_NOHASHDIR: 'true'
CCACHE_SLOPPINESS: pch_defines,time_macros
CCACHE_MAXSIZE: 8G
ITK_WRAP_CACHE: $(Pipeline.Workspace)/.castxml-cache
jobs:
- job: macOS
timeoutInMinutes: 0
Expand Down Expand Up @@ -106,6 +107,14 @@ jobs:
path: $(CCACHE_DIR)
displayName: 'Restore ccache'

- task: Cache@2
inputs:
key: '"castxml-v1" | "$(Agent.OS)" | "macOSPython" | "$(Build.SourceVersion)"'
restoreKeys: |
"castxml-v1" | "$(Agent.OS)" | "macOSPython"
path: $(ITK_WRAP_CACHE)
displayName: 'Restore CastXML cache'

- bash: |
set -x
ccache --zero-stats
Expand All @@ -127,6 +136,7 @@ jobs:
BUILD_SHARED_LIBS:BOOL=ON
BUILD_EXAMPLES:BOOL=OFF
ITK_WRAP_PYTHON:BOOL=ON
ITK_WRAP_CASTXML_CACHE:BOOL=ON
CMAKE_C_COMPILER_LAUNCHER:STRING=ccache
CMAKE_CXX_COMPILER_LAUNCHER:STRING=ccache
ITK_COMPUTER_MEMORY_SIZE:STRING=11
Expand Down
Loading
Loading