From 44b67a83191aae00f431a0d6aa502bf2a005dad2 Mon Sep 17 00:00:00 2001 From: Jacky Date: Mon, 22 Jun 2026 16:19:44 +0800 Subject: [PATCH] [VL] Unify native-build component isolation via a single resolver (macOS + Linux) Native-build path policy was duplicated across three shell entry points (builddeps-veloxbe.sh, build-helper-functions.sh, build-velox.sh), each independently hardcoding `-DCMAKE_IGNORE_PREFIX_PATH=/usr/local` on macOS only. This left Linux without first-class isolation and, importantly, left Velox's own dependency builds (folly, bundled Arrow, ...) unprotected from /usr/local. Introduce dev/build-isolation.sh as a single source of truth. It normalizes all path inputs, decides isolation on/off, and emits a CMake toolchain fragment + path-policy.env + machine-readable resolved_{trusted,ignored,runtime_ignored}_roots under the already-gitignored ep/_ep working dir. Every build layer consumes them. Default behavior (user-facing contract): * macOS and Linux both default-on (GLUTEN_BUILD_ISOLATION=auto -> on); vcpkg forces off; explicit on+vcpkg fails fast (only one toolchain slot). * macOS default: local prefix ${VELOX_HOME}/deps-install; /usr/local ignored. * Linux default: setup still installs to system dirs (trusted-managed, Docker/CI behavior and artifact locations unchanged); only ambient residue (stray Conda, user CMake registry) is filtered -- effectively a no-op unless you opt into a separate install. * Either platform + explicit INSTALL_PREFIX (separate install): /usr/local and /usr flip to ignored, with GLUTEN_ALLOW_IGNORED_ROOTS / GLUTEN_TRUST_PREFIX escape hatches. GLUTEN_BUILD_ISOLATION=off is a full kill-switch. Two-level isolation: * CMake find policy: ignore roots + NO_SYSTEM_FROM_IMPORTED + package-registry off, propagated to every nested cmake (incl. Velox's own dependency setup) via the exported CMAKE_TOOLCHAIN_FILE. The toolchain carries only the ignore policy -- it does NOT prepend trusted prefixes globally, which would wrongly redirect Velox's/Arrow's self-contained bundled builds to deps-install. * Compiler include search: CMAKE_IGNORE_* doesn't govern the compiler, and on macOS clang searches /usr/local/include ahead of -isystem, so a stale header there (e.g. an old gtest/fmt) shadows the bundled copy. The resolver exports CFLAGS/CXXFLAGS with `-idirafter /include` to demote those roots below every -I/-isystem dir; child cmake processes inherit it. build-arrow.sh: guard the destructive download-dir removal (never wipe a user-provided ARROW_PREFIX) and resolve a sane default install prefix for standalone runs instead of silently targeting /usr/local. Verified end-to-end by a complete native macOS build (arm64): valid libgluten.dylib + libvelox.dylib with zero /usr/local linkage (otool -L). The resolver supports GLUTEN_ISOLATION_DRYRUN=1 to emit the policy without building. Linux is a no-op by default, preserving existing Docker/CI behavior. Co-Authored-By: Claude Opus 4.8 (1M context) --- dev/build-arrow.sh | 21 +- dev/build-helper-functions.sh | 24 +- dev/build-isolation.sh | 486 ++++++++++++++++++++++++++++++ dev/builddeps-veloxbe.sh | 33 +- ep/build-velox/src/build-velox.sh | 35 ++- 5 files changed, 570 insertions(+), 29 deletions(-) create mode 100755 dev/build-isolation.sh diff --git a/dev/build-arrow.sh b/dev/build-arrow.sh index 8c9a4686e66..a207a825d07 100755 --- a/dev/build-arrow.sh +++ b/dev/build-arrow.sh @@ -22,11 +22,30 @@ SUDO="${SUDO:-""}" source ${CURRENT_DIR}/build-helper-functions.sh VELOX_ARROW_BUILD_VERSION=15.0.0 ARROW_PREFIX=$CURRENT_DIR/../ep/_ep/arrow_ep +ARROW_MANAGED_PREFIX="$CURRENT_DIR/../ep/_ep/arrow_ep" BUILD_TYPE=Release +# When invoked via builddeps-veloxbe.sh, INSTALL_PREFIX is already resolved and +# exported (isolated local prefix on macOS, system on Linux) and is respected +# here. For a standalone run, consult the isolation resolver so we do not +# silently target /usr/local on an isolated platform. +if [ -z "${INSTALL_PREFIX:-}" ] && [ -f "${CURRENT_DIR}/build-isolation.sh" ]; then + source "${CURRENT_DIR}/build-isolation.sh" + # Fatal on resolver rejection (e.g. isolation=on with vcpkg, or an invalid + # mode): falling through would silently target /usr/local and mask the + # misconfiguration. Consistent with the other native build entrypoints. + resolve_build_isolation || exit 1 +fi INSTALL_PREFIX=${INSTALL_PREFIX:-"/usr/local"} function prepare_arrow_build() { - mkdir -p ${ARROW_PREFIX}/../ && pushd ${ARROW_PREFIX}/../ && ${SUDO} rm -rf arrow_ep/ + mkdir -p ${ARROW_PREFIX}/../ && pushd ${ARROW_PREFIX}/../ + # Only auto-remove Gluten's managed download dir; never wipe a user-provided + # Arrow source tree pointed to by an overridden ARROW_PREFIX. + if [ "${ARROW_PREFIX}" = "${ARROW_MANAGED_PREFIX}" ]; then + ${SUDO} rm -rf arrow_ep/ + else + echo "INFO: ARROW_PREFIX=${ARROW_PREFIX} is user-provided; not auto-removing it." >&2 + fi wget_and_untar https://github.com/apache/arrow/archive/refs/tags/apache-arrow-${VELOX_ARROW_BUILD_VERSION}.tar.gz arrow_ep #wget_and_untar https://archive.apache.org/dist/arrow/arrow-${VELOX_ARROW_BUILD_VERSION}/apache-arrow-${VELOX_ARROW_BUILD_VERSION}.tar.gz arrow_ep cd arrow_ep diff --git a/dev/build-helper-functions.sh b/dev/build-helper-functions.sh index c90e68b2086..631fb98c351 100644 --- a/dev/build-helper-functions.sh +++ b/dev/build-helper-functions.sh @@ -176,16 +176,18 @@ function cmake_install { CPU_TARGET="${CPU_TARGET:-unknown}" COMPILER_FLAGS=$(get_cxx_flags $CPU_TARGET) - local MACOS_ISOLATION_FLAGS="" - if [[ "$(uname)" == "Darwin" ]]; then - if [[ "${INSTALL_PREFIX:-}" == "/usr/local" || "${INSTALL_PREFIX:-}" == /usr/local/* ]]; then - echo "INFO: INSTALL_PREFIX=${INSTALL_PREFIX} is under /usr/local; keeping /usr/local visible to CMake." >&2 - else - MACOS_ISOLATION_FLAGS="-DCMAKE_NO_SYSTEM_FROM_IMPORTED=ON \ - -DCMAKE_IGNORE_PREFIX_PATH=/usr/local \ - -DCMAKE_IGNORE_PATH=/usr/local;/usr/local/include;/usr/local/lib;/usr/local/lib/cmake \ - -DCMAKE_SYSTEM_IGNORE_PATH=/usr/local;/usr/local/include;/usr/local/lib;/usr/local/lib/cmake" - fi + # Component isolation: a single resolver computes the ignore/prefix policy for + # both macOS and Linux (dev/build-isolation.sh). macOS default reproduces the + # previous /usr/local ignore flags; Linux default is a no-op; vcpkg disables it. + local _gi="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/build-isolation.sh" + local ISOLATION_FLAGS="" + if [ -f "$_gi" ]; then + source "$_gi" + resolve_build_isolation || exit 1 + ISOLATION_FLAGS="${GLUTEN_ISOLATION_CMAKE_FLAGS:-}" + # Demote ignored roots' /include below -I/-isystem so a stale /usr/local + # header can't shadow bundled/deps-install headers (compiler isolation). + COMPILER_FLAGS="$COMPILER_FLAGS ${GLUTEN_ISOLATION_CXXFLAGS:-}" fi # CMAKE_POSITION_INDEPENDENT_CODE is required so that Velox can be built into dynamic libraries \ @@ -197,7 +199,7 @@ function cmake_install { "${INSTALL_PREFIX+-DCMAKE_INSTALL_PREFIX=}${INSTALL_PREFIX-}" \ -DCMAKE_CXX_FLAGS="$COMPILER_FLAGS" \ -DBUILD_TESTING=OFF \ - $MACOS_ISOLATION_FLAGS \ + $ISOLATION_FLAGS \ "$@" cmake --build "${BINARY_DIR}" diff --git a/dev/build-isolation.sh b/dev/build-isolation.sh new file mode 100755 index 00000000000..c55efdad775 --- /dev/null +++ b/dev/build-isolation.sh @@ -0,0 +1,486 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ============================================================================ +# Gluten native build component-isolation & dependency-discovery resolver. +# +# This file is the SINGLE source of truth for native-build path policy. It is +# meant to be `source`d (not executed standalone) and exposes one entry point: +# +# resolve_build_isolation +# +# which normalizes all path inputs, classifies them into roles +# (source / build / install / discovery / ignore / runtime), and emits two +# generated artifacts that every build layer consumes: +# +# * build-isolation.cmake : a CMake toolchain fragment carrying the ignore / +# prefix policy (CMAKE_IGNORE_PREFIX_PATH, ...). +# * path-policy.env : resolved role variables + machine-readable +# resolved_{trusted,ignored,runtime_ignored}_roots +# for shell-only stages (pkg-config, runtime gate). +# +# Design goals (see design doc): +# - User-explicit paths always win over ambient environment residue. +# - macOS AND Linux are first-class default-on platforms; the only platform +# difference is the DEFAULT trusted/ignored root set. +# - Safe to source under `set -euo pipefail`; all env reads are guarded; +# functions are local-scoped; only documented variables are exported. +# ============================================================================ + +# --- internal helpers ------------------------------------------------------- + +# Canonicalize a path for classification. Existing paths use realpath/pwd -P; +# a not-yet-existing install root is canonicalized via its nearest existing +# parent plus the remaining (non-existent) segments. Never fails the caller. +_gi_canonical() { + local raw="${1:-}" + [ -z "$raw" ] && { printf '%s' ""; return 0; } + if [ -e "$raw" ]; then + ( cd "$raw" 2>/dev/null && pwd -P ) 2>/dev/null && return 0 + # Not a directory we can cd into (e.g. a file): resolve its dirname. + local d b + d=$(dirname -- "$raw"); b=$(basename -- "$raw") + if [ -d "$d" ]; then printf '%s/%s' "$(cd "$d" && pwd -P)" "$b"; return 0; fi + fi + # Walk up to the nearest existing ancestor, then re-append the tail. + local cur="$raw" tail="" + while [ -n "$cur" ] && [ "$cur" != "/" ] && [ ! -d "$cur" ]; do + tail="$(basename -- "$cur")${tail:+/$tail}" + cur=$(dirname -- "$cur") + done + if [ -d "$cur" ]; then + cur=$(cd "$cur" && pwd -P) + printf '%s%s' "$cur" "${tail:+/$tail}" + else + printf '%s' "$raw" + fi +} + +# True if CHILD is PARENT or lives under PARENT (both already canonical). +_gi_is_under() { + local child="${1:-}" parent="${2:-}" + [ -z "$child" ] && return 1 + [ -z "$parent" ] && return 1 + [ "$child" = "$parent" ] && return 0 + case "$child" in + "$parent"/*) return 0 ;; + *) return 1 ;; + esac +} + +# Append a value to a newline-accumulator variable if not already present. +_gi_add_unique() { + # $1 = accumulator var name, $2 = value + local __name="$1" __val="$2" __cur + [ -z "$__val" ] && return 0 + eval "__cur=\${$__name:-}" + case " +$__cur +" in + *" +$__val +"*) return 0 ;; + esac + eval "$__name=\"\${$__name:+\$$__name +}$__val\"" +} + +# --- main entry point ------------------------------------------------------- +# +# Usage: resolve_build_isolation +# Honors (all optional, guarded): +# GLUTEN_BUILD_ISOLATION auto|on|off (default auto) +# INSTALL_PREFIX component install root +# VELOX_HOME / VELOX_BUILD_PATH / ARROW_HOME / ARROW_INSTALL_DIR +# ENABLE_VCPKG / GLUTEN_VCPKG_ENABLED / VCPKG_ROOT +# CONDA_PREFIX +# GLUTEN_TRUST_PREFIX (path; promote to trusted discovery root) +# GLUTEN_ALLOW_IGNORED_ROOTS (path; re-allow an otherwise-ignored root) +# GLUTEN_ISOLATION_DRYRUN=1 (write artifacts to GLUTEN_ISOLATION_OUT, print, do not touch build) +# Exports (documented): +# GLUTEN_ISOLATION on|off (resolved) +# GLUTEN_ISOLATION_DIR directory holding generated artifacts +# GLUTEN_ISOLATION_TOOLCHAIN path to build-isolation.cmake (when on) +# GLUTEN_ISOLATION_ENV path to path-policy.env +# INSTALL_PREFIX resolved install prefix (may be newly set on macOS default) +resolve_build_isolation() { + # GLUTEN_ISOLATION_FAKE_OS lets the test harness exercise the other platform's + # default trusted/ignored derivation without a second host. Never set in real builds. + local os; os="${GLUTEN_ISOLATION_FAKE_OS:-$(uname)}" + + # 1. Locate Gluten root from this script's location. + local _src="${BASH_SOURCE[0]:-$0}" + local gluten_dir; gluten_dir=$(cd "$(dirname -- "$_src")/.." && pwd -P) + + # 2. Normalize vcpkg signals into a single boolean. + local vcpkg_enabled=false + if [ "${ENABLE_VCPKG:-}" = "ON" ] || [ "${GLUTEN_VCPKG_ENABLED:-}" = "ON" ] || \ + [ "${GLUTEN_VCPKG_ENABLED:-}" = "1" ] || [ -n "${VCPKG_ROOT:-}" ]; then + vcpkg_enabled=true + fi + + # 3. Decide isolation on/off. Never silently downgrade an explicit `on`. + local mode="${GLUTEN_BUILD_ISOLATION:-auto}" + local isolation="off" + case "$mode" in + on) + if [ "$vcpkg_enabled" = true ]; then + echo "FATAL: GLUTEN_BUILD_ISOLATION=on is incompatible with vcpkg (vcpkg owns the CMake toolchain)." >&2 + echo " Use GLUTEN_BUILD_ISOLATION=auto or =off to take the vcpkg path." >&2 + return 1 + fi + isolation="on" ;; + off) + isolation="off" ;; + auto|"") + if [ "$vcpkg_enabled" = true ]; then + isolation="off" # vcpkg keeps its own discovery + else + isolation="on" # macOS AND Linux default-on (user decision U1) + fi ;; + *) + echo "FATAL: invalid GLUTEN_BUILD_ISOLATION='$mode' (expected auto|on|off)." >&2 + return 1 ;; + esac + + # 4. Resolve install prefix + provenance. + # GLUTEN_INSTALL_PREFIX_EXPLICIT (when exported by a parent entry) is the + # authoritative "was this user-requested vs platform-default" signal, so a + # parent that defaulted INSTALL_PREFIX to deps-install does not look like a + # separate-install request to child entries. + local install_prefix_explicit=false + local install_prefix="${INSTALL_PREFIX:-}" + if [ -n "${GLUTEN_INSTALL_PREFIX_EXPLICIT:-}" ]; then + install_prefix_explicit="$GLUTEN_INSTALL_PREFIX_EXPLICIT" + elif [ -n "$install_prefix" ]; then + install_prefix_explicit=true + fi + if [ -z "$install_prefix" ] && [ "$isolation" = "on" ] && [ "$os" = "Darwin" ]; then + # macOS default: local prefix under the Velox tree (never system). + local velox_home="${VELOX_HOME:-$gluten_dir/ep/build-velox/build/velox_ep}" + install_prefix="${velox_home}/deps-install" + fi + # Linux default (no explicit prefix): install_prefix stays empty => setup + # installs to its system location, which is trusted-managed (Docker/CI intact). + + local canon_install; canon_install=$(_gi_canonical "$install_prefix") + + # 5. Build trusted / ignored / neutral root sets (canonical). + local trusted_roots="" ignored_roots="" runtime_ignored_roots="" + + if [ "$isolation" = "on" ]; then + # Trusted: explicit install prefix + Velox local prefixes. + [ -n "$canon_install" ] && _gi_add_unique trusted_roots "$canon_install" + [ -n "${VELOX_HOME:-}" ] && _gi_add_unique trusted_roots "$(_gi_canonical "$VELOX_HOME")" + [ -n "${VELOX_BUILD_PATH:-}" ] && _gi_add_unique trusted_roots "$(_gi_canonical "$VELOX_BUILD_PATH")" + [ -n "${ARROW_HOME:-}" ] && _gi_add_unique trusted_roots "$(_gi_canonical "$ARROW_HOME")" + [ -n "${ARROW_INSTALL_DIR:-}" ]&& _gi_add_unique trusted_roots "$(_gi_canonical "$ARROW_INSTALL_DIR")" + if [ -n "${GLUTEN_TRUST_PREFIX:-}" ]; then + local _p; for _p in ${GLUTEN_TRUST_PREFIX//:/ }; do + _gi_add_unique trusted_roots "$(_gi_canonical "$_p")"; done + fi + + # Decide whether the system dirs (/usr/local, /usr) are ignored by default. + # macOS default -> ignore /usr/local (unless prefix under it) + # Linux default -> system trusted (do NOT ignore /usr) + # any platform + explicit non-system INSTALL_PREFIX -> ignore /usr/local + /usr + local want_ignore_system=false + local sys_under_prefix=false + if [ -n "$canon_install" ] && { _gi_is_under "$canon_install" "/usr/local" || _gi_is_under "$canon_install" "/usr"; }; then + sys_under_prefix=true # user chose a system-dir prefix (system mode) + fi + if [ "$install_prefix_explicit" = true ] && [ "$sys_under_prefix" = false ]; then + want_ignore_system=true # separate install: keep system dirs out (U1) + elif [ "$install_prefix_explicit" = false ] && [ "$os" = "Darwin" ]; then + want_ignore_system=true # macOS default isolation + fi + + if [ "$want_ignore_system" = true ]; then + _gi_add_unique ignored_roots "/usr/local" + # On a separate-install request, also keep the broader /usr out of implicit discovery. + if [ "$install_prefix_explicit" = true ]; then + _gi_add_unique ignored_roots "/usr" + fi + fi + + # Ambient pollution that is filtered on BOTH platforms when isolated: + # - unrelated Conda prefix (unless the install prefix lives under it) + if [ -n "${CONDA_PREFIX:-}" ]; then + local canon_conda; canon_conda=$(_gi_canonical "$CONDA_PREFIX") + if [ -z "$canon_install" ] || ! _gi_is_under "$canon_install" "$canon_conda"; then + _gi_add_unique ignored_roots "$canon_conda" + fi + fi + + # Honor the allow-list: anything explicitly re-allowed is removed from + # ignored and recorded as trusted (trusted wins, with a stable marker). + local overlap_markers="" + if [ -n "${GLUTEN_ALLOW_IGNORED_ROOTS:-}" ]; then + local _a; for _a in ${GLUTEN_ALLOW_IGNORED_ROOTS//:/ }; do + local canon_a; canon_a=$(_gi_canonical "$_a") + _gi_add_unique trusted_roots "$canon_a" + done + fi + + # Remove from ignored any root that is also trusted (trusted precedence). + local filtered_ignored="" _ir + local _t_block=" +$trusted_roots +" + while IFS= read -r _ir; do + [ -z "$_ir" ] && continue + case "$_t_block" in + *" +$_ir +"*) + overlap_markers="${overlap_markers:+$overlap_markers +}# resolved-overlap: $_ir" ;; + *) + filtered_ignored="${filtered_ignored:+$filtered_ignored +}$_ir" ;; + esac + done </include` demotes each + # ignored root below all -I/-isystem dirs so a stale header there cannot + # shadow bundled/deps-install headers (see long note at step 8b). Computed + # here so both the toolchain (step 8) and the export (step 10) can use it. + local idirafter_flags="" _ira + if [ "$isolation" = "on" ] && [ -n "$ignored_roots" ]; then + while IFS= read -r _ira; do + [ -z "$_ira" ] && continue + idirafter_flags="${idirafter_flags:+$idirafter_flags }-idirafter $_ira/include" + done <_FLAGS_INIT -- CMake's platform/compiler modules run after + # the toolchain file and overwrite *_FLAGS_INIT, dropping it. Instead the + # resolver exports CFLAGS/CXXFLAGS (which CMake always appends to the + # compile flags and which child cmake processes inherit), and entries that + # set -DCMAKE_CXX_FLAGS fold GLUTEN_ISOLATION_CXXFLAGS in directly. + # NOTE: we deliberately do NOT prepend trusted roots to CMAKE_PREFIX_PATH + # here. This toolchain file is inherited by EVERY nested cmake build, + # including Velox's and Arrow's self-contained bundled-dependency builds. + # Prepending deps-install globally redirects those bundled builds to the + # wrong artifacts (e.g. Velox's bundled Arrow picking up a partial Arrow + # or a build-relative Thrift config from deps-install). The toolchain + # carries ONLY the ignore policy (the universally-safe part); each entry + # point passes -DCMAKE_PREFIX_PATH for its own top-level configure where + # discovering deps-install is actually wanted. trusted_roots is still + # recorded in path-policy.env for the runtime link gate. + } > "$tmp" + mv -f "$tmp" "$toolchain_file" + else + rm -f "$tmp" + rm -f "$toolchain_file" + fi + + # 8b. Build the equivalent -D flag string for shell entry points that splice + # flags directly into their cmake invocation. Emitted ONLY when there are + # ignored roots, so Linux-default (system trusted) stays a no-op and + # macOS-default is byte-identical to the legacy hardcoded flags. + local cmake_flags="" + # Compiler-level isolation: clang/gcc search an ignored root's /include (e.g. + # /usr/local/include) AHEAD of command-line -isystem dirs, so a stale header + # there (old gtest/fmt/...) shadows the bundled/deps-install copy even when + # CMake points -isystem at the right one. CMAKE_IGNORE_* only governs CMake's + # find_*, not the compiler's include search. `-idirafter /include` + # demotes that root below every -I/-isystem dir (the compiler de-dups it from + # its default-early slot), so the correct headers win while the root stays + # available as a last resort. Applied to every build via the toolchain + # CMAKE__FLAGS_INIT and exported for entries that set their own flags. + if [ "$isolation" = "on" ] && [ -n "$ignored_roots" ]; then + local _prefixlist2="" _cmlist2 _ir4 + local _cmlist2="" + while IFS= read -r _ir4; do + [ -z "$_ir4" ] && continue + _prefixlist2="${_prefixlist2:+$_prefixlist2;}$_ir4" + _cmlist2="${_cmlist2:+$_cmlist2;}$_ir4;$_ir4/include;$_ir4/lib;$_ir4/lib64;$_ir4/lib/cmake" + done < "$tmp" + mv -f "$tmp" "$env_file" + + # Also drop the flat machine-readable root files the runtime gate reads. + printf '%s\n' "$trusted_roots" | grep -v '^$' > "$out_dir/resolved_trusted_roots" 2>/dev/null || : > "$out_dir/resolved_trusted_roots" + printf '%s\n' "$ignored_roots" | grep -v '^$' > "$out_dir/resolved_ignored_roots" 2>/dev/null || : > "$out_dir/resolved_ignored_roots" + printf '%s\n' "$runtime_ignored_roots"| grep -v '^$' > "$out_dir/resolved_runtime_ignored_roots" 2>/dev/null || : > "$out_dir/resolved_runtime_ignored_roots" + + # 10. Export for the calling shell + one-time human-readable summary. + export GLUTEN_ISOLATION="$isolation" + export GLUTEN_ISOLATION_DIR="$out_dir" + export GLUTEN_ISOLATION_ENV="$env_file" + export GLUTEN_ISOLATION_CMAKE_FLAGS="$cmake_flags" + export GLUTEN_ISOLATION_CXXFLAGS="$idirafter_flags" + export GLUTEN_INSTALL_PREFIX_EXPLICIT="$install_prefix_explicit" + # Export CFLAGS/CXXFLAGS so the -idirafter demotion reaches EVERY nested cmake + # (CMake always appends $ENV{CXXFLAGS} to the compile flags, and child cmake + # processes -- e.g. Velox's bundled Arrow and its FetchContent googletest -- + # inherit it). Guarded so repeated resolver calls don't duplicate the flags. + if [ -n "$idirafter_flags" ]; then + case " ${CFLAGS:-} " in *" $idirafter_flags "*) ;; *) export CFLAGS="${CFLAGS:+$CFLAGS }$idirafter_flags" ;; esac + case " ${CXXFLAGS:-} " in *" $idirafter_flags "*) ;; *) export CXXFLAGS="${CXXFLAGS:+$CXXFLAGS }$idirafter_flags" ;; esac + fi + if [ "$isolation" = "on" ]; then + export GLUTEN_ISOLATION_TOOLCHAIN="$toolchain_file" + # Propagate the ignore/prefix policy to EVERY nested cmake build by exporting + # CMAKE_TOOLCHAIN_FILE (CMake >=3.21 reads it as the default toolchain). This + # is what makes isolation comprehensive: Velox builds its own dependencies + # (folly, etc.) via its own scripts that never receive our -D flags, so + # without this they would still discover /usr/local. Any pre-existing user + # toolchain is preserved once in GLUTEN_ORIGINAL_CMAKE_TOOLCHAIN_FILE and + # re-included by the generated file (no self-recursion: the generated file + # never references CMAKE_TOOLCHAIN_FILE). + if [ -n "${CMAKE_TOOLCHAIN_FILE:-}" ] && \ + [ "${CMAKE_TOOLCHAIN_FILE}" != "$toolchain_file" ] && \ + [ -z "${GLUTEN_ORIGINAL_CMAKE_TOOLCHAIN_FILE:-}" ]; then + export GLUTEN_ORIGINAL_CMAKE_TOOLCHAIN_FILE="$CMAKE_TOOLCHAIN_FILE" + fi + export CMAKE_TOOLCHAIN_FILE="$toolchain_file" + [ -n "$install_prefix" ] && export INSTALL_PREFIX="$install_prefix" + else + # Isolation resolved to off, but a prior resolver call in this same shell may + # have pointed CMAKE_TOOLCHAIN_FILE at our (now-removed) generated toolchain. + # Restore the user's original toolchain (captured on that earlier on-call) or + # unset it -- otherwise toggling modes within one shell leaves the variable + # referencing a deleted file. + if [ -n "${CMAKE_TOOLCHAIN_FILE:-}" ] && [ "${CMAKE_TOOLCHAIN_FILE}" = "$toolchain_file" ]; then + if [ -n "${GLUTEN_ORIGINAL_CMAKE_TOOLCHAIN_FILE:-}" ]; then + export CMAKE_TOOLCHAIN_FILE="$GLUTEN_ORIGINAL_CMAKE_TOOLCHAIN_FILE" + else + unset CMAKE_TOOLCHAIN_FILE + fi + fi + fi + + echo "================ Gluten build isolation ================" >&2 + echo " mode=$mode -> isolation=$isolation (os=$os, vcpkg=$vcpkg_enabled)" >&2 + echo " install_prefix=${canon_install:-} (explicit=$install_prefix_explicit)" >&2 + if [ "$isolation" = "on" ]; then + echo " trusted roots:" >&2; printf '%s\n' "$trusted_roots" | sed 's/^/ + /' >&2 + echo " ignored roots:" >&2; printf '%s\n' "$ignored_roots" | sed 's/^/ - /' >&2 + echo " toolchain: $toolchain_file" >&2 + fi + echo " policy env: $env_file" >&2 + echo "=======================================================" >&2 + return 0 +} diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh index 3fcd42660fc..85503ae047e 100755 --- a/dev/builddeps-veloxbe.sh +++ b/dev/builddeps-veloxbe.sh @@ -158,11 +158,17 @@ do esac done +# Record whether INSTALL_PREFIX was user-provided BEFORE we apply the platform +# default, so the isolation resolver (dev/build-isolation.sh) can distinguish a +# separate-install request from the platform default. See the native build +# component-isolation design. +if [ -n "${INSTALL_PREFIX:-}" ]; then + export GLUTEN_INSTALL_PREFIX_EXPLICIT=true +else + export GLUTEN_INSTALL_PREFIX_EXPLICIT=false +fi if [[ "$(uname)" == "Darwin" ]]; then export INSTALL_PREFIX=${INSTALL_PREFIX:-${VELOX_HOME}/deps-install} - if [[ "$INSTALL_PREFIX" == "/usr/local" || "$INSTALL_PREFIX" == /usr/local/* ]]; then - echo "INFO: INSTALL_PREFIX=$INSTALL_PREFIX is under /usr/local; keeping /usr/local visible to CMake." >&2 - fi fi function concat_velox_param { @@ -260,14 +266,17 @@ function build_gluten_cpp { -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ -DENABLE_ENHANCED_FEATURES=$ENABLE_ENHANCED_FEATURES" + # Component isolation: a single resolver computes the ignore/prefix policy for + # both macOS and Linux (dev/build-isolation.sh). On macOS default this yields + # the same /usr/local ignore flags as before; on Linux default it is a no-op + # (system stays trusted); under vcpkg it is disabled. + source $GLUTEN_DIR/dev/build-isolation.sh + resolve_build_isolation || exit 1 + if [ -n "${GLUTEN_ISOLATION_CMAKE_FLAGS:-}" ]; then + GLUTEN_CMAKE_OPTIONS+=" ${GLUTEN_ISOLATION_CMAKE_FLAGS}" + fi if [ $OS == 'Darwin' ]; then GLUTEN_CMAKE_OPTIONS+=" -DCMAKE_PREFIX_PATH=$INSTALL_PREFIX" - if [[ "$INSTALL_PREFIX" != "/usr/local" && "$INSTALL_PREFIX" != /usr/local/* ]]; then - GLUTEN_CMAKE_OPTIONS+=" -DCMAKE_NO_SYSTEM_FROM_IMPORTED=ON" - GLUTEN_CMAKE_OPTIONS+=" -DCMAKE_IGNORE_PREFIX_PATH=/usr/local" - GLUTEN_CMAKE_OPTIONS+=" -DCMAKE_IGNORE_PATH=/usr/local;/usr/local/include;/usr/local/lib;/usr/local/lib/cmake" - GLUTEN_CMAKE_OPTIONS+=" -DCMAKE_SYSTEM_IGNORE_PATH=/usr/local;/usr/local/include;/usr/local/lib;/usr/local/lib/cmake" - fi GLUTEN_CMAKE_OPTIONS+=" -DCMAKE_CXX_FLAGS=-Wno-inconsistent-missing-override -Wno-macro-redefined" fi @@ -327,6 +336,12 @@ function setup_dependencies_arrow { OS=`uname -s` ARCH=`uname -m` +# Establish the component-isolation policy once, up front, and propagate it to +# EVERY nested cmake build (including Velox's own dependency setup) via the +# exported CMAKE_TOOLCHAIN_FILE. Without this, /usr/local would still leak into +# Velox-driven dependency builds. No-op under vcpkg / isolation off. +source $GLUTEN_DIR/dev/build-isolation.sh +resolve_build_isolation || exit 1 commands_to_run=(${OTHER_ARGUMENTS[@]:-}) ( if [[ ${#commands_to_run[@]} -eq 0 ]]; then diff --git a/ep/build-velox/src/build-velox.sh b/ep/build-velox/src/build-velox.sh index cf7c97c455f..98fdb9ffa3b 100755 --- a/ep/build-velox/src/build-velox.sh +++ b/ep/build-velox/src/build-velox.sh @@ -110,20 +110,32 @@ function compile { # -Wno-unknown-warning-option is a Clang-originated flag. GCC ignores unrecognized -Wno- flags to # maintain compatibility, but it prints a diagnostic note about the unknown flag if a true warning # or error occurs. + # Component isolation: a single resolver computes the ignore/prefix policy for + # both macOS and Linux (dev/build-isolation.sh). macOS default reproduces the + # previous /usr/local ignore flags; Linux default is a no-op; vcpkg disables it. + # Velox's Makefile re-expands EXTRA_CMAKE_FLAGS through the shell, so list + # separators must be escaped (`;` -> `\;`). Source it first so the compiler + # -idirafter flags can be folded into CXX_FLAGS below. + local _gi_root; _gi_root="$(cd "$CURRENT_DIR/../../.." && pwd)" + if [ -f "$_gi_root/dev/build-isolation.sh" ]; then + source "$_gi_root/dev/build-isolation.sh" + resolve_build_isolation || exit 1 + fi + CXX_FLAGS='-Wno-error=stringop-overflow -Wno-error=cpp -Wno-missing-field-initializers \ -Wno-error=uninitialized -Wno-unknown-warning-option -Wno-deprecated-declarations' if [[ "$(uname)" == "Darwin" ]]; then CXX_FLAGS="$CXX_FLAGS -Wno-inconsistent-missing-override -Wno-macro-redefined" fi + # Demote ignored roots' /include below -I/-isystem so a stale /usr/local + # header can't shadow Velox's bundled/deps-install headers (compiler isolation). + CXX_FLAGS="$CXX_FLAGS ${GLUTEN_ISOLATION_CXXFLAGS:-}" COMPILE_OPTION="-DCMAKE_CXX_FLAGS=\"$CXX_FLAGS\" -DVELOX_ENABLE_PARQUET=ON -DVELOX_BUILD_TESTING=OFF \ -DVELOX_MONO_LIBRARY=ON -DVELOX_BUILD_RUNNER=OFF -DVELOX_SIMDJSON_SKIPUTF8VALIDATION=ON \ -DVELOX_ENABLE_GEO=OFF" - if [[ "$(uname)" == "Darwin" && "$INSTALL_PREFIX" != "/usr/local" && "$INSTALL_PREFIX" != /usr/local/* ]]; then - COMPILE_OPTION="$COMPILE_OPTION -DCMAKE_NO_SYSTEM_FROM_IMPORTED=ON" - COMPILE_OPTION="$COMPILE_OPTION -DCMAKE_IGNORE_PREFIX_PATH=/usr/local" - COMPILE_OPTION="$COMPILE_OPTION -DCMAKE_IGNORE_PATH=/usr/local\;/usr/local/include\;/usr/local/lib\;/usr/local/lib/cmake" - COMPILE_OPTION="$COMPILE_OPTION -DCMAKE_SYSTEM_IGNORE_PATH=/usr/local\;/usr/local/include\;/usr/local/lib\;/usr/local/lib/cmake" + if [ -n "${GLUTEN_ISOLATION_CMAKE_FLAGS:-}" ]; then + COMPILE_OPTION="$COMPILE_OPTION ${GLUTEN_ISOLATION_CMAKE_FLAGS//;/\\;}" fi if [ $BUILD_TEST_UTILS == "ON" ]; then COMPILE_OPTION="$COMPILE_OPTION -DVELOX_BUILD_TEST_UTILS=ON" @@ -208,11 +220,18 @@ if [ "$VELOX_HOME" == "" ]; then VELOX_HOME="$CURRENT_DIR/../build/velox_ep" fi +# Record explicit-vs-default INSTALL_PREFIX for the isolation resolver, unless a +# parent entry (builddeps-veloxbe.sh) already established it. See the native +# build component-isolation design. +if [ -z "${GLUTEN_INSTALL_PREFIX_EXPLICIT:-}" ]; then + if [ -n "${INSTALL_PREFIX:-}" ]; then + export GLUTEN_INSTALL_PREFIX_EXPLICIT=true + else + export GLUTEN_INSTALL_PREFIX_EXPLICIT=false + fi +fi if [ "$OS" == 'Darwin' ]; then export INSTALL_PREFIX="${INSTALL_PREFIX:-${VELOX_HOME}/deps-install}" - if [[ "$INSTALL_PREFIX" == "/usr/local" || "$INSTALL_PREFIX" == /usr/local/* ]]; then - echo "INFO: INSTALL_PREFIX=$INSTALL_PREFIX is under /usr/local; keeping /usr/local visible to CMake." >&2 - fi fi echo "Start building Velox..."