diff --git a/.github/workflows/install-deps-macos.sh b/.github/workflows/install-deps-macos.sh new file mode 100755 index 0000000000..596c2e0a5c --- /dev/null +++ b/.github/workflows/install-deps-macos.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +brew install llvm@14 gcc@13 cppcheck openldap bear + +echo "$(brew --prefix llvm@14)/bin" >> "$GITHUB_PATH" +echo "$(brew --prefix gcc@13)/bin" >> "$GITHUB_PATH" + +# Create g++ symlink matching Linux CI naming +GCC_BIN="$(brew --prefix gcc@13)/bin" +ln -sf "$GCC_BIN/g++-13" "$GCC_BIN/g++" +ln -sf "$GCC_BIN/gcc-13" "$GCC_BIN/gcc" + +# Create intercept-build wrapper using bear. +# The LLVM intercept-build is broken on macOS ARM64 (libear.dylib arch +# mismatch with SIP). Bear provides equivalent functionality. +WRAPPER_DIR="$(pwd)/build/intercept-build-wrapper" +mkdir -p "$WRAPPER_DIR" +cat > "$WRAPPER_DIR/intercept-build" << 'EOF' +#!/bin/bash +CDB="" +CMD=() +while [[ $# -gt 0 ]]; do + case "$1" in + --cdb) CDB="$2"; shift 2 ;; + --help) echo "intercept-build wrapper using bear"; exit 0 ;; + *) CMD+=("$1"); shift ;; + esac +done +[[ -z "$CDB" ]] && CDB="compile_commands.json" +exec bear --output "$CDB" -- "${CMD[@]}" +EOF +chmod +x "$WRAPPER_DIR/intercept-build" +echo "$WRAPPER_DIR" >> "$GITHUB_PATH" + +# Homebrew's llvm@14 does not know where the macOS SDK lives, so libc++ +# headers that use '#include_next ' (and other platform C headers) +# fail with "file not found". Apple's own clang resolves this via xcrun, but +# the standalone llvm@14 needs SDKROOT to be set explicitly. Recent runner +# images removed the implicit header path clang@14 used to fall back on, +# which is why analyzer and web tests started failing with: +# fatal error: 'ctype.h' file not found +# Pin SDKROOT to the active SDK so subsequent build and test steps can compile. +if [ -n "$GITHUB_ENV" ]; then + echo "SDKROOT=$(xcrun --show-sdk-path)" >> "$GITHUB_ENV" + # The runner is Apple Silicon (arm64). pip builds C extensions as a + # universal2 binary (-arch arm64 -arch x86_64) by default, but clang@14 does + # not support '_Float16' for the x86_64 target, so compiling the macOS 15 + # SDK's fails (e.g. building python-ldap). Restrict native builds + # to the host arch; arm64-only objects are correct for the arm64 runner. + echo "ARCHFLAGS=-arch arm64" >> "$GITHUB_ENV" + # gcc@13 defaults to an older deployment target than the installed SDK, so + # the Xcode toolchain's (clang-based) assembler prints + # "clang: warning: overriding deployment version ... [-Woverriding-deployment-version]" + # to stderr. The GCC analyzer uses '-fdiagnostics-format=sarif-stderr', so + # this warning is appended to the SARIF stream and makes it invalid JSON + # (breaking analyze_and_parse's gcc tests). Pin the deployment target to the + # SDK version so no override (and no warning) is emitted. + echo "MACOSX_DEPLOYMENT_TARGET=$(xcrun --show-sdk-version)" >> "$GITHUB_ENV" + # Build pip C extensions (e.g. python-ldap) with Apple's clang, not the + # Homebrew llvm@14 clang that this script puts on PATH for the analyzer. + # clang@14 cannot link against the current macOS SDK's TBD libraries + # (e.g. 'ld: library ldap_r not found'), whereas Apple clang handles the + # SDK natively. CodeChecker selects its analyzer binary independently of + # $CC, so this does not affect which compiler is analyzed/used as analyzer. + echo "CC=/usr/bin/clang" >> "$GITHUB_ENV" + echo "CXX=/usr/bin/clang++" >> "$GITHUB_ENV" +fi diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 41055c714f..6a2ddf1921 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -46,7 +46,12 @@ jobs: tools: name: Tools (report-converter, etc.) - runs-on: ubuntu-24.04 + runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.os == 'macos-latest' }} + + strategy: + matrix: + os: [ubuntu-24.04, macos-latest] steps: - uses: actions/checkout@v2 @@ -54,15 +59,18 @@ jobs: with: python-version: '3.10' - name: Setup Bazel + if: runner.os == 'Linux' uses: abhinavsingh/setup-bazel@v3 with: version: 4.0.0 - name: Install common dependencies + if: runner.os == 'Linux' run: | sudo apt-get update -q sudo apt-get install gcc-multilib - name: Run build-logger tests + if: runner.os == 'Linux' working-directory: analyzer/tools/build-logger run: | pip install -r requirements_py/dev/requirements.txt @@ -95,6 +103,7 @@ jobs: make test - name: Run bazel-compile-commands tests + if: runner.os == 'Linux' working-directory: tools/bazel run: | pip install -r requirements_py/dev/requirements.txt @@ -102,7 +111,12 @@ jobs: analyzer: name: Analyzer - runs-on: ubuntu-24.04 + runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.os == 'macos-latest' }} + + strategy: + matrix: + os: [ubuntu-24.04, macos-latest] steps: - uses: actions/checkout@v2 @@ -110,9 +124,14 @@ jobs: with: python-version: '3.10' - - name: Install dependencies + - name: Install dependencies (Linux) + if: runner.os == 'Linux' run: sh .github/workflows/install-deps.sh + - name: Install dependencies (macOS) + if: runner.os == 'macOS' + run: sh .github/workflows/install-deps-macos.sh + - name: Build the package run: | make pip_dev_deps @@ -181,6 +200,37 @@ jobs: working-directory: web run: make test_unit_cov + web-macos: + name: Web (macOS) + runs-on: macos-latest + continue-on-error: true + + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: sh .github/workflows/install-deps-macos.sh + + - name: Run tests + run: | + make pip_dev_deps + pip3 install -r web/requirements_py/auth/requirements.txt + BUILD_UI_DIST=NO make package + + # Run full functional test suite. + cd web + make test_matrix_sqlite + env: + CC_TEST_API_WORKERS: "1" + CC_TEST_TASK_WORKERS: "1" + + - name: Run unit tests coverage + working-directory: web + run: make test_unit_cov + gui: name: GUI runs-on: ubuntu-24.04 diff --git a/analyzer/codechecker_analyzer/analysis_manager.py b/analyzer/codechecker_analyzer/analysis_manager.py index 36aad1188a..7d8c11115b 100644 --- a/analyzer/codechecker_analyzer/analysis_manager.py +++ b/analyzer/codechecker_analyzer/analysis_manager.py @@ -8,6 +8,7 @@ import glob +import logging import os import shlex import shutil @@ -118,10 +119,14 @@ def worker_result_handler(results, metadata_tool, output_path): PROGRESS_ACTIONS = None -def init_worker(checked_num, action_num): +def init_worker(checked_num, action_num, log_level=None): global PROGRESS_CHECKED_NUM, PROGRESS_ACTIONS PROGRESS_CHECKED_NUM = checked_num PROGRESS_ACTIONS = action_num + # With spawn, workers need explicit logger setup (no fork inheritance). + if log_level: + from codechecker_common.logger import setup_logger + setup_logger(log_level) def save_output(base_file_name, out, err): @@ -701,9 +706,16 @@ def signal_handler(signum, _): # Start checking parallel. checked_var = multiprocess.Value('i', 1) actions_num = multiprocess.Value('i', len(actions)) - pool = multiprocess.Pool(jobs, + # Spawned workers (macOS/Windows) do not inherit the parent's logging + # configuration and must set it up explicitly. Forked workers (Linux) + # already inherit it; re-running the logging setup in every worker is + # unnecessary and can cause intermittent analysis failures. + log_level = None + if multiprocess.get_start_method() != 'fork': + log_level = logging.getLevelName(LOG.getEffectiveLevel()) + pool = multiprocess.Pool(jobs, # pylint: disable=not-callable initializer=init_worker, - initargs=(checked_var, actions_num)) + initargs=(checked_var, actions_num, log_level)) signal.signal(signal.SIGINT, signal_handler) # If the analysis has failed, we help debugging. diff --git a/analyzer/codechecker_analyzer/buildlog/log_parser.py b/analyzer/codechecker_analyzer/buildlog/log_parser.py index a18d6bfa7c..9bc2501165 100644 --- a/analyzer/codechecker_analyzer/buildlog/log_parser.py +++ b/analyzer/codechecker_analyzer/buildlog/log_parser.py @@ -26,7 +26,9 @@ from codechecker_analyzer.analyzers.clangsa.analyzer import ClangSA -from codechecker_common.compatibility import multiprocessing +import multiprocess +from multiprocess.managers import SyncManager + from codechecker_common.logger import get_logger from codechecker_common.util import load_json @@ -1241,6 +1243,11 @@ class CompileActionUniqueingType(Enum): # recognizing symlink and remove duplication +def _init_log_parser_worker(compiler_info_dict): + """Set shared manager dict in spawn workers.""" + ImplicitCompilerInfo.compiler_info = compiler_info_dict + + def _process_entry_worker(args): """ Worker function for processing compilation database entries in parallel. @@ -1339,7 +1346,7 @@ def parse_unique_log(compilation_database, __contains_no_intrinsic_headers.cache_clear() if jobs is None: - jobs = multiprocessing.cpu_count() + jobs = multiprocess.cpu_count() # Prepare entries for parallel processing entries = extend_compilation_database_entries(compilation_database) @@ -1352,12 +1359,15 @@ def parse_unique_log(compilation_database, # Here we overwrite ImplicitCompilerInfo.compiker_info with a dict type # that can be used in multiprocess environment, since the next section # is executed in a process pool. - manager = multiprocessing.SyncManager() + manager = SyncManager() manager.start() ImplicitCompilerInfo.compiler_info = manager.dict() # Process entries in parallel using imap_unordered with chunk size 1024 - with multiprocessing.Pool(jobs) as pool: + with multiprocess.Pool( # pylint: disable=not-callable + jobs, + initializer=_init_log_parser_worker, + initargs=(ImplicitCompilerInfo.compiler_info,)) as pool: # Convert generator to list for map function worker_args_list = list(worker_args) results = pool.map(_process_entry_worker, worker_args_list) diff --git a/analyzer/codechecker_analyzer/cli/analyze.py b/analyzer/codechecker_analyzer/cli/analyze.py index c805149d38..03a64da4d5 100644 --- a/analyzer/codechecker_analyzer/cli/analyze.py +++ b/analyzer/codechecker_analyzer/cli/analyze.py @@ -20,6 +20,7 @@ from functools import partial from tu_collector import tu_collector +from multiprocess import cpu_count # type: ignore from codechecker_analyzer import analyzer, analyzer_context, \ compilation_database @@ -31,7 +32,6 @@ from codechecker_analyzer.buildlog import log_parser from codechecker_common import arg, logger, cmd_config, review_status_handler -from codechecker_common.compatibility.multiprocessing import cpu_count from codechecker_common.skiplist_handler import SkipListHandler, \ SkipListHandlers from codechecker_common.util import load_json diff --git a/analyzer/codechecker_analyzer/cli/check.py b/analyzer/codechecker_analyzer/cli/check.py index 7fec064675..0281e666af 100644 --- a/analyzer/codechecker_analyzer/cli/check.py +++ b/analyzer/codechecker_analyzer/cli/check.py @@ -17,6 +17,8 @@ import sys import tempfile +from multiprocess import cpu_count # type: ignore + from codechecker_analyzer.analyzers import analyzer_types from codechecker_analyzer.arg import \ OrderedCheckersAction, OrderedConfigAction, \ @@ -33,7 +35,6 @@ EPILOG_ENV_VAR as parse_epilog_env_var from codechecker_common import arg, cmd_config, logger -from codechecker_common.compatibility.multiprocessing import cpu_count from codechecker_common.source_code_comment_handler import \ REVIEW_STATUS_VALUES diff --git a/analyzer/codechecker_analyzer/pre_analysis_manager.py b/analyzer/codechecker_analyzer/pre_analysis_manager.py index 0f164abd03..9023e54b8c 100644 --- a/analyzer/codechecker_analyzer/pre_analysis_manager.py +++ b/analyzer/codechecker_analyzer/pre_analysis_manager.py @@ -9,6 +9,7 @@ Run pre analysis, collect statistics or CTU data. """ +import logging import os import shlex import shutil @@ -76,10 +77,13 @@ def collect_statistics(action, source, clangsa_config, statistics_data): PROGRESS_ACTIONS = None -def init_worker(checked_num, action_num): +def init_worker(checked_num, action_num, log_level=None): global PROGRESS_CHECKED_NUM, PROGRESS_ACTIONS PROGRESS_CHECKED_NUM = checked_num PROGRESS_ACTIONS = action_num + if log_level: + from codechecker_common.logger import setup_logger + setup_logger(log_level) def pre_analyze(params): @@ -167,9 +171,16 @@ def signal_handler(signum, _): processed_var = multiprocess.Value('i', 0) actions_num = multiprocess.Value('i', len(actions)) - pool = multiprocess.Pool(jobs, + # Spawned workers (macOS/Windows) do not inherit the parent's logging + # configuration and must set it up explicitly. Forked workers (Linux) + # already inherit it; re-running the logging setup in every worker is + # unnecessary and can cause intermittent analysis failures. + log_level = None + if multiprocess.get_start_method() != 'fork': + log_level = logging.getLevelName(LOG.getEffectiveLevel()) + pool = multiprocess.Pool(jobs, # pylint: disable=not-callable initializer=init_worker, - initargs=(processed_var, actions_num)) + initargs=(processed_var, actions_num, log_level)) if statistics_data: # Statistics collection is enabled setup temporary diff --git a/analyzer/tests/functional/analyze/test_analyze.py b/analyzer/tests/functional/analyze/test_analyze.py index 0ca34ceb92..35a65733e9 100644 --- a/analyzer/tests/functional/analyze/test_analyze.py +++ b/analyzer/tests/functional/analyze/test_analyze.py @@ -13,6 +13,7 @@ import glob import json +import sys import os import pathlib import re @@ -1283,8 +1284,11 @@ def test_disable_all_checkers(self): errors="ignore") out, _ = process.communicate() - # Checkers of all 3 analyzers are disabled. - self.assertEqual(out.count("No checkers enabled for"), 5) + # Checkers of all available analyzers are disabled. + # Linux has 5 (clangsa, clang-tidy, cppcheck, gcc, infer), + # macOS has 4 (no infer binary available). + expected_count = 4 if sys.platform == "darwin" else 5 + self.assertEqual(out.count("No checkers enabled for"), expected_count) def test_analyzer_and_checker_config(self): """Test analyzer configuration through command line flags.""" diff --git a/analyzer/tests/functional/analyze_and_parse/test_analyze_and_parse.py b/analyzer/tests/functional/analyze_and_parse/test_analyze_and_parse.py index 7957f51cb6..5421d75785 100644 --- a/analyzer/tests/functional/analyze_and_parse/test_analyze_and_parse.py +++ b/analyzer/tests/functional/analyze_and_parse/test_analyze_and_parse.py @@ -18,6 +18,7 @@ import subprocess import tempfile import unittest +import sys from subprocess import CalledProcessError @@ -154,17 +155,11 @@ def __force_j1(self, cmd): def check_one_file(self, path, mode): """ Test 'analyze' and 'parse' output on a ".output" file. - - The '.output' file is formatted as follows: - * >= 1 lines of CodeChecker commands to execute, prefixed by a 'mode' - usually containing commands to build, log, analyze and parse the - corresponding test file. - * A single line containing some - (dashes) - * The lines of the output which is expected to be produced by the - commands in the lines above the -------------. - - mode specifies which command prefixes to execute. """ + # Infer has no macOS binary release. + if sys.platform == "darwin" and "infer" in os.path.basename(path): + self.skipTest("infer not available on macOS") + with open(path, 'r', encoding="utf-8", errors="ignore") as ofile: lines = ofile.readlines() @@ -219,6 +214,7 @@ def check_one_file(self, path, mode): skip_prefixes = ["[] - Analysis length:", "[] - Previous analysis results", "[] - Skipping input file", + "[] - Failed to get analyzer version", # Enabled checkers are listed in the beginning of # analysis. "[] - Enabled checker", @@ -252,8 +248,18 @@ def check_one_file(self, path, mode): r'[] - \2', line) if not any(line.startswith(prefix) for prefix in skip_prefixes): + # Normalize build logger name for cross-platform comparison. + line = line.replace("Using intercept-build.", + "Using CodeChecker ld-logger.") post_processed_output.append(line) + # gcc quote style varies by platform/locale: Unicode curly + # quotes on Linux, backslash-escaped on macOS. Normalize both + # actual and expected to plain ASCII single quotes. + def normalize_quotes(s): + return s.replace("\u2018", "'").replace( + "\u2019", "'").replace("\\'", "'") + print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Actual output below:") print(''.join(post_processed_output)) print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Expected output below:") @@ -261,7 +267,8 @@ def check_one_file(self, path, mode): print("Test output file: " + path) self.maxDiff = None # pylint: disable=invalid-name - self.assertEqual(''.join(post_processed_output), correct_output) + self.assertEqual(normalize_quotes(''.join(post_processed_output)), + normalize_quotes(correct_output)) def test_json_output_for_macros(self): """ Test parse json output for macros. """ @@ -789,6 +796,8 @@ def test_html_checker_url(self): content = f.read() self.assertTrue(re.search('"url": ""', content)) + @unittest.skipIf(sys.platform == "darwin", + "gcc -m32 not available on macOS") def test_mixed_architecture_logging(self): """ Test if CodeChecker can properly log compilation commands when the @@ -886,6 +895,8 @@ def test_mixed_architecture_logging(self): {logged_commands}" ) + @unittest.skipIf(sys.platform == "darwin", + "LD_LIBRARY_PATH not applicable on macOS") def test_use_absolute_paths_flag(self): """ Test if CodeChecker can properly log compilation commands when using @@ -1022,6 +1033,8 @@ def test_use_absolute_paths_flag(self): "Did not find success message for absolute path mode", ) + @unittest.skipIf(sys.platform == "darwin", + "LD_PRELOAD not applicable on macOS") def test_ld_preload(self): """ Test the stripping of LD_PRELOAD if set but has no value """ environ = self.env.copy() diff --git a/analyzer/tests/functional/skip/test_skip.py b/analyzer/tests/functional/skip/test_skip.py index 7e8fb14a1c..3471a80873 100644 --- a/analyzer/tests/functional/skip/test_skip.py +++ b/analyzer/tests/functional/skip/test_skip.py @@ -267,6 +267,7 @@ def test_analyze_only_header(self): except subprocess.CalledProcessError as cerr: print("Failed to run: " + ' '.join(cerr.cmd)) print(cerr.output) + self.fail(f"tu_collector failed: {cerr}") skip_file = os.path.join(self.test_workspace, "skipfile") with open(skip_file, 'w', encoding="utf-8", errors="ignore") as skip_f: @@ -440,8 +441,9 @@ def test_analyze_header_with_file_option_and_intercept_json(self): with open(build_json, encoding='utf-8') as f: build_actions = json.load(f) for ba in build_actions: - ba['arguments'] = shlex.split(ba['command']) - del ba['command'] + if 'command' in ba: + ba['arguments'] = shlex.split(ba['command']) + del ba['command'] build_json = os.path.join(self.test_workspace, "build_intercept.json") diff --git a/analyzer/tests/unit/test_buildcmd_escaping.py b/analyzer/tests/unit/test_buildcmd_escaping.py index b3d3f9342f..6778bb5ad9 100644 --- a/analyzer/tests/unit/test_buildcmd_escaping.py +++ b/analyzer/tests/unit/test_buildcmd_escaping.py @@ -11,6 +11,7 @@ import os import shutil +import sys import tempfile import unittest @@ -79,6 +80,8 @@ def test_buildmgr(self): ret_val = build_manager.execute_buildcmd(cmd) self.assertEqual(ret_val, 0) + @unittest.skipIf(sys.platform == 'win32', + "Unix shell quoting not applicable on Windows") def test_analyzer_exec_double_quote(self): """ Test the process execution by the analyzer, @@ -91,7 +94,12 @@ def test_analyzer_exec_double_quote(self): parse_unique_log(self.__get_cmp_json(compile_cmd)) for comp_action in comp_actions: - cmd = [self.compiler] + # The test only verifies that the (escaped) command compiles; + # linking is irrelevant here and is not always possible in CI + # (e.g. clang cannot link against the host SDK on some macOS + # runners). '-fsyntax-only' keeps the check faithful to the + # docstring ("the source file will not compile") on every platform. + cmd = [self.compiler, '-fsyntax-only'] cmd.extend(comp_action.analyzer_options) cmd.append(str(comp_action.source)) cwd = comp_action.directory @@ -106,6 +114,8 @@ def test_analyzer_exec_double_quote(self): print(stderr) self.assertEqual(ret_val, 0) + @unittest.skipIf(sys.platform == 'win32', + "Unix shell quoting not applicable on Windows") def test_analyzer_ansic_double_quote(self): """ Test the process execution by the analyzer with ansi-C like @@ -117,7 +127,12 @@ def test_analyzer_ansic_double_quote(self): parse_unique_log(self.__get_cmp_json(compile_cmd)) for comp_action in comp_actions: - cmd = [self.compiler] + # The test only verifies that the (escaped) command compiles; + # linking is irrelevant here and is not always possible in CI + # (e.g. clang cannot link against the host SDK on some macOS + # runners). '-fsyntax-only' keeps the check faithful to the + # docstring ("the source file will not compile") on every platform. + cmd = [self.compiler, '-fsyntax-only'] cmd.extend(comp_action.analyzer_options) cmd.append(str(comp_action.source)) cwd = comp_action.directory diff --git a/analyzer/tests/unit/test_checker_handling.py b/analyzer/tests/unit/test_checker_handling.py index e62569dd1b..bf4cb70277 100644 --- a/analyzer/tests/unit/test_checker_handling.py +++ b/analyzer/tests/unit/test_checker_handling.py @@ -14,6 +14,7 @@ from codechecker_common.util import strtobool import os import re +import shutil import tempfile import unittest from argparse import Namespace @@ -658,6 +659,10 @@ def test_disable_clangsa_checkers(self): self.assertNotIn("Wreserved-id-macro", analyzer.config_handler.checks().keys()) + @unittest.skipIf( + not shutil.which('g++') or + 'clang' in os.popen('g++ --version 2>&1').read().lower(), + "gcc analyzer requires real g++, not Apple clang shim") def test_analyze_correct_analyzer_not_enabled(self): """ This test checks if an analyzer is not enabled but a config diff --git a/analyzer/tests/unit/test_env_var.py b/analyzer/tests/unit/test_env_var.py index 3967da3f3e..551be206bd 100644 --- a/analyzer/tests/unit/test_env_var.py +++ b/analyzer/tests/unit/test_env_var.py @@ -11,6 +11,7 @@ """ +import sys import unittest import tempfile import os @@ -95,6 +96,9 @@ def test_cc_analyzer_bin_overrides_cc_analyzers_from_path(self): self.assertNotEqual(bin_gcc_var, bin_gpp_var) + @unittest.skipIf( + sys.platform == "darwin", + "LD_LIBRARY_PATH is not used on macOS") def test_cc_analyzer_internal_env(self): """ Check whether the ld_library_path is extended with the internal diff --git a/analyzer/tests/unit/test_log_parser.py b/analyzer/tests/unit/test_log_parser.py index 78794dc37b..58148f1d7a 100644 --- a/analyzer/tests/unit/test_log_parser.py +++ b/analyzer/tests/unit/test_log_parser.py @@ -185,7 +185,8 @@ def test_new_intercept_build(self): build_actions, _ = log_parser.parse_unique_log(load_json(logfile)) build_action = build_actions[0] - self.assertEqual(build_action.source, r'/tmp/a.cpp') + self.assertEqual(build_action.source, + os.path.realpath(r'/tmp/a.cpp')) self.assertEqual(len(build_action.analyzer_options), 1) self.assertTrue(len(build_action.target) > 0) self.assertEqual(build_action.analyzer_options[0], @@ -197,7 +198,8 @@ def test_new_intercept_build(self): build_actions, _ = log_parser.parse_unique_log(load_json(logfile)) build_action = build_actions[0] - self.assertEqual(build_action.source, '/tmp/a b.cpp') + self.assertEqual(build_action.source, + os.path.realpath('/tmp/a b.cpp')) self.assertEqual(build_action.lang, 'c++') def test_omit_preproc(self): @@ -357,25 +359,27 @@ def test_skip_everything_from_parse_relative_path(self): Same skip file for pre analysis and analysis. Skip everything. Source file contains relative path. """ + # Use realpath to handle macOS /tmp -> /private/tmp symlink. + tmp = os.path.realpath('/tmp') cmp_cmd_json = [ - {"directory": "/tmp/lib1/Debug", + {"directory": f"{tmp}/lib1/Debug", "command": "g++ ../a.cpp", "file": "../a.cpp"}, - {"directory": "/tmp/lib1/Debug/rel", + {"directory": f"{tmp}/lib1/Debug/rel", "command": "g++ ../../b.cpp", "file": "../../b.cpp"}, - {"directory": "/tmp/lib1/Debug", + {"directory": f"{tmp}/lib1/Debug", "command": "g++ ../d.cpp", "file": "../d.cpp"}, - {"directory": "/tmp/lib2/Debug", + {"directory": f"{tmp}/lib2/Debug", "command": "g++ ../a.cpp", "file": "../a.cpp"}] - skip_list = """ - +/tmp/lib1/d.cpp - -*/lib1/Debug/rel/../../* - -*/lib1/a.cpp - -/tmp/lib2/a.cpp + skip_list = f""" + +{tmp}/lib1/d.cpp + -*lib1/Debug/rel/../../* + -*lib1/a.cpp + -{tmp}/lib2/a.cpp """ analysis_skip = SkipListHandlers([SkipListHandler(skip_list)]) pre_analysis_skip = SkipListHandlers([SkipListHandler(skip_list)]) @@ -386,7 +390,8 @@ def test_skip_everything_from_parse_relative_path(self): pre_analysis_skip_handlers=pre_analysis_skip) self.assertEqual(len(build_actions), 1) - self.assertEqual(build_actions[0].source, '/tmp/lib1/d.cpp') + self.assertEqual(build_actions[0].source, + f'{tmp}/lib1/d.cpp') def test_skip_all_in_pre_from_parse(self): """Pre analysis skips everything but keep build action for analysis.""" @@ -419,7 +424,8 @@ def test_skip_all_in_pre_from_parse(self): self.assertEqual(len(build_actions), 1) - source_file = os.path.join(keep['directory'], keep['file']) + source_file = os.path.realpath( + os.path.join(keep['directory'], keep['file'])) self.assertEqual(build_actions[0].source, source_file) self.assertEqual(build_actions[0].original_command, keep['command']) @@ -629,7 +635,8 @@ def test_source_file_path_starts_with_at_sign(self): self.assertEqual(len(build_actions), 1) build_action = build_actions[0] - self.assertEqual(build_action.source, src_file_path) + self.assertEqual(build_action.source, + os.path.realpath(src_file_path)) def test_symlink(self): """ diff --git a/analyzer/tests/unit/test_option_parser.py b/analyzer/tests/unit/test_option_parser.py index 264cc2cd93..f70d429520 100644 --- a/analyzer/tests/unit/test_option_parser.py +++ b/analyzer/tests/unit/test_option_parser.py @@ -63,7 +63,7 @@ def test_build_multiplefiles(self): res = log_parser.parse_options(action) print(res) - self.assertTrue('/tmp/main.cpp' == res.source) + self.assertEqual(os.path.realpath('/tmp/main.cpp'), res.source) self.assertEqual(BuildAction.COMPILE, res.action_type) def test_compile_onefile(self): @@ -77,7 +77,7 @@ def test_compile_onefile(self): res = log_parser.parse_options(action) print(res) - self.assertTrue('/tmp/main.cpp' == res.source) + self.assertEqual(os.path.realpath('/tmp/main.cpp'), res.source) self.assertEqual(BuildAction.COMPILE, res.action_type) def test_nasm_action(self): @@ -92,7 +92,7 @@ def test_nasm_action(self): res = log_parser.parse_options(action) print(res) self.assertIsNone(res.lang) - self.assertEqual(res.source, '/tmp/main.asm') + self.assertEqual(res.source, os.path.realpath('/tmp/main.asm')) self.assertEqual(res.analyzer_type, -1) def test_preprocess_onefile(self): @@ -107,7 +107,7 @@ def test_preprocess_onefile(self): res = log_parser.parse_options(action) print(res) - self.assertTrue('/tmp/main.c' == res.source) + self.assertEqual(os.path.realpath('/tmp/main.c'), res.source) self.assertEqual(BuildAction.PREPROCESS, res.action_type) def test_compile_lang(self): @@ -123,7 +123,7 @@ def test_compile_lang(self): res = log_parser.parse_options(action) print(res) - self.assertTrue('/tmp/main.c' == res.source) + self.assertEqual(os.path.realpath('/tmp/main.c'), res.source) self.assertEqual('c', res.lang) self.assertEqual(BuildAction.COMPILE, res.action_type) @@ -149,7 +149,7 @@ def test_compile_arch(self): res = log_parser.parse_options(action) print(res) - self.assertTrue('/tmp/main.c' == res.source) + self.assertEqual(os.path.realpath('/tmp/main.c'), res.source) self.assertEqual(arch['c'], res.arch) self.assertEqual(BuildAction.COMPILE, res.action_type) @@ -285,7 +285,7 @@ def test_preprocess_and_compile_with_extra_file(self): res = log_parser.parse_options(action) print(res) self.assertEqual(res.analyzer_options, []) - self.assertEqual(res.source, '/tmp/main.cpp') + self.assertEqual(res.source, os.path.realpath('/tmp/main.cpp')) self.assertEqual(BuildAction.COMPILE, res.action_type) @unittest.skipUnless( @@ -392,7 +392,7 @@ class FakeClangVersion: res = log_parser.parse_options(action) print(res) self.assertEqual(res.analyzer_options, []) - self.assertEqual(res.source, '/tmp/main.cpp') + self.assertEqual(res.source, os.path.realpath('/tmp/main.cpp')) self.assertEqual(BuildAction.COMPILE, res.action_type) def test_keep_clang_flags(self): diff --git a/codechecker_common/cli.py b/codechecker_common/cli.py index acffa32604..be4493935b 100755 --- a/codechecker_common/cli.py +++ b/codechecker_common/cli.py @@ -149,6 +149,11 @@ def main(): """ CodeChecker main command line. """ + # Use spawn on macOS/Windows. Fork is unsafe on macOS (Obj-C runtime + # crashes in child processes) and unavailable on Windows. + if sys.platform != "linux": + import multiprocess # type: ignore + multiprocess.set_start_method("spawn") configure_utf8_output() if not os.environ.get('CC_LIB_DIR'): diff --git a/codechecker_common/compatibility/multiprocessing.py b/codechecker_common/compatibility/multiprocessing.py deleted file mode 100644 index 74ccfa1d4a..0000000000 --- a/codechecker_common/compatibility/multiprocessing.py +++ /dev/null @@ -1,32 +0,0 @@ -# ------------------------------------------------------------------------- -# -# Part of the CodeChecker project, under the Apache License v2.0 with -# LLVM Exceptions. See LICENSE for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# ------------------------------------------------------------------------- -""" -Multiprocessing compatibility module. -""" -import sys - -# pylint: disable=no-name-in-module -# pylint: disable=unused-import -if sys.platform in ["darwin", "win32"]: - from multiprocess import ( # type: ignore - Pipe, Pool, Process, - Queue, - Value, - cpu_count - ) - from multiprocess.managers import SyncManager # type: ignore -else: - from concurrent.futures import ProcessPoolExecutor as Pool - from multiprocessing import ( - Pipe, - Process, - Queue, - Value, - cpu_count - ) - from multiprocessing.managers import SyncManager diff --git a/pyproject.toml b/pyproject.toml index 593799bf48..6b42dafaf5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,3 +33,7 @@ exclude = [ # Ignore build directories "/build/", ] + +[[tool.mypy.overrides]] +module = "multiprocess.*" +ignore_missing_imports = true diff --git a/scripts/labels/doc_url/verify_tool/__main__.py b/scripts/labels/doc_url/verify_tool/__main__.py index becb29bd42..98d04e489e 100755 --- a/scripts/labels/doc_url/verify_tool/__main__.py +++ b/scripts/labels/doc_url/verify_tool/__main__.py @@ -17,7 +17,8 @@ from tabulate import tabulate -from codechecker_common.compatibility.multiprocessing import cpu_count +from multiprocess import cpu_count # type: ignore + from codechecker_common.util import clamp from ...checker_labels import SingleLabels, SkipDirectiveRespectStyle, \ diff --git a/scripts/labels/doc_url/verify_tool/action.py b/scripts/labels/doc_url/verify_tool/action.py index 99c3684345..766c25988f 100644 --- a/scripts/labels/doc_url/verify_tool/action.py +++ b/scripts/labels/doc_url/verify_tool/action.py @@ -9,7 +9,7 @@ import sys from typing import List, Optional, Tuple, Type -from codechecker_common.compatibility.multiprocessing import Pool +from concurrent.futures import ProcessPoolExecutor as Pool from ...checker_labels import SingleLabels from ...output import Settings as GlobalOutputSettings, log, emoji, coloured diff --git a/scripts/labels/doc_url/verify_tool/tool.py b/scripts/labels/doc_url/verify_tool/tool.py index cb47c41234..9831b7dcf1 100644 --- a/scripts/labels/doc_url/verify_tool/tool.py +++ b/scripts/labels/doc_url/verify_tool/tool.py @@ -9,7 +9,7 @@ from enum import IntFlag, auto as Enumerator from typing import NamedTuple, Optional, Tuple, Type, cast -from codechecker_common.compatibility.multiprocessing import Pool +from concurrent.futures import ProcessPoolExecutor as Pool from ...checker_labels import SingleLabels from ...output import trace diff --git a/tools/tu_collector/tu_collector/tu_collector.py b/tools/tu_collector/tu_collector/tu_collector.py index a5cbfdbfad..04ac017176 100755 --- a/tools/tu_collector/tu_collector/tu_collector.py +++ b/tools/tu_collector/tu_collector/tu_collector.py @@ -246,7 +246,9 @@ def __analyzer_action_hash(build_action: CompileAction) -> str: source_file = os.path.normpath( os.path.join(build_action['directory'], build_action['file'])) - args = shlex.split(build_action['command']) + cmd = build_action.get('command') or shlex.join( + build_action.get('arguments') or []) # type: ignore[arg-type] + args = shlex.split(cmd) indices = [idx for idx, v in enumerate(args) if v.startswith('-o')] for idx in reversed(indices): @@ -417,6 +419,12 @@ def zip_tu_files( else: compilation_database = compilation_db + # Normalize: compile_commands.json may have 'arguments' (list) instead + # of 'command' (string). Ensure 'command' exists for all entries. + for entry in compilation_database: + if 'command' not in entry and 'arguments' in entry: + entry['command'] = shlex.join(entry['arguments']) + no_sources = 'no-sources' tu_files: Set[str] = set() error_messages = '' @@ -479,8 +487,11 @@ def get_dependent_sources( """ Get dependencies for each files in each translation unit. """ dependencies = collections.defaultdict(set) for build_action in compilation_db: + command = build_action.get('command') or shlex.join( + build_action.get('arguments') # type: ignore[arg-type] + or []) files, _ = get_dependent_headers( - build_action['command'], + command, build_action['directory']) source_file = os.path.join(build_action['directory'], @@ -585,6 +596,9 @@ def main(): if args.logfile: with open(args.logfile, encoding="utf-8", errors="ignore") as f: compilation_db = json.load(f) + for entry in compilation_db: + if 'command' not in entry and 'arguments' in entry: + entry['command'] = shlex.join(entry['arguments']) else: compilation_db = [{ 'file': '', diff --git a/web/client/codechecker_client/blame_info.py b/web/client/codechecker_client/blame_info.py index 32d46731f4..7008a0b3f5 100644 --- a/web/client/codechecker_client/blame_info.py +++ b/web/client/codechecker_client/blame_info.py @@ -6,7 +6,7 @@ from git.exc import InvalidGitRepositoryError, GitCommandError from typing import Dict, Iterable, Optional -from codechecker_common.compatibility.multiprocessing import Pool +from concurrent.futures import ProcessPoolExecutor as Pool from codechecker_common.logger import get_logger LOG = get_logger('system') diff --git a/web/client/codechecker_client/cli/store.py b/web/client/codechecker_client/cli/store.py index a027527765..0882c687bb 100644 --- a/web/client/codechecker_client/cli/store.py +++ b/web/client/codechecker_client/cli/store.py @@ -41,6 +41,9 @@ get_report_path_hash from codechecker_report_converter.report.parser.base import AnalyzerInfo +from concurrent.futures import ProcessPoolExecutor as Pool +from multiprocess import cpu_count # type: ignore + try: from codechecker_client.blame_info import assemble_blame_info except ImportError: @@ -55,7 +58,6 @@ def assemble_blame_info(_, __) -> int: from codechecker_client.task_client import await_task_termination from codechecker_common import arg, logger, cmd_config from codechecker_common.checker_labels import CheckerLabels -from codechecker_common.compatibility.multiprocessing import Pool, cpu_count from codechecker_common.source_code_comment_handler import \ SourceCodeCommentHandler from codechecker_common.util import format_size, load_json, strtobool diff --git a/web/server/codechecker_server/cli/server.py b/web/server/codechecker_server/cli/server.py index ebb9328eb0..8d2370a9eb 100644 --- a/web/server/codechecker_server/cli/server.py +++ b/web/server/codechecker_server/cli/server.py @@ -30,8 +30,10 @@ from codechecker_report_converter import twodim +from concurrent.futures import ProcessPoolExecutor as Pool +from multiprocess import cpu_count # type: ignore + from codechecker_common import arg, cmd_config, logger, process, util -from codechecker_common.compatibility.multiprocessing import Pool, cpu_count from codechecker_server import instance_manager, server from codechecker_server.database import database diff --git a/web/server/codechecker_server/server.py b/web/server/codechecker_server/server.py index b06ef3a005..7809254cad 100644 --- a/web/server/codechecker_server/server.py +++ b/web/server/codechecker_server/server.py @@ -47,9 +47,11 @@ from codechecker_api.codeCheckerServersideTasks_v6 import \ codeCheckerServersideTaskService as TaskAPI_v6 +from concurrent.futures import ProcessPoolExecutor as Pool +from multiprocess import Process, Queue, Value, cpu_count # type: ignore +from multiprocess.managers import SyncManager # type: ignore + from codechecker_common import util -from codechecker_common.compatibility.multiprocessing import \ - Pool, Process, Queue, Value, cpu_count, SyncManager from codechecker_common.logger import get_logger, signal_log from codechecker_web.shared import database_status @@ -575,7 +577,8 @@ def _do_db_cleanup(context, check_env, return False, str(e) -def _do_db_cleanups(config_database, context, check_env) \ +def _do_db_cleanups(config_database, context, check_env, + workspace: str = "") \ -> Tuple[bool, List[Tuple[str, str]]]: """ Performs on-demand start-up database cleanup on all the products present @@ -601,6 +604,12 @@ def _get_products() -> List[Product]: if not products: return True, [] + # Ensure cwd is valid before spawning worker processes. On macOS the + # default 'spawn' start method calls os.getcwd() during process creation + # which fails if the inherited cwd was deleted. + if workspace and os.path.isdir(workspace): + os.chdir(workspace) + thr_count = util.clamp(1, len(products), cpu_count()) overall_result, failures = True, [] with Pool(max_workers=thr_count) as executor: @@ -651,7 +660,8 @@ def __init__(self, machine_id: str, task_queue: Queue, task_pipes, - server_shutdown_flag: Value): + server_shutdown_flag: Value, + existing_socket=None): LOG.debug("Initializing HTTP server...") @@ -697,9 +707,17 @@ def __init__(self, cfg_sess.close() try: - HTTPServer.__init__(self, (self.address, self.port), - RequestHandlerClass, - bind_and_activate=True) + if existing_socket: + # Spawn worker: use pre-bound socket from main process. + HTTPServer.__init__(self, (self.address, self.port), + RequestHandlerClass, + bind_and_activate=False) + self.socket.close() + self.socket = existing_socket + else: + HTTPServer.__init__(self, (self.address, self.port), + RequestHandlerClass, + bind_and_activate=True) ssl_key_file = os.path.join(config_directory, "key.pem") ssl_cert_file = os.path.join(config_directory, "cert.pem") @@ -988,6 +1006,66 @@ def formatted_address(self) -> str: return f"[{str(self.address)}]:{self.port}" +def _api_worker_main(http_server=None, *, server_init_args=None): + """Entry point for an API worker process. + + On Linux (fork): receives http_server via inheritance. + On macOS/Windows (spawn): receives server_init_args dict and + reconstructs the server in the child process. + """ + if http_server is None: + # Spawn path: reconstruct server in child. + http_server = _build_worker_server(server_init_args) + http_server.serve_forever_with_shutdown_handler() + + +def _build_worker_server(args): + """Create server in a spawned worker from serializable config.""" + from codechecker_server.database.database import SQLServer + from codechecker_server.database.config_db_model \ + import IDENTIFIER as CONFIG_META + + product_db_sql_server = SQLServer.from_connection_string( + args['db_connection_string'], + "config", + CONFIG_META, + args['migration_root']) + + mgr = session_manager.SessionManager( + args['server_cfg_file'], + args['server_secrets_file'], + args['force_auth'], + args['api_handler_processes'], + args['task_worker_processes']) + + # Recreate listening socket from transferred file descriptor. + fd = args['socket_dupfd'].detach() + sock = socket.socket( + args['socket_family'], socket.SOCK_STREAM, fileno=fd) + + server_clazz = CCSimpleHttpServerIPv6 \ + if ':' in args['listen_address'] else CCSimpleHttpServer + + # Spawn workers detect shutdown via SIGINT, not shared flag. + local_shutdown_flag = Value('B', False) + + return server_clazz( + (args['listen_address'], args['port']), + RequestHandler, + args['config_directory'], + args['workspace_directory'], + product_db_sql_server, + args['package_data'], + args['context'], + args['check_env'], + mgr, + args['machine_id'], + args['task_queue'], + args['task_pipes'], + local_shutdown_flag, + existing_socket=sock) + + def start_server(config_directory: str, workspace_directory: str, package_data, port: int, config_sql_server, listen_address: str, force_auth: bool, @@ -1054,7 +1132,8 @@ def start_server(config_directory: str, workspace_directory: str, if not skip_db_cleanup: all_success, fails = _do_db_cleanups(config_sql_server, context, - check_env) + check_env, + workspace_directory) if not all_success: LOG.error("Failed to perform automatic cleanup on %d products! " "Earlier logs might contain additional detailed " @@ -1075,11 +1154,14 @@ def start_server(config_directory: str, workspace_directory: str, # Note that Queue under the hood uses OS-level primitives such as a socket # or a pipe, where the read-write buffers have a **LIMITED** capacity, and # are usually **NOT** backed by the full amount of available system memory. - bg_task_queue: Queue = Queue() - is_server_shutting_down = Value('B', False) - sync_manager = SyncManager() sync_manager.start() + + # Manager proxy for task queue (picklable for spawn workers). + bg_task_queue = sync_manager.Queue() + # Direct Value for shutdown flag - used in signal handlers (IPC-unsafe). + # Spawn workers don't need this: they detect shutdown via signals. + is_server_shutting_down = Value('B', False) task_pipes = sync_manager.dict() def _cleanup_incomplete_tasks(action: str) -> int: @@ -1131,6 +1213,32 @@ def _cleanup_incomplete_tasks(action: str) -> int: task_pipes, is_server_shutting_down) + # Config needed by spawn workers to reconstruct the server. + _use_spawn = sys.platform != "linux" + server_init_args = None + if _use_spawn: + server_init_args = { + 'socket_family': http_server.socket.family, + 'listen_address': listen_address, + 'port': http_server.port, + 'db_connection_string': + config_sql_server.get_connection_string(), + 'migration_root': config_sql_server.migration_root, + 'config_directory': config_directory, + 'workspace_directory': workspace_directory, + 'package_data': package_data, + 'context': context, + 'check_env': check_env, + 'server_cfg_file': server_cfg_file, + 'server_secrets_file': server_secrets_file, + 'force_auth': force_auth, + 'api_handler_processes': api_handler_processes, + 'task_worker_processes': task_worker_processes, + 'machine_id': machine_id, + 'task_queue': bg_task_queue, + 'task_pipes': task_pipes, + } + try: instance_manager.register(os.getpid(), os.path.abspath( @@ -1192,9 +1300,20 @@ def spawn_api_process(): nonlocal spawned_api_proc_count spawned_api_proc_count += 1 - p = _start_process_with_no_signal_handling( - target=http_server.serve_forever_with_shutdown_handler, - name=f"CodeChecker-API-{spawned_api_proc_count}") + if _use_spawn: + from multiprocess.reduction import DupFd # type: ignore + worker_args = dict(server_init_args) + worker_args['socket_dupfd'] = DupFd( + http_server.socket.fileno()) + p = _start_process_with_no_signal_handling( + target=_api_worker_main, + kwargs={'server_init_args': worker_args}, + name=f"CodeChecker-API-{spawned_api_proc_count}") + else: + p = _start_process_with_no_signal_handling( + target=_api_worker_main, + args=(http_server,), + name=f"CodeChecker-API-{spawned_api_proc_count}") api_processes[cast(int, p.pid)] = p signal_log(LOG, "DEBUG", f"API handler child process {p.pid} started!") return p @@ -1317,8 +1436,6 @@ def termination_signal_handler(signum: int, _frame): finally: del api_processes[pid] - bg_task_queue.close() - bg_task_queue.join_thread() for pid in bg_processes: try: signal_log(LOG, "DEBUG", f"SIGHUP! Task child PID: {pid} ...") @@ -1336,6 +1453,9 @@ def termination_signal_handler(signum: int, _frame): finally: del bg_processes[pid] + bg_task_queue.close() + bg_task_queue.join_thread() + def reload_signal_handler(signum: int, _frame): """ Handle SIGHUP (1) to reload the server's configuration file to memory. diff --git a/web/server/codechecker_server/session_manager.py b/web/server/codechecker_server/session_manager.py index 82f28e9e47..1db80996db 100644 --- a/web/server/codechecker_server/session_manager.py +++ b/web/server/codechecker_server/session_manager.py @@ -18,7 +18,8 @@ import hashlib from typing import Optional -from codechecker_common.compatibility.multiprocessing import cpu_count +from multiprocess import cpu_count # type: ignore + from codechecker_common.logger import get_logger from codechecker_common.util import generate_random_token, load_json diff --git a/web/server/codechecker_server/task_executors/main.py b/web/server/codechecker_server/task_executors/main.py index dda81577c2..7376453b16 100644 --- a/web/server/codechecker_server/task_executors/main.py +++ b/web/server/codechecker_server/task_executors/main.py @@ -16,7 +16,8 @@ from sqlalchemy.orm import sessionmaker -from codechecker_common.compatibility.multiprocessing import Queue, Value +from multiprocess import Queue, Value # type: ignore + from codechecker_common.logger import get_logger, signal_log from ..database.config_db_model import BackgroundTask as DBTask diff --git a/web/server/codechecker_server/task_executors/task_manager.py b/web/server/codechecker_server/task_executors/task_manager.py index 4db887b224..1f071289b6 100644 --- a/web/server/codechecker_server/task_executors/task_manager.py +++ b/web/server/codechecker_server/task_executors/task_manager.py @@ -18,7 +18,8 @@ import sqlalchemy -from codechecker_common.compatibility.multiprocessing import Pipe, Queue, Value +from multiprocess import Pipe, Queue, Value # type: ignore + from codechecker_common.logger import get_logger, signal_log from codechecker_common.util import generate_random_token diff --git a/web/tests/Makefile b/web/tests/Makefile index d5c0a33dab..55ebb13a93 100644 --- a/web/tests/Makefile +++ b/web/tests/Makefile @@ -15,7 +15,7 @@ TEST_PROJECT ?= TEST_PROJ=$(CURRENT_DIR)/tests/projects PYTHON_BIN ?= python3 REPO_ROOT ?= REPO_ROOT=$(ROOT) -CC_TEST_WORKSPACE_ROOT ?= $(BUILD_DIR)/workspace +CC_TEST_WORKSPACE_ROOT ?= $(ROOT)/build/workspace WOKSPACE_GLOBAL_AUTH_SERVER = $(CC_TEST_WORKSPACE_ROOT)/global_auth_server WOKSPACE_GLOBAL_SIMPLE_SERVER = $(CC_TEST_WORKSPACE_ROOT)/global_simple_server @@ -56,7 +56,7 @@ pylint: pylint_in_env: $(ACTIVATE_DEV_VENV) && $(PYLINT_TEST_CMD) -CODECHECKER_CMD = $(BUILD_DIR)/CodeChecker/bin/CodeChecker +CODECHECKER_CMD = $(ROOT)/build/CodeChecker/bin/CodeChecker SHUTDOWN_GLOBAL_SERVERS_CMD = \ for TEST_ROOT in ${WOKSPACE_GLOBAL_AUTH_SERVER} ${WOKSPACE_GLOBAL_SIMPLE_SERVER}; do \ if [ -d "$${TEST_ROOT}" ]; then \ diff --git a/web/tests/functional/authentication/__init__.py b/web/tests/functional/authentication/__init__.py index fd5100b3c7..67479f4b39 100644 --- a/web/tests/functional/authentication/__init__.py +++ b/web/tests/functional/authentication/__init__.py @@ -13,6 +13,7 @@ import os import shutil import subprocess +import sys from libtest import codechecker from libtest import env @@ -22,6 +23,9 @@ # Stopping event for CodeChecker server. __STOP_SERVER = multiprocess.Event() +# OAuth mock server process. +__OAUTH_SERVER = None + # Test workspace initialized at setup for authentication tests. TEST_WORKSPACE = None @@ -66,9 +70,45 @@ def setup_class_common(): codechecker.add_test_package_product(host_port_cfg, TEST_WORKSPACE) - subprocess.Popen(["python3", "oauth_server.py"], - cwd="tests/functional/authentication") - sleep(5) + subprocess.run(["pkill", "-f", "oauth_server.py"], + capture_output=True, check=False) + sleep(1) + + global __OAUTH_SERVER + oauth_log = os.path.join(TEST_WORKSPACE, "oauth_server.log") + oauth_out = open(oauth_log, "w", encoding="utf-8") + __OAUTH_SERVER = subprocess.Popen( + [sys.executable, "oauth_server.py"], + cwd="tests/functional/authentication", + stdout=oauth_out, + stderr=oauth_out) + + # Wait for mock server to be ready (port 3000 open). + import socket + ready = False + for i in range(30): + try: + s = socket.create_connection(("127.0.0.1", 3000), timeout=1) + s.close() + ready = True + print(f"OAuth mock server ready after {i+1}s") + break + except (ConnectionRefusedError, OSError): + if __OAUTH_SERVER.poll() is not None: + oauth_out.flush() + with open(oauth_log, encoding="utf-8") as f: + print(f"OAuth mock server DIED " + f"(rc={__OAUTH_SERVER.returncode}): " + f"{f.read()}") + break + sleep(1) + + if not ready: + oauth_out.flush() + with open(oauth_log, encoding="utf-8") as f: + print(f"OAuth mock server NOT ready after 30s. " + f"Log: {f.read()}") + print(f"OAuth server poll: {__OAUTH_SERVER.poll()}") def teardown_class_common(): @@ -76,6 +116,12 @@ def teardown_class_common(): # TODO If environment variable is set keep the workspace # and print out the path. global TEST_WORKSPACE + global __OAUTH_SERVER + + if __OAUTH_SERVER: + __OAUTH_SERVER.terminate() + __OAUTH_SERVER.wait() + __OAUTH_SERVER = None # Removing the product through this server requires credentials. codechecker_cfg = env.import_test_cfg(TEST_WORKSPACE)['codechecker_cfg'] diff --git a/web/tests/functional/authentication/oauth_server.py b/web/tests/functional/authentication/oauth_server.py index 031867317f..b95c3ac2df 100644 --- a/web/tests/functional/authentication/oauth_server.py +++ b/web/tests/functional/authentication/oauth_server.py @@ -18,7 +18,7 @@ from http.server import BaseHTTPRequestHandler, HTTPServer # Server config -HOSTNAME = "0.0.0.0" +HOSTNAME = "127.0.0.1" SERVERPORT = int(os.getenv("PORT")) if os.getenv("PORT") else 3000 @@ -220,7 +220,8 @@ def do_POST(self): webServer = HTTPServer((HOSTNAME, SERVERPORT), OauthServer) webServer.allow_reuse_address = True -# print(f"OAuth mock server started on http://{HOSTNAME}:{SERVERPORT}") +print(f"OAuth mock server started on http://{HOSTNAME}:{SERVERPORT}", + flush=True) webServer.serve_forever() webServer.server_close() diff --git a/web/tests/functional/blame/test_blame_info.py b/web/tests/functional/blame/test_blame_info.py index 287f728dc8..f1562b280f 100644 --- a/web/tests/functional/blame/test_blame_info.py +++ b/web/tests/functional/blame/test_blame_info.py @@ -154,63 +154,65 @@ def test_update_blame_info(self): # easily. old_pwd = os.getcwd() os.chdir(proj_dir) - - run_name = "update_blame_info" - codechecker.check_and_store( - self._codechecker_cfg, run_name, proj_dir) - - run_filter = RunFilter(names=[run_name], exactMatch=True) - runs = self._cc_client.getRunData(run_filter, None, 0, None) - run_id = runs[0].runId - - report_filter = ReportFilter( - checkerName=['*'], - filepath=[f'*{source_file_name}']) - - run_results = get_all_run_results( - self._cc_client, run_id, [], report_filter) - self.assertIsNotNone(run_results) - - report = run_results[0] - - # Get source file data. - file_data = self._cc_client.getSourceFileData( - report.fileId, True, None) - self.assertIsNotNone(file_data) - self.assertFalse(file_data.hasBlameInfo) - self.assertFalse(file_data.remoteUrl) - self.assertFalse(file_data.trackingBranch) - - # Get blame information - blame_info = self._cc_client.getBlameInfo(report.fileId) - self.assertIsNotNone(blame_info) - self.assertFalse(blame_info.commits) - self.assertFalse(blame_info.blame) - - # Create a .git structure that is as bare as possible, without - # getting interference from the user's configuration. - subprocess.Popen(['git', 'init', proj_dir, - "--template", "/usr/share/git-core/templates" - ]).communicate() - - subprocess.Popen([ - 'git', - 'remote', - 'add', - 'origin', - 'https://myurl.com']).communicate() - subprocess.Popen(['git', 'add', src_file]).communicate() - subprocess.Popen([ - 'git', - '-c', 'user.name=hello', - '-c', 'user.email=world', - 'commit', - '--no-verify', - '--message', 'message']).communicate() - - codechecker.store(self._codechecker_cfg, run_name) - - os.chdir(old_pwd) + try: + run_name = "update_blame_info" + codechecker.check_and_store( + self._codechecker_cfg, run_name, proj_dir) + + run_filter = RunFilter(names=[run_name], exactMatch=True) + runs = self._cc_client.getRunData(run_filter, None, 0, None) + run_id = runs[0].runId + + report_filter = ReportFilter( + checkerName=['*'], + filepath=[f'*{source_file_name}']) + + run_results = get_all_run_results( + self._cc_client, run_id, [], report_filter) + self.assertIsNotNone(run_results) + + report = run_results[0] + + # Get source file data. + file_data = self._cc_client.getSourceFileData( + report.fileId, True, None) + self.assertIsNotNone(file_data) + self.assertFalse(file_data.hasBlameInfo) + self.assertFalse(file_data.remoteUrl) + self.assertFalse(file_data.trackingBranch) + + # Get blame information + blame_info = self._cc_client.getBlameInfo(report.fileId) + self.assertIsNotNone(blame_info) + self.assertFalse(blame_info.commits) + self.assertFalse(blame_info.blame) + + # Create a .git structure that is as bare as possible, + # without getting interference from the user's + # configuration. + subprocess.Popen(['git', 'init', proj_dir, + "--template", + "/usr/share/git-core/templates" + ]).communicate() + + subprocess.Popen([ + 'git', + 'remote', + 'add', + 'origin', + 'https://myurl.com']).communicate() + subprocess.Popen(['git', 'add', src_file]).communicate() + subprocess.Popen([ + 'git', + '-c', 'user.name=hello', + '-c', 'user.email=world', + 'commit', + '--no-verify', + '--message', 'message']).communicate() + + codechecker.store(self._codechecker_cfg, run_name) + finally: + os.chdir(old_pwd) # Get source file data. file_data = self._cc_client.getSourceFileData( @@ -250,12 +252,12 @@ def test_no_blame_info(self): # easily. old_pwd = os.getcwd() os.chdir(proj_dir) - - run_name = "no_blame_info" - codechecker.check_and_store( - self._codechecker_cfg, run_name, proj_dir) - - os.chdir(old_pwd) + try: + run_name = "no_blame_info" + codechecker.check_and_store( + self._codechecker_cfg, run_name, proj_dir) + finally: + os.chdir(old_pwd) run_filter = RunFilter(names=[run_name], exactMatch=True) runs = self._cc_client.getRunData(run_filter, None, 0, None) diff --git a/web/tests/functional/tasks/test_task_management.py b/web/tests/functional/tasks/test_task_management.py index 611641cd76..35b31b126c 100644 --- a/web/tests/functional/tasks/test_task_management.py +++ b/web/tests/functional/tasks/test_task_management.py @@ -13,18 +13,23 @@ from datetime import datetime, timezone import os import shutil -import unittest +import sys import time +import unittest from typing import List, Optional, cast import multiprocess from codechecker_api_shared.ttypes import RequestFailed, Ternary from codechecker_api.codeCheckerServersideTasks_v6.ttypes import \ - AdministratorTaskInfo, TaskFilter, TaskInfo, TaskStatus + AdministratorTaskInfo, TaskFilter, TaskStatus from libtest import codechecker, env +# Timeout for polling task state transitions. On macOS CI, spawn workers +# take ~42s to import before they can process tasks. +_POLL_TIMEOUT = 120 if sys.platform == "darwin" else 30 + # Stop events for the CodeChecker servers. STOP_SERVER = multiprocess.Event() @@ -131,24 +136,41 @@ def setup_method(self, _): auth_server["viewer_host"], auth_server["viewer_port"], session_token=root_token) + def _poll_status(self, client, token, status_name, timeout=_POLL_TIMEOUT): + """Poll until task reaches expected status or timeout.""" + expected = TaskStatus._NAMES_TO_VALUES[status_name] + info = None + for _ in range(timeout): + info = client.getTaskInfo(token) + if info and info.status == expected: + return info + time.sleep(1) + actual = TaskStatus._VALUES_TO_NAMES.get(info.status) if info else None + self.fail(f"Task did not reach {status_name} within {timeout}s " + f"(last: {actual})") + return None # Unreachable, but satisfies pylint R1710. + def test_task_1_query_status(self): - task_token = self._anonymous_task_client.createDummyTask(2, False) + # Use a long task to reliably observe RUNNING state. + task_token = self._anonymous_task_client.createDummyTask(600, False) - time.sleep(1) - task_info: TaskInfo = self._anonymous_task_client.getTaskInfo( - task_token) + task_info = self._poll_status( + self._anonymous_task_client, task_token, "RUNNING") self.assertEqual(task_info.token, task_token) - self.assertEqual(task_info.status, - TaskStatus._NAMES_TO_VALUES["RUNNING"]) self.assertEqual(task_info.productId, 0) self.assertIsNone(task_info.actorUsername) self.assertIn("Dummy task", task_info.summary) self.assertEqual(task_info.cancelFlagSet, False) - time.sleep(2) # A bit more than exactly what remains of 2 seconds! - task_info = self._anonymous_task_client.getTaskInfo(task_token) - self.assertEqual(task_info.status, - TaskStatus._NAMES_TO_VALUES["COMPLETED"]) + # Cancel the long task — we'll use a short task for COMPLETED check. + self._privileged_task_client.cancelTask(task_token) + self._poll_status( + self._anonymous_task_client, task_token, "CANCELLED") + + # Worker is warm now. Short task to verify COMPLETED state. + task_token = self._anonymous_task_client.createDummyTask(1, False) + task_info = self._poll_status( + self._anonymous_task_client, task_token, "COMPLETED", timeout=10) self.assertEqual(task_info.cancelFlagSet, False) self.assertIsNotNone(task_info.enqueuedAtEpoch) self.assertIsNotNone(task_info.startedAtEpoch) @@ -156,44 +178,30 @@ def test_task_1_query_status(self): task_info.startedAtEpoch) self.assertIsNotNone(task_info.completedAtEpoch) self.assertLess(task_info.startedAtEpoch, task_info.completedAtEpoch) - self.assertEqual(task_info.cancelFlagSet, False) def test_task_2_query_status_of_failed(self): - task_token = self._anonymous_task_client.createDummyTask(2, True) - - time.sleep(1) - task_info: TaskInfo = self._anonymous_task_client.getTaskInfo( - task_token) + task_token = self._anonymous_task_client.createDummyTask(1, True) + task_info = self._poll_status( + self._anonymous_task_client, task_token, "FAILED", timeout=10) self.assertEqual(task_info.token, task_token) - self.assertEqual(task_info.status, - TaskStatus._NAMES_TO_VALUES["RUNNING"]) - self.assertEqual(task_info.cancelFlagSet, False) - - time.sleep(2) # A bit more than exactly what remains of 2 seconds! - task_info = self._anonymous_task_client.getTaskInfo(task_token) - self.assertEqual(task_info.status, - TaskStatus._NAMES_TO_VALUES["FAILED"]) self.assertEqual(task_info.cancelFlagSet, False) def test_task_3_cancel(self): - task_token = self._anonymous_task_client.createDummyTask(3, False) + task_token = self._anonymous_task_client.createDummyTask(600, False) - time.sleep(1) + # Wait until running, then cancel. + self._poll_status( + self._anonymous_task_client, task_token, "RUNNING", timeout=10) cancel_req: bool = self._privileged_task_client.cancelTask(task_token) self.assertTrue(cancel_req) time.sleep(0.5) cancel_req_2: bool = self._privileged_task_client.cancelTask( task_token) - # The task was already cancelled, so cancel_req_2 is not the API call - # that cancelled the task. self.assertFalse(cancel_req_2) - time.sleep(0.5) # A bit more than exactly what remains of 10 seconds! - task_info: TaskInfo = self._anonymous_task_client.getTaskInfo( - task_token) - self.assertEqual(task_info.status, - TaskStatus._NAMES_TO_VALUES["CANCELLED"]) + task_info = self._poll_status( + self._anonymous_task_client, task_token, "CANCELLED", timeout=10) self.assertEqual(task_info.cancelFlagSet, True) self.assertIn("root", task_info.comments) self.assertIn("User requested cancellation.", task_info.comments) @@ -225,7 +233,7 @@ def test_task_4_get_tasks_as_admin(self): self.assertEqual(len(task_infos), 0) task_infos = self._privileged_task_client.getTasks(TaskFilter()) - self.assertEqual(len(task_infos), 3) + self.assertEqual(len(task_infos), 4) self.assertEqual(sum(1 for t in task_infos if t.normalInfo.status == @@ -235,7 +243,7 @@ def test_task_4_get_tasks_as_admin(self): TaskStatus._NAMES_TO_VALUES["FAILED"]), 1) self.assertEqual(sum(1 for t in task_infos if t.normalInfo.status == - TaskStatus._NAMES_TO_VALUES["CANCELLED"]), 1) + TaskStatus._NAMES_TO_VALUES["CANCELLED"]), 2) def test_task_5_info_query_filters(self): current_time_epoch = int(datetime.now(timezone.utc).timestamp()) @@ -249,7 +257,7 @@ def test_task_5_info_query_filters(self): task_infos = self._privileged_task_client.getTasks(TaskFilter( machineIDs=["unprivileged"] )) - self.assertEqual(len(task_infos), 3) + self.assertEqual(len(task_infos), 4) tokens_from_previous_test = [t.normalInfo.token for t in task_infos] @@ -268,7 +276,7 @@ def test_task_5_info_query_filters(self): task_infos = self._privileged_task_client.getTasks(TaskFilter( startedBeforeEpoch=current_time_epoch )) - self.assertEqual(len(task_infos), 3) + self.assertEqual(len(task_infos), 4) task_infos = self._privileged_task_client.getTasks(TaskFilter( startedAfterEpoch=current_time_epoch @@ -278,7 +286,7 @@ def test_task_5_info_query_filters(self): task_infos = self._privileged_task_client.getTasks(TaskFilter( cancelFlag=Ternary._NAMES_TO_VALUES["ON"] )) - self.assertEqual(len(task_infos), 1) + self.assertEqual(len(task_infos), 2) task_infos = self._privileged_task_client.getTasks(TaskFilter( cancelFlag=Ternary._NAMES_TO_VALUES["OFF"] @@ -288,7 +296,7 @@ def test_task_5_info_query_filters(self): task_infos = self._privileged_task_client.getTasks(TaskFilter( consumedFlag=Ternary._NAMES_TO_VALUES["ON"] )) - self.assertEqual(len(task_infos), 3) + self.assertEqual(len(task_infos), 4) task_infos = self._privileged_task_client.getTasks(TaskFilter( consumedFlag=Ternary._NAMES_TO_VALUES["OFF"] @@ -306,7 +314,7 @@ def test_task_5_info_query_filters(self): target_api.createDummyTask(1, bool(j % 2 == 0)) task_infos = self._privileged_task_client.getTasks(TaskFilter()) - self.assertEqual(len(task_infos), 7) + self.assertEqual(len(task_infos), 8) task_infos = self._privileged_task_client.getTasks(TaskFilter( enqueuedAfterEpoch=current_time_epoch, @@ -358,18 +366,18 @@ def test_task_5_info_query_filters(self): # Some tasks ought to have also finished at least. self.assertGreater(len(task_infos), 0) - # Let every task terminate. We should only need 1 second per task, - # running likely in a multithreaded environment. - # Let's have some leeway, though... - time.sleep(2) - - task_infos = self._privileged_task_client.getTasks(TaskFilter( - enqueuedAfterEpoch=current_time_epoch, - startedAfterEpoch=current_time_epoch, - completedAfterEpoch=current_time_epoch - )) - # All tasks should have finished. - self.assertEqual(len(task_infos), 4) + # Wait until all 4 new tasks have completed. + for _ in range(_POLL_TIMEOUT): + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + startedAfterEpoch=current_time_epoch, + completedAfterEpoch=current_time_epoch + )) + if len(task_infos) == 4: + break + time.sleep(1) + else: + self.fail("Not all tasks completed within timeout") task_infos = self._privileged_task_client.getTasks(TaskFilter( enqueuedAfterEpoch=current_time_epoch, @@ -406,12 +414,12 @@ def test_task_5_info_query_filters(self): task_infos = self._privileged_task_client.getTasks(TaskFilter( machineIDs=["*privileged"] )) - self.assertEqual(len(task_infos), 7) + self.assertEqual(len(task_infos), 8) task_infos = self._privileged_task_client.getTasks(TaskFilter( kinds=["*Dummy*"] )) - self.assertEqual(len(task_infos), 7) + self.assertEqual(len(task_infos), 8) # Try to consume the task status from the wrong user! task_infos = self._privileged_task_client.getTasks(TaskFilter( diff --git a/web/tests/libtest/codechecker.py b/web/tests/libtest/codechecker.py index 35655e1225..3fbdc28f31 100644 --- a/web/tests/libtest/codechecker.py +++ b/web/tests/libtest/codechecker.py @@ -568,6 +568,14 @@ def serv_cmd(workspace_dir, port, pg_config=None, serv_args=None): server_cmd.extend(['--host', 'localhost', '--port', str(port)]) + # Allow CI to override worker counts via env vars. + api_procs = os.environ.get('CC_TEST_API_WORKERS') + task_procs = os.environ.get('CC_TEST_TASK_WORKERS') + if api_procs: + server_cmd.extend(['--api-handler-processes', api_procs]) + if task_procs: + server_cmd.extend(['--task-worker-processes', task_procs]) + server_cmd.extend(serv_args or []) # server_cmd.extend(['--verbose', 'debug']) @@ -633,7 +641,7 @@ def start_or_get_server(auth_required=False): encoding="utf-8", errors="ignore") - wait_for_server_start(server_stdout) + wait_for_server_start(server_stdout, port=port) if pg_config: # The behaviour is that CodeChecker servers only configure a @@ -656,7 +664,7 @@ def start_or_get_server(auth_required=False): } -def wait_for_server_start(stdoutfile): +def wait_for_server_start(stdoutfile, port=None): print("Waiting for server start reading file " + stdoutfile) n = 0 server_start_timeout = timedelta(minutes=5) @@ -672,6 +680,26 @@ def wait_for_server_start(stdoutfile): if "usage: CodeChecker" in out: return + # Fail fast if server crashed during startup. + if "Config database initialization failed" in out \ + or "Failed to create schema" in out: + print(f"[DIAG] Server FATAL error after " + f"{n}s. Output:") + print(out[-2000:]) + + # Fallback: check if server can handle HTTP requests. + if port and n > 3: + import urllib.request + try: + urllib.request.urlopen( + f"http://localhost:{port}/", timeout=1) + except urllib.error.HTTPError: + # Any HTTP response (even 404) means server is ready. + print(f"Server responding on port {port} after {n}s") + return + except (ConnectionRefusedError, OSError, urllib.error.URLError): + pass + if n > server_start_timeout.total_seconds(): print("[FATAL!] Server failed to start after " f"'{str(server_start_timeout)}' " @@ -732,7 +760,8 @@ def start_server_proc(event, server_cmd, checking_env): server_proc.start() server_output_file = os.path.join(codechecker_cfg['workspace'], str(server_proc.pid) + ".out") - wait_for_server_start(server_output_file) + wait_for_server_start(server_output_file, + port=codechecker_cfg['viewer_port']) return { 'viewer_host': 'localhost', diff --git a/web/tests/libtest/env.py b/web/tests/libtest/env.py index 225f571340..1b18da59fd 100644 --- a/web/tests/libtest/env.py +++ b/web/tests/libtest/env.py @@ -353,6 +353,7 @@ def test_env(test_workspace): base_env['PATH'] = os.path.join(codechecker_package(), 'bin') + \ ':' + base_env['PATH'] base_env['HOME'] = test_workspace + return base_env @@ -406,7 +407,7 @@ def enable_auth(workspace): "enabled": True, "shared_variables": { "host": "http://localhost:8080", - "oauth_host": "http://localhost:3000" + "oauth_host": "http://127.0.0.1:3000" }, "providers": { "github": {