diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 57f81f7fd51ee..6907dbf81dd97 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -170,6 +170,12 @@ jobs: echo 'true' return fi + + # ...same for project branch + if [[ $BRANCH == "lworld" ]]; then + echo 'true' + return + fi fi echo 'false' diff --git a/doc/testing.html b/doc/testing.html index c8d0b928bb05c..648bc8baa6aee 100644 --- a/doc/testing.html +++ b/doc/testing.html @@ -465,6 +465,44 @@

TEST_THREAD_FACTORY

test/jtreg_test_thread_factory/ directory. This class gets compiled during the test image build. The implementation of the Virtual class creates a new virtual thread for executing each test class.

+

VALUE_CLASS_PLUGIN

+

Enables the ValueClassPlugin javac plugin when compiling +and running JTReg tests. This is a temporary mode +intended for use while value classes (JEP 401) are a preview feature. +The long-term plan is to replace classes annotated with +@jdk.test.lib.valueclass.AsValueClass with plain +value class declarations once value classes are +finalized.

+

In the meantime, this mode allows test sources to compile and run as +either value classes or regular identity classes without source-level +changes.

+

When set to any non-empty value, the following options are appended to +every JTReg invocation:

+ +

The plugin scans each compilation unit after parsing and converts any +class annotated with +@jdk.test.lib.valueclass.AsValueClass into a value class by +setting the internal VALUE_CLASS modifier flag and clearing +the IDENTITY_TYPE flag. This transformation only takes +effect when --enable-preview is active; without it the +annotation is a no-op and the class compiles as an ordinary identity +class, so the same test source can exercise both code paths.

+

Example:

+
$ make test TEST=jdk_lang JTREG="VALUE_CLASS_PLUGIN=true"
+

JVMTI_STRESS_AGENT

Executes JTReg tests with JVM TI stress agent. The stress agent is the part of test library and located in diff --git a/doc/testing.md b/doc/testing.md index 5f70f2796adc6..1de6c94b679f9 100644 --- a/doc/testing.md +++ b/doc/testing.md @@ -403,6 +403,42 @@ the `test/jtreg_test_thread_factory/` directory. This class gets compiled during the test image build. The implementation of the Virtual class creates a new virtual thread for executing each test class. +#### VALUE_CLASS_PLUGIN + +Enables the `ValueClassPlugin` javac plugin when compiling and running JTReg +tests. This is a **temporary mode** intended for use while value classes +(JEP 401) are a preview feature. The long-term plan is to replace classes +annotated with `@jdk.test.lib.valueclass.AsValueClass` with plain +`value class` declarations once value classes are finalized. + +In the meantime, this mode allows test sources to compile and run as either +value classes or regular identity classes without source-level changes. + +When set to any non-empty value, the following options are appended to every +JTReg invocation: + +* `-cpa:` — appends the plugin JAR to the compile-time + classpath (only when the JAR is present in the test image under + `jtreg_value_class_plugin/valueClassPlugin.jar`). +* `-vmoption:--enable-preview` — enables JVM preview features at runtime. +* `-javacoption:-XDaccessInternalAPI` — grants the compiler access to internal + APIs required by the plugin. +* `-javacoption:--source --enable-preview` — enables preview language + features at compile time. +* `-javacoption:-Xplugin:ValueClassPlugin` — activates the plugin. + +The plugin scans each compilation unit after parsing and converts any class +annotated with `@jdk.test.lib.valueclass.AsValueClass` into a value class by +setting the internal `VALUE_CLASS` modifier flag and clearing the +`IDENTITY_TYPE` flag. This transformation only takes effect when +`--enable-preview` is active; without it the annotation is a no-op and the +class compiles as an ordinary identity class, so the same test source can +exercise both code paths. + +Example: + + $ make test TEST=jdk_lang JTREG="VALUE_CLASS_PLUGIN=true" + #### JVMTI_STRESS_AGENT Executes JTReg tests with JVM TI stress agent. The stress agent is the part of diff --git a/make/CompileJavaModules.gmk b/make/CompileJavaModules.gmk index 54d063a7a7182..d8e0de29b702a 100644 --- a/make/CompileJavaModules.gmk +++ b/make/CompileJavaModules.gmk @@ -1,5 +1,5 @@ # -# Copyright (c) 2014, 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2014, 2026, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -30,6 +30,8 @@ include MakeFileStart.gmk include JavaCompilation.gmk include Modules.gmk +include CopyFiles.gmk + ################################################################################ # If this is an imported module that has prebuilt classes, only compile # module-info.java. @@ -98,13 +100,15 @@ endif ################################################################################ # Setup the main compilation +COMPILATION_OUTPUTDIR := $(if $($(MODULE)_BIN), $($(MODULE)_BIN), $(JDK_OUTPUTDIR)/modules) + $(eval $(call SetupJavaCompilation, $(MODULE), \ SMALL_JAVA := false, \ MODULE := $(MODULE), \ SRC := $(wildcard $(MODULE_SRC_DIRS)), \ INCLUDES := $(JDK_USER_DEFINED_FILTER), \ FAIL_NO_SRC := $(FAIL_NO_SRC), \ - BIN := $(if $($(MODULE)_BIN), $($(MODULE)_BIN), $(JDK_OUTPUTDIR)/modules), \ + BIN := $(COMPILATION_OUTPUTDIR), \ HEADERS := $(SUPPORT_OUTPUTDIR)/headers, \ CREATE_API_DIGEST := true, \ CLEAN := $(CLEAN), \ @@ -126,6 +130,66 @@ $(eval $(call SetupJavaCompilation, $(MODULE), \ TARGETS += $($(MODULE)) +################################################################################ +# Setup compilation for preview classes in the module +# TBD: When $(DOCLINT) was included there was an NPE in JavacTypes.getOverriddenMethods + +# Directory and file name suffix for jar file containing preview classes/resources. +PREVIEW_CLASSES_LABEL := preview +# Module relative path in which preview classes/resources are placed. +PREVIEW_PATH := META-INF/preview + +MODULE_PREVIEW_SRC_DIRS := $(call FindModulePreviewSrcDirs, $(MODULE)) +MODULE_PREVIEW_SOURCEPATH := $(call GetModulePreviewSrcPath) +ifneq ($(MODULE_PREVIEW_SRC_DIRS),) + # Compile preview classes into a separate directory, and then copy into the + # correct output path location. We cannot compile directly into the desired + # directory because it's the compiler which creates the original + # '//...' hierarchy. + PREVIEW_OUTPUTDIR := $(SUPPORT_OUTPUTDIR)/$(PREVIEW_CLASSES_LABEL) + PATCH_COMMAND := $(MODULE)=$(call strip, $(COMPILATION_OUTPUTDIR)/$(MODULE)) + + $(eval $(call SetupJavaCompilation, $(MODULE)-$(PREVIEW_CLASSES_LABEL), \ + SMALL_JAVA := false, \ + MODULE := $(MODULE), \ + SRC := $(wildcard $(MODULE_PREVIEW_SRC_DIRS)), \ + INCLUDES := $(JDK_USER_DEFINED_FILTER), \ + FAIL_NO_SRC := $(FAIL_NO_SRC), \ + BIN := $(PREVIEW_OUTPUTDIR)/, \ + DISABLED_WARNINGS := $(DISABLED_WARNINGS_java) preview, \ + EXCLUDES := $(EXCLUDES), \ + EXCLUDE_FILES := $(EXCLUDE_FILES), \ + KEEP_ALL_TRANSLATIONS := $(KEEP_ALL_TRANSLATIONS), \ + DEPENDS := $($(MODULE)), \ + JAVAC_FLAGS := \ + $(JAVAC_FLAGS) \ + --module-source-path $(MODULE_PREVIEW_SOURCEPATH) \ + --module-path $(MODULEPATH) \ + --patch-module $(PATCH_COMMAND) \ + --system none \ + --enable-preview -source $(JDK_SOURCE_TARGET_VERSION), \ + )) + + # Don't add '$($(MODULE)-$(PREVIEW_CLASSES_LABEL))' to TARGETS (it's transient). + # The 'preview' target below depends on it, and that's the non-transient + # result we care about. + + # Copy compiled output from "$(PREVIEW_OUTPUTDIR)/$(MODULE)//..." + # to "$(COMPILATION_OUTPUTDIR)/$(MODULE)/$(PREVIEW_PATH)//...". + MOD_SRC := $(PREVIEW_OUTPUTDIR)/$(MODULE) + MOD_DST := $(COMPILATION_OUTPUTDIR)/$(MODULE) + + # NOTE: We cannot use '$(CP) -R $(MOD_SRC)/*/ ...' to select sub-directories (it + # does not work on MacOS/BSD). Use 'filter-out' to explicitly exclude marker files. + $(MOD_DST)/_the.$(MODULE).preview: $($(MODULE)-$(PREVIEW_CLASSES_LABEL)) + $(RM) -r $(@D)/$(PREVIEW_PATH) + $(MKDIR) -p $(@D)/$(PREVIEW_PATH) + $(CP) -R $(filter-out $(MOD_SRC)/_%, $(wildcard $(MOD_SRC)/*)) $(@D)/$(PREVIEW_PATH) + $(TOUCH) $@ + + TARGETS += $(MOD_DST)/_the.$(MODULE).preview +endif + # Declare dependencies between java compilations of different modules. # Since the other modules are declared in different invocations of this file, # use the macro to find the correct target file to depend on. diff --git a/make/Docs.gmk b/make/Docs.gmk index 9cee8cd40c1b2..a4a4bc60363e3 100644 --- a/make/Docs.gmk +++ b/make/Docs.gmk @@ -93,9 +93,10 @@ JAVADOC_DISABLED_DOCLINT_WARNINGS := missing JAVADOC_DISABLED_DOCLINT_PACKAGES := org.w3c.* javax.smartcardio # The initial set of options for javadoc -JAVADOC_OPTIONS := -use -keywords -notimestamp \ +JAVADOC_OPTIONS := -XDignore.symbol.file=true -use -keywords -notimestamp \ -serialwarn -encoding utf-8 -docencoding utf-8 -breakiterator \ -splitIndex --system none -javafx --expand-requires transitive \ + --enable-preview -source $(JDK_SOURCE_TARGET_VERSION) \ --override-methods=summary # The reference options must stay stable to allow for comparisons across the diff --git a/make/Images.gmk b/make/Images.gmk index 8008cfa677912..5d6f63e69658d 100644 --- a/make/Images.gmk +++ b/make/Images.gmk @@ -138,6 +138,7 @@ CDS_DUMP_FLAGS = -Xmx128M -Xms128M # # Param1 - VM variant (e.g., server, client, zero, ...) # Param2 - _nocoops, _nocoh, _nocoops_nocoh, or empty +# Param3 - _preview, or empty define CreateCDSArchive $1_$2_COOPS_OPTION := $(if $(findstring _nocoops, $2),-XX:-UseCompressedOops) # enable and also explicitly disable coh as needed. @@ -145,15 +146,15 @@ define CreateCDSArchive $1_$2_NOCOH_OPTION := -XX:+UnlockExperimentalVMOptions \ $(if $(findstring _nocoh, $2),-XX:-UseCompactObjectHeaders,-XX:+UseCompactObjectHeaders) endif - $1_$2_DUMP_EXTRA_ARG := $$($1_$2_COOPS_OPTION) $$($1_$2_NOCOH_OPTION) - $1_$2_DUMP_TYPE := $(if $(findstring _nocoops, $2),-NOCOOPS,)$(if $(findstring _nocoh, $2),-NOCOH,) + $1_$2_$3_DUMP_EXTRA_ARG := $$($1_$2_COOPS_OPTION) $$($1_$2_NOCOH_OPTION) $(if $(findstring _preview, $3), --enable-preview,) + $1_$2_$3_DUMP_TYPE := $(if $(findstring _nocoops, $2),-NOCOOPS,)$(if $(findstring _nocoh, $2),-NOCOH,)$(if $(findstring _preview, $3),-PREVIEW,) - $1_$2_CDS_DUMP_FLAGS := $(CDS_DUMP_FLAGS) $(if $(filter g1gc, $(JVM_FEATURES_$1)), -XX:+UseG1GC) + $1_$2_$3_CDS_DUMP_FLAGS := $(CDS_DUMP_FLAGS) $(if $(filter g1gc, $(JVM_FEATURES_$1)), -XX:+UseG1GC) ifeq ($(OPENJDK_TARGET_OS), windows) - $1_$2_CDS_ARCHIVE := bin/$1/classes$2.jsa + $1_$2_$3_CDS_ARCHIVE := bin/$1/classes$2$3.jsa else - $1_$2_CDS_ARCHIVE := lib/$1/classes$2.jsa + $1_$2_$3_CDS_ARCHIVE := lib/$1/classes$2$3.jsa endif ifneq ($(COMPARE_BUILD), ) @@ -161,51 +162,55 @@ define CreateCDSArchive endif ifeq ($(DEBUG_CDS_ARCHIVE), true) - $1_$2_CDS_DUMP_FLAGS += -Xlog:aot+map*=trace:file=$$(JDK_IMAGE_DIR)/$$($1_$2_CDS_ARCHIVE).cdsmap:none:filesize=0 + $1_$2_$3_CDS_DUMP_FLAGS += -Xlog:aot+map*=trace:file=$$(JDK_IMAGE_DIR)/$$($1_$2_$3_CDS_ARCHIVE).cdsmap:none:filesize=0 endif - $$(eval $$(call SetupExecute, $1_$2_gen_cds_archive_jdk, \ - WARN := Creating CDS$$($1_$2_DUMP_TYPE) archive for jdk image for $1, \ - INFO := Using CDS flags for $1: $$($1_$2_CDS_DUMP_FLAGS), \ + $$(eval $$(call SetupExecute, $1_$2_$3_gen_cds_archive_jdk, \ + WARN := Creating CDS$$($1_$2_$3_DUMP_TYPE) archive for jdk image for $1, \ + INFO := Using CDS flags for $1: $$($1_$2_$3_CDS_DUMP_FLAGS), \ DEPS := $$(jlink_jdk), \ - OUTPUT_FILE := $$(JDK_IMAGE_DIR)/$$($1_$2_CDS_ARCHIVE), \ + OUTPUT_FILE := $$(JDK_IMAGE_DIR)/$$($1_$2_$3_CDS_ARCHIVE), \ SUPPORT_DIR := $$(JDK_IMAGE_SUPPORT_DIR), \ COMMAND := $$(FIXPATH) $$(JDK_IMAGE_DIR)/bin/java -Xshare:dump \ - -XX:SharedArchiveFile=$$(JDK_IMAGE_DIR)/$$($1_$2_CDS_ARCHIVE) \ - -$1 $$($1_$2_DUMP_EXTRA_ARG) $$($1_$2_CDS_DUMP_FLAGS) $$(LOG_INFO), \ + -XX:SharedArchiveFile=$$(JDK_IMAGE_DIR)/$$($1_$2_$3_CDS_ARCHIVE) \ + -$1 $$($1_$2_$3_DUMP_EXTRA_ARG) $$($1_$2_$3_CDS_DUMP_FLAGS) $$(LOG_INFO), \ )) - JDK_TARGETS += $$($1_$2_gen_cds_archive_jdk) + JDK_TARGETS += $$($1_$2_$3_gen_cds_archive_jdk) - $$(eval $$(call SetupExecute, $1_$2_gen_cds_archive_jre, \ - WARN := Creating CDS$$($1_$2_DUMP_TYPE) archive for jre image for $1, \ - INFO := Using CDS flags for $1: $$($1_$2_CDS_DUMP_FLAGS), \ + $$(eval $$(call SetupExecute, $1_$2_$3_gen_cds_archive_jre, \ + WARN := Creating CDS$$($1_$2_$3_DUMP_TYPE) archive for jre image for $1, \ + INFO := Using CDS flags for $1: $$($1_$2_$3_CDS_DUMP_FLAGS), \ DEPS := $$(jlink_jre), \ - OUTPUT_FILE := $$(JRE_IMAGE_DIR)/$$($1_$2_CDS_ARCHIVE), \ + OUTPUT_FILE := $$(JRE_IMAGE_DIR)/$$($1_$2_$3_CDS_ARCHIVE), \ SUPPORT_DIR := $$(JRE_IMAGE_SUPPORT_DIR), \ COMMAND := $$(FIXPATH) $$(JRE_IMAGE_DIR)/bin/java -Xshare:dump \ - -XX:SharedArchiveFile=$$(JRE_IMAGE_DIR)/$$($1_$2_CDS_ARCHIVE) \ - -$1 $$($1_$2_DUMP_EXTRA_ARG) $$($1_$2_CDS_DUMP_FLAGS) $$(LOG_INFO), \ + -XX:SharedArchiveFile=$$(JRE_IMAGE_DIR)/$$($1_$2_$3_CDS_ARCHIVE) \ + -$1 $$($1_$2_$3_DUMP_EXTRA_ARG) $$($1_$2_$3_CDS_DUMP_FLAGS) $$(LOG_INFO), \ )) - JRE_TARGETS += $$($1_$2_gen_cds_archive_jre) + JRE_TARGETS += $$($1_$2_$3_gen_cds_archive_jre) endef ifeq ($(BUILD_CDS_ARCHIVE), true) $(foreach v, $(JVM_VARIANTS), \ - $(eval $(call CreateCDSArchive,$v,)) \ + $(eval $(call CreateCDSArchive,$v,,)) \ + $(eval $(call CreateCDSArchive,$v,,_preview)) \ ) ifeq ($(call isTargetCpuBits, 64), true) $(foreach v, $(JVM_VARIANTS), \ - $(eval $(call CreateCDSArchive,$v,_nocoops)) \ + $(eval $(call CreateCDSArchive,$v,_nocoops,)) \ + $(eval $(call CreateCDSArchive,$v,_nocoops,_preview)) \ ) ifeq ($(BUILD_CDS_ARCHIVE_NOCOH), true) $(foreach v, $(JVM_VARIANTS), \ $(eval $(call CreateCDSArchive,$v,_nocoh)) \ + $(eval $(call CreateCDSArchive,$v,_nocoh,_preview)) \ ) $(foreach v, $(JVM_VARIANTS), \ $(eval $(call CreateCDSArchive,$v,_nocoops_nocoh)) \ + $(eval $(call CreateCDSArchive,$v,_nocoops_nocoh,_preview)) \ ) endif endif diff --git a/make/Main.gmk b/make/Main.gmk index 172cee697ad38..198403844c313 100644 --- a/make/Main.gmk +++ b/make/Main.gmk @@ -794,6 +794,20 @@ ifeq ($(BUILD_JTREG_TEST_THREAD_FACTORY), true) )) endif +# Builds the value class plugin jtreg extension (JEP 401) +$(eval $(call SetupTarget, build-test-value-class-plugin, \ + MAKEFILE := test/BuildJtregValueClassPlugin, \ + TARGET := build, \ + DEPS := interim-langtools exploded-image, \ +)) + +# Copies the value class plugin into the test image +$(eval $(call SetupTarget, test-image-value-class-plugin, \ + MAKEFILE := test/BuildJtregValueClassPlugin, \ + TARGET := images, \ + DEPS := build-test-value-class-plugin, \ +)) + $(eval $(call SetupTarget, build-microbenchmark, \ MAKEFILE := test/BuildMicrobenchmark, \ DEPS := interim-langtools exploded-image build-test-lib, \ @@ -1310,6 +1324,8 @@ ifeq ($(BUILD_JTREG_TEST_THREAD_FACTORY), true) test-image: test-image-test-thread-factory endif +test-image: test-image-value-class-plugin + ifneq ($(JMH_CORE_JAR), ) test-image: build-microbenchmark endif diff --git a/make/MainSupport.gmk b/make/MainSupport.gmk index ee5bb324f8f0a..6025cc74a9420 100644 --- a/make/MainSupport.gmk +++ b/make/MainSupport.gmk @@ -1,5 +1,5 @@ # -# Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -107,6 +107,7 @@ define Clean-java @$(PRINTF) "Cleaning java %s..." "$(if $1,for $(strip $1) )" @$(ECHO) "" $(LOG_DEBUG) $(RM) -r $(JDK_OUTPUTDIR)/modules/$(strip $1) + $(RM) -r $(SUPPORT_OUTPUTDIR)/preview/$(strip $1) $(RM) -r $(SUPPORT_OUTPUTDIR)/special_classes/$(strip $1) $(ECHO) " done" $(PRINTF) "Cleaning headers %s..." "$(if $1,for $(strip $1) )" diff --git a/make/RunTests.gmk b/make/RunTests.gmk index 1eb81499505cb..1ae49298785eb 100644 --- a/make/RunTests.gmk +++ b/make/RunTests.gmk @@ -93,6 +93,9 @@ JTREG_FAILURE_HANDLER := $(JTREG_FAILURE_HANDLER_DIR)/jtregFailureHandler.jar JTREG_TEST_THREAD_FACTORY_DIR := $(TEST_IMAGE_DIR)/jtreg_test_thread_factory JTREG_TEST_THREAD_FACTORY_JAR := $(JTREG_TEST_THREAD_FACTORY_DIR)/jtregTestThreadFactory.jar +JTREG_VALUE_CLASS_PLUGIN_DIR := $(TEST_IMAGE_DIR)/jtreg_value_class_plugin +JTREG_VALUE_CLASS_PLUGIN_JAR := $(JTREG_VALUE_CLASS_PLUGIN_DIR)/valueClassPlugin.jar + JTREG_FAILURE_HANDLER_TIMEOUT ?= 0 ifneq ($(wildcard $(JTREG_FAILURE_HANDLER)), ) @@ -206,7 +209,7 @@ $(eval $(call ParseKeywordVariable, JTREG, \ SINGLE_KEYWORDS := JOBS TIMEOUT_FACTOR FAILURE_HANDLER_TIMEOUT \ TEST_MODE ASSERT VERBOSE RETAIN TEST_THREAD_FACTORY JVMTI_STRESS_AGENT \ MAX_MEM RUN_PROBLEM_LISTS RETRY_COUNT REPEAT_COUNT MAX_OUTPUT REPORT \ - AOT_JDK MANUAL $(CUSTOM_JTREG_SINGLE_KEYWORDS), \ + AOT_JDK MANUAL VALUE_CLASS_PLUGIN $(CUSTOM_JTREG_SINGLE_KEYWORDS), \ STRING_KEYWORDS := OPTIONS JAVA_OPTIONS VM_OPTIONS KEYWORDS \ EXTRA_PROBLEM_LISTS LAUNCHER_OPTIONS \ $(CUSTOM_JTREG_STRING_KEYWORDS), \ @@ -877,6 +880,20 @@ define SetupRunJtregTestBody )) endif + ifneq ($$(JTREG_VALUE_CLASS_PLUGIN), ) + ifneq ($$(wildcard $$(JTREG_VALUE_CLASS_PLUGIN_JAR)), ) + $1_JTREG_BASIC_OPTIONS += -cpa:$$(JTREG_VALUE_CLASS_PLUGIN_JAR) + endif + $1_JTREG_BASIC_OPTIONS += -vmoption:--enable-preview + $1_JTREG_BASIC_OPTIONS += -javacoption:-XDaccessInternalAPI + $1_JTREG_BASIC_OPTIONS += -javacoption:--source -javacoption:$(VERSION_FEATURE) + $1_JTREG_BASIC_OPTIONS += -javacoption:--enable-preview + $1_JTREG_BASIC_OPTIONS += -javacoption:-Xplugin:ValueClassPlugin + $1_JTREG_BASIC_OPTIONS += $$(addprefix $$(JTREG_PROBLEM_LIST_PREFIX), $$(wildcard \ + $$(addprefix $$($1_TEST_ROOT)/, ProblemList-ValueClass.txt) \ + )) + endif + ifneq ($$(JTREG_JVMTI_STRESS_AGENT), ) AGENT := $$(LIBRARY_PREFIX)JvmtiStressAgent$$(SHARED_LIBRARY_SUFFIX)=$$(JTREG_JVMTI_STRESS_AGENT) $1_JTREG_BASIC_OPTIONS += -javaoption:'-agentpath:$(TEST_IMAGE_DIR)/hotspot/jtreg/native/$$(AGENT)' diff --git a/make/RunTestsPrebuiltSpec.gmk b/make/RunTestsPrebuiltSpec.gmk index 568f69da5a519..55801489cbf8d 100644 --- a/make/RunTestsPrebuiltSpec.gmk +++ b/make/RunTestsPrebuiltSpec.gmk @@ -53,6 +53,9 @@ $(eval $(call VerifyVariable,TEST_IMAGE_DIR)) $(eval $(call VerifyVariable,MAKE)) $(eval $(call VerifyVariable,BASH)) +include $(TOPDIR)/make/conf/version-numbers.conf +VERSION_FEATURE := $(DEFAULT_VERSION_FEATURE) + ################################################################################ # The "human readable" name of this configuration CONF_NAME := run-test-prebuilt diff --git a/make/ZipSource.gmk b/make/ZipSource.gmk index 519339f207886..35c80400bf0d9 100644 --- a/make/ZipSource.gmk +++ b/make/ZipSource.gmk @@ -35,12 +35,35 @@ $(if $(filter $(TOPDIR)/%, $(SUPPORT_OUTPUTDIR)), $(eval SRC_ZIP_BASE := $(TOPDI ################################################################################ # Create the directory structure for src.zip using symlinks. -# //.java +# //.java +# or: +# /META-INF/preview//.java +# for preview enabled sources. +# +# Generate the src dirs in the first make invocation and then call this makefile +# again to create src.zip. ALL_MODULES := $(FindAllModules) -# Generate the src dirs in the first make invocation and then call this makefile -# again to create src.zip. +# Module source directories ($d) are things like (but not limited to): +# - $TOPDIR/src//share/classes +# - $SUPPORT_OUTPUTDIR/gensrc/ +# +# A symbolic link is created for each directory. For example: +# - $TOPDIR/src//share/classes +# is linked from: +# - $SRC_ZIP_WORK_DIR/src//share/classes/ +# +# Each link has the form: +# - $SRC_ZIP_WORK_DIR// +# and always has the name of its module (even if the module already appears in . +# +# Then the contents of all link parent directories are given as ZIP includes: +# - $SRC_ZIP_WORK_DIR/ +# results in ZIP file entries starting: +# - /... +# where multiple source links/directories can contribute to the same module's sources. +# $(foreach m, $(ALL_MODULES), \ $(foreach d, $(call FindModuleSrcDirs, $m), \ $(eval $d_TARGET := $(SRC_ZIP_WORK_DIR)/$(patsubst $(TOPDIR)/%,%,$(patsubst $(SUPPORT_OUTPUTDIR)/%,%,$d))/$m) \ @@ -54,6 +77,36 @@ $(foreach m, $(ALL_MODULES), \ ) \ ) +# Preview source directories are currently limited to: +# - $SUPPORT_OUTPUTDIR/gensrc-valueclasses/ +# If this is changed, the 'patsubst' rewriting will need updating. +# +# The difficulty with this case is that we need to have a directory of the form: +# - $SRC_ZIP_WORK_DIR// +# containing (either directly or via symbolic linking) a `META-INF/preview` +# subdirectory which then contains the sources. +# +# The easiest way to achieve this is to symbolically link: +# - $SRC_ZIP_WORK_DIR///META-INF/preview +# and then use the parent directory: +# - $SRC_ZIP_WORK_DIR// +# as the actual target for the ZIP file generation. However, this requires +# an extra rule to associate the symbolic link and the parent directory. +# +$(foreach m, $(ALL_MODULES), \ + $(foreach d, $(call FindModulePreviewSrcDirs, $m), \ + $(eval $d_TARGET := $(SRC_ZIP_WORK_DIR)/$(patsubst $(SUPPORT_OUTPUTDIR)/%,%,$d)/$m) \ + $(if $(SRC_GENERATED), , \ + $(eval $$($d_TARGET)/META-INF/preview: $d ; \ + $$(if $(filter $(SRC_ZIP_BASE)/%, $d), $$(link-file-relative), $$(link-file-absolute)) \ + ) \ + $(eval $$($d_TARGET): $$($d_TARGET)/META-INF/preview) \ + ) \ + $(eval SRC_ZIP_SRCS += $$($d_TARGET)) \ + $(eval SRC_ZIP_SRCS_$m += $$($d_TARGET)) \ + ) \ +) + TARGETS += $(SRC_ZIP_SRCS) ################################################################################ diff --git a/make/common/Modules.gmk b/make/common/Modules.gmk index 1ec7bef6f98ad..f40921e243474 100644 --- a/make/common/Modules.gmk +++ b/make/common/Modules.gmk @@ -68,6 +68,12 @@ GENERATED_SRC_DIRS += \ $(SUPPORT_OUTPUTDIR)/gensrc \ # +# Directories in which generated preview classes may exist. +# Currently this is restricted to generated value classes, but can be extended. +GENERATED_PREVIEW_SUBDIRS += \ + $(SUPPORT_OUTPUTDIR)/gensrc-valueclasses \ + # + TOP_SRC_DIRS += \ $(TOPDIR)/src \ # @@ -137,6 +143,13 @@ FindModuleSrcDirs = \ $(addsuffix /$(strip $1), $(GENERATED_SRC_DIRS) $(IMPORT_MODULES_SRC)) \ $(foreach sub, $(SRC_SUBDIRS), $(addsuffix /$(strip $1)/$(sub), $(TOP_SRC_DIRS))))) +# Find preview class source dirs for a particular module. +# Currently this is restricted to generated value classes, but can be extended. +# $1 - Module to find source dirs for +FindModulePreviewSrcDirs = \ + $(strip $(wildcard \ + $(addsuffix /$(strip $1), $(GENERATED_PREVIEW_SUBDIRS)))) + # Find all specs dirs for a particular module # $1 - Module to find specs dirs for FindModuleSpecsDirs = \ @@ -159,6 +172,13 @@ GetModuleSrcPath = \ $(addsuffix /*, $(GENERATED_SRC_DIRS) $(IMPORT_MODULES_SRC)) \ $(foreach sub, $(SRC_SUBDIRS), $(addsuffix /*/$(sub), $(TOP_SRC_DIRS)))) +# Construct the complete module source path for preview classes. +# Currently this is restricted to generated value classes, but can be extended. +GetModulePreviewSrcPath = \ + $(call PathList, \ + $(addsuffix /*, $(GENERATED_PREVIEW_SUBDIRS) $(GENERATED_SRC_DIRS) $(IMPORT_MODULES_SRC)) \ + $(foreach sub, $(SRC_SUBDIRS), $(addsuffix /*/$(sub), $(TOP_SRC_DIRS)))) + ################################################################################ # Extract module dependencies from module-info.java files, both normal # dependencies ("requires"), and indirect exports ("requires transitive"). diff --git a/make/common/modules/GensrcStreamPreProcessing.gmk b/make/common/modules/GensrcStreamPreProcessing.gmk index a48e3c98d4b20..7302cf1a58b5c 100644 --- a/make/common/modules/GensrcStreamPreProcessing.gmk +++ b/make/common/modules/GensrcStreamPreProcessing.gmk @@ -1,5 +1,5 @@ # -# Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025, 2026, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -33,9 +33,12 @@ ifeq ($(INCLUDE), true) include Execute.gmk include $(TOPDIR)/make/ToolsJdk.gmk -NON_BYTE_NUMBER_TYPES := char short int long float double +NON_BYTE_INTEGER_TYPES := char short int long +NON_BYTE_NUMBER_TYPES := $(NON_BYTE_INTEGER_TYPES) float double NUMBER_TYPES := byte $(NON_BYTE_NUMBER_TYPES) +INTEGER_NUMBER_TYPES := byte $(NON_BYTE_INTEGER_TYPES) PRIMITIVE_TYPES := boolean $(NUMBER_TYPES) +BITWISE_PRIMITIVE_TYPES := boolean $(INTEGER_NUMBER_TYPES) ################################################################################ # The Conv function converts a type given as first argument (as a normal Java diff --git a/make/hotspot/lib/JvmFeatures.gmk b/make/hotspot/lib/JvmFeatures.gmk index 7dc5fd676a113..ced4f1f63504d 100644 --- a/make/hotspot/lib/JvmFeatures.gmk +++ b/make/hotspot/lib/JvmFeatures.gmk @@ -52,7 +52,8 @@ ifeq ($(call check-jvm-feature, zero), true) JVM_EXCLUDES += opto libadt JVM_EXCLUDE_PATTERNS += c1_ c1/ c2_ runtime_ /c2/ JVM_EXCLUDE_FILES += templateInterpreter.cpp \ - templateInterpreterGenerator.cpp bcEscapeAnalyzer.cpp ciTypeFlow.cpp + templateInterpreterGenerator.cpp bcEscapeAnalyzer.cpp ciTypeFlow.cpp \ + macroAssembler_common.cpp JVM_CFLAGS_FEATURES += -DZERO \ -DZERO_LIBARCH='"$(OPENJDK_TARGET_CPU_LEGACY_LIB)"' $(LIBFFI_CFLAGS) JVM_LIBS_FEATURES += $(LIBFFI_LIBS) diff --git a/make/langtools/src/classes/build/tools/symbolgenerator/CreateSymbols.java b/make/langtools/src/classes/build/tools/symbolgenerator/CreateSymbols.java index fe5938ce0e34e..14d6e17ec05ad 100644 --- a/make/langtools/src/classes/build/tools/symbolgenerator/CreateSymbols.java +++ b/make/langtools/src/classes/build/tools/symbolgenerator/CreateSymbols.java @@ -183,6 +183,18 @@ */ public class CreateSymbols { + /** + *

Support for a "preview version" of classfiles when running with preview + * mode. This is modeled as a new version (@) and since preview mode is only + * supported for the current version, a single identifier token is sufficient. + * + *

For example, inside ct.sym, 27 will be modeled as 'R', and the preview + * for 27 will be '@'. Classfiles unchanged between 27 and 27-preview will + * not be duplicated (in the same way classfiles that are common between 26 + * and 27 are shared). + */ + private static final String PREVIEW_VERSION = "@"; + // /**Create sig files for ct.sym reading the classes description from the directory that contains * {@code ctDescriptionFile}, using the file as a recipe to create the sigfiles. @@ -212,12 +224,17 @@ public void createSymbols(String ctDescriptionFileExtra, String ctDescriptionFil loadVersionClassesFromDirectory(data.classes, data.modules, moduleClassPath, includedModules, currentVersion, previousVersion); + loadVersionClassesFromDirectory(data.classes, data.modules, moduleClassPath, + includedModules, PREVIEW_VERSION, currentVersion); + stripNonExistentAnnotations(data); splitHeaders(data.classes); Map> package2Version2Module = new HashMap<>(); Map> directory2FileData = new TreeMap<>(); + String currentVersionFin = currentVersion; + for (ModuleDescription md : data.modules.values()) { for (ModuleHeaderDescription mhd : md.header) { writeModulesForVersions(directory2FileData, @@ -226,6 +243,9 @@ public void createSymbols(String ctDescriptionFileExtra, String ctDescriptionFil mhd.versions, version -> { String versionString = Character.toString(version); + if (PREVIEW_VERSION.equals(versionString)) { + versionString = currentVersionFin; + } int versionNumber = Integer.parseInt(versionString, Character.MAX_RADIX); versionString = Integer.toString(versionNumber); if (versionNumber == currentVersionParsed && !preReleaseTag.isEmpty()) { @@ -809,6 +829,9 @@ void writeClass(Map> directory2FileData, String module, String version) throws IOException { var classFile = ClassFile.of().build(ClassDesc.ofInternalName(classDescription.name), clb -> { + if (header.preview) { + clb.withVersion(ClassFile.latestMajorVersion(), ClassFile.PREVIEW_MINOR_VERSION); + } if (header.extendsAttr != null) clb.withSuperclass(ClassDesc.ofInternalName(header.extendsAttr)); clb.withInterfaceSymbols(header.implementsAttr.stream().map(ClassDesc::ofInternalName).collect(Collectors.toList())) @@ -1305,8 +1328,9 @@ private void loadVersionClassesFromDirectory(ClassList classes, Collections.emptySet()); try { + record ExportedDir(Path modulePath, Path exportedDir) {} Map modulePath2Header = new HashMap<>(); - List pendingExportedDirectories = new ArrayList<>(); + List pendingExportedDirectories = new ArrayList<>(); try (DirectoryStream ds = Files.newDirectoryStream(modulesDirectory)) { for (Path p : ds) { @@ -1314,7 +1338,7 @@ private void loadVersionClassesFromDirectory(ClassList classes, continue; } - Path moduleInfo = p.resolve("module-info.class"); + Path moduleInfo = resolvePossiblyPreviewClassFile(version, p, p.resolve("module-info.class")); if (Files.isReadable(moduleInfo)) { ModuleDescription md = inspectModuleInfoClassFile(Files.readAllBytes(moduleInfo), @@ -1333,7 +1357,7 @@ private void loadVersionClassesFromDirectory(ClassList classes, for (String dir : currentModuleExports) { includes.add(dir); - pendingExportedDirectories.add(p.resolve(dir)); + pendingExportedDirectories.add(new ExportedDir(p, p.resolve(dir))); } } else { throw new IllegalArgumentException("Included module: " + @@ -1345,13 +1369,15 @@ private void loadVersionClassesFromDirectory(ClassList classes, List pendingExtraClasses = new ArrayList<>(); - for (Path exported : pendingExportedDirectories) { - try (DirectoryStream ds = Files.newDirectoryStream(exported)) { + for (ExportedDir exported : pendingExportedDirectories) { + try (DirectoryStream ds = Files.newDirectoryStream(exported.exportedDir())) { for (Path p2 : ds) { if (!Files.isRegularFile(p2) || !p2.getFileName().toString().endsWith(".class")) { continue; } + p2 = resolvePossiblyPreviewClassFile(version, exported.modulePath(), p2); + loadFromDirectoryHandleClassFile(p2, currentVersionClasses, currentEIList, version, pendingExtraClasses); @@ -1370,6 +1396,7 @@ private void loadVersionClassesFromDirectory(ClassList classes, Path currentPath = e.getKey().resolve(current + ".class"); if (Files.isReadable(currentPath)) { + currentPath = resolvePossiblyPreviewClassFile(version, e.getKey(), currentPath); String pack = current.substring(0, current.lastIndexOf('/')); e.getValue().extraModulePackages.add(pack); @@ -1402,6 +1429,21 @@ private void loadFromDirectoryHandleClassFile(Path path, ClassList currentVersio } } + private Path resolvePossiblyPreviewClassFile(String version, Path moduleClassDir, Path classfile) { + if (!PREVIEW_VERSION.equals(version)) { + return classfile; + } + + Path relativePath = moduleClassDir.relativize(classfile); + Path previewCandidate = moduleClassDir.resolve("META-INF").resolve("preview").resolve(relativePath); + + if (Files.exists(previewCandidate)) { + return previewCandidate; + } + + return classfile; + } + private void finishClassLoading(ClassList classes, Map modules, Map currentVersionModules, ClassList currentVersionClasses, ExcludeIncludeList currentEIList, String version, String baseline) { ModuleDescription unsupported = @@ -1930,6 +1972,7 @@ private void inspectClassFile(InputStream in, ClassList classes, ExcludeIncludeL ClassHeaderDescription headerDesc = new ClassHeaderDescription(); headerDesc.flags = cm.flags().flagsMask(); + headerDesc.preview = cm.minorVersion() == ClassFile.PREVIEW_MINOR_VERSION; if (cm.superclass().isPresent()) { headerDesc.extendsAttr = cm.superclass().get().asInternalName(); @@ -1996,6 +2039,7 @@ private ModuleDescription inspectModuleInfoClassFile(byte[] data, headerDesc.versions = version; headerDesc.flags = cm.flags().flagsMask(); + headerDesc.preview = cm.minorVersion() == ClassFile.PREVIEW_MINOR_VERSION; for (var attr : cm.attributes()) { if (!readAttribute(headerDesc, attr)) @@ -2263,7 +2307,13 @@ private boolean readAttribute(FeatureDescription feature, Attribute attr) { feature.classTypeAnnotations = typeAnnotations2Descriptions(a.annotations()); case RuntimeVisibleTypeAnnotationsAttribute a -> feature.runtimeTypeAnnotations = typeAnnotations2Descriptions(a.annotations()); - default -> throw new IllegalArgumentException("Unhandled attribute: " + attr.attributeName()); // Do nothing + default -> { + if (attr.attributeName().equalsString("LoadableDescriptors")) { + //OK, do nothing + } else { + throw new IllegalArgumentException("Unhandled attribute: " + attr.attributeName()); + } + } } return true; @@ -3308,11 +3358,13 @@ protected void readRecordComponents(LineBasedReader reader) throws IOException { static abstract class HeaderDescription extends FeatureDescription { List innerClasses; + boolean preview; @Override public int hashCode() { int hash = super.hashCode(); hash = 19 * hash + Objects.hashCode(this.innerClasses); + hash = 19 * hash + Objects.hashCode(this.preview); return hash; } @@ -3328,6 +3380,9 @@ public boolean equals(Object obj) { if (!listEquals(this.innerClasses, other.innerClasses)) { return false; } + if (this.preview != other.preview) { + return false; + } return true; } @@ -3366,6 +3421,22 @@ protected void readInnerClasses(LineBasedReader reader) throws IOException { } } + @Override + protected void writeAttributes(Appendable output) throws IOException { + super.writeAttributes(output); + if (preview) { + output.append(" preview true"); + } + } + + @Override + protected void readAttributes(LineBasedReader reader) { + super.readAttributes(reader); + String inPreview = reader.attributes.get("preview"); + if ("true".equals(inPreview)) { + preview = true; + } + } } static class MethodDescription extends FeatureDescription { diff --git a/make/modules/java.base/Gensrc.gmk b/make/modules/java.base/Gensrc.gmk index 675038c8fd573..7de79e016a1a3 100644 --- a/make/modules/java.base/Gensrc.gmk +++ b/make/modules/java.base/Gensrc.gmk @@ -37,6 +37,7 @@ include gensrc/GensrcMisc.gmk include gensrc/GensrcModuleLoaderMap.gmk include gensrc/GensrcRegex.gmk include gensrc/GensrcScopedMemoryAccess.gmk +include gensrc/GensrcValueClasses.gmk include gensrc/GensrcVarHandles.gmk ################################################################################ diff --git a/make/modules/java.base/gensrc/GensrcValueClasses.gmk b/make/modules/java.base/gensrc/GensrcValueClasses.gmk new file mode 100644 index 0000000000000..2bbd9ca1eb2e0 --- /dev/null +++ b/make/modules/java.base/gensrc/GensrcValueClasses.gmk @@ -0,0 +1,75 @@ +# +# Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. Oracle designates this +# particular file as subject to the "Classpath" exception as provided +# by Oracle in the LICENSE file that accompanied this code. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + +################################################################################ +# Generate the value class replacements for selected java.base source files + +JAVA_BASE_VALUE_CLASS_REPLACEMENTS := \ + java/lang/Byte.java \ + java/lang/Short.java \ + java/lang/Integer.java \ + java/lang/Long.java \ + java/lang/Float.java \ + java/lang/Double.java \ + java/lang/Boolean.java \ + java/lang/Character.java \ + java/lang/Number.java \ + java/lang/Record.java \ + java/util/Optional.java \ + java/util/OptionalInt.java \ + java/util/OptionalLong.java \ + java/util/OptionalDouble.java \ + java/time/LocalDate.java \ + java/time/LocalDateTime.java \ + java/time/LocalTime.java \ + java/time/Duration.java \ + java/time/Instant.java \ + java/time/MonthDay.java \ + java/time/ZonedDateTime.java \ + java/time/OffsetDateTime.java \ + java/time/OffsetTime.java \ + java/time/YearMonth.java \ + java/time/Year.java \ + java/time/Period.java \ + java/time/chrono/ChronoLocalDateImpl.java \ + java/time/chrono/MinguoDate.java \ + java/time/chrono/HijrahDate.java \ + java/time/chrono/JapaneseDate.java \ + java/time/chrono/ThaiBuddhistDate.java \ + # + +JAVA_BASE_VALUE_CLASS_SRC_PATHS := \ + $(foreach f, $(JAVA_BASE_VALUE_CLASS_REPLACEMENTS), $(addprefix $(TOPDIR)/src/java.base/share/classes/, $(f))) + +$(eval $(call SetupTextFileProcessing, JAVA_BASE_VALUE_CLASS_TARGETS, \ + SOURCE_FILES := $(JAVA_BASE_VALUE_CLASS_SRC_PATHS), \ + SOURCE_BASE_DIR := $(TOPDIR)/src/java.base/share/classes, \ + OUTPUT_DIR := $(SUPPORT_OUTPUTDIR)/gensrc-valueclasses/java.base/, \ + REPLACEMENTS := \ + /\*value\*/ class => value class ; \ + /\*value\*/ record => value record ; \ +)) + +TARGETS += $(JAVA_BASE_VALUE_CLASS_TARGETS) diff --git a/make/modules/java.base/gensrc/GensrcVarHandles.gmk b/make/modules/java.base/gensrc/GensrcVarHandles.gmk index 341a8c9dc2c79..d30267f67d99a 100644 --- a/make/modules/java.base/gensrc/GensrcVarHandles.gmk +++ b/make/modules/java.base/gensrc/GensrcVarHandles.gmk @@ -1,5 +1,5 @@ # -# Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2015, 2026, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -30,35 +30,59 @@ ifeq ($(INCLUDE), true) VARHANDLES_INPUT_DIR := $(MODULE_SRC)/share/classes/java/lang/invoke VARHANDLES_OUTPUT_DIR := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/java/lang/invoke +# These guys are cased because make cannot preserve case... +VARHANDLE_OBJECT_TYPES := Reference FlatValue NonAtomicReference NonAtomicFlatValue ################################################################################ # Setup a rule for generating a VarHandle java class # # arg $1: type for this varhandle define GenerateVarHandle - VARHANDLE_$1_type := $$(strip $$(if $$(filter reference, $1), Object, $1)) - VARHANDLE_$1_Type := $$(call Conv, $1, Type) + # Underlying erased signature type, Object or a primitive type + VARHANDLE_$1_type := \ + $$(strip $$(if $$(filter $(VARHANDLE_OBJECT_TYPES), $1), Object, $1)) + VARHANDLE_$1_InputType := \ + $$(strip $$(if $$(filter $(VARHANDLE_OBJECT_TYPES), $1), $1, \ + $$(call Conv, $1, Type))) + VARHANDLE_$1_Type := \ + $$(strip $$(subst NonAtomicReference, Reference, \ + $$(subst NonAtomicFlatValue, FlatValue, $$(VARHANDLE_$1_InputType)))) $1_KEYS := $$(VARHANDLE_$1_type) CAS + ifneq ($$(filter $(PRIMITIVE_TYPES), $1),) + # Reference types use ArrayVarHandle class + $1_KEYS += Array Static + else ifneq ($$(filter Reference NonAtomicReference, $1),) + $1_KEYS += Reference Static + else ifneq ($$(filter FlatValue NonAtomicFlatValue, $1),) + # No static field is flat in Hotspot + $1_KEYS += FlatValue + endif + ifneq ($$(filter byte short char, $1),) $1_KEYS += ShorterThanInt endif - ifeq ($$(filter boolean reference, $1),) + ifneq ($$(filter $(NUMBER_TYPES), $1),) $1_KEYS += AtomicAdd endif - ifeq ($$(filter float double reference, $1),) + ifneq ($$(filter $(BITWISE_PRIMITIVE_TYPES), $1),) $1_KEYS += Bitwise endif + ifeq ($$(filter NonAtomicReference NonAtomicFlatValue, $1),) + # Everyone except NonAtomicXxx have non-plain access + $1_KEYS += NonPlainAccess + endif $$(eval $$(call SetupStreamPreProcessing, GEN_VARHANDLE_$1, \ SOURCE_FILE := $$(VARHANDLES_INPUT_DIR)/X-VarHandle.java.template, \ - OUTPUT_FILE := $$(VARHANDLES_OUTPUT_DIR)/VarHandle$$(VARHANDLE_$1_Type)s.java, \ + OUTPUT_FILE := $$(VARHANDLES_OUTPUT_DIR)/VarHandle$$(VARHANDLE_$1_InputType)s.java, \ INFO := Generating VarHandle class for $1, \ SUBST_EMPTY_LINES := false, \ KEYS := $$($1_KEYS), \ REPLACEMENTS := \ type=$$(VARHANDLE_$1_type) \ - Type=$$(VARHANDLE_$1_Type), \ + Type=$$(VARHANDLE_$1_Type) \ + InputType=$$(VARHANDLE_$1_InputType), \ )) TARGETS += $$(GEN_VARHANDLE_$1) endef @@ -143,7 +167,7 @@ endef ################################################################################ # Generate all VarHandle related classes -$(foreach t, $(PRIMITIVE_TYPES) reference, \ +$(foreach t, $(PRIMITIVE_TYPES) $(VARHANDLE_OBJECT_TYPES), \ $(eval $(call GenerateVarHandle,$t)) \ ) diff --git a/make/test/BuildJtregValueClassPlugin.gmk b/make/test/BuildJtregValueClassPlugin.gmk new file mode 100644 index 0000000000000..125af5c2f5492 --- /dev/null +++ b/make/test/BuildJtregValueClassPlugin.gmk @@ -0,0 +1,88 @@ +# +# Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. Oracle designates this +# particular file as subject to the "Classpath" exception as provided +# by Oracle in the LICENSE file that accompanied this code. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + +include MakeFileStart.gmk + +################################################################################ +# Builds one JAR used by jtreg to test value classes (JEP 401): +# +# valueClassPlugin.jar -- Contains ValueClassPlugin (a javac Plugin that +# rewrites @AsValueClass classes to value classes at parse time) together +# with @AsValueClass and the META-INF service descriptor, compiled WITH +# --enable-preview and the required internal-API exports. +# +# Usage in test runs (via RunTests.gmk): +# make test TEST=... JTREG=VALUE_CLASS_PLUGIN=true +# +# The plugin is also enabled automatically when --enable-preview is passed via +# VM_OPTIONS or JAVA_OPTIONS and the plugin JARs are present in the test image: +# make test TEST=... JTREG=VM_OPTIONS=--enable-preview +# +################################################################################ + +include CopyFiles.gmk +include JavaCompilation.gmk + +VCP_BASEDIR := $(TOPDIR)/test/jtreg_value_class_plugin +VCP_SUPPORT := $(SUPPORT_OUTPUTDIR)/test/jtreg_value_class_plugin + +# Compile ValueClassPlugin WITH --enable-preview +$(eval $(call SetupJavaCompilation, BUILD_VCP_PLUGIN, \ + TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK_UPGRADED), \ + SRC := $(VCP_BASEDIR)/plugin, \ + BIN := $(VCP_SUPPORT)/plugin_classes, \ + JAR := $(VCP_SUPPORT)/valueClassPlugin.jar, \ + JAVAC_FLAGS := \ + --add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \ + --add-exports jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED \ + --add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \ + --add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED \ + --enable-preview, \ + DISABLED_WARNINGS := preview, \ +)) + +TARGETS += $(BUILD_VCP_PLUGIN) + +################################################################################ +# Targets for building test-image. +################################################################################ + +$(eval $(call SetupCopyFiles, COPY_VCP, \ + SRC := $(VCP_SUPPORT), \ + DEST := $(TEST_IMAGE_DIR)/jtreg_value_class_plugin, \ + FILES := \ + $(VCP_SUPPORT)/valueClassPlugin.jar, \ +)) + +IMAGES_TARGETS += $(COPY_VCP) + +build: $(TARGETS) +images: $(IMAGES_TARGETS) + +.PHONY: images + +################################################################################ + +include MakeFileEnd.gmk diff --git a/make/test/BuildMicrobenchmark.gmk b/make/test/BuildMicrobenchmark.gmk index c71ef7c61af6e..cdbdab6f68f19 100644 --- a/make/test/BuildMicrobenchmark.gmk +++ b/make/test/BuildMicrobenchmark.gmk @@ -78,13 +78,14 @@ MICROBENCHMARK_MANIFEST := Build: $(FULL_VERSION)\n\ # requires the use of -processor option during benchmark compilation. # Build microbenchmark suite for the current JDK +# Need to patch java.base to include preview classes not found in interim javac $(eval $(call SetupJavaCompilation, BUILD_JDK_MICROBENCHMARK, \ TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK_UPGRADED), \ SMALL_JAVA := false, \ CLASSPATH := $(JMH_COMPILE_JARS), \ CREATE_API_DIGEST := true, \ - DISABLED_WARNINGS := restricted this-escape rawtypes removal cast \ - serial preview, \ + DISABLED_WARNINGS := restricted this-escape processing rawtypes removal cast \ + serial preview unchecked deprecation dangling-doc-comments, \ SRC := $(MICROBENCHMARK_SRC), \ BIN := $(MICROBENCHMARK_CLASSES), \ JAVAC_FLAGS := \ @@ -95,12 +96,15 @@ $(eval $(call SetupJavaCompilation, BUILD_JDK_MICROBENCHMARK, \ --add-exports java.base/jdk.internal.jimage=ALL-UNNAMED \ --add-exports java.base/jdk.internal.misc=ALL-UNNAMED \ --add-exports java.base/jdk.internal.util=ALL-UNNAMED \ + --add-exports java.base/jdk.internal.value=ALL-UNNAMED \ --add-exports java.base/jdk.internal.vm=ALL-UNNAMED \ + --add-exports java.base/jdk.internal.vm.annotation=ALL-UNNAMED \ --add-exports java.base/sun.invoke.util=ALL-UNNAMED \ --add-exports java.base/sun.security.util=ALL-UNNAMED \ --add-exports java.base/sun.security.util.math=ALL-UNNAMED \ --add-exports java.base/sun.security.util.math.intpoly=ALL-UNNAMED \ --enable-preview \ + --patch-module java.base=$(SUPPORT_OUTPUTDIR)/preview/java.base \ -XDsuppressNotes \ -processor org.openjdk.jmh.generators.BenchmarkProcessor \ -s $(MICROBENCHMARK_GENSRC), \ diff --git a/make/test/BuildTestLib.gmk b/make/test/BuildTestLib.gmk index 3668a918ab1b9..010b910e697b4 100644 --- a/make/test/BuildTestLib.gmk +++ b/make/test/BuildTestLib.gmk @@ -46,6 +46,7 @@ $(eval $(call SetupJavaCompilation, BUILD_WB_JAR, \ SRC := $(TEST_LIB_SOURCE_DIR)/jdk/test/whitebox/, \ BIN := $(TEST_LIB_SUPPORT)/wb_classes, \ JAR := $(TEST_LIB_SUPPORT)/wb.jar, \ + DISABLED_WARNINGS := preview, \ JAVAC_FLAGS := --enable-preview, \ )) @@ -58,10 +59,11 @@ endif $(eval $(call SetupJavaCompilation, BUILD_TEST_LIB_JAR, \ TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK_UPGRADED), \ SRC := $(TEST_LIB_SOURCE_DIR), \ - EXCLUDES := $(BUILD_TEST_LIB_JAR_EXCLUDES), \ + EXCLUDES := $(BUILD_TEST_LIB_JAR_EXCLUDES) org, \ BIN := $(TEST_LIB_SUPPORT)/test-lib_classes, \ HEADERS := $(TEST_LIB_SUPPORT)/test-lib_headers, \ JAR := $(TEST_LIB_SUPPORT)/test-lib.jar, \ + DISABLED_WARNINGS := preview, \ JAVAC_FLAGS := --add-exports java.base/sun.security.util=ALL-UNNAMED \ --add-exports java.base/jdk.internal.classfile=ALL-UNNAMED \ --add-exports java.base/jdk.internal.classfile.attribute=ALL-UNNAMED \ @@ -72,6 +74,12 @@ $(eval $(call SetupJavaCompilation, BUILD_TEST_LIB_JAR, \ --add-exports java.base/sun.security.provider.certpath=ALL-UNNAMED \ --add-exports java.base/sun.security.tools.keytool=ALL-UNNAMED \ --add-exports java.base/sun.security.x509=ALL-UNNAMED \ + --add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \ + --add-exports jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED \ + --add-exports jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED \ + --add-exports jdk.compiler/com.sun.tools.javac.main=ALL-UNNAMED \ + --add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \ + --add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED \ --enable-preview, \ )) diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 49f3419dfb635..5ba199e5c73f2 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -1684,6 +1684,9 @@ int MachCallRuntimeNode::ret_addr_offset() { CodeBlob *cb = CodeCache::find_blob(_entry_point); if (cb) { return 1 * NativeInstruction::instruction_size; + } else if (_entry_point == nullptr) { + // See CallLeafNoFPIndirect + return 1 * NativeInstruction::instruction_size; } else { return 6 * NativeInstruction::instruction_size; } @@ -1792,49 +1795,15 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { Compile* C = ra_->C; - // n.b. frame size includes space for return pc and rfp - const int framesize = C->output()->frame_size_in_bytes(); - - if (C->clinit_barrier_on_entry()) { - assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); - Label L_skip_barrier; + __ verified_entry(C, 0); - __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding()); - __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier); - __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); - __ bind(L_skip_barrier); + if (C->stub_function() == nullptr) { + __ entry_barrier(); } - if (C->max_vector_size() > 0) { - __ reinitialize_ptrue(); - } - - int bangsize = C->output()->bang_size_in_bytes(); - if (C->output()->need_stack_bang(bangsize)) - __ generate_stack_overflow_check(bangsize); - - __ build_frame(framesize); - - if (C->stub_function() == nullptr) { - BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); - // Dummy labels for just measuring the code size - Label dummy_slow_path; - Label dummy_continuation; - Label dummy_guard; - Label* slow_path = &dummy_slow_path; - Label* continuation = &dummy_continuation; - Label* guard = &dummy_guard; - if (!Compile::current()->output()->in_scratch_emit_size()) { - // Use real labels from actual stub when not emitting code for the purpose of measuring its size - C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub(); - Compile::current()->output()->add_stub(stub); - slow_path = &stub->entry(); - continuation = &stub->continuation(); - guard = &stub->guard(); - } - // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub. - bs->nmethod_entry_barrier(masm, slow_path, continuation, guard); + if (!Compile::current()->output()->in_scratch_emit_size()) { + __ bind(*_verified_entry); } if (VerifyStackAtCalls) { @@ -1851,12 +1820,6 @@ void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { } } -uint MachPrologNode::size(PhaseRegAlloc* ra_) const -{ - return MachNode::size(ra_); // too many variables; just compute it - // the hard way -} - int MachPrologNode::reloc() const { return 0; @@ -1899,7 +1862,7 @@ void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { Compile* C = ra_->C; int framesize = C->output()->frame_slots() << LogBytesPerInt; - __ remove_frame(framesize); + __ remove_frame(framesize, C->needs_stack_repair()); if (StackReservedPages > 0 && C->has_reserved_stack_access()) { __ reserved_stack_check(); @@ -1918,11 +1881,6 @@ void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { } } -uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { - // Variable size. Determine dynamically. - return MachNode::size(ra_); -} - int MachEpilogNode::reloc() const { // Return number of relocatable values contained in this instruction. return 1; // 1 for polling page. @@ -2221,8 +2179,47 @@ uint BoxLockNode::size(PhaseRegAlloc *ra_) const { } } -//============================================================================= +///============================================================================= +#ifndef PRODUCT +void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const +{ + st->print_cr("# MachVEPNode"); + if (!_verified) { + st->print_cr("\t load_class"); + } else { + st->print_cr("\t unpack_inline_arg"); + } +} +#endif +void MachVEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const +{ + if (!_verified) { + __ ic_check(1); + } else { + if (ra_->C->stub_function() == nullptr) { + // Emit the entry barrier in a temporary frame before unpacking because + // it can deopt, which would require packing the scalarized args again. + __ verified_entry(ra_->C, 0); + __ entry_barrier(); + int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt; + __ remove_frame(framesize, false); + } + // Unpack inline type args passed as oop and then jump to + // the verified entry point (skipping the unverified entry). + int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only); + // Emit code for verified entry and save increment for stack repair on return + __ verified_entry(ra_->C, sp_inc); + if (Compile::current()->output()->in_scratch_emit_size()) { + Label dummy_verified_entry; + __ b(dummy_verified_entry); + } else { + __ b(*_verified_entry); + } + } +} + +//============================================================================= #ifndef PRODUCT void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const { @@ -2239,11 +2236,6 @@ void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const __ ic_check(InteriorEntryAlignment); } -uint MachUEPNode::size(PhaseRegAlloc* ra_) const -{ - return MachNode::size(ra_); -} - // REQUIRED EMIT CODE //============================================================================= @@ -3687,6 +3679,37 @@ encode %{ // Check that stack depth is unchanged: find majik cookie on stack __ call_Unimplemented(); } + if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) { + // The last return value is not set by the callee but used to pass the null marker to compiled code. + // Search for the corresponding projection, get the register and emit code that initializes it. + uint con = (tf()->range_cc()->cnt() - 1); + for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { + ProjNode* proj = fast_out(i)->as_Proj(); + if (proj->_con == con) { + // Set null marker if r0 is non-null (a non-null value is returned buffered or scalarized) + OptoReg::Name optoReg = ra_->get_reg_first(proj); + VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP)); + Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1; + __ cmp(r0, zr); + __ cset(toReg, Assembler::NE); + if (reg->is_stack()) { + int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size; + __ str(toReg, Address(sp, st_off)); + } + break; + } + } + if (return_value_is_used()) { + // An inline type is returned as fields in multiple registers. + // R0 either contains an oop if the inline type is buffered or a pointer + // to the corresponding InlineKlass with the lowest bit set to 1. Zero r0 + // if the lowest bit is set to allow C2 to use the oop after null checking. + // r0 &= (r0 & 1) - 1 + __ andr(rscratch1, r0, 0x1); + __ sub(rscratch1, rscratch1, 0x1); + __ andr(r0, r0, rscratch1); + } + } %} enc_class aarch64_enc_java_to_runtime(method meth) %{ @@ -3976,6 +3999,16 @@ operand immI_le_4() interface(CONST_INTER); %} +operand immI_4() +%{ + predicate(n->get_int() == 4); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + operand immI_16() %{ predicate(n->get_int() == 16); @@ -8112,6 +8145,36 @@ instruct castX2P(iRegPNoSp dst, iRegL src) %{ ins_pipe(ialu_reg); %} +instruct castI2N(iRegNNoSp dst, iRegI src) %{ + match(Set dst (CastI2N src)); + + ins_cost(INSN_COST); + format %{ "mov $dst, $src\t# int -> narrow ptr" %} + + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ mov(as_Register($dst$$reg), as_Register($src$$reg)); + } + %} + + ins_pipe(ialu_reg); +%} + +instruct castN2X(iRegLNoSp dst, iRegN src) %{ + match(Set dst (CastP2X src)); + + ins_cost(INSN_COST); + format %{ "mov $dst, $src\t# ptr -> long" %} + + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ mov(as_Register($dst$$reg), as_Register($src$$reg)); + } + %} + + ins_pipe(ialu_reg); +%} + instruct castP2X(iRegLNoSp dst, iRegP src) %{ match(Set dst (CastP2X src)); @@ -14128,9 +14191,9 @@ instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{ // ============================================================================ // clearing of an array -instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr) +instruct clearArray_reg_reg_immL0(iRegL_R11 cnt, iRegP_R10 base, immL0 zero, Universe dummy, rFlagsReg cr) %{ - match(Set dummy (ClearArray cnt base)); + match(Set dummy (ClearArray (Binary cnt base) zero)); effect(USE_KILL cnt, USE_KILL base, KILL cr); ins_cost(4 * INSN_COST); @@ -14147,11 +14210,28 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag ins_pipe(pipe_class_memory); %} -instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr) +instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr) %{ - predicate((uint64_t)n->in(2)->get_long() - < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); - match(Set dummy (ClearArray cnt base)); + predicate(((ClearArrayNode*)n)->word_copy_only()); + match(Set dummy (ClearArray (Binary cnt base) val)); + effect(USE_KILL cnt, USE_KILL base, KILL cr); + + ins_cost(4 * INSN_COST); + format %{ "ClearArray $cnt, $base, $val" %} + + ins_encode %{ + __ fill_words($base$$Register, $cnt$$Register, $val$$Register); + %} + + ins_pipe(pipe_class_memory); +%} + +instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, immL0 zero, Universe dummy, rFlagsReg cr) +%{ + predicate((uint64_t)n->in(2)->in(1)->get_long() + < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord) + && !((ClearArrayNode*)n)->word_copy_only()); + match(Set dummy (ClearArray (Binary cnt base) zero)); effect(TEMP temp, USE_KILL base, KILL cr); ins_cost(4 * INSN_COST); @@ -15467,8 +15547,28 @@ instruct CallLeafDirectVector(method meth) // Call Runtime Instruction +// entry point is null, target holds the address to call +instruct CallLeafNoFPIndirect(iRegP target) +%{ + predicate(n->as_Call()->entry_point() == nullptr); + + match(CallLeafNoFP target); + + ins_cost(CALL_COST); + + format %{ "CALL, runtime leaf nofp indirect $target" %} + + ins_encode %{ + __ blr($target$$Register); + %} + + ins_pipe(pipe_class_call); +%} + instruct CallLeafNoFPDirect(method meth) %{ + predicate(n->as_Call()->entry_point() != nullptr); + match(CallLeafNoFP); effect(USE meth); diff --git a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp index 9bf466785352c..25b0625d60edc 100644 --- a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -119,6 +119,72 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) { #endif } +// Implementation of LoadFlattenedArrayStub + +LoadFlattenedArrayStub::LoadFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) { + _array = array; + _index = index; + _result = result; + _scratch_reg = FrameMap::r0_oop_opr; + _info = new CodeEmitInfo(info); +} + +void LoadFlattenedArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_array->as_register(), 1); + ce->store_parameter(_index->as_register(), 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(StubId::c1_load_flat_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + if (_result->as_register() != r0) { + __ mov(_result->as_register(), r0); + } + __ b(_continuation); +} + + +// Implementation of StoreFlattenedArrayStub + +StoreFlattenedArrayStub::StoreFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr value, CodeEmitInfo* info) { + _array = array; + _index = index; + _value = value; + _scratch_reg = FrameMap::r0_oop_opr; + _info = new CodeEmitInfo(info); +} + + +void StoreFlattenedArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_array->as_register(), 2); + ce->store_parameter(_index->as_register(), 1); + ce->store_parameter(_value->as_register(), 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(StubId::c1_store_flat_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + +// Implementation of SubstitutabilityCheckStub +SubstitutabilityCheckStub::SubstitutabilityCheckStub(LIR_Opr left, LIR_Opr right, CodeEmitInfo* info) { + _left = left; + _right = right; + _scratch_reg = FrameMap::r0_oop_opr; + _info = new CodeEmitInfo(info); +} + +void SubstitutabilityCheckStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_left->as_register(), 1); + ce->store_parameter(_right->as_register(), 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(StubId::c1_substitutability_check_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} // Implementation of NewInstanceStub @@ -176,11 +242,13 @@ void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { // Implementation of NewObjectArrayStub -NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, + CodeEmitInfo* info, bool is_null_free) { _klass_reg = klass_reg; _result = result; _length = length; _info = new CodeEmitInfo(info); + _is_null_free = is_null_free; } @@ -189,7 +257,13 @@ void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); assert(_length->as_register() == r19, "length must in r19,"); assert(_klass_reg->as_register() == r3, "klass_reg must in r3"); - __ far_call(RuntimeAddress(Runtime1::entry_for(StubId::c1_new_object_array_id))); + + if (_is_null_free) { + __ far_call(RuntimeAddress(Runtime1::entry_for(StubId::c1_new_null_free_array_id))); + } else { + __ far_call(RuntimeAddress(Runtime1::entry_for(StubId::c1_new_object_array_id))); + } + ce->add_call_info_here(_info); ce->verify_oop_map(_info); assert(_result->as_register() == r0, "result must in r0"); @@ -199,6 +273,16 @@ void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { void MonitorEnterStub::emit_code(LIR_Assembler* ce) { assert(__ rsp_offset() == 0, "frame size should be fixed"); __ bind(_entry); + if (_throw_ie_stub != nullptr) { + // When we come here, _obj_reg has already been checked to be non-null. + __ ldr(rscratch1, Address(_obj_reg->as_register(), oopDesc::mark_offset_in_bytes())); + __ mov(rscratch2, markWord::inline_type_pattern); + __ andr(rscratch1, rscratch1, rscratch2); + + __ cmp(rscratch1, rscratch2); + __ br(Assembler::EQ, *_throw_ie_stub->entry()); + } + ce->store_parameter(_obj_reg->as_register(), 1); ce->store_parameter(_lock_reg->as_register(), 0); StubId enter_id; diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index 87451b5a07a36..1fc0f43ed6caf 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -33,13 +33,16 @@ #include "c1/c1_Runtime1.hpp" #include "c1/c1_ValueStack.hpp" #include "ci/ciArrayKlass.hpp" +#include "ci/ciInlineKlass.hpp" #include "ci/ciInstance.hpp" +#include "ci/ciObjArrayKlass.hpp" #include "code/aotCodeCache.hpp" #include "code/compiledIC.hpp" #include "gc/shared/collectedHeap.hpp" #include "gc/shared/gc_globals.hpp" #include "nativeInst_aarch64.hpp" #include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" #include "runtime/frame.inline.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -413,7 +416,7 @@ int LIR_Assembler::emit_unwind_handler() { // remove the activation and dispatch to the unwind handler __ block_comment("remove_frame and dispatch to the unwind handler"); - __ remove_frame(initial_frame_size_in_bytes()); + __ remove_frame(initial_frame_size_in_bytes(), needs_stack_repair()); __ far_jump(RuntimeAddress(Runtime1::entry_for(StubId::c1_unwind_exception_id))); // Emit the slow path assembly @@ -465,8 +468,50 @@ void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == r0, "word returns are in r0,"); + if (InlineTypeReturnedAsFields) { + // Check if we are returning a non-null inline type and load its fields into registers + ciType* return_type = compilation()->method()->return_type(); + if (return_type->is_inlinetype()) { + ciInlineKlass* vk = return_type->as_inline_klass(); + if (vk->can_be_returned_as_fields()) { + address unpack_handler = vk->unpack_handler(); + assert(unpack_handler != nullptr, "must be"); + __ far_call(RuntimeAddress(unpack_handler)); + } + } else if (return_type->is_instance_klass() && (!return_type->is_loaded() || StressCallingConvention)) { + Label skip; + Label not_null; + __ cbnz(r0, not_null); + // Returned value is null, zero all return registers because they may belong to oop fields + __ mov(j_rarg1, zr); + __ mov(j_rarg2, zr); + __ mov(j_rarg3, zr); + __ mov(j_rarg4, zr); + __ mov(j_rarg5, zr); + __ mov(j_rarg6, zr); + __ mov(j_rarg7, zr); + __ b(skip); + __ bind(not_null); + + // Check if we are returning a non-null inline type and load its fields into registers + __ test_oop_is_not_inline_type(r0, rscratch2, skip, /* can_be_null= */ false); + + // Load fields from a buffered value with an inline class specific handler + __ load_klass(rscratch1 /*dst*/, r0 /*src*/); + __ ldr(rscratch1, Address(rscratch1, InlineKlass::adr_members_offset())); + __ ldr(rscratch1, Address(rscratch1, InlineKlass::unpack_handler_offset())); + // Unpack handler can be null if inline type is not scalarizable in returns + __ cbz(rscratch1, skip); + __ blr(rscratch1); + + __ bind(skip); + } + // At this point, r0 points to the value object (for interpreter or C1 caller). + // The fields of the object are copied into registers (for C2 caller). + } + // Pop the stack before the safepoint code - __ remove_frame(initial_frame_size_in_bytes()); + __ remove_frame(initial_frame_size_in_bytes(), needs_stack_repair()); if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) { __ reserved_stack_check(); @@ -478,6 +523,10 @@ void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { __ ret(lr); } +int LIR_Assembler::store_inline_type_fields_to_buf(ciInlineKlass* vk) { + return (__ store_inline_type_fields_to_buf(vk, false)); +} + int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { guarantee(info != nullptr, "Shouldn't be null"); __ get_polling_page(rscratch1, relocInfo::poll_type); @@ -537,10 +586,10 @@ void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_cod } case T_OBJECT: { - if (patch_code == lir_patch_none) { - jobject2reg(c->as_jobject(), dest->as_register()); - } else { + if (patch_code != lir_patch_none) { jobject2reg_with_patching(dest->as_register(), info); + } else { + jobject2reg(c->as_jobject(), dest->as_register()); } break; } @@ -1013,6 +1062,21 @@ void LIR_Assembler::load_unordered(LIR_Address *from_addr, LIR_Opr dest, } } +void LIR_Assembler::move(LIR_Opr src, LIR_Opr dst) { + assert(dst->is_cpu_register(), "must be"); + assert(dst->type() == src->type(), "must be"); + + if (src->is_cpu_register()) { + reg2reg(src, dst); + } else if (src->is_stack()) { + stack2reg(src, dst, dst->type()); + } else if (src->is_constant()) { + const2reg(src, dst, lir_patch_none, nullptr); + } else { + ShouldNotReachHere(); + } +} + void LIR_Assembler::load_volatile(LIR_Address *from_addr, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { __ lea(rscratch1, as_Address(from_addr)); @@ -1240,7 +1304,7 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { Register len = op->len()->as_register(); __ uxtw(len, len); - if (UseSlowPath || + if (UseSlowPath || op->always_slow_path() || (!UseFastNewObjectArray && is_reference_type(op->type())) || (!UseFastNewTypeArray && !is_reference_type(op->type()))) { __ b(*op->stub()->entry()); @@ -1317,27 +1381,29 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L assert_different_registers(obj, k_RInfo, klass_RInfo); - if (should_profile) { - Register mdo = klass_RInfo; - __ mov_metadata(mdo, md->constant_encoding()); - Label not_null; - __ cbnz(obj, not_null); - // Object is null; update MDO and exit - Address data_addr - = __ form_address(rscratch2, mdo, - md->byte_offset_of_slot(data, DataLayout::flags_offset()), - 0); - __ ldrb(rscratch1, data_addr); - __ orr(rscratch1, rscratch1, BitData::null_seen_byte_constant()); - __ strb(rscratch1, data_addr); - __ b(*obj_is_null); - __ bind(not_null); - - Register recv = k_RInfo; - __ load_klass(recv, obj); - type_profile_helper(mdo, md, data, recv); - } else { - __ cbz(obj, *obj_is_null); + if (op->need_null_check()) { + if (should_profile) { + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); + Label not_null; + __ cbnz(obj, not_null); + // Object is null; update MDO and exit + Address data_addr + = __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, DataLayout::flags_offset()), + 0); + __ ldrb(rscratch1, data_addr); + __ orr(rscratch1, rscratch1, BitData::null_seen_byte_constant()); + __ strb(rscratch1, data_addr); + __ b(*obj_is_null); + __ bind(not_null); + + Register recv = k_RInfo; + __ load_klass(recv, obj); + type_profile_helper(mdo, md, data, recv); + } else { + __ cbz(obj, *obj_is_null); + } } if (!k->is_loaded()) { @@ -1348,6 +1414,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L __ verify_oop(obj); if (op->fast_check()) { + assert(!k->is_loaded() || !k->is_obj_array_klass(), "Use refined array for a direct pointer comparison"); // get object class // not a safepoint as obj null check happens earlier __ load_klass(rscratch1, obj); @@ -1370,7 +1437,18 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L // See if we get an immediate positive hit __ br(Assembler::EQ, *success_target); // check for self - __ cmp(klass_RInfo, k_RInfo); + if (k->is_loaded() && k->is_obj_array_klass()) { + // For a direct pointer comparison, we need the refined array klass pointer + ciKlass* k_refined = ciObjArrayKlass::make(k->as_obj_array_klass()->element_klass()); + if (!k_refined->is_loaded()) { + bailout("encountered unloaded_ciobjarrayklass due to out of memory error"); + return; + } + __ mov_metadata(rscratch1, k_refined->constant_encoding()); + __ cmp(klass_RInfo, rscratch1); + } else { + __ cmp(klass_RInfo, k_RInfo); + } __ br(Assembler::EQ, *success_target); __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize))); @@ -1491,6 +1569,90 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { } } +void LIR_Assembler::emit_opFlattenedArrayCheck(LIR_OpFlattenedArrayCheck* op) { + // We are loading/storing from/to an array that *may* be a flat array (the + // declared type is Object[], abstract[], interface[] or VT.ref[]). + // If this array is a flat array, take the slow path. + __ test_flat_array_oop(op->array()->as_register(), op->tmp()->as_register(), *op->stub()->entry()); +} + +void LIR_Assembler::emit_opNullFreeArrayCheck(LIR_OpNullFreeArrayCheck* op) { + // We are storing into an array that *may* be null-free (the declared type is + // Object[], abstract[], interface[] or VT.ref[]). + Label test_mark_word; + Register tmp = op->tmp()->as_register(); + __ ldr(tmp, Address(op->array()->as_register(), oopDesc::mark_offset_in_bytes())); + __ tst(tmp, markWord::unlocked_value); + __ br(Assembler::NE, test_mark_word); + __ load_prototype_header(tmp, op->array()->as_register()); + __ bind(test_mark_word); + __ tst(tmp, markWord::null_free_array_bit_in_place); +} + +void LIR_Assembler::emit_opSubstitutabilityCheck(LIR_OpSubstitutabilityCheck* op) { + Label L_oops_equal; + Label L_oops_not_equal; + Label L_end; + + Register left = op->left()->as_register(); + Register right = op->right()->as_register(); + + __ cmp(left, right); + __ br(Assembler::EQ, L_oops_equal); + + // (1) Null check -- if one of the operands is null, the other must not be null (because + // the two references are not equal), so they are not substitutable, + __ cbz(left, L_oops_not_equal); + __ cbz(right, L_oops_not_equal); + + ciKlass* left_klass = op->left_klass(); + ciKlass* right_klass = op->right_klass(); + + // (2) Inline type check -- if either of the operands is not an inline type, + // they are not substitutable. We do this only if we are not sure that the + // operands are inline type + if ((left_klass == nullptr || right_klass == nullptr) ||// The klass is still unloaded, or came from a Phi node. + !left_klass->is_inlinetype() || !right_klass->is_inlinetype()) { + Register tmp1 = op->tmp1()->as_register(); + Register tmp2 = op->tmp2()->as_register(); + __ mov(tmp1, markWord::inline_type_pattern); + __ ldr(tmp2, Address(left, oopDesc::mark_offset_in_bytes())); + __ andr(tmp1, tmp1, tmp2); + __ ldr(tmp2, Address(right, oopDesc::mark_offset_in_bytes())); + __ andr(tmp1, tmp1, tmp2); + __ cmp(tmp1, (u1)markWord::inline_type_pattern); + __ br(Assembler::NE, L_oops_not_equal); + } + + // (3) Same klass check: if the operands are of different klasses, they are not substitutable. + if (left_klass != nullptr && left_klass->is_inlinetype() && left_klass == right_klass) { + // No need to load klass -- the operands are statically known to be the same inline klass. + __ b(*op->stub()->entry()); + } else { + Register tmp1 = op->tmp1()->as_register(); + Register tmp2 = op->tmp2()->as_register(); + __ cmp_klasses_from_objects(left, right, tmp1, tmp2); + __ br(Assembler::EQ, *op->stub()->entry()); // same klass -> do slow check + // fall through to L_oops_not_equal + } + + __ bind(L_oops_not_equal); + move(op->not_equal_result(), op->result_opr()); + __ b(L_end); + + // We've returned from the stub. R0 contains 0x0 IFF the two + // operands are not substitutable. (Don't compare against 0x1 in case the + // C compiler is naughty) + __ bind(*op->stub()->continuation()); + __ cbz(r0, L_oops_not_equal); // (call_stub() == 0x0) -> not_equal + + __ bind(L_oops_equal); + move(op->equal_result(), op->result_opr()); // (call_stub() != 0x0) -> equal + // fall-through + __ bind(L_end); +} + + void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { __ cmpxchg(addr, cmpval, newval, Assembler::word, /* acquire*/ true, /* release*/ true, /* weak*/ false, rscratch1); __ cset(rscratch1, Assembler::NE); @@ -2002,7 +2164,7 @@ void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { bailout("trampoline stub overflow"); return; } - add_call_info(code_offset(), op->info()); + add_call_info(code_offset(), op->info(), op->maybe_return_as_fields()); __ post_call_nop(); } @@ -2013,7 +2175,7 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { bailout("trampoline stub overflow"); return; } - add_call_info(code_offset(), op->info()); + add_call_info(code_offset(), op->info(), op->maybe_return_as_fields()); __ post_call_nop(); } @@ -2176,6 +2338,17 @@ void LIR_Assembler::store_parameter(jobject o, int offset_from_rsp_in_words) { __ str(rscratch1, Address(sp, offset_from_rsp_in_bytes)); } +void LIR_Assembler::arraycopy_inlinetype_check(Register obj, Register tmp, CodeStub* slow_path, bool is_dest, bool null_check) { + if (null_check) { + __ cbz(obj, *slow_path->entry()); + } + if (is_dest) { + __ test_null_free_array_oop(obj, tmp, *slow_path->entry()); + __ test_flat_array_oop(obj, tmp, *slow_path->entry()); + } else { + __ test_flat_array_oop(obj, tmp, *slow_path->entry()); + } +} // This code replaces a call to arraycopy; no exception may // be thrown in this code, they must be thrown in the System.arraycopy @@ -2194,6 +2367,12 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { BasicType basic_type = default_type != nullptr ? default_type->element_type()->basic_type() : T_ILLEGAL; if (is_reference_type(basic_type)) basic_type = T_OBJECT; + if (flags & LIR_OpArrayCopy::always_slow_path) { + __ b(*stub->entry()); + __ bind(*stub->continuation()); + return; + } + // if we don't know anything, just go through the generic arraycopy if (default_type == nullptr // || basic_type == T_OBJECT ) { @@ -2247,6 +2426,14 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { return; } + // Handle inline type arrays + if (flags & LIR_OpArrayCopy::src_inlinetype_check) { + arraycopy_inlinetype_check(src, tmp, stub, false, (flags & LIR_OpArrayCopy::src_null_check)); + } + if (flags & LIR_OpArrayCopy::dst_inlinetype_check) { + arraycopy_inlinetype_check(dst, tmp, stub, true, (flags & LIR_OpArrayCopy::dst_null_check)); + } + assert(default_type != nullptr && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point"); int elem_size = type2aelembytes(basic_type); @@ -2761,6 +2948,26 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { COMMENT("} emit_profile_type"); } +void LIR_Assembler::emit_profile_inline_type(LIR_OpProfileInlineType* op) { + Register obj = op->obj()->as_register(); + Register tmp = op->tmp()->as_pointer_register(); + bool not_null = op->not_null(); + int flag = op->flag(); + + Label not_inline_type; + if (!not_null) { + __ cbz(obj, not_inline_type); + } + + __ test_oop_is_not_inline_type(obj, tmp, not_inline_type); + + Address mdo_addr = as_Address(op->mdp()->as_address_ptr(), rscratch2); + __ ldrb(rscratch1, mdo_addr); + __ orr(rscratch1, rscratch1, flag); + __ strb(rscratch1, mdo_addr); + + __ bind(not_inline_type); +} void LIR_Assembler::align_backward_branch_target() { } @@ -2902,6 +3109,10 @@ void LIR_Assembler::get_thread(LIR_Opr result_reg) { __ mov(result_reg->as_register(), rthread); } +void LIR_Assembler::check_orig_pc() { + __ ldr(rscratch2, frame_map()->address_for_orig_pc_addr()); + __ cmp(rscratch2, (u1)NULL_WORD); +} void LIR_Assembler::peephole(LIR_List *lir) { #if 0 diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp index 367256d2f696f..d48126e22bb8c 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp @@ -79,6 +79,9 @@ friend class ArrayCopyStub; _deopt_handler_size = 4 * NativeInstruction::instruction_size }; + void arraycopy_inlinetype_check(Register obj, Register tmp, CodeStub* slow_path, bool is_dest, bool null_check); + void move(LIR_Opr src, LIR_Opr dst); + public: void store_parameter(Register r, int offset_from_esp_in_words); diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp index 7e82f410a950d..959c61dbf07d7 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp @@ -32,6 +32,7 @@ #include "c1/c1_Runtime1.hpp" #include "c1/c1_ValueStack.hpp" #include "ci/ciArray.hpp" +#include "ci/ciInlineKlass.hpp" #include "ci/ciObjArrayKlass.hpp" #include "ci/ciTypeArrayKlass.hpp" #include "compiler/compilerDefinitions.inline.hpp" @@ -323,11 +324,17 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { if (x->needs_null_check()) { info_for_exception = state_for(x); } + + CodeStub* throw_ie_stub = + x->maybe_inlinetype() ? + new SimpleExceptionStub(StubId::c1_throw_identity_exception_id, obj.result(), state_for(x)) : + nullptr; + // this CodeEmitInfo must not have the xhandlers because here the // object is already locked (xhandlers expect object to be unlocked) CodeEmitInfo* info = state_for(x, x->state(), true); monitor_enter(obj.result(), lock, syncTempOpr(), scratch, - x->monitor_no(), info_for_exception, info); + x->monitor_no(), info_for_exception, info, throw_ie_stub); } @@ -1127,14 +1134,15 @@ void LIRGenerator::do_NewInstance(NewInstance* x) { tty->print_cr(" ###class not loaded at new bci %d", x->printable_bci()); } #endif - CodeEmitInfo* info = state_for(x, x->state()); + CodeEmitInfo* info = state_for(x, x->needs_state_before() ? x->state_before() : x->state()); LIR_Opr reg = result_register_for(x->type()); new_instance(reg, x->klass(), x->is_unresolved(), - FrameMap::r10_oop_opr, - FrameMap::r11_oop_opr, - FrameMap::r4_oop_opr, - LIR_OprFact::illegalOpr, - FrameMap::r3_metadata_opr, info); + !x->is_unresolved() && x->klass()->is_inlinetype(), + FrameMap::r10_oop_opr, + FrameMap::r11_oop_opr, + FrameMap::r4_oop_opr, + LIR_OprFact::illegalOpr, + FrameMap::r3_metadata_opr, info); LIR_Opr result = rlock_result(x); __ move(reg, result); } @@ -1190,13 +1198,19 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { length.load_item_force(FrameMap::r19_opr); LIR_Opr len = length.result(); - CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); - ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); + ciKlass* obj = ciObjArrayKlass::make(x->klass()); + + // TODO 8265122 Implement a fast path for this + bool is_flat = obj->is_loaded() && obj->is_flat_array_klass(); + bool is_null_free = obj->is_loaded() && obj->as_array_klass()->is_elem_null_free(); + + CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info, is_null_free); if (obj == ciEnv::unloaded_ciobjarrayklass()) { BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); } + klass2reg_with_patching(klass_reg, obj, patching_info); - __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path, true, is_null_free || is_flat); LIR_Opr result = rlock_result(x); __ move(reg, result); @@ -1291,7 +1305,7 @@ void LIRGenerator::do_CheckCast(CheckCast* x) { __ checkcast(reg, obj.result(), x->klass(), new_register(objectType), new_register(objectType), tmp3, x->direct_compare(), info_for_exception, patching_info, stub, - x->profiled_method(), x->profiled_bci()); + x->profiled_method(), x->profiled_bci(), x->is_null_free()); } void LIRGenerator::do_InstanceOf(InstanceOf* x) { @@ -1372,7 +1386,12 @@ void LIRGenerator::do_If(If* x) { __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before())); } - __ cmp(lir_cond(cond), left, right); + if (x->substitutability_check()) { + substitutability_check(x, *xin, *yin); + } else { + __ cmp(lir_cond(cond), left, right); + } + // Generate branch profiling. Profiling code doesn't kill flags. profile_branch(x, cond); move_to_phi(x->state()); diff --git a/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp index 5d2890251d7d4..ccafc6c2542ba 100644 --- a/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp @@ -49,5 +49,6 @@ void LIR_Address::verify() const { assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand"); assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA, "wrong type for addresses"); + assert(index()->is_illegal() || disp() == 0, "cannot set both index and displacement"); } #endif // PRODUCT diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp index 89a9422ea4888..8088c4bb40e40 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp @@ -27,10 +27,13 @@ #include "c1/c1_Runtime1.hpp" #include "gc/shared/barrierSetAssembler.hpp" #include "gc/shared/collectedHeap.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" #include "gc/shared/tlab_globals.hpp" #include "interpreter/interpreter.hpp" #include "oops/arrayOop.hpp" #include "oops/markWord.hpp" +#include "runtime/arguments.hpp" #include "runtime/basicLock.hpp" #include "runtime/os.hpp" #include "runtime/sharedRuntime.hpp" @@ -99,12 +102,21 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) { assert_different_registers(obj, klass, len); - if (UseCompactObjectHeaders) { + if (UseCompactObjectHeaders || Arguments::is_valhalla_enabled()) { + // COH: Markword contains class pointer which is only known at runtime. + // Valhalla: Could have value class which has a different prototype header to a normal object. + // In both cases, we need to fetch dynamically. ldr(t1, Address(klass, Klass::prototype_header_offset())); str(t1, Address(obj, oopDesc::mark_offset_in_bytes())); } else { + // Otherwise: Can use the statically computed prototype header which is the same for every object. mov(t1, checked_cast(markWord::prototype().value())); str(t1, Address(obj, oopDesc::mark_offset_in_bytes())); + } + + if (!UseCompactObjectHeaders) { + // COH: Markword already contains class pointer. Nothing else to do. + // Otherwise: Store encoded klass pointer following the markword encode_klass_not_null(t1, klass); // Take care not to kill klass strw(t1, Address(obj, oopDesc::klass_offset_in_bytes())); } @@ -237,20 +249,37 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, verify_oop(obj); } -void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { - assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); +void C1_MacroAssembler::build_frame_helper(int frame_size_in_bytes, int sp_offset_for_orig_pc, int sp_inc, bool reset_orig_pc, bool needs_stack_repair) { + MacroAssembler::build_frame(frame_size_in_bytes); + + if (needs_stack_repair) { + save_stack_increment(sp_inc, frame_size_in_bytes); + } + if (reset_orig_pc) { + // Zero orig_pc to detect deoptimization during buffering in the entry points + str(zr, Address(sp, sp_offset_for_orig_pc)); + } +} + +void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes, + int sp_offset_for_orig_pc, + bool needs_stack_repair, bool has_scalarized_args, + Label* verified_inline_entry_label) { // Make sure there is enough stack space for this method's activation. // Note that we do this before creating a frame. + assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); generate_stack_overflow_check(bang_size_in_bytes); - MacroAssembler::build_frame(framesize); + + build_frame_helper(frame_size_in_bytes, sp_offset_for_orig_pc, 0, has_scalarized_args, needs_stack_repair); // Insert nmethod entry barrier into frame. BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->nmethod_entry_barrier(this, nullptr /* slow_path */, nullptr /* continuation */, nullptr /* guard */); -} -void C1_MacroAssembler::remove_frame(int framesize) { - MacroAssembler::remove_frame(framesize); + if (verified_inline_entry_label != nullptr) { + // Jump here from the scalarized entry points that already created the frame. + bind(*verified_inline_entry_label); + } } @@ -260,8 +289,70 @@ void C1_MacroAssembler::verified_entry(bool breakAtEntry) { // must ensure that this first instruction is a B, BL, NOP, BKPT, // SVC, HVC, or SMC. Make it a NOP. nop(); + if (C1Breakpoint) brk(1); +} + +int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, Label& verified_inline_entry_label, bool is_inline_ro_entry) { + assert(InlineTypePassFieldsAsArgs, "sanity"); + // Make sure there is enough stack space for this method's activation. + assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); + generate_stack_overflow_check(bang_size_in_bytes); + + GrowableArray* sig = ces->sig(); + GrowableArray* sig_cc = is_inline_ro_entry ? ces->sig_cc_ro() : ces->sig_cc(); + VMRegPair* regs = ces->regs(); + VMRegPair* regs_cc = is_inline_ro_entry ? ces->regs_cc_ro() : ces->regs_cc(); + int args_on_stack = ces->args_on_stack(); + int args_on_stack_cc = is_inline_ro_entry ? ces->args_on_stack_cc_ro() : ces->args_on_stack_cc(); + + assert(sig->length() <= sig_cc->length(), "Zero-sized inline class not allowed!"); + BasicType* sig_bt = NEW_RESOURCE_ARRAY(BasicType, sig_cc->length()); + int args_passed = sig->length(); + int args_passed_cc = SigEntry::fill_sig_bt(sig_cc, sig_bt); + + // Create a temp frame so we can call into the runtime. It must be properly set up to accommodate GC. + build_frame_helper(frame_size_in_bytes, sp_offset_for_orig_pc, 0, true, ces->c1_needs_stack_repair()); + + // The runtime call might safepoint, make sure nmethod entry barrier is executed + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + // C1 code is not hot enough to micro optimize the nmethod entry barrier with an out-of-line stub + bs->nmethod_entry_barrier(this, nullptr /* slow_path */, nullptr /* continuation */, nullptr /* guard */); + + mov(r19, (intptr_t) ces->method()); + if (is_inline_ro_entry) { + far_call(RuntimeAddress(Runtime1::entry_for(StubId::c1_buffer_inline_args_no_receiver_id))); + } else { + far_call(RuntimeAddress(Runtime1::entry_for(StubId::c1_buffer_inline_args_id))); + } + int rt_call_offset = offset(); + + // The runtime call returns the new array in r20 instead of the usual r0 + // because r0 is also j_rarg7 which may be holding a live argument here. + Register val_array = r20; + + // Remove the temp frame + MacroAssembler::remove_frame(frame_size_in_bytes); + + // Check if we need to extend the stack for packing + int sp_inc = 0; + if (args_on_stack > args_on_stack_cc) { + sp_inc = extend_stack_for_inline_args(args_on_stack); + } + + shuffle_inline_args(true, is_inline_ro_entry, sig_cc, + args_passed_cc, args_on_stack_cc, regs_cc, // from + args_passed, args_on_stack, regs, // to + sp_inc, val_array); + + // Create the real frame. Below jump will then skip over the stack banging and frame + // setup code in the verified_inline_entry (which has a different real_frame_size). + build_frame_helper(frame_size_in_bytes, sp_offset_for_orig_pc, sp_inc, false, ces->c1_needs_stack_repair()); + + b(verified_inline_entry_label); + return rt_call_offset; } + void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { // rfp, + 0: link // + 1: return address diff --git a/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp index 449ad4f8a4c55..d603d134a379b 100644 --- a/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -741,6 +741,7 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { case StubId::c1_new_type_array_id: case StubId::c1_new_object_array_id: + case StubId::c1_new_null_free_array_id: { Register length = r19; // Incoming Register klass = r3; // Incoming @@ -748,8 +749,10 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { if (id == StubId::c1_new_type_array_id) { __ set_info("new_type_array", dont_gc_arguments); - } else { + } else if (id == StubId::c1_new_object_array_id) { __ set_info("new_object_array", dont_gc_arguments); + } else { + __ set_info("new_null_free_array", dont_gc_arguments); } #ifdef ASSERT @@ -759,13 +762,28 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { Register t0 = obj; __ ldrw(t0, Address(klass, Klass::layout_helper_offset())); __ asrw(t0, t0, Klass::_lh_array_tag_shift); - int tag = ((id == StubId::c1_new_type_array_id) - ? Klass::_lh_array_tag_type_value - : Klass::_lh_array_tag_obj_value); - __ mov(rscratch1, tag); - __ cmpw(t0, rscratch1); - __ br(Assembler::EQ, ok); - __ stop("assert(is an array klass)"); + switch (id) { + case StubId::c1_new_type_array_id: + __ cmpw(t0, Klass::_lh_array_tag_type_value); + __ br(Assembler::EQ, ok); + __ stop("assert(is a type array klass)"); + break; + case StubId::c1_new_object_array_id: + __ cmpw(t0, Klass::_lh_array_tag_ref_value); // new "[Ljava/lang/Object;" + __ br(Assembler::EQ, ok); + __ cmpw(t0, Klass::_lh_array_tag_flat_value); // new "[LVT;" + __ br(Assembler::EQ, ok); + __ stop("assert(is an object or inline type array klass)"); + break; + case StubId::c1_new_null_free_array_id: + __ cmpw(t0, Klass::_lh_array_tag_flat_value); // the array can be a flat array. + __ br(Assembler::EQ, ok); + __ cmpw(t0, Klass::_lh_array_tag_ref_value); // the array cannot be a flat array (due to the InlineArrayElementMaxFlatSize, etc.) + __ br(Assembler::EQ, ok); + __ stop("assert(is an object or inline type array klass)"); + break; + default: ShouldNotReachHere(); + } __ should_not_reach_here(); __ bind(ok); } @@ -776,8 +794,11 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { int call_offset; if (id == StubId::c1_new_type_array_id) { call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); - } else { + } else if (id == StubId::c1_new_object_array_id) { call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); + } else { + assert(id == StubId::c1_new_null_free_array_id, "must be"); + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_null_free_array), klass, length); } oop_maps = new OopMapSet(); @@ -812,6 +833,93 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { } break; + case StubId::c1_buffer_inline_args_id: + case StubId::c1_buffer_inline_args_no_receiver_id: + { + const char* name = (id == StubId::c1_buffer_inline_args_id) ? + "buffer_inline_args" : "buffer_inline_args_no_receiver"; + StubFrame f(sasm, name, dont_gc_arguments); + OopMap* map = save_live_registers(sasm); + Register method = r19; // Incoming + address entry = (id == StubId::c1_buffer_inline_args_id) ? + CAST_FROM_FN_PTR(address, buffer_inline_args) : + CAST_FROM_FN_PTR(address, buffer_inline_args_no_receiver); + // This is called from a C1 method's scalarized entry point + // where r0-r7 may be holding live argument values so we can't + // return the result in r0 as the other stubs do. LR is used as + // a temporary below to avoid the result being clobbered by + // restore_live_registers. It's saved and restored by + // StubAssembler::prologue and epilogue anyway. + int call_offset = __ call_RT(lr, noreg, entry, method); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ mov(r20, lr); + __ verify_oop(r20); // r20: an array of buffered value objects + } + break; + + case StubId::c1_load_flat_array_id: + { + StubFrame f(sasm, "load_flat_array", dont_gc_arguments); + OopMap* map = save_live_registers(sasm); + + // Called with store_parameter and not C abi + + f.load_argument(1, r0); // r0,: array + f.load_argument(0, r1); // r1,: index + int call_offset = __ call_RT(r0, noreg, CAST_FROM_FN_PTR(address, load_flat_array), r0, r1); + + // Ensure the stores that initialize the buffer are visible + // before any subsequent store that publishes this reference. + __ membar(Assembler::StoreStore); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r0(sasm); + + // r0: loaded element at array[index] + __ verify_oop(r0); + } + break; + + case StubId::c1_store_flat_array_id: + { + StubFrame f(sasm, "store_flat_array", dont_gc_arguments); + OopMap* map = save_live_registers(sasm); + + // Called with store_parameter and not C abi + + f.load_argument(2, r0); // r0: array + f.load_argument(1, r1); // r1: index + f.load_argument(0, r2); // r2: value + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, store_flat_array), r0, r1, r2); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r0(sasm); + } + break; + + case StubId::c1_substitutability_check_id: + { + StubFrame f(sasm, "substitutability_check", dont_gc_arguments); + OopMap* map = save_live_registers(sasm); + + // Called with store_parameter and not C abi + + f.load_argument(1, r1); // r1,: left + f.load_argument(0, r2); // r2,: right + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, substitutability_check), r1, r2); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r0(sasm); + + // r0,: are the two operands substitutable + } + break; + case StubId::c1_register_finalizer_id: { __ set_info("register_finalizer", dont_gc_arguments); @@ -851,11 +959,23 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { break; case StubId::c1_throw_incompatible_class_change_error_id: - { StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return); + { StubFrame f(sasm, "throw_incompatible_class_change_error", dont_gc_arguments, does_not_return); oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); } break; + case StubId::c1_throw_illegal_monitor_state_exception_id: + { StubFrame f(sasm, "throw_illegal_monitor_state_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_illegal_monitor_state_exception), false); + } + break; + + case StubId::c1_throw_identity_exception_id: + { StubFrame f(sasm, "throw_identity_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_identity_exception), true); + } + break; + case StubId::c1_slow_subtype_check_id: { // Typical calling sequence: diff --git a/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp index 5d8d1fbd9cb44..00edf6d1958e7 100644 --- a/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -59,7 +59,6 @@ void C2EntryBarrierStub::emit(C2_MacroAssembler& masm) { __ b(continuation()); __ bind(guard()); - __ relocate(entry_guard_Relocation::spec()); __ emit_int32(0); // nmethod guard value } diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index cb9e308197e59..d7776adf1e377 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -49,6 +49,27 @@ typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr); +void C2_MacroAssembler::entry_barrier() { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + // Dummy labels for just measuring the code size + Label dummy_slow_path; + Label dummy_continuation; + Label dummy_guard; + Label* slow_path = &dummy_slow_path; + Label* continuation = &dummy_continuation; + Label* guard = &dummy_guard; + if (!Compile::current()->output()->in_scratch_emit_size()) { + // Use real labels from actual stub when not emitting code for the purpose of measuring its size + C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub(); + Compile::current()->output()->add_stub(stub); + slow_path = &stub->entry(); + continuation = &stub->continuation(); + guard = &stub->guard(); + } + // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub. + bs->nmethod_entry_barrier(this, slow_path, continuation, guard); +} + // jdk.internal.util.ArraysSupport.vectorizedHashCode address C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register result, FloatRegister vdata0, FloatRegister vdata1, diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp index f96d3ffb86351..bf5af8437502b 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp @@ -77,6 +77,8 @@ public: using Assembler::sve_cpy; + void entry_barrier(); + // jdk.internal.util.ArraysSupport.vectorizedHashCode address arrays_hashcode(Register ary, Register cnt, Register result, FloatRegister vdata0, FloatRegister vdata1, FloatRegister vdata2, FloatRegister vdata3, diff --git a/src/hotspot/cpu/aarch64/continuationFreezeThaw_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/continuationFreezeThaw_aarch64.inline.hpp index a1a5209de7ab2..1a7a51dc6c5f3 100644 --- a/src/hotspot/cpu/aarch64/continuationFreezeThaw_aarch64.inline.hpp +++ b/src/hotspot/cpu/aarch64/continuationFreezeThaw_aarch64.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -59,23 +59,20 @@ inline frame FreezeBase::sender(const frame& f) { if (FKind::interpreted) { return frame(f.sender_sp(), f.interpreter_frame_sender_sp(), f.link(), f.sender_pc()); } - intptr_t** link_addr = link_address(f); - intptr_t* sender_sp = (intptr_t*)(link_addr + frame::sender_sp_offset); // f.unextended_sp() + (fsize/wordSize); // - address sender_pc = ContinuationHelper::return_address_at(sender_sp - 1); - assert(sender_sp != f.sp(), "must have changed"); + frame::CompiledFramePointers cfp = f.compiled_frame_details(); int slot = 0; - CodeBlob* sender_cb = CodeCache::find_blob_and_oopmap(sender_pc, slot); + CodeBlob* sender_cb = CodeCache::find_blob_and_oopmap(*cfp.sender_pc_addr, slot); + return sender_cb != nullptr - ? frame(sender_sp, sender_sp, *link_addr, sender_pc, sender_cb, - slot == -1 ? nullptr : sender_cb->oop_map_for_slot(slot, sender_pc), - false /* on_heap ? */) - : frame(sender_sp, sender_sp, *link_addr, sender_pc); + ? frame(cfp.sender_sp, cfp.sender_sp, *cfp.saved_fp_addr, *cfp.sender_pc_addr, sender_cb, + slot == -1 ? nullptr : sender_cb->oop_map_for_slot(slot, *cfp.sender_pc_addr), false) + : frame(cfp.sender_sp, cfp.sender_sp, *cfp.saved_fp_addr, *cfp.sender_pc_addr); } template -frame FreezeBase::new_heap_frame(frame& f, frame& caller) { +frame FreezeBase::new_heap_frame(frame& f, frame& caller, int size_adjust) { assert(FKind::is_instance(f), ""); assert(!caller.is_interpreted_frame() || caller.unextended_sp() == (intptr_t*)caller.at(frame::interpreter_frame_last_sp_offset), ""); @@ -109,14 +106,14 @@ frame FreezeBase::new_heap_frame(frame& f, frame& caller) { fp = FKind::compiled ? *(intptr_t**)(f.sp() - frame::sender_sp_offset) : (intptr_t*)badAddressVal; int fsize = FKind::size(f); - sp = caller.unextended_sp() - fsize; - if (caller.is_interpreted_frame()) { + sp = caller.unextended_sp() - fsize - size_adjust; + if (caller.is_interpreted_frame() && size_adjust == 0) { // If the caller is interpreted, our stackargs are not supposed to overlap with it // so we make more room by moving sp down by argsize int argsize = FKind::stack_argsize(f); sp -= argsize; + caller.set_sp(sp + fsize); } - caller.set_sp(sp + fsize); assert(_cont.tail()->is_in_chunk(sp), ""); @@ -183,11 +180,12 @@ inline void FreezeBase::set_top_frame_metadata_pd(const frame& hf) { : (intptr_t)hf.fp(); } -inline void FreezeBase::patch_pd(frame& hf, const frame& caller) { +inline void FreezeBase::patch_pd(frame& hf, const frame& caller, bool is_bottom_frame) { if (caller.is_interpreted_frame()) { assert(!caller.is_empty(), ""); patch_callee_link_relative(caller, caller.fp()); - } else { + } else if (is_bottom_frame && caller.pc() != nullptr) { + assert(caller.is_compiled_frame(), ""); // If we're the bottom-most frame frozen in this freeze, the caller might have stayed frozen in the chunk, // and its oop-containing fp fixed. We've now just overwritten it, so we must patch it back to its value // as read from the chunk. @@ -258,7 +256,7 @@ inline frame ThawBase::new_entry_frame() { return frame(sp, sp, _cont.entryFP(), _cont.entryPC()); // TODO PERF: This finds code blob and computes deopt state } -template frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom) { +template frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom, int size_adjust) { assert(FKind::is_instance(hf), ""); // The values in the returned frame object will be written into the callee's stack in patch. @@ -286,24 +284,23 @@ template frame ThawBase::new_stack_frame(const frame& hf, frame& return f; } else { int fsize = FKind::size(hf); - intptr_t* frame_sp = caller.unextended_sp() - fsize; + intptr_t* frame_sp = caller.unextended_sp() - fsize - size_adjust; if (bottom || caller.is_interpreted_frame()) { - int argsize = FKind::stack_argsize(hf); - - fsize += argsize; - frame_sp -= argsize; - caller.set_sp(caller.sp() - argsize); - assert(caller.sp() == frame_sp + (fsize-argsize), ""); - + if (size_adjust == 0) { + int argsize = FKind::stack_argsize(hf); + frame_sp -= argsize; + } frame_sp = align(hf, frame_sp, caller, bottom); + caller.set_sp(frame_sp + fsize + size_adjust); } + assert(is_aligned(frame_sp, frame::frame_alignment), ""); assert(hf.cb() != nullptr, ""); assert(hf.oop_map() != nullptr, ""); intptr_t* fp; if (PreserveFramePointer) { // we need to recreate a "real" frame pointer, pointing into the stack - fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset; + fp = frame_sp + fsize - frame::sender_sp_offset; } else { fp = FKind::stub || FKind::native ? frame_sp + fsize - frame::sender_sp_offset // fp always points to the address below the pushed return pc. We need correct address. @@ -318,7 +315,6 @@ inline intptr_t* ThawBase::align(const frame& hf, intptr_t* frame_sp, frame& cal if (((intptr_t)frame_sp & 0xf) != 0) { assert(caller.is_interpreted_frame() || (bottom && hf.compiled_frame_stack_argsize() % 2 != 0), ""); frame_sp--; - caller.set_sp(caller.sp() - 1); } assert(is_aligned(frame_sp, frame::frame_alignment), ""); #endif @@ -327,7 +323,9 @@ inline intptr_t* ThawBase::align(const frame& hf, intptr_t* frame_sp, frame& cal } inline void ThawBase::patch_pd(frame& f, const frame& caller) { - patch_callee_link(caller, caller.fp()); + if (caller.is_interpreted_frame() || PreserveFramePointer) { + patch_callee_link(caller, caller.fp()); + } } inline void ThawBase::patch_pd(frame& f, intptr_t* caller_sp) { diff --git a/src/hotspot/cpu/aarch64/continuationHelper_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/continuationHelper_aarch64.inline.hpp index 04a2d4e2bd575..9946926af68bb 100644 --- a/src/hotspot/cpu/aarch64/continuationHelper_aarch64.inline.hpp +++ b/src/hotspot/cpu/aarch64/continuationHelper_aarch64.inline.hpp @@ -124,7 +124,8 @@ inline intptr_t** ContinuationHelper::Frame::callee_link_address(const frame& f) } inline address* ContinuationHelper::Frame::return_pc_address(const frame& f) { - return (address*)(f.real_fp() - 1); + frame::CompiledFramePointers cfp = f.compiled_frame_details(); + return cfp.sender_pc_addr; } inline address* ContinuationHelper::InterpretedFrame::return_pc_address(const frame& f) { diff --git a/src/hotspot/cpu/aarch64/foreignGlobals_aarch64.cpp b/src/hotspot/cpu/aarch64/foreignGlobals_aarch64.cpp index 1ed5e6f312f76..b9fcb447d65f7 100644 --- a/src/hotspot/cpu/aarch64/foreignGlobals_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/foreignGlobals_aarch64.cpp @@ -50,15 +50,15 @@ const ABIDescriptor ForeignGlobals::parse_abi_descriptor(jobject jabi) { oop abi_oop = JNIHandles::resolve_non_null(jabi); ABIDescriptor abi; - objArrayOop inputStorage = jdk_internal_foreign_abi_ABIDescriptor::inputStorage(abi_oop); + refArrayOop inputStorage = jdk_internal_foreign_abi_ABIDescriptor::inputStorage(abi_oop); parse_register_array(inputStorage, StorageType::INTEGER, abi._integer_argument_registers, as_Register); parse_register_array(inputStorage, StorageType::VECTOR, abi._vector_argument_registers, as_FloatRegister); - objArrayOop outputStorage = jdk_internal_foreign_abi_ABIDescriptor::outputStorage(abi_oop); + refArrayOop outputStorage = jdk_internal_foreign_abi_ABIDescriptor::outputStorage(abi_oop); parse_register_array(outputStorage, StorageType::INTEGER, abi._integer_return_registers, as_Register); parse_register_array(outputStorage, StorageType::VECTOR, abi._vector_return_registers, as_FloatRegister); - objArrayOop volatileStorage = jdk_internal_foreign_abi_ABIDescriptor::volatileStorage(abi_oop); + refArrayOop volatileStorage = jdk_internal_foreign_abi_ABIDescriptor::volatileStorage(abi_oop); parse_register_array(volatileStorage, StorageType::INTEGER, abi._integer_additional_volatile_registers, as_Register); parse_register_array(volatileStorage, StorageType::VECTOR, abi._vector_additional_volatile_registers, as_FloatRegister); diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.cpp b/src/hotspot/cpu/aarch64/frame_aarch64.cpp index c3bbc540ed441..f9e15bec2986a 100644 --- a/src/hotspot/cpu/aarch64/frame_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/frame_aarch64.cpp @@ -153,14 +153,17 @@ bool frame::safe_for_sender(JavaThread *thread) { if (!thread->is_in_full_stack_checked((address)sender_sp)) { return false; } - sender_unextended_sp = sender_sp; // Note: frame::sender_sp_offset is only valid for compiled frame - saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); + intptr_t **saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset); + saved_fp = *saved_fp_addr; // Note: PAC authentication may fail in case broken frame is passed in. // Just strip it for now. sender_pc = pauth_strip_pointer((address) *(sender_sp - 1)); - } + // Repair the sender sp if this is a method with scalarized inline type args + sender_sp = repair_sender_sp(sender_sp, saved_fp_addr); + sender_unextended_sp = sender_sp; + } if (Continuation::is_return_barrier_entry(sender_pc)) { // sender_pc might be invalid so check that the frame // actually belongs to a Continuation. @@ -622,13 +625,24 @@ void frame::describe_pd(FrameValues& values, int frame_no) { ret_pc_loc = fp() + return_addr_offset; fp_loc = fp(); } else { - ret_pc_loc = real_fp() - return_addr_offset; - fp_loc = real_fp() - sender_sp_offset; + if (cb()->is_nmethod() && cb()->as_nmethod_or_null()->needs_stack_repair()) { + values.describe(frame_no, real_fp() - sender_sp_offset - 1, err_msg("fsize for #%d", frame_no), 1); + } + frame::CompiledFramePointers cfp = compiled_frame_details(); + ret_pc_loc = (intptr_t*)cfp.sender_pc_addr; + fp_loc = (intptr_t*)cfp.saved_fp_addr; } address ret_pc = *(address*)ret_pc_loc; values.describe(frame_no, ret_pc_loc, Continuation::is_return_barrier_entry(ret_pc) ? "return address (return barrier)" : "return address"); values.describe(-1, fp_loc, "saved fp", 0); // "unowned" as value belongs to sender + + intptr_t* ret_pc_loc2 = real_fp() - return_addr_offset; + if (ret_pc_loc2 != ret_pc_loc) { + intptr_t* fp_loc2 = real_fp() - sender_sp_offset; + values.describe(frame_no, ret_pc_loc2, "return address copy #2"); + values.describe(-1, fp_loc2, "saved fp copy #2", 0); + } } } #endif @@ -779,6 +793,78 @@ frame::frame(void* sp, void* fp, void* pc) { #endif +// Check for a method with scalarized inline type arguments that needs +// a stack repair and return the repaired sender stack pointer. +intptr_t* frame::repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const { + nmethod* nm = _cb->as_nmethod_or_null(); + if (nm != nullptr && nm->needs_stack_repair()) { + // The stack increment resides just below the saved FP on the stack and + // records the total frame size excluding the two words for saving FP and LR + // (see MacroAssembler::remove_frame). + intptr_t* sp_inc_addr = (intptr_t*) (saved_fp_addr - 1); + assert(*sp_inc_addr % StackAlignmentInBytes == 0, "sp_inc not aligned"); + int real_frame_size = (*sp_inc_addr / wordSize) + metadata_words_at_bottom; + assert(real_frame_size >= _cb->frame_size() && real_frame_size <= 1000000, "invalid frame size"); + sender_sp = unextended_sp() + real_frame_size; + } + return sender_sp; +} + +// See comment in MacroAssembler::remove_frame +frame::CompiledFramePointers frame::compiled_frame_details() const { + // we cannot rely upon the last fp having been saved to the thread + // in C2 code but it will have been pushed onto the stack. so we + // have to find it relative to the unextended sp + + assert(_cb->frame_size() > 0, "must have non-zero frame size"); + + // if need stack repair: the bottom of the fake frame, under LR #2 + // else the bottom of the frame + intptr_t* l_sender_sp = (!PreserveFramePointer || _sp_is_trusted) + ? unextended_sp() + _cb->frame_size() + : sender_sp(); + + assert(!_sp_is_trusted || l_sender_sp == real_fp(), ""); + + // the actual bottom of the frame. This actually changes something if the frame needs stack repair + l_sender_sp = repair_sender_sp(l_sender_sp, (intptr_t**)(l_sender_sp - frame::sender_sp_offset)); + + // From the sender's sp, we can locate the real saved lr (x30) and rfp (x29): they are + // immediately above, no matter if the stack was extended or not + CompiledFramePointers cfp; + cfp.sender_sp = l_sender_sp; + cfp.saved_fp_addr = (intptr_t**)(l_sender_sp - frame::sender_sp_offset); + cfp.sender_pc_addr = (address*)(l_sender_sp - frame::return_addr_offset); + + return cfp; +} + +intptr_t* frame::repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr) { + assert(nm != nullptr && nm->needs_stack_repair(), ""); + // The stack increment resides just below the saved FP on the stack and + // records the total frame size excluding the two words for saving FP and LR + // (see MacroAssembler::remove_frame). + intptr_t* real_frame_size_addr = (intptr_t*) (saved_fp_addr - 1); + int real_frame_size = (*real_frame_size_addr / wordSize) + metadata_words_at_bottom; + assert(real_frame_size >= nm->frame_size() && real_frame_size <= 1000000, "invalid frame size"); + return sp + real_frame_size; +} + +bool frame::was_augmented_on_entry(int& real_size) const { + assert(is_compiled_frame(), ""); + if (_cb->as_nmethod_or_null()->needs_stack_repair()) { + // The stack increment resides just below the saved FP on the stack and + // records the total frame size excluding the two words for saving FP and LR + // (see MacroAssembler::remove_frame). + intptr_t* real_frame_size_addr = unextended_sp() + _cb->frame_size() - sender_sp_offset - 1; + log_trace(continuations)("real_frame_size is addr is " INTPTR_FORMAT, p2i(real_frame_size_addr)); + real_size = (*real_frame_size_addr / wordSize) + metadata_words_at_bottom; + return real_size != _cb->frame_size(); + } + real_size = _cb->frame_size(); + return false; +} + void JavaFrameAnchor::make_walkable() { // last frame set? if (last_Java_sp() == nullptr) return; diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.hpp b/src/hotspot/cpu/aarch64/frame_aarch64.hpp index 231710df7d749..27685f0a4d392 100644 --- a/src/hotspot/cpu/aarch64/frame_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/frame_aarch64.hpp @@ -151,6 +151,17 @@ } public: + // Support for scalarized inline type calling convention + intptr_t* repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const; + struct CompiledFramePointers { + intptr_t* sender_sp; // The top of the stack of the sender + intptr_t** saved_fp_addr; // Where rfp (x29) is saved on the stack (FP #1 in remove_frame's comment) + address* sender_pc_addr; // Where lr (x30) is saved on the stack (LR #1) + }; + CompiledFramePointers compiled_frame_details() const; + static intptr_t* repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr); + bool was_augmented_on_entry(int& real_size) const; + // Constructors frame(intptr_t* sp, intptr_t* fp, address pc); diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/frame_aarch64.inline.hpp index 748ab0e0e2bbc..36f6a88cb2f1b 100644 --- a/src/hotspot/cpu/aarch64/frame_aarch64.inline.hpp +++ b/src/hotspot/cpu/aarch64/frame_aarch64.inline.hpp @@ -32,6 +32,9 @@ #include "interpreter/interpreter.hpp" #include "runtime/sharedRuntime.hpp" #include "pauth_aarch64.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif // Inline functions for AArch64 frames: @@ -444,28 +447,31 @@ inline frame frame::sender_raw(RegisterMap* map) const { } inline frame frame::sender_for_compiled_frame(RegisterMap* map) const { - // we cannot rely upon the last fp having been saved to the thread - // in C2 code but it will have been pushed onto the stack. so we - // have to find it relative to the unextended sp - - assert(_cb->frame_size() > 0, "must have non-zero frame size"); - intptr_t* l_sender_sp = (!PreserveFramePointer || _sp_is_trusted) ? unextended_sp() + _cb->frame_size() - : sender_sp(); - assert(!_sp_is_trusted || l_sender_sp == real_fp(), ""); + CompiledFramePointers cfp = compiled_frame_details(); // The return_address is always the word on the stack. // For ROP protection, C1/C2 will have signed the sender_pc, // but there is no requirement to authenticate it here. - address sender_pc = pauth_strip_verifiable((address) *(l_sender_sp - 1)); - - intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp - frame::sender_sp_offset); + address sender_pc = pauth_strip_verifiable(*cfp.sender_pc_addr); if (map->update_map()) { // Tell GC to use argument oopmaps for some runtime stubs that need it. // For C1, the runtime stub might not have oop maps, so set this flag // outside of update_register_map. - if (!_cb->is_nmethod()) { // compiled frames do not use callee-saved registers - map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + bool c1_buffering = false; +#ifdef COMPILER1 + nmethod* nm = _cb->as_nmethod_or_null(); + if (nm != nullptr && nm->is_compiled_by_c1() && nm->method()->has_scalarized_args() && + pc() < nm->verified_inline_entry_point()) { + // The VEP and VIEP(RO) of C1-compiled methods call buffer_inline_args_xxx + // before doing any argument shuffling, so we need to scan the oops + // as the caller passes them. + c1_buffering = true; + } +#endif + if (!_cb->is_nmethod() || c1_buffering) { // compiled frames do not use callee-saved registers + bool caller_args = _cb->caller_must_gc_arguments(map->thread()) || c1_buffering; + map->set_include_argument_oops(caller_args); if (oop_map() != nullptr) { _oop_map->update_register_map(this, map); } @@ -478,19 +484,19 @@ inline frame frame::sender_for_compiled_frame(RegisterMap* map) const { // Since the prolog does the save and restore of FP there is no oopmap // for it so we must fill in its location as if there was an oopmap entry // since if our caller was compiled code there could be live jvm state in it. - update_map_with_saved_link(map, saved_fp_addr); + update_map_with_saved_link(map, cfp.saved_fp_addr); } if (Continuation::is_return_barrier_entry(sender_pc)) { if (map->walk_cont()) { // about to walk into an h-stack return Continuation::top_frame(*this, map); } else { - return Continuation::continuation_bottom_sender(map->thread(), *this, l_sender_sp); + return Continuation::continuation_bottom_sender(map->thread(), *this, cfp.sender_sp); } } - intptr_t* unextended_sp = l_sender_sp; - return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc); + intptr_t* unextended_sp = cfp.sender_sp; + return frame(cfp.sender_sp, unextended_sp, *cfp.saved_fp_addr, sender_pc); } template diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp index 682917202088e..f942970742832 100644 --- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -132,6 +132,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, const Register thread, const Register value, const Register temp1, const Register temp2) { + assert_different_registers(value, temp1, temp2); // Can we store a value in the given thread's buffer? // (The index field is typed as size_t.) __ ldr(temp1, Address(thread, in_bytes(index_offset))); // temp1 := *(index address) @@ -205,6 +206,7 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, __ bind(runtime); + assert_different_registers(rscratch1, pre_val); // push_call_clobbered_registers trashes rscratch1 __ push_call_clobbered_registers(); // Calling the runtime using the regular call_VM_leaf mechanism generates @@ -383,6 +385,16 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { + + bool in_heap = (decorators & IN_HEAP) != 0; + bool as_normal = (decorators & AS_NORMAL) != 0; + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; + + bool needs_pre_barrier = as_normal && !dest_uninitialized; + bool needs_post_barrier = (val != noreg && in_heap); + + assert_different_registers(val, tmp1, tmp2, tmp3); + // flatten object address if needed if (dst.index() == noreg && dst.offset() == 0) { if (dst.base() != tmp3) { @@ -392,31 +404,38 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco __ lea(tmp3, dst); } - g1_write_barrier_pre(masm, - tmp3 /* obj */, - tmp2 /* pre_val */, - rthread /* thread */, - tmp1 /* tmp1 */, - rscratch2 /* tmp2 */, - val != noreg /* tosca_live */, - false /* expand_call */); + if (needs_pre_barrier) { + g1_write_barrier_pre(masm, + tmp3 /* obj */, + tmp2 /* pre_val */, + rthread /* thread */, + tmp1 /* tmp1 */, + rscratch2 /* tmp2 */, + val != noreg /* tosca_live */, + false /* expand_call */); + } if (val == noreg) { BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg, noreg); } else { // G1 barrier needs uncompressed oop for region cross check. Register new_val = val; - if (UseCompressedOops) { - new_val = rscratch2; - __ mov(new_val, val); + if (needs_post_barrier) { + if (UseCompressedOops) { + new_val = rscratch2; + __ mov(new_val, val); + } } + BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg); - g1_write_barrier_post(masm, - tmp3 /* store_adr */, - new_val /* new_val */, - rthread /* thread */, - tmp1 /* tmp1 */, - tmp2 /* tmp2 */); + if (needs_post_barrier) { + g1_write_barrier_post(masm, + tmp3 /* store_adr */, + new_val /* new_val */, + rthread /* thread */, + tmp1 /* tmp1 */, + tmp2 /* tmp2 */); + } } } diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad index 18fc27a4af4ca..7bd5abbb06045 100644 --- a/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad +++ b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad @@ -1,5 +1,5 @@ // -// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2024, 2026, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -73,6 +73,121 @@ static void write_barrier_post(MacroAssembler* masm, %} +// TODO 8350865 (same applies to g1StoreLSpecialTwoOops) +// - Do not set/overwrite barrier data here, also handle G1C2BarrierPostNotNull +// - Move this into the .m4? +instruct g1StoreLSpecialOneOopOff0(indirect mem, iRegLNoSp src, immI0 off, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC); + match(Set mem (StoreLSpecial mem (Binary src off))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(INSN_COST); + format %{ "str $src, $mem\t# g1StoreLSpecialOneOopOff0" %} + ins_encode %{ + ((MachNode*)this)->set_barrier_data(G1C2BarrierPre | G1C2BarrierPost); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + + __ str($src$$Register, $mem$$Register); + + // Extract the narrow oop field value + __ ubfm($tmp1$$Register, $src$$Register, 0, 31); + __ decode_heap_oop($tmp1$$Register, $tmp1$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + +instruct g1StoreLSpecialOneOopOff4(indirect mem, iRegLNoSp src, immI_4 off, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegPNoSp tmp4, rFlagsReg cr) +%{ + predicate(UseG1GC); + match(Set mem (StoreLSpecial mem (Binary src off))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + ins_cost(INSN_COST); + format %{ "str $src, $mem\t# g1StoreLSpecialOneOopOff4" %} + ins_encode %{ + ((MachNode*)this)->set_barrier_data(G1C2BarrierPre | G1C2BarrierPost); + + // Adjust address to point to narrow oop + __ add($tmp4$$Register, $mem$$Register, 4); + write_barrier_pre(masm, this, + $tmp4$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register, $tmp4$$Register) /* preserve */); + + __ str($src$$Register, $mem$$Register); + + // Shift long value to extract the narrow oop field value + __ lsr($tmp1$$Register, $src$$Register, 32); + __ decode_heap_oop($tmp1$$Register, $tmp1$$Register); + write_barrier_post(masm, this, + $tmp4$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + +instruct g1StoreLSpecialTwoOops(indirect mem, iRegLNoSp src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegPNoSp tmp4, rFlagsReg cr) +%{ + predicate(UseG1GC); + match(Set mem (StoreLSpecial mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + ins_cost(INSN_COST); + format %{ "str $src, $mem\t# g1StoreLSpecialTwoOops" %} + ins_encode %{ + ((MachNode*)this)->set_barrier_data(G1C2BarrierPre | G1C2BarrierPost); + + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + // Adjust address to point to the second narrow oop in the long value + __ add($tmp4$$Register, $mem$$Register, 4); + write_barrier_pre(masm, this, + $tmp4$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register, $tmp4$$Register) /* preserve */); + + __ str($src$$Register, $mem$$Register); + + // Zero-extend first narrow oop to long + __ ubfm($tmp1$$Register, $src$$Register, 0, 31); + __ decode_heap_oop($tmp1$$Register, $tmp1$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + + // Shift long value to extract the second narrow oop field value + __ lsr($tmp1$$Register, $src$$Register, 32); + __ decode_heap_oop($tmp1$$Register, $tmp1$$Register); + write_barrier_post(masm, this, + $tmp4$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + + // BEGIN This section of the file is automatically generated. Do not edit -------------- // This section is generated from g1_aarch64.m4 diff --git a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp index 38efcf80650c2..3c48f13800783 100644 --- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp @@ -26,6 +26,7 @@ #include "gc/shared/barrierSet.hpp" #include "gc/shared/barrierSetAssembler.hpp" #include "gc/shared/barrierSetNMethod.hpp" +#include "gc/shared/barrierSetRuntime.hpp" #include "gc/shared/collectedHeap.hpp" #include "interpreter/interp_masm.hpp" #include "memory/universe.hpp" @@ -86,22 +87,35 @@ void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { bool in_heap = (decorators & IN_HEAP) != 0; bool in_native = (decorators & IN_NATIVE) != 0; + bool is_not_null = (decorators & IS_NOT_NULL) != 0; + switch (type) { case T_OBJECT: case T_ARRAY: { - val = val == noreg ? zr : val; if (in_heap) { - if (UseCompressedOops) { - assert(!dst.uses(val), "not enough registers"); - if (val != zr) { - __ encode_heap_oop(val); + if (val == noreg) { + assert(!is_not_null, "inconsistent access"); + if (UseCompressedOops) { + __ strw(zr, dst); + } else { + __ str(zr, dst); } - __ strw(val, dst); } else { - __ str(val, dst); + if (UseCompressedOops) { + assert(!dst.uses(val), "not enough registers"); + if (is_not_null) { + __ encode_heap_oop_not_null(val); + } else { + __ encode_heap_oop(val); + } + __ strw(val, dst); + } else { + __ str(val, dst); + } } } else { assert(in_native, "why else?"); + assert(val != noreg, "not supported"); __ str(val, dst); } break; @@ -122,6 +136,19 @@ void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators } } +void BarrierSetAssembler::flat_field_copy(MacroAssembler* masm, DecoratorSet decorators, + Register src, Register dst, Register inline_layout_info) { + // flat_field_copy implementation is fairly complex, and there are not any + // "short-cuts" to be made from asm. What there is, appears to have the same + // cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds + // of hand-rolled instructions... + if (decorators & IS_DEST_UNINITIALIZED) { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized), src, dst, inline_layout_info); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy), src, dst, inline_layout_info); + } +} + void BarrierSetAssembler::copy_load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, diff --git a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp index c8ed794198339..d2fcc6568ec4e 100644 --- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp @@ -99,6 +99,9 @@ class BarrierSetAssembler: public CHeapObj { virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); + virtual void flat_field_copy(MacroAssembler* masm, DecoratorSet decorators, + Register src, Register dst, Register inline_layout_info); + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, Register obj, Register tmp, Label& slowpath); diff --git a/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp index 89f33cf452930..1ebef02241fda 100644 --- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp @@ -22,6 +22,7 @@ * */ +#include "asm/macroAssembler.hpp" #include "code/codeCache.hpp" #include "code/nativeInst.hpp" #include "gc/shared/barrierSet.hpp" @@ -60,83 +61,160 @@ static int entry_barrier_offset(nmethod* nm) { return 0; } +static int* decode_guard_from_instruction(nmethod* nm, address& instruction) { + int* result = reinterpret_cast(MacroAssembler::target_addr_for_insn(instruction)); + assert(nm->insts_contains(reinterpret_cast

(result)) || + nm->stub_contains(reinterpret_cast
(result)), + "guard must be in nmethod code"); + return result; +} + +// The NativeNMethodBarrier class encapsulates up to three entrypoints and handles their +// arming/verification. +// An entrypoint is defined as a tuple of : +// * The instr. address corresponds to the ldr of the guard value of that entrypoint. +// * The guard address is the address where the guard value of that entrypoint resides. +// +// Each nmethod has at least one entrypoint. The default must always be well-defined +// (neither instruction nor guard are nullptr). +// +// When using the scalarized calling convention, up to two additional (verified) entrypoints, +// alt1 and alt2 can be present. The meaning of these depends on who compiled the nmethod. +// +// The mapping of C1-compiled methods (scalarization used) looks as follows: +// * alt1: verified entry point +// * alt2 (optional): verified inline ro entry point +// +// The mapping of C2-compiled methods (scalarization used) looks as follows: +// * alt1: verified inline entry point +// * alt2 (optional): verified inline ro entry point +// +// In other scenarios, neither alt1 nor alt2 are defined. class NativeNMethodBarrier { - address _instruction_address; - int* _guard_addr; - nmethod* _nm; + private: + // The addresses of the instructions that act as the guards. + address _default_entry_instruction; + address _verified_alt1_instruction; + address _verified_alt2_instruction; + // Pointers representing the actual guard values themselves. + int* _default_entry_guard; + int* _verified_alt1_guard; + int* _verified_alt2_guard; + + public: + NativeNMethodBarrier(nmethod* nm) : + _default_entry_instruction(nullptr), + _verified_alt1_instruction(nullptr), + _verified_alt2_instruction(nullptr), + _default_entry_guard(nullptr), + _verified_alt1_guard(nullptr), + _verified_alt2_guard(nullptr) + { + // The default entry point has a known address. The guard address can be + // decoded from the literal in the instruction. Verification will confirm + // that this instruction corresponds to a load. + _default_entry_instruction = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(nm); + _default_entry_guard = decode_guard_from_instruction(nm, _default_entry_instruction); - address instruction_address() const { return _instruction_address; } + // If the nmethod has scalarized arguments, then there are more entry + // points, each with their own nmethod entry barrier. + if (!nm->is_osr_method() && nm->method()->has_scalarized_args()) { + assert(nm->verified_entry_point() != nm->verified_inline_entry_point(), "scalarized entry point not found"); + address method_body = nm->is_compiled_by_c1() ? nm->verified_inline_entry_point() : nm->verified_entry_point(); + int barrier_offset = _default_entry_instruction - method_body; - int *guard_addr() { - return _guard_addr; + // Set the first alternative entry point. + address entry_point2 = nm->is_compiled_by_c1() ? nm->verified_entry_point() : nm->verified_inline_entry_point(); + _verified_alt1_instruction = entry_point2 + barrier_offset; + assert(_default_entry_instruction != _verified_alt1_instruction, "sanity"); + _verified_alt1_guard = decode_guard_from_instruction(nm, _verified_alt1_instruction); + + // If there is a second alternative entry point, set it too. + if (method_body != nm->verified_inline_ro_entry_point() && entry_point2 != nm->verified_inline_ro_entry_point()) { + _verified_alt2_instruction = nm->verified_inline_ro_entry_point() + barrier_offset; + _verified_alt2_guard = decode_guard_from_instruction(nm, _verified_alt2_instruction); + assert(_default_entry_instruction != _verified_alt2_instruction && + _verified_alt1_instruction != _verified_alt2_instruction, + "sanity"); + } + } + // Perform the checking as verification. + err_msg msg("%s", ""); + assert(check_barriers(msg), "%s", msg.buffer()); } - int local_guard_offset(nmethod* nm) { - // It's the last instruction - return (-entry_barrier_offset(nm)) - 4; + // Gets the value of the default entry guard. + // This does not consider the alternative entrypoints, as these should + // all be consistent. It is up to the caller to enforce this. + int get_default_guard_value() { + return AtomicAccess::load_acquire(_default_entry_guard); } -public: - NativeNMethodBarrier(nmethod* nm): _nm(nm) { - _instruction_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(nm); - if (nm->is_compiled_by_c2()) { - // With c2 compiled code, the guard is out-of-line in a stub - // We find it using the RelocIterator. - RelocIterator iter(nm); - while (iter.next()) { - if (iter.type() == relocInfo::entry_guard_type) { - entry_guard_Relocation* const reloc = iter.entry_guard_reloc(); - _guard_addr = reinterpret_cast(reloc->addr()); - return; - } - } - ShouldNotReachHere(); - } - _guard_addr = reinterpret_cast(instruction_address() + local_guard_offset(nm)); + // Sets the value for all barriers. + void set_values(int value, int bit_mask) { + set_value_impl(_default_entry_guard, value, bit_mask); + if (_verified_alt1_guard != nullptr) { + set_value_impl(_verified_alt1_guard, value, bit_mask); + } + if (_verified_alt2_guard != nullptr) { + set_value_impl(_verified_alt2_guard, value, bit_mask); + } } - int get_value() { - return AtomicAccess::load_acquire(guard_addr()); + // Verifies that all potential barriers are correct. + bool check_barriers(err_msg& msg) { + // The default entry barrier should always be checked. + if (!check_barrier_impl(_default_entry_instruction, msg)) { + return false; + } + // Check the alternative entry barriers only if they are specified. + // Note that the guard values are already validated at construction time, + // if they fall out of the nmethod range, this will be caught earlier. + if (_verified_alt1_instruction != nullptr && + !check_barrier_impl(_verified_alt1_instruction, msg)) { + return false; + } + if (_verified_alt2_instruction != nullptr && + !check_barrier_impl(_verified_alt2_instruction, msg)) { + return false; + } + return true; } - void set_value(int value, int bit_mask) { +private: + // Sets the value for a single barrier. + void set_value_impl(int* guard, int value, int bit_mask) { if (bit_mask == ~0) { - AtomicAccess::release_store(guard_addr(), value); + AtomicAccess::release_store(guard, value); return; } assert((value & ~bit_mask) == 0, "trying to set bits outside the mask"); value &= bit_mask; - int old_value = AtomicAccess::load(guard_addr()); + int old_value = AtomicAccess::load(guard); while (true) { // Only bits in the mask are changed int new_value = value | (old_value & ~bit_mask); if (new_value == old_value) break; - int v = AtomicAccess::cmpxchg(guard_addr(), old_value, new_value, memory_order_release); + int v = AtomicAccess::cmpxchg(guard, old_value, new_value, memory_order_release); if (v == old_value) break; old_value = v; } } - bool check_barrier(err_msg& msg) const; - void verify() const { - err_msg msg("%s", ""); - assert(check_barrier(msg), "%s", msg.buffer()); + // Checks the validity of a single barrier. + // The first instruction of the nmethod entry barrier is an ldrw (literal) + // instruction. Verify that it's really there, so the offsets are not skewed. + bool check_barrier_impl(address& instruction, err_msg& msg) { + NativeInstruction* ni = nativeInstruction_at(instruction); + if (!ni->is_ldrw_gpr_literal()) { + msg.print("Nmethod entry barrier did not start with ldrw (literal) as expected. " + "Addr: " PTR_FORMAT " Code: " UINT32_FORMAT, p2i(instruction), ni->encoding()); + return false; + } + return true; } }; -// The first instruction of the nmethod entry barrier is an ldrw (literal) -// instruction. Verify that it's really there, so the offsets are not skewed. -bool NativeNMethodBarrier::check_barrier(err_msg& msg) const { - NativeInstruction* ni = nativeInstruction_at(instruction_address()); - if (!ni->is_ldrw_gpr_literal()) { - msg.print("Nmethod entry barrier did not start with ldrw (literal) as expected. " - "Addr: " PTR_FORMAT " Code: " UINT32_FORMAT, p2i(instruction_address()), ni->encoding()); - return false; - } - return true; -} - - /* We're called from an nmethod when we need to deoptimize it. We do this by throwing away the nmethod's frame and jumping to the ic_miss stub. This looks like there has been an IC miss at the @@ -197,7 +275,7 @@ void BarrierSetNMethod::set_guard_value(nmethod* nm, int value, int bit_mask) { MACOS_AARCH64_ONLY(ThreadWXEnable wx(WXWrite, Thread::current())); NativeNMethodBarrier barrier(nm); - barrier.set_value(value, bit_mask); + barrier.set_values(value, bit_mask); } int BarrierSetNMethod::guard_value(nmethod* nm) { @@ -206,5 +284,5 @@ int BarrierSetNMethod::guard_value(nmethod* nm) { } NativeNMethodBarrier barrier(nm); - return barrier.get_value(); + return barrier.get_default_guard_value(); } diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp index 59c7e44b0e50e..1945936940267 100644 --- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp @@ -67,6 +67,9 @@ define_pd_global(bool, RewriteFrequentPairs, true); define_pd_global(bool, PreserveFramePointer, false); +define_pd_global(bool, InlineTypePassFieldsAsArgs, true); +define_pd_global(bool, InlineTypeReturnedAsFields, true); + define_pd_global(uintx, TypeProfileLevel, 111); define_pd_global(bool, CompactStrings, true); diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp index 22c2383816cd6..1b16ec25ca7f9 100644 --- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp @@ -32,9 +32,11 @@ #include "interpreter/interpreterRuntime.hpp" #include "logging/log.hpp" #include "oops/arrayOop.hpp" +#include "oops/constMethodFlags.hpp" #include "oops/markWord.hpp" #include "oops/method.hpp" #include "oops/methodData.hpp" +#include "oops/inlineKlass.hpp" #include "oops/resolvedFieldEntry.hpp" #include "oops/resolvedIndyEntry.hpp" #include "oops/resolvedMethodEntry.hpp" @@ -208,6 +210,40 @@ void InterpreterMacroAssembler::get_method_counters(Register method, bind(has_counters); } +void InterpreterMacroAssembler::read_flat_field(Register entry, Register obj) { + call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flat_field), obj, entry); + membar(Assembler::StoreStore); +} + +void InterpreterMacroAssembler::write_flat_field(Register entry, Register field_offset, + Register tmp1, Register tmp2, + Register obj) { + assert_different_registers(entry, field_offset, tmp1, tmp2, obj); + Label slow_path, done; + + load_unsigned_byte(tmp1, Address(entry, in_bytes(ResolvedFieldEntry::flags_offset()))); + test_field_is_not_null_free_inline_type(tmp1, noreg /* temp */, slow_path); + + null_check(r0); // FIXME JDK-8341120 + + add(obj, obj, field_offset); + + load_klass(tmp1, r0); + payload_address(r0, r0, tmp1); + + Register layout_info = field_offset; + load_unsigned_short(tmp1, Address(entry, in_bytes(ResolvedFieldEntry::field_index_offset()))); + ldr(tmp2, Address(entry, in_bytes(ResolvedFieldEntry::field_holder_offset()))); + inline_layout_info(tmp2, tmp1, layout_info); + + flat_field_copy(IN_HEAP, r0, obj, layout_info); + b(done); + + bind(slow_path); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::write_flat_field), obj, r0, entry); + bind(done); +} + // Load object from cpool->resolved_references(index) void InterpreterMacroAssembler::load_resolved_reference_at_index( Register result, Register index, Register tmp) { @@ -242,13 +278,15 @@ void InterpreterMacroAssembler::load_resolved_klass_at_offset( // Kills: // r2 void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, - Label& ok_is_subtype) { + Label& ok_is_subtype, + bool profile) { assert(Rsub_klass != r0, "r0 holds superklass"); assert(Rsub_klass != r2, "r2 holds 2ndary super array length"); // Profile the not-null value's klass. - profile_typecheck(r2, Rsub_klass); // blows r2 - + if (profile) { + profile_typecheck(r2, Rsub_klass); // blows r2 + } // Do the check. check_klass_subtype(Rsub_klass, r0, r2, ok_is_subtype); // blows r2 } @@ -658,6 +696,37 @@ void InterpreterMacroAssembler::remove_activation(TosState state, bind(no_reserved_zone_enabling); } + if (state == atos && InlineTypeReturnedAsFields) { + Label skip; + Label not_null; + cbnz(r0, not_null); + // Returned value is null, zero all return registers because they may belong to oop fields + mov(j_rarg1, zr); + mov(j_rarg2, zr); + mov(j_rarg3, zr); + mov(j_rarg4, zr); + mov(j_rarg5, zr); + mov(j_rarg6, zr); + mov(j_rarg7, zr); + b(skip); + bind(not_null); + + // Check if we are returning a non-null inline type and load its fields into registers + test_oop_is_not_inline_type(r0, rscratch2, skip, /* can_be_null= */ false); + + // Load fields from a buffered value with an inline class specific handler + load_klass(rscratch1 /*dst*/, r0 /*src*/); + ldr(rscratch1, Address(rscratch1, InlineKlass::adr_members_offset())); + ldr(rscratch1, Address(rscratch1, InlineKlass::unpack_handler_offset())); + // Unpack handler can be null if inline type is not scalarizable in returns + cbz(rscratch1, skip); + + blr(rscratch1); + bind(skip); + // Check above kills sender esp in rscratch2. Reload it. + ldr(rscratch2, Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); + } + // remove frame anchor leave(); @@ -935,7 +1004,7 @@ void InterpreterMacroAssembler::profile_taken_branch(Register mdp) { } -void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { +void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp, bool acmp) { if (ProfileInterpreter) { Label profile_continue; @@ -947,7 +1016,7 @@ void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { // The method data pointer needs to be updated to correspond to // the next bytecode - update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); + update_mdp_by_constant(mdp, acmp ? in_bytes(ACmpData::acmp_data_size()) : in_bytes(BranchData::branch_data_size())); bind(profile_continue); } } @@ -1133,6 +1202,120 @@ void InterpreterMacroAssembler::profile_switch_case(Register index, } } +template void InterpreterMacroAssembler::profile_array_type(Register mdp, + Register array, + Register tmp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + mov(tmp, array); + profile_obj_type(tmp, Address(mdp, in_bytes(ArrayData::array_offset()))); + + Label not_flat; + test_non_flat_array_oop(array, tmp, not_flat); + + set_mdp_flag_at(mdp, ArrayData::flat_array_byte_constant()); + + bind(not_flat); + + Label not_null_free; + test_non_null_free_array_oop(array, tmp, not_null_free); + + set_mdp_flag_at(mdp, ArrayData::null_free_array_byte_constant()); + + bind(not_null_free); + + bind(profile_continue); + } +} + +template void InterpreterMacroAssembler::profile_array_type(Register mdp, + Register array, + Register tmp); +template void InterpreterMacroAssembler::profile_array_type(Register mdp, + Register array, + Register tmp); + +void InterpreterMacroAssembler::profile_multiple_element_types(Register mdp, Register element, Register tmp, const Register tmp2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label done, update; + cbnz(element, update); + set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + b(done); + + bind(update); + load_klass(tmp, element); + + // Record the object type. + profile_receiver_type(tmp, mdp, 0); + + bind(done); + + // The method data pointer needs to be updated. + update_mdp_by_constant(mdp, in_bytes(ArrayStoreData::array_store_data_size())); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_element_type(Register mdp, + Register element, + Register tmp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + mov(tmp, element); + profile_obj_type(tmp, Address(mdp, in_bytes(ArrayLoadData::element_offset()))); + + // The method data pointer needs to be updated. + update_mdp_by_constant(mdp, in_bytes(ArrayLoadData::array_load_data_size())); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_acmp(Register mdp, + Register left, + Register right, + Register tmp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + mov(tmp, left); + profile_obj_type(tmp, Address(mdp, in_bytes(ACmpData::left_offset()))); + + Label left_not_inline_type; + test_oop_is_not_inline_type(left, tmp, left_not_inline_type); + set_mdp_flag_at(mdp, ACmpData::left_inline_type_byte_constant()); + bind(left_not_inline_type); + + mov(tmp, right); + profile_obj_type(tmp, Address(mdp, in_bytes(ACmpData::right_offset()))); + + Label right_not_inline_type; + test_oop_is_not_inline_type(right, tmp, right_not_inline_type); + set_mdp_flag_at(mdp, ACmpData::right_inline_type_byte_constant()); + bind(right_not_inline_type); + + bind(profile_continue); + } +} + void InterpreterMacroAssembler::_interp_verify_oop(Register reg, TosState state, const char* file, int line) { if (state == atos) { MacroAssembler::_verify_oop_checked(reg, "broken oop", file, line); @@ -1503,7 +1686,7 @@ void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register ca // argument. tmp is the number of cells left in the // CallTypeData/VirtualCallTypeData to reach its end. Non null // if there's a return to profile. - assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + assert(SingleTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); add(mdp, mdp, tmp, LSL, exact_log2(DataLayout::cell_size)); } str(mdp, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize)); @@ -1549,7 +1732,7 @@ void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, bind(do_profile); } - Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); + Address mdo_ret_addr(mdp, -in_bytes(SingleTypeEntry::size())); mov(tmp, ret); profile_obj_type(tmp, mdo_ret_addr); diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp index 9a074f1ce69d9..d72137a09448c 100644 --- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp @@ -158,6 +158,16 @@ class InterpreterMacroAssembler: public MacroAssembler { void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); void get_method_counters(Register method, Register mcs, Label& skip); + // Allocate instance in "obj" and read in the content of the inline field + // NOTES: + // - input holder object via "obj", which must be r0, + // will return new instance via the same reg + void read_flat_field(Register entry, Register obj); + + void write_flat_field(Register entry, Register field_offset, + Register tmp1, Register tmp2, + Register obj); + // load cpool->resolved_references(index); void load_resolved_reference_at_index(Register result, Register index, Register tmp = r5); @@ -202,7 +212,7 @@ class InterpreterMacroAssembler: public MacroAssembler { // Generate a subtype check: branch to ok_is_subtype if sub_klass is // a subtype of super_klass. - void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); + void gen_subtype_check( Register sub_klass, Label &ok_is_subtype, bool profile = true); // Dispatching void dispatch_prolog(TosState state, int step = 0); @@ -282,7 +292,7 @@ class InterpreterMacroAssembler: public MacroAssembler { void narrow(Register result); void profile_taken_branch(Register mdp); - void profile_not_taken_branch(Register mdp); + void profile_not_taken_branch(Register mdp, bool acmp = false); void profile_call(Register mdp); void profile_final_call(Register mdp); void profile_virtual_call(Register receiver, Register mdp); @@ -293,6 +303,10 @@ class InterpreterMacroAssembler: public MacroAssembler { void profile_switch_default(Register mdp); void profile_switch_case(Register index_in_scratch, Register mdp, Register scratch2); + template void profile_array_type(Register mdp, Register array, Register tmp); + void profile_multiple_element_types(Register mdp, Register element, Register tmp, Register tmp2); + void profile_element_type(Register mdp, Register element, Register tmp); + void profile_acmp(Register mdp, Register left, Register right, Register tmp); void profile_obj_type(Register obj, const Address& mdo_addr); void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); diff --git a/src/hotspot/cpu/aarch64/interpreterRT_aarch64.cpp b/src/hotspot/cpu/aarch64/interpreterRT_aarch64.cpp index d6310a2d326c9..5369f211381d5 100644 --- a/src/hotspot/cpu/aarch64/interpreterRT_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/interpreterRT_aarch64.cpp @@ -173,6 +173,10 @@ void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { } } +void InterpreterRuntime::SignatureHandlerGenerator::pass_valuetype() { + pass_object(); +} + void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { // generate code to handle arguments iterate(fingerprint); @@ -257,6 +261,11 @@ class SlowSignatureHandler } } + virtual void pass_valuetype() { + // values are handled with oops, like objects + pass_object(); + } + virtual void pass_long() { intptr_t value = *double_slot_addr(); if (pass_gpr(value) < 0) { diff --git a/src/hotspot/cpu/aarch64/interpreterRT_aarch64.hpp b/src/hotspot/cpu/aarch64/interpreterRT_aarch64.hpp index 8f84ebbe11494..e08291c5db694 100644 --- a/src/hotspot/cpu/aarch64/interpreterRT_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/interpreterRT_aarch64.hpp @@ -46,6 +46,7 @@ class SignatureHandlerGenerator: public NativeSignatureIterator { void pass_float(); void pass_double(); void pass_object(); + void pass_valuetype(); Register next_gpr(); FloatRegister next_fpr(); diff --git a/src/hotspot/cpu/aarch64/jniFastGetField_aarch64.cpp b/src/hotspot/cpu/aarch64/jniFastGetField_aarch64.cpp index 8bec45b4b479a..cffdcf494296d 100644 --- a/src/hotspot/cpu/aarch64/jniFastGetField_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/jniFastGetField_aarch64.cpp @@ -30,6 +30,7 @@ #include "prims/jniFastGetField.hpp" #include "prims/jvm_misc.hpp" #include "prims/jvmtiExport.hpp" +#include "runtime/jfieldIDWorkaround.hpp" #include "runtime/javaThread.inline.hpp" #include "runtime/safepoint.hpp" #include "runtime/threadWXSetters.inline.hpp" @@ -152,7 +153,7 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, rscratch1, slow); - __ lsr(roffset, c_rarg2, 2); // offset + __ lsr(roffset, c_rarg2, jfieldIDWorkaround::offset_shift); // offset __ add(result, robj, roffset); assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 1c052b67503d9..4a7ebeae5041f 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -26,6 +26,7 @@ #include "asm/assembler.hpp" #include "asm/assembler.inline.hpp" #include "ci/ciEnv.hpp" +#include "ci/ciInlineKlass.hpp" #include "code/compiledIC.hpp" #include "compiler/compileTask.hpp" #include "compiler/disassembler.hpp" @@ -47,16 +48,21 @@ #include "oops/compressedKlass.inline.hpp" #include "oops/compressedOops.inline.hpp" #include "oops/klass.inline.hpp" +#include "oops/resolvedFieldEntry.hpp" +#include "runtime/arguments.hpp" #include "runtime/continuation.hpp" +#include "runtime/globals.hpp" #include "runtime/icache.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/javaThread.hpp" #include "runtime/jniHandles.inline.hpp" #include "runtime/sharedRuntime.hpp" +#include "runtime/signature_cc.hpp" #include "runtime/stubRoutines.hpp" #include "utilities/globalDefinitions.hpp" #include "utilities/integerCast.hpp" #include "utilities/powerOfTwo.hpp" +#include "vmreg_aarch64.inline.hpp" #ifdef COMPILER1 #include "c1/c1_LIRAssembler.hpp" #endif @@ -2008,7 +2014,11 @@ void MacroAssembler::clinit_barrier(Register klass, Register scratch, Label* L_f } void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { - if (!VerifyOops) return; + if (!VerifyOops || VerifyAdapterSharing) { + // Below address of the code string confuses VerifyAdapterSharing + // because it may differ between otherwise equivalent adapters. + return; + } // Pass register number to verify_oop_subroutine const char* b = nullptr; @@ -2041,7 +2051,11 @@ void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, } void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { - if (!VerifyOops) return; + if (!VerifyOops || VerifyAdapterSharing) { + // Below address of the code string confuses VerifyAdapterSharing + // because it may differ between otherwise equivalent adapters. + return; + } const char* b = nullptr; { @@ -2291,6 +2305,10 @@ void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, call_VM_leaf_base(entry_point, 3); } +void MacroAssembler::super_call_VM_leaf(address entry_point) { + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { pass_arg0(this, arg_0); MacroAssembler::call_VM_leaf_base(entry_point, 1); @@ -2336,6 +2354,84 @@ void MacroAssembler::null_check(Register reg, int offset) { } } +void MacroAssembler::test_markword_is_inline_type(Register markword, Label& is_inline_type) { + assert_different_registers(markword, rscratch2); + mov(rscratch2, markWord::inline_type_pattern_mask); + andr(markword, markword, rscratch2); + mov(rscratch2, markWord::inline_type_pattern); + cmp(markword, rscratch2); + br(Assembler::EQ, is_inline_type); +} + +void MacroAssembler::test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type, bool can_be_null) { + assert_different_registers(tmp, rscratch1); + if (can_be_null) { + cbz(object, not_inline_type); + } + const int is_inline_type_mask = markWord::inline_type_pattern; + ldr(tmp, Address(object, oopDesc::mark_offset_in_bytes())); + mov(rscratch1, is_inline_type_mask); + andr(tmp, tmp, rscratch1); + cmp(tmp, rscratch1); + br(Assembler::NE, not_inline_type); +} + +void MacroAssembler::test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free_inline_type) { + assert(temp_reg == noreg, "not needed"); // keep signature uniform with x86 + tbnz(flags, ResolvedFieldEntry::is_null_free_inline_type_shift, is_null_free_inline_type); +} + +void MacroAssembler::test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free_inline_type) { + assert(temp_reg == noreg, "not needed"); // keep signature uniform with x86 + tbz(flags, ResolvedFieldEntry::is_null_free_inline_type_shift, not_null_free_inline_type); +} + +void MacroAssembler::test_field_is_flat(Register flags, Register temp_reg, Label& is_flat) { + assert(temp_reg == noreg, "not needed"); // keep signature uniform with x86 + tbnz(flags, ResolvedFieldEntry::is_flat_shift, is_flat); +} + +void MacroAssembler::test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label) { + Label test_mark_word; + // load mark word + ldr(temp_reg, Address(oop, oopDesc::mark_offset_in_bytes())); + // check displaced + tst(temp_reg, markWord::unlocked_value); + br(Assembler::NE, test_mark_word); + // slow path use klass prototype + load_prototype_header(temp_reg, oop); + + bind(test_mark_word); + andr(temp_reg, temp_reg, test_bit); + if (jmp_set) { + cbnz(temp_reg, jmp_label); + } else { + cbz(temp_reg, jmp_label); + } +} + +void MacroAssembler::test_flat_array_oop(Register oop, Register temp_reg, Label& is_flat_array) { + test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, true, is_flat_array); +} + +void MacroAssembler::test_non_flat_array_oop(Register oop, Register temp_reg, + Label&is_non_flat_array) { + test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, false, is_non_flat_array); +} + +void MacroAssembler::test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array) { + test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, true, is_null_free_array); +} + +void MacroAssembler::test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array) { + test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, false, is_non_null_free_array); +} + +void MacroAssembler::test_flat_array_layout(Register lh, Label& is_flat_array) { + tst(lh, Klass::_lh_array_tag_flat_value_bit_inplace); + br(Assembler::NE, is_flat_array); +} + // MacroAssembler protected routines needed to implement // public methods @@ -5074,6 +5170,14 @@ void MacroAssembler::load_method_holder(Register holder, Register method) { ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass* } +void MacroAssembler::load_metadata(Register dst, Register src) { + if (UseCompactObjectHeaders) { + load_narrow_klass_compact(dst, src); + } else { + ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes())); + } +} + // Loads the obj's Klass* into dst. // Preserves all registers (incl src, rscratch1 and rscratch2). // Input: @@ -5165,7 +5269,7 @@ void MacroAssembler::cmp_klass(Register obj, Register klass, Register tmp) { void MacroAssembler::cmp_klasses_from_objects(Register obj1, Register obj2, Register tmp1, Register tmp2) { if (UseCompactObjectHeaders) { load_narrow_klass_compact(tmp1, obj1); - load_narrow_klass_compact(tmp2, obj2); + load_narrow_klass_compact(tmp2, obj2); } else { ldrw(tmp1, Address(obj1, oopDesc::klass_offset_in_bytes())); ldrw(tmp2, Address(obj2, oopDesc::klass_offset_in_bytes())); @@ -5173,6 +5277,11 @@ void MacroAssembler::cmp_klasses_from_objects(Register obj1, Register obj2, Regi cmpw(tmp1, tmp2); } +void MacroAssembler::load_prototype_header(Register dst, Register src) { + load_klass(dst, src); + ldr(dst, Address(dst, Klass::prototype_header_offset())); +} + void MacroAssembler::store_klass(Register dst, Register src) { // FIXME: Should this be a store release? concurrent gcs assumes // klass length is valid if klass field is not null. @@ -5562,6 +5671,28 @@ void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, } } +void MacroAssembler::flat_field_copy(DecoratorSet decorators, Register src, Register dst, + Register inline_layout_info) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->flat_field_copy(this, decorators, src, dst, inline_layout_info); +} + +void MacroAssembler::payload_offset(Register inline_klass, Register offset) { + ldr(offset, Address(inline_klass, InlineKlass::adr_members_offset())); + ldrw(offset, Address(offset, InlineKlass::payload_offset_offset())); +} + +void MacroAssembler::payload_address(Register oop, Register data, Register inline_klass) { + // ((address) (void*) o) + vk->payload_offset(); + Register offset = (data == oop) ? rscratch1 : data; + payload_offset(inline_klass, offset); + if (data == oop) { + add(data, data, offset); + } else { + lea(data, Address(oop, offset)); + } +} + void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, Register tmp2, DecoratorSet decorators) { access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); @@ -5676,6 +5807,21 @@ void MacroAssembler::verify_tlab() { #endif } +void MacroAssembler::inline_layout_info(Register holder_klass, Register index, Register layout_info) { + assert_different_registers(holder_klass, index, layout_info); + InlineLayoutInfo array[2]; + int size = (char*)&array[1] - (char*)&array[0]; // computing size of array elements + if (is_power_of_2(size)) { + lsl(index, index, log2i_exact(size)); // Scale index by power of 2 + } else { + mov(layout_info, size); + mul(index, index, layout_info); // Scale the index to be the entry index * array_element_size + } + ldr(layout_info, Address(holder_klass, InstanceKlass::inline_layout_info_array_offset())); + add(layout_info, layout_info, Array::base_offset_in_bytes()); + lea(layout_info, Address(layout_info, index)); +} + // Writes to stack successive pages until offset reached to check for // stack overflow + shadow pages. This clobbers tmp. void MacroAssembler::bang_stack_size(Register size, Register tmp) { @@ -5782,16 +5928,32 @@ void MacroAssembler::load_aotrc_address(Register reg, address a) { #endif } +#ifdef ASSERT void MacroAssembler::build_frame(int framesize) { + build_frame(framesize, false); +} +#endif + +void MacroAssembler::build_frame(int framesize DEBUG_ONLY(COMMA bool zap_rfp_lr_spills)) { assert(framesize >= 2 * wordSize, "framesize must include space for FP/LR"); assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); protect_return_address(); if (framesize < ((1 << 9) + 2 * wordSize)) { sub(sp, sp, framesize); - stp(rfp, lr, Address(sp, framesize - 2 * wordSize)); + if (DEBUG_ONLY(zap_rfp_lr_spills ||) false) { + mov_immediate64(rscratch1, ((uint64_t)badRegWordVal) << 32 | (uint64_t)badRegWordVal); + stp(rscratch1, rscratch1, Address(sp, framesize - 2 * wordSize)); + } else { + stp(rfp, lr, Address(sp, framesize - 2 * wordSize)); + } if (PreserveFramePointer) add(rfp, sp, framesize - 2 * wordSize); } else { - stp(rfp, lr, Address(pre(sp, -2 * wordSize))); + if (DEBUG_ONLY(zap_rfp_lr_spills ||) false) { + mov_immediate64(rscratch1, ((uint64_t)badRegWordVal) << 32 | (uint64_t)badRegWordVal); + stp(rscratch1, rscratch1, Address(pre(sp, -2 * wordSize))); + } else { + stp(rfp, lr, Address(pre(sp, -2 * wordSize))); + } if (PreserveFramePointer) mov(rfp, sp); if (framesize < ((1 << 12) + 2 * wordSize)) sub(sp, sp, framesize - 2 * wordSize); @@ -5821,6 +5983,88 @@ void MacroAssembler::remove_frame(int framesize) { authenticate_return_address(); } +void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair) { + if (needs_stack_repair) { + // The method has a scalarized entry point (where fields of value object arguments + // are passed through registers and stack), and a non-scalarized entry point (where + // value object arguments are given as oops). The non-scalarized entry point will + // first load each field of value object arguments and store them in registers and on + // the stack in a way compatible with the scalarized entry point. To do so, some extra + // stack space might be reserved (if argument registers are not enough). On leaving the + // method, this space must be freed. + // + // In case we used the non-scalarized entry point the stack looks like this: + // + // | Arguments from caller | + // |---------------------------| <-- caller's SP + // | Saved LR #1 | + // | Saved FP #1 | + // |---------------------------| + // | Extension space for | + // | inline arg (un)packing | + // |---------------------------| <-- start of this method's frame + // | Saved LR #2 | + // | Saved FP #2 | + // |---------------------------| <-- FP (with -XX:+PreserveFramePointer) + // | sp_inc | + // | method locals | + // |---------------------------| <-- SP + // + // There are two copies of FP and LR on the stack. They will be identical at + // first, but that can change. + // If the caller has been deoptimized, LR #1 will be patched to point at the + // deopt blob, and LR #2 will still point into the old method. + // If the saved FP (x29) was not used as the frame pointer, but to store an + // oop, the GC will be aware only of FP #1 as the spilled location of x29 and + // will fix only this one. Overall, FP/LR #2 are not reliable and are simply + // needed to add space between the extension space and the locals, as there + // would be between the real arguments and the locals if we don't need to + // do unpacking (from the scalarized entry point). + // + // When restoring, one must then load FP #1 into x29, and LR #1 into x30, + // while keeping in mind that from the scalarized entry point, there will be + // only one copy of each. Indeed, in the case we used the scalarized calling + // convention, the stack looks like this: + // + // | Arguments from caller | + // |---------------------------| <-- caller's SP / start of this method's frame + // | Saved LR | + // | Saved FP | + // |---------------------------| <-- FP (with -XX:+PreserveFramePointer) + // | sp_inc | + // | method locals | + // |---------------------------| <-- SP + // + // The sp_inc stack slot holds the total size of the frame including the + // extension space minus two words for the saved FP and LR. That is how to + // find FP/LR #1. This size is expressed in bytes. Be careful when using it + // from C++ in pointer arithmetic; you might need to divide it by wordSize. + // + // One can find sp_inc since the start the method's frame is SP + initial_framesize. + + int sp_inc_offset = initial_framesize - 3 * wordSize; // Immediately below saved LR and FP + + ldr(rscratch1, Address(sp, sp_inc_offset)); + add(sp, sp, rscratch1); + ldp(rfp, lr, Address(post(sp, 2 * wordSize))); + } else { + remove_frame(initial_framesize); + } +} + +void MacroAssembler::save_stack_increment(int sp_inc, int frame_size) { + int real_frame_size = frame_size + sp_inc; + assert(sp_inc == 0 || sp_inc > 2*wordSize, "invalid sp_inc value"); + assert(real_frame_size >= 2*wordSize, "frame size must include FP/LR space"); + assert((real_frame_size & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + int sp_inc_offset = frame_size - 3 * wordSize; // Immediately below saved LR and FP + + // Subtract two words for the saved FP and LR as these will be popped + // separately. See remove_frame above. + mov(rscratch1, real_frame_size - 2*wordSize); + str(rscratch1, Address(sp, sp_inc_offset)); +} // This method counts leading positive bytes (highest bit not set) in provided byte array address MacroAssembler::count_positives(Register ary1, Register len, Register result) { @@ -6744,6 +6988,482 @@ void MacroAssembler::get_thread(Register dst) { authenticate_return_address(); } +#ifdef COMPILER2 +// C2 compiled method's prolog code +// Moved here from aarch64.ad to support Valhalla code below +void MacroAssembler::verified_entry(Compile* C, int sp_inc) { + if (C->clinit_barrier_on_entry()) { + assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); + + Label L_skip_barrier; + + mov_metadata(rscratch2, C->method()->holder()->constant_encoding()); + clinit_barrier(rscratch2, rscratch1, &L_skip_barrier); + far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + bind(L_skip_barrier); + } + + if (C->max_vector_size() > 0) { + reinitialize_ptrue(); + } + + int bangsize = C->output()->bang_size_in_bytes(); + if (C->output()->need_stack_bang(bangsize)) + generate_stack_overflow_check(bangsize); + + // n.b. frame size includes space for return pc and rfp + const long framesize = C->output()->frame_size_in_bytes(); + build_frame(framesize DEBUG_ONLY(COMMA sp_inc != 0)); + + if (C->needs_stack_repair()) { + save_stack_increment(sp_inc, framesize); + } + + if (VerifyStackAtCalls) { + Unimplemented(); + } +} +#endif // COMPILER2 + +int MacroAssembler::store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter) { + assert(InlineTypeReturnedAsFields, "Inline types should never be returned as fields"); + // An inline type might be returned. If fields are in registers we + // need to allocate an inline type instance and initialize it with + // the value of the fields. + Label skip; + // We only need a new buffered inline type if a new one is not returned + tbz(r0, 0, skip); + int call_offset = -1; + + // Be careful not to clobber r1-7 which hold returned fields + // Also do not use callee-saved registers as these may be live in the interpreter + Register tmp1 = r13, tmp2 = r14, klass = r15, r0_preserved = r12; + + // The following code is similar to the instance allocation code in TemplateTable::_new + // but has some slight differences, + // e.g. object size is always not zero, sometimes it's constant; storing klass ptr after + // allocating is not necessary if vk != nullptr, etc. + Label slow_case; + // 1. Try to allocate a new buffered inline instance either from TLAB or eden space + mov(r0_preserved, r0); // save r0 for slow_case since *_allocate may corrupt it when allocation failed + + if (vk != nullptr) { + // Called from C1, where the return type is statically known. + movptr(klass, (intptr_t)vk->get_InlineKlass()); + jint lh = vk->layout_helper(); + assert(lh != Klass::_lh_neutral_value, "inline class in return type must have been resolved"); + if (UseTLAB && !Klass::layout_helper_needs_slow_path(lh)) { + tlab_allocate(r0, noreg, lh, tmp1, tmp2, slow_case); + } else { + b(slow_case); + } + } else { + // Call from interpreter. R0 contains ((the InlineKlass* of the return type) | 0x01) + andr(klass, r0, -2); + if (UseTLAB) { + ldrw(tmp2, Address(klass, Klass::layout_helper_offset())); + tst(tmp2, Klass::_lh_instance_slow_path_bit); + br(Assembler::NE, slow_case); + tlab_allocate(r0, tmp2, 0, tmp1, tmp2, slow_case); + } else { + b(slow_case); + } + } + if (UseTLAB) { + // 2. Initialize buffered inline instance header + Register buffer_obj = r0; + if (UseCompactObjectHeaders) { + ldr(rscratch1, Address(klass, Klass::prototype_header_offset())); + str(rscratch1, Address(buffer_obj, oopDesc::mark_offset_in_bytes())); + } else { + mov(rscratch1, (intptr_t)markWord::inline_type_prototype().value()); + str(rscratch1, Address(buffer_obj, oopDesc::mark_offset_in_bytes())); + store_klass_gap(buffer_obj, zr); + if (vk == nullptr) { + // store_klass corrupts klass, so save it for later use (interpreter case only). + mov(tmp1, klass); + } + store_klass(buffer_obj, klass); + klass = tmp1; + } + // 3. Initialize its fields with an inline class specific handler + if (vk != nullptr) { + far_call(RuntimeAddress(vk->pack_handler())); // no need for call info as this will not safepoint. + } else { + ldr(tmp1, Address(klass, InlineKlass::adr_members_offset())); + ldr(tmp1, Address(tmp1, InlineKlass::pack_handler_offset())); + blr(tmp1); + } + + membar(Assembler::StoreStore); + b(skip); + } else { + // Must have already branched to slow_case above. + DEBUG_ONLY(should_not_reach_here()); + } + bind(slow_case); + // We failed to allocate a new inline type, fall back to a runtime + // call. Some oop field may be live in some registers but we can't + // tell. That runtime call will take care of preserving them + // across a GC if there's one. + mov(r0, r0_preserved); + + if (from_interpreter) { + super_call_VM_leaf(StubRoutines::store_inline_type_fields_to_buf()); + } else { + far_call(RuntimeAddress(StubRoutines::store_inline_type_fields_to_buf())); + call_offset = offset(); + } + membar(Assembler::StoreStore); + + bind(skip); + return call_offset; +} + +// Move a value between registers/stack slots and update the reg_state +bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]) { + assert(from->is_valid() && to->is_valid(), "source and destination must be valid"); + if (reg_state[to->value()] == reg_written) { + return true; // Already written + } + + if (from != to && bt != T_VOID) { + if (reg_state[to->value()] == reg_readonly) { + return false; // Not yet writable + } + if (from->is_reg()) { + if (to->is_reg()) { + if (from->is_Register() && to->is_Register()) { + mov(to->as_Register(), from->as_Register()); + } else if (from->is_FloatRegister() && to->is_FloatRegister()) { + fmovd(to->as_FloatRegister(), from->as_FloatRegister()); + } else { + ShouldNotReachHere(); + } + } else { + int st_off = to->reg2stack() * VMRegImpl::stack_slot_size; + Address to_addr = Address(sp, st_off); + if (from->is_FloatRegister()) { + if (bt == T_DOUBLE) { + strd(from->as_FloatRegister(), to_addr); + } else { + assert(bt == T_FLOAT, "must be float"); + strs(from->as_FloatRegister(), to_addr); + } + } else { + str(from->as_Register(), to_addr); + } + } + } else { + Address from_addr = Address(sp, from->reg2stack() * VMRegImpl::stack_slot_size); + if (to->is_reg()) { + if (to->is_FloatRegister()) { + if (bt == T_DOUBLE) { + ldrd(to->as_FloatRegister(), from_addr); + } else { + assert(bt == T_FLOAT, "must be float"); + ldrs(to->as_FloatRegister(), from_addr); + } + } else { + ldr(to->as_Register(), from_addr); + } + } else { + int st_off = to->reg2stack() * VMRegImpl::stack_slot_size; + ldr(rscratch1, from_addr); + str(rscratch1, Address(sp, st_off)); + } + } + } + + // Update register states + reg_state[from->value()] = reg_writable; + reg_state[to->value()] = reg_written; + return true; +} + +// Calculate the extra stack space required for packing or unpacking inline +// args and adjust the stack pointer +int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) { + int sp_inc = args_on_stack * VMRegImpl::stack_slot_size; + sp_inc = align_up(sp_inc, StackAlignmentInBytes); + assert(sp_inc > 0, "sanity"); + + // Save a copy of the FP and LR here for deoptimization patching and frame walking + stp(rfp, lr, Address(pre(sp, -2 * wordSize))); + + // Adjust the stack pointer. This will be repaired on return by MacroAssembler::remove_frame + if (sp_inc < (1 << 9)) { + sub(sp, sp, sp_inc); // Fits in an immediate + } else { + mov(rscratch1, sp_inc); + sub(sp, sp, rscratch1); + } + + return sp_inc + 2 * wordSize; // Account for the FP/LR space +} + +// Read all fields from an inline type oop and store the values in registers/stack slots +bool MacroAssembler::unpack_inline_helper(const GrowableArray* sig, int& sig_index, + VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index, + RegState reg_state[]) { + assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter"); + assert(from->is_valid(), "source must be valid"); + bool progress = false; +#ifdef ASSERT + const int start_offset = offset(); +#endif + + Label L_null, L_notNull; + // Don't use r14 as tmp because it's used for spilling (see MacroAssembler::spill_reg_for) + Register tmp1 = r10; + Register tmp2 = r11; + +#ifdef ASSERT + RegSet clobbered_gp_regs = MacroAssembler::call_clobbered_gp_registers(); + assert(clobbered_gp_regs.contains(tmp1), "tmp1 must be saved explicitly if it's not a clobber"); + assert(clobbered_gp_regs.contains(tmp2), "tmp2 must be saved explicitly if it's not a clobber"); + assert(clobbered_gp_regs.contains(r14), "r14 must be saved explicitly if it's not a clobber"); +#endif + + Register fromReg = noreg; + ScalarizedInlineArgsStream stream(sig, sig_index, to, to_count, to_index, true); + bool done = true; + bool mark_done = true; + VMReg toReg; + BasicType bt; + // Check if argument requires a null check + bool null_check = false; + VMReg nullCheckReg; + while (stream.next(nullCheckReg, bt)) { + if (sig->at(stream.sig_index())._offset == -1) { + null_check = true; + break; + } + } + stream.reset(sig_index, to_index); + while (stream.next(toReg, bt)) { + assert(toReg->is_valid(), "destination must be valid"); + int idx = (int)toReg->value(); + if (reg_state[idx] == reg_readonly) { + if (idx != from->value()) { + mark_done = false; + } + done = false; + continue; + } else if (reg_state[idx] == reg_written) { + continue; + } + assert(reg_state[idx] == reg_writable, "must be writable"); + reg_state[idx] = reg_written; + progress = true; + + if (fromReg == noreg) { + if (from->is_reg()) { + fromReg = from->as_Register(); + } else { + int st_off = from->reg2stack() * VMRegImpl::stack_slot_size; + ldr(tmp1, Address(sp, st_off)); + fromReg = tmp1; + } + if (null_check) { + // Nullable inline type argument, emit null check + cbz(fromReg, L_null); + } + } + int off = sig->at(stream.sig_index())._offset; + if (off == -1) { + assert(null_check, "Missing null check at"); + if (toReg->is_stack()) { + int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size; + mov(tmp2, 1); + str(tmp2, Address(sp, st_off)); + } else { + mov(toReg->as_Register(), 1); + } + continue; + } + if (sig->at(stream.sig_index())._vt_oop) { + if (toReg->is_stack()) { + int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size; + str(fromReg, Address(sp, st_off)); + } else { + mov(toReg->as_Register(), fromReg); + } + continue; + } + assert(off > 0, "offset in object should be positive"); + Address fromAddr = Address(fromReg, off); + if (!toReg->is_FloatRegister()) { + Register dst = toReg->is_stack() ? tmp2 : toReg->as_Register(); + if (is_reference_type(bt)) { + load_heap_oop(dst, fromAddr, rscratch1, rscratch2); + } else { + bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN); + load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed); + } + if (toReg->is_stack()) { + int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size; + str(dst, Address(sp, st_off)); + } + } else if (bt == T_DOUBLE) { + ldrd(toReg->as_FloatRegister(), fromAddr); + } else { + assert(bt == T_FLOAT, "must be float"); + ldrs(toReg->as_FloatRegister(), fromAddr); + } + } + if (progress && null_check) { + if (done) { + b(L_notNull); + bind(L_null); + // Set null marker to zero to signal that the argument is null. + // Also set all fields to zero since the runtime requires a canonical + // representation of a flat null. + stream.reset(sig_index, to_index); + while (stream.next(toReg, bt)) { + if (toReg->is_stack()) { + int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size; + str(zr, Address(sp, st_off)); + } else if (toReg->is_FloatRegister()) { + mov(toReg->as_FloatRegister(), T2S, 0); + } else { + mov(toReg->as_Register(), zr); + } + } + bind(L_notNull); + } else { + bind(L_null); + } + } + + sig_index = stream.sig_index(); + to_index = stream.regs_index(); + + if (mark_done && reg_state[from->value()] != reg_written) { + // This is okay because no one else will write to that slot + reg_state[from->value()] = reg_writable; + } + from_index--; + assert(progress || (start_offset == offset()), "should not emit code"); + return done; +} + +// Pack fields back into an inline type oop +bool MacroAssembler::pack_inline_helper(const GrowableArray* sig, int& sig_index, int vtarg_index, + VMRegPair* from, int from_count, int& from_index, VMReg to, + RegState reg_state[], Register val_array) { + assert(sig->at(sig_index)._bt == T_METADATA, "should be at delimiter"); + assert(to->is_valid(), "destination must be valid"); + + if (reg_state[to->value()] == reg_written) { + skip_unpacked_fields(sig, sig_index, from, from_count, from_index); + return true; // Already written + } + + // The GC barrier expanded by store_heap_oop below may call into the + // runtime so use callee-saved registers for any values that need to be + // preserved. The GC barrier assembler should take care of saving the + // Java argument registers. + // Be careful with r14 because it's used for spilling (see MacroAssembler::spill_reg_for). + Register val_obj_tmp = r21; + Register from_reg_tmp = r22; + Register tmp1 = r14; + Register tmp2 = r13; + Register tmp3 = r12; + Register val_obj = to->is_stack() ? val_obj_tmp : to->as_Register(); + + assert_different_registers(val_obj_tmp, from_reg_tmp, tmp1, tmp2, tmp3, val_array); + + if (reg_state[to->value()] == reg_readonly) { + if (!is_reg_in_unpacked_fields(sig, sig_index, to, from, from_count, from_index)) { + skip_unpacked_fields(sig, sig_index, from, from_count, from_index); + return false; // Not yet writable + } + val_obj = val_obj_tmp; + } + + ScalarizedInlineArgsStream stream(sig, sig_index, from, from_count, from_index); + VMReg fromReg; + BasicType bt; + Label L_null; + while (stream.next(fromReg, bt)) { + assert(fromReg->is_valid(), "source must be valid"); + reg_state[fromReg->value()] = reg_writable; + + int off = sig->at(stream.sig_index())._offset; + if (off == -1) { + // Nullable inline type argument, emit null check + Label L_notNull; + if (fromReg->is_stack()) { + int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size; + ldrb(tmp2, Address(sp, ld_off)); + cbnz(tmp2, L_notNull); + } else { + cbnz(fromReg->as_Register(), L_notNull); + } + mov(val_obj, 0); + b(L_null); + bind(L_notNull); + continue; + } + if (sig->at(stream.sig_index())._vt_oop) { + if (fromReg->is_stack()) { + int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size; + ldr(val_obj, Address(sp, ld_off)); + } else { + mov(val_obj, fromReg->as_Register()); + } + cbnz(val_obj, L_null); + // get the buffer from the just allocated pool of buffers + int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + vtarg_index * type2aelembytes(T_OBJECT); + load_heap_oop(val_obj, Address(val_array, index), rscratch1, rscratch2); + continue; + } + + assert(off > 0, "offset in object should be positive"); + size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; + + // Pack the scalarized field into the value object. + Address dst(val_obj, off); + if (!fromReg->is_FloatRegister()) { + Register src; + if (fromReg->is_stack()) { + src = from_reg_tmp; + int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size; + load_sized_value(src, Address(sp, ld_off), size_in_bytes, /* is_signed */ false); + } else { + src = fromReg->as_Register(); + } + assert_different_registers(dst.base(), src, tmp1, tmp2, tmp3, val_array); + if (is_reference_type(bt)) { + // store_heap_oop transitively calls oop_store_at which corrupts to.base(). We need to keep val_obj valid. + mov(tmp3, val_obj); + Address dst_with_tmp3(tmp3, off); + store_heap_oop(dst_with_tmp3, src, tmp1, tmp2, tmp3, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); + } else { + store_sized_value(dst, src, size_in_bytes); + } + } else if (bt == T_DOUBLE) { + strd(fromReg->as_FloatRegister(), dst); + } else { + assert(bt == T_FLOAT, "must be float"); + strs(fromReg->as_FloatRegister(), dst); + } + } + bind(L_null); + sig_index = stream.sig_index(); + from_index = stream.regs_index(); + + assert(reg_state[to->value()] == reg_writable, "must have already been read"); + bool success = move_helper(val_obj->as_VMReg(), to, T_OBJECT, reg_state); + assert(success, "to register must be writable"); + return true; +} + +VMReg MacroAssembler::spill_reg_for(VMReg reg) { + return (reg->is_FloatRegister()) ? v8->as_VMReg() : r14->as_VMReg(); +} + void MacroAssembler::cache_wb(Address line) { assert(line.getMode() == Address::base_plus_offset, "mode should be base_plus_offset"); assert(line.index() == noreg, "index should be noreg"); @@ -7179,6 +7899,9 @@ void MacroAssembler::fast_lock(Register basic_lock, Register obj, Register t1, R // Try to lock. Transition lock bits 0b01 => 0b00 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); orr(mark, mark, markWord::unlocked_value); + // Mask inline_type bit such that we go to the slow path if object is an inline type + andr(mark, mark, ~((int) markWord::inline_type_bit_in_place)); + eor(t, mark, markWord::unlocked_value); cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::xword, /*acquire*/ true, /*release*/ false, /*weak*/ false, noreg); diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index 8f1e662765efd..d122591e2ea15 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -34,7 +34,12 @@ #include "oops/compressedKlass.hpp" #include "runtime/vm_version.hpp" #include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" #include "utilities/powerOfTwo.hpp" +#include "runtime/signature.hpp" + + +class ciInlineKlass; class OopMap; @@ -185,7 +190,8 @@ class MacroAssembler: public Assembler { void strw(Register Rx, const Address &adr); // Frame creation and destruction shared between JITs. - void build_frame(int framesize); + DEBUG_ONLY(void build_frame(int framesize);) + void build_frame(int framesize DEBUG_ONLY(COMMA bool zap_rfp_lr_spills)); void remove_frame(int framesize); virtual void _call_Unimplemented(address call_site) { @@ -696,6 +702,26 @@ class MacroAssembler: public Assembler { static bool needs_explicit_null_check(intptr_t offset); static bool uses_implicit_null_check(void* address); + // markWord tests, kills markWord reg + void test_markword_is_inline_type(Register markword, Label& is_inline_type); + + // inlineKlass queries, kills temp_reg + void test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type, bool can_be_null = true); + + void test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free); + void test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free); + void test_field_is_flat(Register flags, Register temp_reg, Label& is_flat); + + // Check oops for special arrays, i.e. flat arrays and/or null-free arrays + void test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label); + void test_flat_array_oop(Register klass, Register temp_reg, Label& is_flat_array); + void test_non_flat_array_oop(Register oop, Register temp_reg, Label&is_non_flat_array); + void test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array); + void test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array); + + // Check array klass layout helper for flat or null-free arrays... + void test_flat_array_layout(Register lh, Label& is_flat_array); + static address target_addr_for_insn(address insn_addr); // Required platform-specific helpers for Label::patch_instructions. @@ -922,6 +948,8 @@ class MacroAssembler: public Assembler { void load_method_holder(Register holder, Register method); // oop manipulations + void load_metadata(Register dst, Register src); + void load_narrow_klass_compact(Register dst, Register src); void load_klass(Register dst, Register src); void store_klass(Register dst, Register src); @@ -938,6 +966,12 @@ class MacroAssembler: public Assembler { void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); + void flat_field_copy(DecoratorSet decorators, Register src, Register dst, Register inline_layout_info); + + // inline type data payload offsets... + void payload_offset(Register inline_klass, Register offset); + void payload_address(Register oop, Register data, Register inline_klass); + void load_heap_oop(Register dst, Address src, Register tmp1, Register tmp2, DecoratorSet decorators = 0); @@ -951,6 +985,8 @@ class MacroAssembler: public Assembler { // stored using routines that take a jobject. void store_heap_oop_null(Address dst); + void load_prototype_header(Register dst, Register src); + void store_klass_gap(Register dst, Register src); // This dummy is to prevent a call to store_heap_oop from @@ -1000,6 +1036,7 @@ class MacroAssembler: public Assembler { void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp); // allocation + void tlab_allocate( Register obj, // result: pointer to object after successful allocation Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise @@ -1010,6 +1047,8 @@ class MacroAssembler: public Assembler { ); void verify_tlab(); + void inline_layout_info(Register holder_klass, Register index, Register layout_info); + // interface method calling void lookup_interface_method(Register recv_klass, Register intf_klass, @@ -1467,6 +1506,13 @@ class MacroAssembler: public Assembler { void adrp(Register reg1, const Address &dest, uint64_t &byte_offset); + void verified_entry(Compile* C, int sp_inc); + + // Inline type specific methods + #include "asm/macroAssembler_common.hpp" + + void save_stack_increment(int sp_inc, int frame_size); + void tableswitch(Register index, jint lowbound, jint highbound, Label &jumptable, Label &jumptable_end, int stride = 1) { adr(rscratch1, jumptable); @@ -1541,6 +1587,8 @@ class MacroAssembler: public Assembler { void string_equals(Register a1, Register a2, Register result, Register cnt1); void fill_words(Register base, Register cnt, Register value); + void fill_words(Register base, uint64_t cnt, Register value); + address zero_words(Register base, uint64_t cnt); address zero_words(Register ptr, Register cnt); void zero_dcache_blocks(Register base, Register cnt); diff --git a/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp b/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp index cdf67e3423f66..4051cbc003b2c 100644 --- a/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp @@ -161,7 +161,11 @@ void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register meth __ BIND(run_compiled_code); } - const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : + // The following jump might pass an inline type argument that was erased to Object as oop to a + // callee that expects inline type arguments to be passed as fields. We need to call the compiled + // value entry (_code->inline_entry_point() or _adapter->c2i_inline_entry()) which will take care + // of translating between the calling conventions. + const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_inline_offset() : Method::from_interpreted_offset(); __ ldr(rscratch1,Address(method, entry_offset)); __ br(rscratch1); diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp index 0e3d9d76b9436..005ab86e2420e 100644 --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp @@ -26,6 +26,7 @@ #include "asm/macroAssembler.hpp" #include "asm/macroAssembler.inline.hpp" +#include "classfile/symbolTable.hpp" #include "code/aotCodeCache.hpp" #include "code/codeCache.hpp" #include "code/compiledIC.hpp" @@ -201,7 +202,6 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ // will allow deoptimization at this safepoint to find all possible // debug-info recordings, as well as let GC find all oops. - OopMapSet *oop_maps = new OopMapSet(); OopMap* oop_map = new OopMap(frame_size_in_slots, 0); for (int i = 0; i < Register::number_of_registers; i++) { @@ -351,6 +351,85 @@ int SharedRuntime::java_calling_convention(const BasicType *sig_bt, return stk_args; } + +const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j; +const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j; + +int SharedRuntime::java_return_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed) { + + // Create the mapping between argument positions and registers. + + static const Register INT_ArgReg[java_return_convention_max_int] = { + r0 /* j_rarg7 */, j_rarg6, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0 + }; + + static const FloatRegister FP_ArgReg[java_return_convention_max_float] = { + j_farg0, j_farg1, j_farg2, j_farg3, j_farg4, j_farg5, j_farg6, j_farg7 + }; + + uint int_args = 0; + uint fp_args = 0; + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (int_args < SharedRuntime::java_return_convention_max_int) { + regs[i].set1(INT_ArgReg[int_args]->as_VMReg()); + int_args ++; + } else { + return -1; + } + break; + case T_VOID: + // halves of T_LONG or T_DOUBLE + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_LONG: + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + // Should T_METADATA be added to java_calling_convention as well ? + case T_METADATA: + if (int_args < SharedRuntime::java_return_convention_max_int) { + regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); + int_args ++; + } else { + return -1; + } + break; + case T_FLOAT: + if (fp_args < SharedRuntime::java_return_convention_max_float) { + regs[i].set1(FP_ArgReg[fp_args]->as_VMReg()); + fp_args ++; + } else { + return -1; + } + break; + case T_DOUBLE: + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args < SharedRuntime::java_return_convention_max_float) { + regs[i].set2(FP_ArgReg[fp_args]->as_VMReg()); + fp_args ++; + } else { + return -1; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return int_args + fp_args; +} + // Patch the callers callsite with entry to compiled code if it exists. static void patch_callers_callsite(MacroAssembler *masm) { Label L; @@ -385,12 +464,148 @@ static void patch_callers_callsite(MacroAssembler *masm) { __ bind(L); } +// For each inline type argument, sig includes the list of fields of +// the inline type. This utility function computes the number of +// arguments for the call if inline types are passed by reference (the +// calling convention the interpreter expects). +static int compute_total_args_passed_int(const GrowableArray* sig_extended) { + int total_args_passed = 0; + if (InlineTypePassFieldsAsArgs) { + for (int i = 0; i < sig_extended->length(); i++) { + BasicType bt = sig_extended->at(i)._bt; + if (bt == T_METADATA) { + // In sig_extended, an inline type argument starts with: + // T_METADATA, followed by the types of the fields of the + // inline type and T_VOID to mark the end of the value + // type. Inline types are flattened so, for instance, in the + // case of an inline type with an int field and an inline type + // field that itself has 2 fields, an int and a long: + // T_METADATA T_INT T_METADATA T_INT T_LONG T_VOID (second + // slot for the T_LONG) T_VOID (inner inline type) T_VOID + // (outer inline type) + total_args_passed++; + int vt = 1; + do { + i++; + BasicType bt = sig_extended->at(i)._bt; + BasicType prev_bt = sig_extended->at(i-1)._bt; + if (bt == T_METADATA) { + vt++; + } else if (bt == T_VOID && + prev_bt != T_LONG && + prev_bt != T_DOUBLE) { + vt--; + } + } while (vt != 0); + } else { + total_args_passed++; + } + } + } else { + total_args_passed = sig_extended->length(); + } + return total_args_passed; +} + + +static void gen_c2i_adapter_helper(MacroAssembler* masm, + BasicType bt, + BasicType prev_bt, + size_t size_in_bytes, + const VMRegPair& reg_pair, + const Address& to, + Register tmp1, + Register tmp2, + Register tmp3, + int extraspace, + bool is_oop) { + if (bt == T_VOID) { + assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); + return; + } + + // Say 4 args: + // i st_off + // 0 32 T_LONG + // 1 24 T_VOID + // 2 16 T_OBJECT + // 3 8 T_BOOL + // - 0 return address + // + // However to make thing extra confusing. Because we can fit a Java long/double in + // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter + // leaves one slot empty and only stores to a single slot. In this case the + // slot that is occupied is the T_VOID slot. See I said it was confusing. + + bool wide = (size_in_bytes == wordSize); + VMReg r_1 = reg_pair.first(); + VMReg r_2 = reg_pair.second(); + assert(r_2->is_valid() == wide, "invalid size"); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + return; + } + + if (!r_1->is_FloatRegister()) { + Register val = r25; + if (r_1->is_stack()) { + // memory to memory use r25 (scratch registers is used by store_heap_oop) + int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + __ load_sized_value(val, Address(sp, ld_off), size_in_bytes, /* is_signed */ false); + } else { + val = r_1->as_Register(); + } + assert_different_registers(to.base(), val, tmp1, tmp2, tmp3); + if (is_oop) { + // store_heap_oop transitively calls oop_store_at which corrupts to.base(). We need to keep it valid. + __ push(to.base(), sp); + __ store_heap_oop(to, val, tmp1, tmp2, tmp3, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); + __ pop(to.base(), sp); + } else { + __ store_sized_value(to, val, size_in_bytes); + } + } else { + if (wide) { + __ strd(r_1->as_FloatRegister(), to); + } else { + // only a float use just part of the slot + __ strs(r_1->as_FloatRegister(), to); + } + } +} + static void gen_c2i_adapter(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, + const GrowableArray* sig_extended, const VMRegPair *regs, - Label& skip_fixup) { + bool requires_clinit_barrier, + address& c2i_no_clinit_check_entry, + Label& skip_fixup, + address start, + OopMapSet* oop_maps, + int& frame_complete, + int& frame_size_in_words, + bool alloc_inline_receiver) { + if (requires_clinit_barrier) { + assert(VM_Version::supports_fast_class_init_checks(), "sanity"); + Label L_skip_barrier; + + { // Bypass the barrier for non-static methods + __ ldrh(rscratch1, Address(rmethod, Method::access_flags_offset())); + __ andsw(zr, rscratch1, JVM_ACC_STATIC); + __ br(Assembler::EQ, L_skip_barrier); // non-static + } + + __ load_method_holder(rscratch2, rmethod); + __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier); + __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + + __ bind(L_skip_barrier); + c2i_no_clinit_check_entry = __ pc(); + } + + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->c2i_entry_barrier(masm); + // Before we get into the guts of the C2I adapter, see if we should be here // at all. We've come from compiled code and are attempting to jump to the // interpreter, which means the caller made a static call to get here @@ -400,114 +615,184 @@ static void gen_c2i_adapter(MacroAssembler *masm, __ bind(skip_fixup); - int words_pushed = 0; + // Name some registers to be used in the following code. We can use + // anything except r0-r7 which are arguments in the Java calling + // convention, rmethod (r12), and r19 which holds the outgoing sender + // SP for the interpreter. + Register buf_array = r10; // Array of buffered inline types + Register buf_oop = r11; // Buffered inline type oop + Register tmp1 = r15; + Register tmp2 = r16; + Register tmp3 = r17; - // Since all args are passed on the stack, total_args_passed * - // Interpreter::stackElementSize is the space we need. +#ifdef ASSERT + RegSet clobbered_gp_regs = MacroAssembler::call_clobbered_gp_registers(); + assert(clobbered_gp_regs.contains(buf_array), "buf_array must be saved explicitly if it's not a clobber"); + assert(clobbered_gp_regs.contains(buf_oop), "buf_oop must be saved explicitly if it's not a clobber"); + assert(clobbered_gp_regs.contains(tmp1), "tmp1 must be saved explicitly if it's not a clobber"); + assert(clobbered_gp_regs.contains(tmp2), "tmp2 must be saved explicitly if it's not a clobber"); + assert(clobbered_gp_regs.contains(tmp3), "tmp3 must be saved explicitly if it's not a clobber"); +#endif - int extraspace = total_args_passed * Interpreter::stackElementSize; + if (InlineTypePassFieldsAsArgs) { + // Is there an inline type argument? + bool has_inline_argument = false; + for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) { + has_inline_argument = (sig_extended->at(i)._bt == T_METADATA); + } + if (has_inline_argument) { + // There is at least a value type argument: we're coming from + // compiled code so we may not have buffers to back the value + // objects. Allocate the buffers here with a runtime call for + // the value arguments that needs a buffer. + RegisterSaver reg_save(true /* save_vectors */); + OopMap* map = reg_save.save_live_registers(masm, 0, &frame_size_in_words); - __ mov(r19_sender_sp, sp); + frame_complete = __ offset(); + address the_pc = __ pc(); - // stack is aligned, keep it that way - extraspace = align_up(extraspace, 2*wordSize); + Label retaddr; + __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); - if (extraspace) - __ sub(sp, sp, extraspace); + __ mov(c_rarg0, rthread); + __ mov(c_rarg1, rmethod); + __ mov(c_rarg2, (int64_t)alloc_inline_receiver); - // Now write the args into the outgoing interpreter space - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); - continue; - } + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types))); + __ blr(rscratch1); + __ bind(retaddr); - // offset to start parameters - int st_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; - int next_off = st_off - Interpreter::stackElementSize; - - // Say 4 args: - // i st_off - // 0 32 T_LONG - // 1 24 T_VOID - // 2 16 T_OBJECT - // 3 8 T_BOOL - // - 0 return address - // - // However to make thing extra confusing. Because we can fit a Java long/double in - // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter - // leaves one slot empty and only stores to a single slot. In this case the - // slot that is occupied is the T_VOID slot. See I said it was confusing. + oop_maps->add_gc_map(__ pc() - start, map); + __ reset_last_Java_frame(false); - VMReg r_1 = regs[i].first(); - VMReg r_2 = regs[i].second(); - if (!r_1->is_valid()) { - assert(!r_2->is_valid(), ""); - continue; + reg_save.restore_live_registers(masm); + + Label no_exception; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, no_exception); + + __ str(zr, Address(rthread, JavaThread::vm_result_oop_offset())); + __ ldr(r0, Address(rthread, Thread::pending_exception_offset())); + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + __ bind(no_exception); + + // We get an array of objects from the runtime call + __ get_vm_result_oop(buf_array, rthread); } - if (r_1->is_stack()) { - // memory to memory use rscratch1 - int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size - + extraspace - + words_pushed * wordSize); - if (!r_2->is_valid()) { - // sign extend?? - __ ldrw(rscratch1, Address(sp, ld_off)); - __ str(rscratch1, Address(sp, st_off)); + } - } else { + // Since all args are passed on the stack, total_args_passed * + // Interpreter::stackElementSize is the space we need. + + int total_args_passed = compute_total_args_passed_int(sig_extended); + int extraspace = total_args_passed * Interpreter::stackElementSize; - __ ldr(rscratch1, Address(sp, ld_off)); + // stack is aligned, keep it that way + extraspace = align_up(extraspace, StackAlignmentInBytes); - // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG - // T_DOUBLE and T_LONG use two slots in the interpreter - if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { - // ld_off == LSW, ld_off+wordSize == MSW - // st_off == MSW, next_off == LSW - __ str(rscratch1, Address(sp, next_off)); + // set senderSP value + __ mov(r19_sender_sp, sp); + + __ sub(sp, sp, extraspace); + + // Now write the args into the outgoing interpreter space + + // next_arg_comp is the next argument from the compiler point of + // view (inline type fields are passed in registers/on the stack). In + // sig_extended, an inline type argument starts with: T_METADATA, + // followed by the types of the fields of the inline type and T_VOID + // to mark the end of the inline type. ignored counts the number of + // T_METADATA/T_VOID. next_vt_arg is the next inline type argument: + // used to get the buffer for that argument from the pool of buffers + // we allocated above and want to pass to the + // interpreter. next_arg_int is the next argument from the + // interpreter point of view (inline types are passed by reference). + for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0; + next_arg_comp < sig_extended->length(); next_arg_comp++) { + assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments"); + assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?"); + BasicType bt = sig_extended->at(next_arg_comp)._bt; + int st_off = (total_args_passed - next_arg_int - 1) * Interpreter::stackElementSize; + if (!InlineTypePassFieldsAsArgs || bt != T_METADATA) { + int next_off = st_off - Interpreter::stackElementSize; + const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; + const VMRegPair reg_pair = regs[next_arg_comp-ignored]; + size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4; + gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, + size_in_bytes, reg_pair, Address(sp, offset), tmp1, tmp2, tmp3, extraspace, false); + next_arg_int++; #ifdef ASSERT - // Overwrite the unused slot with known junk - __ mov(rscratch1, (uint64_t)0xdeadffffdeadaaaaull); - __ str(rscratch1, Address(sp, st_off)); -#endif /* ASSERT */ - } else { - __ str(rscratch1, Address(sp, st_off)); - } + if (bt == T_LONG || bt == T_DOUBLE) { + // Overwrite the unused slot with known junk + __ mov(rscratch1, CONST64(0xdeadffffdeadaaaa)); + __ str(rscratch1, Address(sp, st_off)); } - } else if (r_1->is_Register()) { - Register r = r_1->as_Register(); - if (!r_2->is_valid()) { - // must be only an int (or less ) so move only 32bits to slot - // why not sign extend?? - __ str(r, Address(sp, st_off)); - } else { - // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG - // T_DOUBLE and T_LONG use two slots in the interpreter - if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { - // jlong/double in gpr -#ifdef ASSERT - // Overwrite the unused slot with known junk - __ mov(rscratch1, (uint64_t)0xdeadffffdeadaaabull); - __ str(rscratch1, Address(sp, st_off)); #endif /* ASSERT */ - __ str(r, Address(sp, next_off)); + } else { + ignored++; + next_arg_int++; + int vt = 1; + // write fields we get from compiled code in registers/stack + // slots to the buffer: we know we are done with that inline type + // argument when we hit the T_VOID that acts as an end of inline + // type delimiter for this inline type. Inline types are flattened + // so we might encounter embedded inline types. Each entry in + // sig_extended contains a field offset in the buffer. + Label L_null; + Label not_null_buffer; + do { + next_arg_comp++; + BasicType bt = sig_extended->at(next_arg_comp)._bt; + BasicType prev_bt = sig_extended->at(next_arg_comp - 1)._bt; + if (bt == T_METADATA) { + vt++; + ignored++; + } else if (bt == T_VOID && prev_bt != T_LONG && prev_bt != T_DOUBLE) { + vt--; + ignored++; + } else if (sig_extended->at(next_arg_comp)._vt_oop) { + VMReg buffer = regs[next_arg_comp-ignored].first(); + if (buffer->is_stack()) { + int ld_off = buffer->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + __ ldr(buf_oop, Address(sp, ld_off)); + } else { + __ mov(buf_oop, buffer->as_Register()); + } + __ cbnz(buf_oop, not_null_buffer); + // get the buffer from the just allocated pool of buffers + int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_OBJECT); + __ load_heap_oop(buf_oop, Address(buf_array, index), rscratch1, tmp2); + next_vt_arg++; } else { - __ str(r, Address(sp, st_off)); + int off = sig_extended->at(next_arg_comp)._offset; + if (off == -1) { + // Nullable inline type argument, emit null check + VMReg reg = regs[next_arg_comp-ignored].first(); + Label L_notNull; + if (reg->is_stack()) { + int ld_off = reg->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + __ ldrb(tmp1, Address(sp, ld_off)); + __ cbnz(tmp1, L_notNull); + } else { + __ cbnz(reg->as_Register(), L_notNull); + } + __ str(zr, Address(sp, st_off)); + __ b(L_null); + __ bind(L_notNull); + continue; + } + assert(off > 0, "offset in object should be positive"); + size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; + bool is_oop = is_reference_type(bt); + gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, + size_in_bytes, regs[next_arg_comp-ignored], Address(buf_oop, off), tmp1, tmp2, tmp3, extraspace, is_oop); } - } - } else { - assert(r_1->is_FloatRegister(), ""); - if (!r_2->is_valid()) { - // only a float use just part of the slot - __ strs(r_1->as_FloatRegister(), Address(sp, st_off)); - } else { -#ifdef ASSERT - // Overwrite the unused slot with known junk - __ mov(rscratch1, (uint64_t)0xdeadffffdeadaaacull); - __ str(rscratch1, Address(sp, st_off)); -#endif /* ASSERT */ - __ strd(r_1->as_FloatRegister(), Address(sp, next_off)); - } + } while (vt != 0); + // pass the buffer to the interpreter + __ bind(not_null_buffer); + __ str(buf_oop, Address(sp, st_off)); + __ bind(L_null); } } @@ -517,12 +802,8 @@ static void gen_c2i_adapter(MacroAssembler *masm, __ br(rscratch1); } +void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, int comp_args_on_stack, const GrowableArray* sig, const VMRegPair *regs) { -void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs) { // Note: r19_sender_sp contains the senderSP on entry. We must // preserve it since we may do a i2c -> c2i transition if we lose a @@ -551,29 +832,32 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, // caller, but with an uncorrected stack, causing delayed havoc. // Cut-out for having no stack args. - int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; + int comp_words_on_stack = 0; if (comp_args_on_stack) { - __ sub(rscratch1, sp, comp_words_on_stack * wordSize); - __ andr(sp, rscratch1, -16); + comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord; + __ sub(rscratch1, sp, comp_words_on_stack * wordSize); + __ andr(sp, rscratch1, -16); } // Will jump to the compiled code just as if compiled code was doing it. // Pre-load the register-jump target early, to schedule it better. - __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset()))); + __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_inline_offset()))); + + int total_args_passed = sig->length(); // Now generate the shuffle code. for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + BasicType bt = sig->at(i)._bt; + if (bt == T_VOID) { + assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half"); continue; } // Pick up 0, 1 or 2 words from SP+offset. + assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); - assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), - "scrambled load targets?"); // Load in argument order going down. - int ld_off = (total_args_passed - i - 1)*Interpreter::stackElementSize; + int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; // Point to interpreter value (vs. tag) int next_off = ld_off - Interpreter::stackElementSize; // @@ -587,7 +871,7 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, } if (r_1->is_stack()) { // Convert stack slot to an SP offset (+ wordSize to account for return address ) - int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; + int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size; if (!r_2->is_valid()) { // sign extend??? __ ldrsw(rscratch2, Address(esp, ld_off)); @@ -604,39 +888,37 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, // are accessed as negative so LSW is at LOW address // ld_off is MSW so get LSW - const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? - next_off : ld_off; + const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off; __ ldr(rscratch2, Address(esp, offset)); // st_off is LSW (i.e. reg.first()) - __ str(rscratch2, Address(sp, st_off)); - } - } else if (r_1->is_Register()) { // Register argument - Register r = r_1->as_Register(); - if (r_2->is_valid()) { - // - // We are using two VMRegs. This can be either T_OBJECT, - // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates - // two slots but only uses one for thr T_LONG or T_DOUBLE case - // So we must adjust where to pick up the data to match the - // interpreter. - - const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? - next_off : ld_off; - - // this can be a misaligned move - __ ldr(r, Address(esp, offset)); - } else { - // sign extend and use a full word? - __ ldrw(r, Address(esp, ld_off)); - } - } else { - if (!r_2->is_valid()) { - __ ldrs(r_1->as_FloatRegister(), Address(esp, ld_off)); - } else { - __ ldrd(r_1->as_FloatRegister(), Address(esp, next_off)); - } - } - } + __ str(rscratch2, Address(sp, st_off)); + } + } else if (r_1->is_Register()) { // Register argument + Register r = r_1->as_Register(); + if (r_2->is_valid()) { + // + // We are using two VMRegs. This can be either T_OBJECT, + // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates + // two slots but only uses one for thr T_LONG or T_DOUBLE case + // So we must adjust where to pick up the data to match the + // interpreter. + + const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off; + + // this can be a misaligned move + __ ldr(r, Address(esp, offset)); + } else { + // sign extend and use a full word? + __ ldrw(r, Address(esp, ld_off)); + } + } else { + if (!r_2->is_valid()) { + __ ldrs(r_1->as_FloatRegister(), Address(esp, ld_off)); + } else { + __ ldrd(r_1->as_FloatRegister(), Address(esp, next_off)); + } + } + } __ mov(rscratch2, rscratch1); __ push_cont_fastpath(rthread); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about; kills rscratch1 @@ -657,23 +939,34 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, __ br(rscratch1); } -// --------------------------------------------------------------- -void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - address entry_address[AdapterBlob::ENTRY_COUNT]) { - entry_address[AdapterBlob::I2C] = __ pc(); +static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) { + Register data = rscratch2; + __ ic_check(1 /* end_alignment */); + __ ldr(rmethod, Address(data, CompiledICData::speculated_method_offset())); - gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + // Method might have been compiled since the call site was patched to + // interpreted; if that is the case treat it as a miss so we can get + // the call site corrected. + __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset()))); + __ cbz(rscratch1, skip_fixup); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); +} - entry_address[AdapterBlob::C2I_Unverified] = __ pc(); - Label skip_fixup; +// --------------------------------------------------------------- +void SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm, + int comp_args_on_stack, + const GrowableArray* sig, + const VMRegPair* regs, + const GrowableArray* sig_cc, + const VMRegPair* regs_cc, + const GrowableArray* sig_cc_ro, + const VMRegPair* regs_cc_ro, + address entry_address[AdapterBlob::ENTRY_COUNT], + AdapterBlob*& new_adapter, + bool allocate_code_blob) { - Register data = rscratch2; - Register receiver = j_rarg0; - Register tmp = r10; // A call-clobbered register not used for arg passing + entry_address[AdapterBlob::I2C] = __ pc(); + gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); // ------------------------------------------------------------------------- // Generate a C2I adapter. On entry we know rmethod holds the Method* during calls @@ -684,44 +977,52 @@ void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, // On exit from the interpreter, the interpreter will restore our SP (lest the // compiled code, which relies solely on SP and not FP, get sick). - { - __ block_comment("c2i_unverified_entry {"); - // Method might have been compiled since the call site was patched to - // interpreted; if that is the case treat it as a miss so we can get - // the call site corrected. - __ ic_check(1 /* end_alignment */); - __ ldr(rmethod, Address(data, CompiledICData::speculated_method_offset())); + entry_address[AdapterBlob::C2I_Unverified] = __ pc(); + entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc(); + Label skip_fixup; - __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset()))); - __ cbz(rscratch1, skip_fixup); - __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); - __ block_comment("} c2i_unverified_entry"); - } + gen_inline_cache_check(masm, skip_fixup); - entry_address[AdapterBlob::C2I] = __ pc(); + OopMapSet* oop_maps = new OopMapSet(); + int frame_complete = CodeOffsets::frame_never_safe; + int frame_size_in_words = 0; - // Class initialization barrier for static methods + // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver) entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr; - assert(VM_Version::supports_fast_class_init_checks(), "sanity"); - Label L_skip_barrier; - - // Bypass the barrier for non-static methods - __ ldrh(rscratch1, Address(rmethod, Method::access_flags_offset())); - __ andsw(zr, rscratch1, JVM_ACC_STATIC); - __ br(Assembler::EQ, L_skip_barrier); // non-static - - __ load_method_holder(rscratch2, rmethod); - __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier); - __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); - - __ bind(L_skip_barrier); - entry_address[AdapterBlob::C2I_No_Clinit_Check] = __ pc(); + entry_address[AdapterBlob::C2I_Inline_RO] = __ pc(); + if (regs_cc != regs_cc_ro) { + // No class init barrier needed because method is guaranteed to be non-static + gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, entry_address[AdapterBlob::C2I_No_Clinit_Check], + skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); + skip_fixup.reset(); + } - BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->c2i_entry_barrier(masm); + // Scalarized c2i adapter + entry_address[AdapterBlob::C2I] = __ pc(); + entry_address[AdapterBlob::C2I_Inline] = __ pc(); + gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check], + skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true); + + // Non-scalarized c2i adapter + if (regs != regs_cc) { + entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc(); + Label inline_entry_skip_fixup; + gen_inline_cache_check(masm, inline_entry_skip_fixup); + + entry_address[AdapterBlob::C2I_Inline] = __ pc(); + gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check], + inline_entry_skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); + } - gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); - return; + // The c2i adapters might safepoint and trigger a GC. The caller must make sure that + // the GC knows about the location of oop argument locations passed to the c2i adapter. + if (allocate_code_blob) { + bool caller_must_gc_arguments = (regs != regs_cc); + int entry_offset[AdapterHandlerEntry::ENTRIES_COUNT]; + assert(AdapterHandlerEntry::ENTRIES_COUNT == 7, "sanity"); + AdapterHandlerLibrary::address_to_offset(entry_address, entry_offset); + new_adapter = AdapterBlob::create(masm->code(), entry_offset, frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments); + } } static int c_calling_convention_priv(const BasicType *sig_bt, @@ -2620,6 +2921,156 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(StubId id, address destination return rs_blob; } +BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) { + BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K); + if (buf == nullptr) { + return nullptr; + } + CodeBuffer buffer(buf); + short buffer_locs[20]; + buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs, + sizeof(buffer_locs)/sizeof(relocInfo)); + + MacroAssembler _masm(&buffer); + MacroAssembler* masm = &_masm; + + const Array* sig_vk = vk->extended_sig(); + const Array* regs = vk->return_regs(); + + int pack_fields_jobject_off = __ offset(); + // Resolve pre-allocated buffer from JNI handle. + // We cannot do this in generate_call_stub() because it requires GC code to be initialized. + Register Rresult = r14; // See StubGenerator::generate_call_stub(). + __ ldr(r0, Address(Rresult)); + __ resolve_jobject(r0 /* value */, + rthread /* thread */, + r12 /* tmp */); + __ str(r0, Address(Rresult)); + + int pack_fields_off = __ offset(); + + int j = 1; + for (int i = 0; i < sig_vk->length(); i++) { + BasicType bt = sig_vk->at(i)._bt; + if (bt == T_METADATA) { + continue; + } + if (bt == T_VOID) { + if (sig_vk->at(i-1)._bt == T_LONG || + sig_vk->at(i-1)._bt == T_DOUBLE) { + j++; + } + continue; + } + int off = sig_vk->at(i)._offset; + VMRegPair pair = regs->at(j); + VMReg r_1 = pair.first(); + VMReg r_2 = pair.second(); + Address to(r0, off); + if (bt == T_FLOAT) { + __ strs(r_1->as_FloatRegister(), to); + } else if (bt == T_DOUBLE) { + __ strd(r_1->as_FloatRegister(), to); + } else { + Register val = r_1->as_Register(); + assert_different_registers(to.base(), val, r15, r16, r17); + if (is_reference_type(bt)) { + // store_heap_oop transitively calls oop_store_at which corrupts to.base(). We need to keep r0 valid. + __ mov(r17, r0); + Address to_with_r17(r17, off); + __ store_heap_oop(to_with_r17, val, r15, r16, r17, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); + } else { + __ store_sized_value(to, r_1->as_Register(), type2aelembytes(bt)); + } + } + j++; + } + assert(j == regs->length(), "missed a field?"); + if (vk->supports_nullable_layouts()) { + // Zero the null marker (setting it to 1 would be better but would require an additional register) + __ strb(zr, Address(r0, vk->null_marker_offset())); + } + __ ret(lr); + + int unpack_fields_off = __ offset(); + + Label skip; + Label not_null; + __ cbnz(r0, not_null); + + // Return value is null. Zero all registers because the runtime requires a canonical + // representation of a flat null. + j = 1; + for (int i = 0; i < sig_vk->length(); i++) { + BasicType bt = sig_vk->at(i)._bt; + if (bt == T_METADATA) { + continue; + } + if (bt == T_VOID) { + if (sig_vk->at(i-1)._bt == T_LONG || + sig_vk->at(i-1)._bt == T_DOUBLE) { + j++; + } + continue; + } + + VMRegPair pair = regs->at(j); + VMReg r_1 = pair.first(); + if (r_1->is_FloatRegister()) { + __ mov(r_1->as_FloatRegister(), Assembler::T2S, 0); + } else { + __ mov(r_1->as_Register(), zr); + } + j++; + } + __ b(skip); + __ bind(not_null); + + j = 1; + for (int i = 0; i < sig_vk->length(); i++) { + BasicType bt = sig_vk->at(i)._bt; + if (bt == T_METADATA) { + continue; + } + if (bt == T_VOID) { + if (sig_vk->at(i-1)._bt == T_LONG || + sig_vk->at(i-1)._bt == T_DOUBLE) { + j++; + } + continue; + } + int off = sig_vk->at(i)._offset; + assert(off > 0, "offset in object should be positive"); + VMRegPair pair = regs->at(j); + VMReg r_1 = pair.first(); + VMReg r_2 = pair.second(); + Address from(r0, off); + if (bt == T_FLOAT) { + __ ldrs(r_1->as_FloatRegister(), from); + } else if (bt == T_DOUBLE) { + __ ldrd(r_1->as_FloatRegister(), from); + } else if (bt == T_OBJECT || bt == T_ARRAY) { + assert_different_registers(r0, r_1->as_Register()); + __ load_heap_oop(r_1->as_Register(), from, rscratch1, rscratch2); + } else { + assert(is_java_primitive(bt), "unexpected basic type"); + assert_different_registers(r0, r_1->as_Register()); + size_t size_in_bytes = type2aelembytes(bt); + __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN); + } + j++; + } + assert(j == regs->length(), "missed a field?"); + + __ bind(skip); + + __ ret(lr); + + __ flush(); + + return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, pack_fields_jobject_off, unpack_fields_off); +} + // Continuation point for throwing of implicit exceptions that are // not handled in the current activation. Fabricates an exception // oop and initiates normal exception dispatching in this diff --git a/src/hotspot/cpu/aarch64/stackChunkFrameStream_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/stackChunkFrameStream_aarch64.inline.hpp index 18c24ad054300..6c68d177b294a 100644 --- a/src/hotspot/cpu/aarch64/stackChunkFrameStream_aarch64.inline.hpp +++ b/src/hotspot/cpu/aarch64/stackChunkFrameStream_aarch64.inline.hpp @@ -35,8 +35,30 @@ template inline bool StackChunkFrameStream::is_in_frame(void* p0) const { assert(!is_done(), ""); intptr_t* p = (intptr_t*)p0; - int argsize = is_compiled() ? (_cb->as_nmethod()->num_stack_arg_slots() * VMRegImpl::stack_slot_size) >> LogBytesPerWord : 0; - int frame_size = _cb->frame_size() + argsize; + int frame_size = _cb->frame_size(); + if (is_compiled()) { + nmethod* nm = _cb->as_nmethod_or_null(); + if (nm->needs_stack_repair() && nm->is_compiled_by_c2()) { + frame f = to_frame(); + bool augmented = f.was_augmented_on_entry(frame_size); + if (!augmented) { + // Fix: C2 caller, so frame was not extended and thus the + // size read from the frame does not include the arguments. + // Ideally we have to count the arg size for the scalarized + // convention. For now we include the size of the caller frame + // which would at least be equal to that. + RegisterMap map(nullptr, + RegisterMap::UpdateMap::skip, + RegisterMap::ProcessFrames::skip, + RegisterMap::WalkContinuation::skip); + frame caller = to_frame().sender(&map); + assert(caller.is_compiled_frame() && caller.cb()->as_nmethod()->is_compiled_by_c2(), "needs stack repair but was not extended with c1/interpreter caller"); + frame_size += (caller.real_fp() - caller.sp()); + } + } else { + frame_size += _cb->as_nmethod()->num_stack_arg_slots() * VMRegImpl::stack_slot_size >> LogBytesPerWord; + } + } return p == sp() - frame::sender_sp_offset || ((p - unextended_sp()) >= 0 && (p - unextended_sp()) < frame_size); } #endif @@ -46,7 +68,13 @@ inline frame StackChunkFrameStream::to_frame() const { if (is_done()) { return frame(_sp, _sp, nullptr, nullptr, nullptr, nullptr, true); } else { - return frame(sp(), unextended_sp(), fp(), pc(), cb(), _oopmap, true); + frame f = frame(sp(), unextended_sp(), fp(), pc(), cb(), _oopmap, true); + // If caller tries to get the sender of this frame and PreserveFramePointer + // is set, fp() will be used which contains the old value at the time of + // freeze (fp is reconstructed again during thaw). Setting sp as trusted + // causes the sender code to use _unextended_sp instead (see sender_for_compiled_frame()). + f.set_sp_is_trusted(); + return f; } } diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp index f89b6e2d579c2..0ed1f896c0120 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -504,20 +504,25 @@ class StubGenerator: public StubCodeGenerator { // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) // n.b. this assumes Java returns an integral result in r0 // and a floating result in j_farg0 - __ ldr(j_rarg2, result); - Label is_long, is_float, is_double, exit; - __ ldr(j_rarg1, result_type); - __ cmp(j_rarg1, (u1)T_OBJECT); + // All of j_rargN may be used to return inline type fields so be careful + // not to clobber those. + // SharedRuntime::generate_buffered_inline_type_adapter() knows the register + // assignment of Rresult below. + Register Rresult = r14, Rresult_type = r15; + __ ldr(Rresult, result); + Label is_long, is_float, is_double, check_prim, exit; + __ ldr(Rresult_type, result_type); + __ cmp(Rresult_type, (u1)T_OBJECT); + __ br(Assembler::EQ, check_prim); + __ cmp(Rresult_type, (u1)T_LONG); __ br(Assembler::EQ, is_long); - __ cmp(j_rarg1, (u1)T_LONG); - __ br(Assembler::EQ, is_long); - __ cmp(j_rarg1, (u1)T_FLOAT); + __ cmp(Rresult_type, (u1)T_FLOAT); __ br(Assembler::EQ, is_float); - __ cmp(j_rarg1, (u1)T_DOUBLE); + __ cmp(Rresult_type, (u1)T_DOUBLE); __ br(Assembler::EQ, is_double); // handle T_INT case - __ strw(r0, Address(j_rarg2)); + __ strw(r0, Address(Rresult)); __ BIND(exit); @@ -569,17 +574,28 @@ class StubGenerator: public StubCodeGenerator { __ ret(lr); // handle return types different from T_INT + __ BIND(check_prim); + if (InlineTypeReturnedAsFields) { + // Check for scalarized return value + __ tbz(r0, 0, is_long); + // Load pack handler address + __ andr(rscratch1, r0, -2); + __ ldr(rscratch1, Address(rscratch1, InlineKlass::adr_members_offset())); + __ ldr(rscratch1, Address(rscratch1, InlineKlass::pack_handler_jobject_offset())); + __ blr(rscratch1); + __ b(exit); + } __ BIND(is_long); - __ str(r0, Address(j_rarg2, 0)); + __ str(r0, Address(Rresult, 0)); __ br(Assembler::AL, exit); __ BIND(is_float); - __ strs(j_farg0, Address(j_rarg2, 0)); + __ strs(j_farg0, Address(Rresult, 0)); __ br(Assembler::AL, exit); __ BIND(is_double); - __ strd(j_farg0, Address(j_rarg2, 0)); + __ strd(j_farg0, Address(Rresult, 0)); __ br(Assembler::AL, exit); // record the stub entry and end plus the auxiliary entry @@ -2606,6 +2622,12 @@ class StubGenerator: public StubCodeGenerator { __ eor(rscratch2, rscratch2, scratch_src_klass); __ cbnz(rscratch2, L_failed); + // Check for flat inline type array -> return -1 + __ test_flat_array_oop(src, rscratch2, L_failed); + + // Check for null-free (non-flat) inline type array -> handle as object array + __ test_null_free_array_oop(src, rscratch2, L_objArray); + // if (!src->is_Array()) return -1; __ tbz(lh, 31, L_failed); // i.e. (lh >= 0) @@ -12276,6 +12298,30 @@ class StubGenerator: public StubCodeGenerator { } #endif // LINUX + static void save_return_registers(MacroAssembler* masm) { + if (InlineTypeReturnedAsFields) { + masm->push(RegSet::range(r0, r7), sp); + masm->sub(sp, sp, 4 * wordSize); + masm->st1(v0, v1, v2, v3, masm->T1D, Address(sp)); + masm->sub(sp, sp, 4 * wordSize); + masm->st1(v4, v5, v6, v7, masm->T1D, Address(sp)); + } else { + masm->fmovd(rscratch1, v0); + masm->stp(rscratch1, r0, Address(masm->pre(sp, -2 * wordSize))); + } + } + + static void restore_return_registers(MacroAssembler* masm) { + if (InlineTypeReturnedAsFields) { + masm->ld1(v4, v5, v6, v7, masm->T1D, Address(masm->post(sp, 4 * wordSize))); + masm->ld1(v0, v1, v2, v3, masm->T1D, Address(masm->post(sp, 4 * wordSize))); + masm->pop(RegSet::range(r0, r7), sp); + } else { + masm->ldp(rscratch1, r0, Address(masm->post(sp, 2 * wordSize))); + masm->fmovd(v0, rscratch1); + } + } + address generate_cont_thaw(Continuation::thaw_kind kind) { bool return_barrier = Continuation::is_thaw_return_barrier(kind); bool return_barrier_exception = Continuation::is_thaw_return_barrier_exception(kind); @@ -12290,8 +12336,7 @@ class StubGenerator: public StubCodeGenerator { if (return_barrier) { // preserve possible return value from a method returning to the return barrier - __ fmovd(rscratch1, v0); - __ stp(rscratch1, r0, Address(__ pre(sp, -2 * wordSize))); + save_return_registers(_masm); } __ movw(c_rarg1, (return_barrier ? 1 : 0)); @@ -12300,8 +12345,7 @@ class StubGenerator: public StubCodeGenerator { if (return_barrier) { // restore return value (no safepoint in the call to thaw, so even an oop return value should be OK) - __ ldp(rscratch1, r0, Address(__ post(sp, 2 * wordSize))); - __ fmovd(v0, rscratch1); + restore_return_registers(_masm); } assert_asm(_masm, (__ ldr(rscratch1, Address(rthread, JavaThread::cont_entry_offset())), __ cmp(sp, rscratch1)), Assembler::EQ, "incorrect sp"); @@ -12320,8 +12364,7 @@ class StubGenerator: public StubCodeGenerator { if (return_barrier) { // save original return value -- again - __ fmovd(rscratch1, v0); - __ stp(rscratch1, r0, Address(__ pre(sp, -2 * wordSize))); + save_return_registers(_masm); } // If we want, we can templatize thaw by kind, and have three different entries @@ -12332,8 +12375,7 @@ class StubGenerator: public StubCodeGenerator { if (return_barrier) { // restore return value (no safepoint in the call to thaw, so even an oop return value should be OK) - __ ldp(rscratch1, r0, Address(__ post(sp, 2 * wordSize))); - __ fmovd(v0, rscratch1); + restore_return_registers(_masm); } else { __ mov(r0, zr); // return 0 (success) from doYield } @@ -13538,6 +13580,144 @@ class StubGenerator: public StubCodeGenerator { // } }; + // Call here from the interpreter or compiled code to either load + // multiple returned values from the inline type instance being + // returned to registers or to store returned values to a newly + // allocated inline type instance. + address generate_return_value_stub(address destination, const char* name, bool has_res) { + // We need to save all registers the calling convention may use so + // the runtime calls read or update those registers. This needs to + // be in sync with SharedRuntime::java_return_convention(). + // n.b. aarch64 asserts that frame::arg_reg_save_area_bytes == 0 + enum layout { + j_rarg7_off = 0, j_rarg7_2, // j_rarg7 is r0 + j_rarg6_off, j_rarg6_2, + j_rarg5_off, j_rarg5_2, + j_rarg4_off, j_rarg4_2, + j_rarg3_off, j_rarg3_2, + j_rarg2_off, j_rarg2_2, + j_rarg1_off, j_rarg1_2, + j_rarg0_off, j_rarg0_2, + + j_farg7_off, j_farg7_2, + j_farg6_off, j_farg6_2, + j_farg5_off, j_farg5_2, + j_farg4_off, j_farg4_2, + j_farg3_off, j_farg3_2, + j_farg2_off, j_farg2_2, + j_farg1_off, j_farg1_2, + j_farg0_off, j_farg0_2, + + rfp_off, rfp_off2, + return_off, return_off2, + + framesize // inclusive of return address + }; + + CodeBuffer code(name, 512, 64); + MacroAssembler* masm = new MacroAssembler(&code); + + int frame_size_in_bytes = align_up(framesize*BytesPerInt, 16); + assert(frame_size_in_bytes == framesize*BytesPerInt, "misaligned"); + int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; + int frame_size_in_words = frame_size_in_bytes / wordSize; + + OopMapSet* oop_maps = new OopMapSet(); + OopMap* map = new OopMap(frame_size_in_slots, 0); + + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg7_off), j_rarg7->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg6_off), j_rarg6->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg5_off), j_rarg5->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg4_off), j_rarg4->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg3_off), j_rarg3->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg2_off), j_rarg2->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg1_off), j_rarg1->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg0_off), j_rarg0->as_VMReg()); + + map->set_callee_saved(VMRegImpl::stack2reg(j_farg0_off), j_farg0->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg1_off), j_farg1->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg2_off), j_farg2->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg3_off), j_farg3->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg4_off), j_farg4->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg5_off), j_farg5->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg6_off), j_farg6->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg7_off), j_farg7->as_VMReg()); + + address start = __ pc(); + + __ enter(); // Save FP and LR before call + + __ stpd(j_farg1, j_farg0, Address(__ pre(sp, -2 * wordSize))); + __ stpd(j_farg3, j_farg2, Address(__ pre(sp, -2 * wordSize))); + __ stpd(j_farg5, j_farg4, Address(__ pre(sp, -2 * wordSize))); + __ stpd(j_farg7, j_farg6, Address(__ pre(sp, -2 * wordSize))); + + __ stp(j_rarg1, j_rarg0, Address(__ pre(sp, -2 * wordSize))); + __ stp(j_rarg3, j_rarg2, Address(__ pre(sp, -2 * wordSize))); + __ stp(j_rarg5, j_rarg4, Address(__ pre(sp, -2 * wordSize))); + __ stp(j_rarg7, j_rarg6, Address(__ pre(sp, -2 * wordSize))); + + int frame_complete = __ offset(); + + // Set up last_Java_sp and last_Java_fp + address the_pc = __ pc(); + __ set_last_Java_frame(sp, noreg, the_pc, rscratch1); + + // Call runtime + __ mov(c_rarg1, r0); + __ mov(c_rarg0, rthread); + + __ mov(rscratch1, destination); + __ blr(rscratch1); + + oop_maps->add_gc_map(the_pc - start, map); + + __ reset_last_Java_frame(false); + + __ ldp(j_rarg7, j_rarg6, Address(__ post(sp, 2 * wordSize))); + __ ldp(j_rarg5, j_rarg4, Address(__ post(sp, 2 * wordSize))); + __ ldp(j_rarg3, j_rarg2, Address(__ post(sp, 2 * wordSize))); + __ ldp(j_rarg1, j_rarg0, Address(__ post(sp, 2 * wordSize))); + + __ ldpd(j_farg7, j_farg6, Address(__ post(sp, 2 * wordSize))); + __ ldpd(j_farg5, j_farg4, Address(__ post(sp, 2 * wordSize))); + __ ldpd(j_farg3, j_farg2, Address(__ post(sp, 2 * wordSize))); + __ ldpd(j_farg1, j_farg0, Address(__ post(sp, 2 * wordSize))); + + // check for pending exceptions + Label pending; + __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ cbnz(rscratch1, pending); + + if (has_res) { + // We just called SharedRuntime::store_inline_type_fields_to_buf. Check if we still + // need to initialize the buffer and if so, call the inline class specific pack handler. + Label skip_pack; + __ get_vm_result_oop(r0, rthread); + __ get_vm_result_metadata(rscratch1, rthread); + __ cbz(rscratch1, skip_pack); + __ ldr(rscratch1, Address(rscratch1, InlineKlass::adr_members_offset())); + __ ldr(rscratch1, Address(rscratch1, InlineKlass::pack_handler_offset())); + __ blr(rscratch1); + __ membar(Assembler::StoreStore); + __ bind(skip_pack); + } + + __ leave(); + __ ret(lr); + + __ bind(pending); + __ leave(); + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + // ------------- + // make sure all code is generated + masm->flush(); + + RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, frame_size_in_words, oop_maps, false); + return stub->entry_point(); + } + // Initialization void generate_preuniverse_stubs() { // preuniverse stubs are not needed for aarch64 @@ -13586,6 +13766,14 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_hf2f = generate_float16ToFloat(); StubRoutines::_f2hf = generate_floatToFloat16(); } + + if (InlineTypeReturnedAsFields) { + StubRoutines::_load_inline_type_fields_in_regs = + generate_return_value_stub(CAST_FROM_FN_PTR(address, SharedRuntime::load_inline_type_fields_in_regs), "load_inline_type_fields_in_regs", false); + StubRoutines::_store_inline_type_fields_to_buf = + generate_return_value_stub(CAST_FROM_FN_PTR(address, SharedRuntime::store_inline_type_fields_to_buf), "store_inline_type_fields_to_buf", true); + } + } void generate_continuation_stubs() { diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp index fd6247bf362ca..dacb47d159448 100644 --- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp @@ -41,6 +41,7 @@ #include "oops/methodCounters.hpp" #include "oops/methodData.hpp" #include "oops/oop.inline.hpp" +#include "oops/inlineKlass.hpp" #include "oops/resolvedIndyEntry.hpp" #include "oops/resolvedMethodEntry.hpp" #include "prims/jvmtiExport.hpp" @@ -467,6 +468,11 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, __ lea(esp, Address(rfp, rscratch1, Address::lsl(Interpreter::logStackElementSize))); // and null it as marker that esp is now tos until next java call __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + + if (state == atos && InlineTypeReturnedAsFields) { + __ store_inline_type_fields_to_buf(nullptr, true); + } + __ restore_bcp(); __ restore_locals(); __ restore_constant_pool_cache(); @@ -1633,7 +1639,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // // Generic interpreted method entry to (asm) interpreter // -address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { +address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized, bool object_init) { // determine code generation flags bool inc_counter = UseCompiler || CountCompiledCalls; @@ -1760,6 +1766,12 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { #endif } + // Issue a StoreStore barrier on entry to Object_init if the + // class has strict field fields. Be lazy, always do it. + if (object_init) { + __ membar(MacroAssembler::StoreStore); + } + // start execution #ifdef ASSERT { diff --git a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp index b6cf58d6062f7..698ca7fd67a0e 100644 --- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp @@ -43,6 +43,7 @@ #include "oops/resolvedMethodEntry.hpp" #include "prims/jvmtiExport.hpp" #include "prims/methodHandles.hpp" +#include "runtime/arguments.hpp" #include "runtime/frame.inline.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -169,6 +170,7 @@ void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, Label L_patch_done; switch (bc) { + case Bytecodes::_fast_vputfield: case Bytecodes::_fast_aputfield: case Bytecodes::_fast_bputfield: case Bytecodes::_fast_zputfield: @@ -753,7 +755,7 @@ void TemplateTable::index_check(Register array, Register index) } Label ok; __ br(Assembler::LO, ok); - // ??? convention: move array into r3 for exception message + // ??? convention: move array into r3 for exception message __ mov(r3, array); __ mov(rscratch1, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); __ br(rscratch1); @@ -816,11 +818,23 @@ void TemplateTable::aaload() // r0: array // r1: index index_check(r0, r1); // leaves index in r1, kills rscratch1 - __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); - do_oop_load(_masm, - Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)), - r0, - IS_ARRAY); + __ profile_array_type(r2, r0, r4); + if (UseArrayFlattening) { + Label is_flat_array, done; + + __ test_flat_array_oop(r0, rscratch1 /*temp*/, is_flat_array); + __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + do_oop_load(_masm, Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)), r0, IS_ARRAY); + + __ b(done); + __ bind(is_flat_array); + __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::flat_array_load), r0, r1); + __ bind(done); + } else { + __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + do_oop_load(_masm, Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)), r0, IS_ARRAY); + } + __ profile_element_type(r2, r0, r4); } void TemplateTable::baload() @@ -1107,32 +1121,45 @@ void TemplateTable::dastore() { } void TemplateTable::aastore() { - Label is_null, ok_is_subtype, done; + Label is_null, is_flat_array, ok_is_subtype, done; transition(vtos, vtos); // stack: ..., array, index, value __ ldr(r0, at_tos()); // value __ ldr(r2, at_tos_p1()); // index __ ldr(r3, at_tos_p2()); // array - Address element_address(r3, r4, Address::uxtw(LogBytesPerHeapOop)); - index_check(r3, r2); // kills r1 + + __ profile_array_type(r4, r3, r5); + __ profile_multiple_element_types(r4, r0, r5, r6); + __ add(r4, r2, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + Address element_address(r3, r4, Address::uxtw(LogBytesPerHeapOop)); + // Be careful not to clobber r4 below // do array store check - check for null value first __ cbz(r0, is_null); + // Move array class to r5 + __ load_klass(r5, r3); + + if (UseArrayFlattening) { + __ ldrw(r6, Address(r5, Klass::layout_helper_offset())); + __ test_flat_array_layout(r6, is_flat_array); + } + // Move subklass into r1 __ load_klass(r1, r0); - // Move superklass into r0 - __ load_klass(r0, r3); - __ ldr(r0, Address(r0, - ObjArrayKlass::element_klass_offset())); + + // Move array element superklass into r0 + __ ldr(r0, Address(r5, ObjArrayKlass::element_klass_offset())); // Compress array + index*oopSize + 12 into a single register. Frees r2. // Generate subtype check. Blows r2, r5 // Superklass in r0. Subklass in r1. - __ gen_subtype_check(r1, ok_is_subtype); + + // is "r1 <: r0" ? (value subclass <: array element superclass) + __ gen_subtype_check(r1, ok_is_subtype, false); // Come here on failure // object is at TOS @@ -1150,11 +1177,37 @@ void TemplateTable::aastore() { // Have a null in r0, r3=array, r2=index. Store null at ary[idx] __ bind(is_null); - __ profile_null_seen(r2); + if (Arguments::is_valhalla_enabled()) { + Label is_null_into_value_array_npe, store_null; + + if (UseArrayFlattening) { + __ test_flat_array_oop(r3, rscratch1, is_flat_array); + } + + // No way to store null in a null-free array + __ test_null_free_array_oop(r3, rscratch1, is_null_into_value_array_npe); + __ b(store_null); + + __ bind(is_null_into_value_array_npe); + __ b(ExternalAddress(Interpreter::_throw_NullPointerException_entry)); + + __ bind(store_null); + } // Store a null // Clobbers: r10, r11, r3 do_oop_store(_masm, element_address, noreg, IS_ARRAY); + __ b(done); + + if (UseArrayFlattening) { + Label is_type_ok; + __ bind(is_flat_array); // Store non-null value to flat + + __ ldr(r0, at_tos()); // value + __ ldr(r3, at_tos_p1()); // index + __ ldr(r2, at_tos_p2()); // array + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::flat_array_store), r0, r2, r3); + } // Pop stack arguments __ bind(done); @@ -1961,19 +2014,68 @@ void TemplateTable::if_nullcmp(Condition cc) __ profile_not_taken_branch(r0); } -void TemplateTable::if_acmp(Condition cc) -{ +void TemplateTable::if_acmp(Condition cc) { transition(atos, vtos); // assume branch is more often taken than not (loops use backward branches) - Label not_taken; + Label taken, not_taken; __ pop_ptr(r1); + + __ profile_acmp(r2, r1, r0, r4); + + Register is_inline_type_mask = rscratch1; + __ mov(is_inline_type_mask, markWord::inline_type_pattern); + + if (Arguments::is_valhalla_enabled()) { + // The substitutability test is only necessary if r1 and r0 are not the same... + __ cmp(r1, r0); + __ br(Assembler::EQ, (cc == equal) ? taken : not_taken); + + // ... neither are null... + __ cbz(r1, (cc == equal) ? not_taken : taken); + __ cbz(r0, (cc == equal) ? not_taken : taken); + + // ...and both are values... + __ ldr(r2, Address(r1, oopDesc::mark_offset_in_bytes())); + __ andr(r2, r2, is_inline_type_mask); + __ ldr(r4, Address(r0, oopDesc::mark_offset_in_bytes())); + __ andr(r4, r4, is_inline_type_mask); + __ andr(r2, r2, r4); + __ cmp(r2, is_inline_type_mask); + __ br(Assembler::NE, (cc == equal) ? not_taken : taken); + + // ...with the same value klass + __ load_metadata(r2, r1); + __ load_metadata(r4, r0); + __ cmp(r2, r4); + __ br(Assembler::NE, (cc == equal) ? not_taken : taken); + + // Know both are the same type, let's test for substitutability... + if (cc == equal) { + invoke_is_substitutable(r0, r1, taken, not_taken); + } else { + invoke_is_substitutable(r0, r1, not_taken, taken); + } + __ stop("Not reachable"); + } + __ cmpoop(r1, r0); __ br(j_not(cc), not_taken); + __ bind(taken); branch(false, false); __ bind(not_taken); - __ profile_not_taken_branch(r0); + __ profile_not_taken_branch(r0, true); } +void TemplateTable::invoke_is_substitutable(Register aobj, Register bobj, + Label& is_subst, Label& not_subst) { + + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::is_substitutable), aobj, bobj); + // Restored... r0 answer, jmp to outcome... + __ cbz(r0, not_subst); + __ b(is_subst); +} + + void TemplateTable::ret() { transition(vtos, vtos); locals_index(r1); @@ -2207,7 +2309,8 @@ void TemplateTable::_return(TosState state) // Issue a StoreStore barrier after all stores but before return // from any constructor for any class with a final field. We don't // know if this is a finalizer, so we always do so. - if (_desc->bytecode() == Bytecodes::_return) + if (_desc->bytecode() == Bytecodes::_return + || _desc->bytecode() == Bytecodes::_return_register_finalizer) __ membar(MacroAssembler::StoreStore); if (_desc->bytecode() != Bytecodes::_return_register_finalizer) { @@ -2599,7 +2702,7 @@ void TemplateTable::pop_and_check_object(Register r) void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { - const Register cache = r4; + const Register cache = r2; const Register obj = r4; const Register index = r3; const Register tos_state = r3; @@ -2609,6 +2712,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr resolve_cache_and_index_for_field(byte_no, cache, index); jvmti_post_field_access(cache, index, is_static, false); + load_resolved_field_entry(obj, cache, tos_state, off, flags, is_static); if (!is_static) { @@ -2667,12 +2771,39 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr __ cmp(tos_state, (u1)atos); __ br(Assembler::NE, notObj); // atos - do_oop_load(_masm, field, r0, IN_HEAP); - __ push(atos); - if (rc == may_rewrite) { - patch_bytecode(Bytecodes::_fast_agetfield, bc, r1); + if (!Arguments::is_valhalla_enabled()) { + do_oop_load(_masm, field, r0, IN_HEAP); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, r1); + } + __ b(Done); + } else { // Valhalla + if (is_static) { + __ load_heap_oop(r0, field, rscratch1, rscratch2); + __ push(atos); + __ b(Done); + } else { + Label is_flat; + __ test_field_is_flat(flags, noreg /* temp */, is_flat); + __ load_heap_oop(r0, field, rscratch1, rscratch2); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, r1); + } + __ b(Done); + __ bind(is_flat); + // field is flat (null-free or nullable with a null-marker) + __ mov(r0, obj); + __ read_flat_field(cache, r0); + __ verify_oop(r0); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_vgetfield, bc, r1); + } + __ b(Done); + } } - __ b(Done); __ bind(notObj); __ cmp(tos_state, (u1)itos); @@ -2833,7 +2964,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr const Register tos_state = r3; const Register obj = r2; const Register off = r19; - const Register flags = r0; + const Register flags = r6; const Register bc = r4; resolve_cache_and_index_for_field(byte_no, cache, index); @@ -2841,11 +2972,9 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr load_resolved_field_entry(obj, cache, tos_state, off, flags, is_static); Label Done; - __ mov(r5, flags); - { Label notVolatile; - __ tbz(r5, ResolvedFieldEntry::is_volatile_shift, notVolatile); + __ tbz(flags, ResolvedFieldEntry::is_volatile_shift, notVolatile); __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore); __ bind(notVolatile); } @@ -2894,15 +3023,55 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr // atos { - __ pop(atos); - if (!is_static) pop_and_check_object(obj); - // Store into the field - // Clobbers: r10, r11, r3 - do_oop_store(_masm, field, r0, IN_HEAP); - if (rc == may_rewrite) { - patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no); - } - __ b(Done); + if (!Arguments::is_valhalla_enabled()) { + __ pop(atos); + if (!is_static) pop_and_check_object(obj); + // Store into the field + // Clobbers: r10, r11, r3 + do_oop_store(_masm, field, r0, IN_HEAP); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no); + } + __ b(Done); + } else { // Valhalla + __ pop(atos); + if (is_static) { + Label is_nullable; + __ test_field_is_not_null_free_inline_type(flags, noreg /* temp */, is_nullable); + __ null_check(r0); // FIXME JDK-8341120 + __ bind(is_nullable); + do_oop_store(_masm, field, r0, IN_HEAP); + __ b(Done); + } else { + Label null_free_reference, is_flat, rewrite_inline; + __ test_field_is_flat(flags, noreg /* temp */, is_flat); + __ test_field_is_null_free_inline_type(flags, noreg /* temp */, null_free_reference); + pop_and_check_object(obj); + // Store into the field + // Clobbers: r10, r11, r3 + do_oop_store(_masm, field, r0, IN_HEAP); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, r19, true, byte_no); + } + __ b(Done); + // Implementation of the inline type semantic + __ bind(null_free_reference); + __ null_check(r0); // FIXME JDK-8341120 + pop_and_check_object(obj); + // Store into the field + // Clobbers: r10, r11, r3 + do_oop_store(_masm, field, r0, IN_HEAP); + __ b(rewrite_inline); + __ bind(is_flat); + pop_and_check_object(r7); + __ write_flat_field(cache, off, index, flags, r7); + __ bind(rewrite_inline); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_vputfield, bc, r19, true, byte_no); + } + __ b(Done); + } + } // Valhalla } __ bind(notObj); @@ -3007,7 +3176,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr { Label notVolatile; - __ tbz(r5, ResolvedFieldEntry::is_volatile_shift, notVolatile); + __ tbz(flags, ResolvedFieldEntry::is_volatile_shift, notVolatile); __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore); __ bind(notVolatile); } @@ -3041,6 +3210,7 @@ void TemplateTable::jvmti_post_fast_field_mod() { // to do it for every data type, we use the saved values as the // jvalue object. switch (bytecode()) { // load values into the jvalue object + case Bytecodes::_fast_vputfield: // fall through case Bytecodes::_fast_aputfield: __ push_ptr(r0); break; case Bytecodes::_fast_bputfield: // fall through case Bytecodes::_fast_zputfield: // fall through @@ -3067,6 +3237,7 @@ void TemplateTable::jvmti_post_fast_field_mod() { r19, c_rarg2, c_rarg3); switch (bytecode()) { // restore tos values + case Bytecodes::_fast_vputfield: // fall through case Bytecodes::_fast_aputfield: __ pop_ptr(r0); break; case Bytecodes::_fast_bputfield: // fall through case Bytecodes::_fast_zputfield: // fall through @@ -3114,6 +3285,22 @@ void TemplateTable::fast_storefield(TosState state) // access field switch (bytecode()) { + case Bytecodes::_fast_vputfield: + { + Label is_flat, done; + __ test_field_is_flat(r5, noreg /* temp */, is_flat); + __ null_check(r0); + do_oop_store(_masm, field, r0, IN_HEAP); + __ b(done); + __ bind(is_flat); + __ load_field_entry(r4, r5); + // Re-shuffle registers because of VM calls calling convention + __ mov(r19, r1); + __ mov(r7, r2); + __ write_flat_field(r4, r19, r6, r8, r7); + __ bind(done); + } + break; case Bytecodes::_fast_aputfield: // Clobbers: r10, r11, r3 do_oop_store(_masm, field, r0, IN_HEAP); @@ -3209,6 +3396,13 @@ void TemplateTable::fast_accessfield(TosState state) // access field switch (bytecode()) { + case Bytecodes::_fast_vgetfield: + { + // field is flat + __ read_flat_field(r2, r0); + __ verify_oop(r0); + } + break; case Bytecodes::_fast_agetfield: do_oop_load(_masm, field, r0, IN_HEAP); __ verify_oop(r0); @@ -3659,6 +3853,16 @@ void TemplateTable::_new() { __ sub(r3, r3, header_size); __ cbz(r3, initialize_header); + #ifdef ASSERT + // make sure instance_size was multiple of 8 + Label L; + __ tst(r3, 7); + __ br(Assembler::EQ, L); + __ stop("object size is not multiple of 8 - adjust this code"); + __ bind(L); + // must be > 0, no extra check needed here + #endif + // Initialize object fields { __ add(r2, r0, header_size); @@ -3671,12 +3875,14 @@ void TemplateTable::_new() { // initialize object header only. __ bind(initialize_header); - if (UseCompactObjectHeaders) { + if (UseCompactObjectHeaders || Arguments::is_valhalla_enabled()) { __ ldr(rscratch1, Address(r4, Klass::prototype_header_offset())); __ str(rscratch1, Address(r0, oopDesc::mark_offset_in_bytes())); } else { __ mov(rscratch1, (intptr_t)markWord::prototype().value()); __ str(rscratch1, Address(r0, oopDesc::mark_offset_in_bytes())); + } + if (!UseCompactObjectHeaders) { __ store_klass_gap(r0, zr); // zero klass gap for compressed oops __ store_klass(r0, r4); // store klass last } @@ -3774,13 +3980,12 @@ void TemplateTable::checkcast() __ bind(ok_is_subtype); __ mov(r0, r3); // Restore object in r3 + __ b(done); + __ bind(is_null); + // Collect counts on whether this test sees nulls a lot or not. if (ProfileInterpreter) { - __ b(done); - __ bind(is_null); __ profile_null_seen(r2); - } else { - __ bind(is_null); // same as 'done' } __ bind(done); } @@ -3899,6 +4104,10 @@ void TemplateTable::monitorenter() // check for null object __ null_check(r0); + Label is_inline_type; + __ ldr(rscratch1, Address(r0, oopDesc::mark_offset_in_bytes())); + __ test_markword_is_inline_type(rscratch1, is_inline_type); + const Address monitor_block_top( rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize); const Address monitor_block_bot( @@ -4000,6 +4209,11 @@ void TemplateTable::monitorenter() // The bcp has already been incremented. Just need to dispatch to // next instruction. __ dispatch_next(vtos); + + __ bind(is_inline_type); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_identity_exception), r0); + __ should_not_reach_here(); } @@ -4010,6 +4224,18 @@ void TemplateTable::monitorexit() // check for null object __ null_check(r0); + const int is_inline_type_mask = markWord::inline_type_pattern; + Label has_identity; + __ ldr(rscratch1, Address(r0, oopDesc::mark_offset_in_bytes())); + __ mov(rscratch2, is_inline_type_mask); + __ andr(rscratch1, rscratch1, rscratch2); + __ cmp(rscratch1, rscratch2); + __ br(Assembler::NE, has_identity); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + __ bind(has_identity); + const Address monitor_block_top( rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize); const Address monitor_block_bot( diff --git a/src/hotspot/cpu/aarch64/templateTable_aarch64.hpp b/src/hotspot/cpu/aarch64/templateTable_aarch64.hpp index c51c111a6f89e..1f82613f4af4a 100644 --- a/src/hotspot/cpu/aarch64/templateTable_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.hpp @@ -34,4 +34,6 @@ static void index_check(Register array, Register index); static void index_check_without_pop(Register array, Register index); + static void invoke_is_substitutable(Register aobj, Register bobj, Label& is_subst, Label& not_subst); + #endif // CPU_AARCH64_TEMPLATETABLE_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp index 714904ab3df46..5f3112d424fd3 100644 --- a/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp @@ -47,10 +47,10 @@ extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); #endif -VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { +VtableStub* VtableStubs::create_vtable_stub(int vtable_index, bool caller_is_c1) { // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. const int stub_code_length = code_size_limit(true); - VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index, caller_is_c1); // Can be null if there is no free space in the code cache. if (s == nullptr) { return nullptr; @@ -63,6 +63,8 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { int slop_bytes = 0; int slop_delta = 0; + ByteSize entry_offset = caller_is_c1 ? Method::from_compiled_inline_offset() : Method::from_compiled_inline_ro_offset(); + ResourceMark rm; CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); @@ -116,7 +118,7 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { if (DebugVtables) { Label L; __ cbz(rmethod, L); - __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); + __ ldr(rscratch1, Address(rmethod, entry_offset)); __ cbnz(rscratch1, L); __ stop("Vtable entry is null"); __ bind(L); @@ -127,7 +129,7 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { // rmethod: Method* // r2: receiver address ame_addr = __ pc(); - __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); + __ ldr(rscratch1, Address(rmethod, entry_offset)); __ br(rscratch1); masm->flush(); @@ -137,10 +139,10 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { } -VtableStub* VtableStubs::create_itable_stub(int itable_index) { +VtableStub* VtableStubs::create_itable_stub(int itable_index, bool caller_is_c1) { // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. const int stub_code_length = code_size_limit(false); - VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index, caller_is_c1); // Can be null if there is no free space in the code cache. if (s == nullptr) { return nullptr; @@ -153,6 +155,8 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { int slop_bytes = 0; int slop_delta = 0; + ByteSize entry_offset = caller_is_c1 ? Method::from_compiled_inline_offset() : Method::from_compiled_inline_ro_offset(); + ResourceMark rm; CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); @@ -207,7 +211,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { if (DebugVtables) { Label L2; __ cbz(rmethod, L2); - __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); + __ ldr(rscratch1, Address(rmethod, entry_offset)); __ cbnz(rscratch1, L2); __ stop("compiler entrypoint is null"); __ bind(L2); @@ -217,7 +221,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { // rmethod: Method* // j_rarg0: receiver address ame_addr = __ pc(); - __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); + __ ldr(rscratch1, Address(rmethod, entry_offset)); __ br(rscratch1); __ bind(L_no_such_interface); diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad index 45ae283e05a20..4ad3599ef4530 100644 --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -339,10 +339,6 @@ void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { } } -uint MachPrologNode::size(PhaseRegAlloc *ra_) const { - return MachNode::size(ra_); -} - int MachPrologNode::reloc() const { return 10; // a large enough number } @@ -384,10 +380,6 @@ void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { } } -uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { - return MachNode::size(ra_); -} - int MachEpilogNode::reloc() const { return 16; // a large enough number } @@ -849,6 +841,18 @@ uint BoxLockNode::size(PhaseRegAlloc *ra_) const { } //============================================================================= +#ifndef PRODUCT +void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const +{ + Unimplemented(); +} +#endif + +void MachVEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const +{ + Unimplemented(); +} + #ifndef PRODUCT #define R_RTEMP "R_R12" void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { @@ -863,11 +867,6 @@ void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { __ ic_check(InteriorEntryAlignment); } -uint MachUEPNode::size(PhaseRegAlloc *ra_) const { - return MachNode::size(ra_); -} - - //============================================================================= int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { diff --git a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp index 3ef02e44b6596..4b1f08ffa8bf0 100644 --- a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp +++ b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -156,11 +156,13 @@ void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { // Implementation of NewObjectArrayStub -NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, + CodeEmitInfo* info, bool is_null_free) { _klass_reg = klass_reg; _result = result; _length = length; _info = new CodeEmitInfo(info); + _is_null_free = is_null_free; // unimplemented } @@ -424,4 +426,31 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) { __ b(_continuation); } +// Implementation of SubstitutabilityCheckStub +SubstitutabilityCheckStub::SubstitutabilityCheckStub(LIR_Opr left, LIR_Opr right, CodeEmitInfo* info) { + Unimplemented(); +} + +void SubstitutabilityCheckStub::emit_code(LIR_Assembler* ce) { + Unimplemented(); +} + +LoadFlattenedArrayStub::LoadFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) { + Unimplemented(); +} + +void LoadFlattenedArrayStub::emit_code(LIR_Assembler* ce) { + Unimplemented(); +} + +// Implementation of StoreFlattenedArrayStub + +StoreFlattenedArrayStub::StoreFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr value, CodeEmitInfo* info) { + Unimplemented(); +} + +void StoreFlattenedArrayStub::emit_code(LIR_Assembler* ce) { + Unimplemented(); +} + #undef __ diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp index b314577c2c865..62424d9df22fc 100644 --- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2559,6 +2559,10 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { fatal("Type profiling not implemented on this platform"); } +void LIR_Assembler::emit_profile_inline_type(LIR_OpProfileInlineType* op) { + Unimplemented(); +} + void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { Address mon_addr = frame_map()->address_for_monitor_lock(monitor_no); __ add_slow(dst->as_pointer_register(), mon_addr.base(), mon_addr.disp()); @@ -2858,4 +2862,27 @@ void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr } +// Valhalla support + +void LIR_Assembler::check_orig_pc() { + Unimplemented(); +} + +int LIR_Assembler::store_inline_type_fields_to_buf(ciInlineKlass* vk) { + Unimplemented(); + return 0; +} + +void LIR_Assembler::emit_opFlattenedArrayCheck(LIR_OpFlattenedArrayCheck* op) { + Unimplemented(); +} + +void LIR_Assembler::emit_opNullFreeArrayCheck(LIR_OpNullFreeArrayCheck* op) { + Unimplemented(); +} + +void LIR_Assembler::emit_opSubstitutabilityCheck(LIR_OpSubstitutabilityCheck* op) { + Unimplemented(); +} + #undef __ diff --git a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp index 46ec87290ae3b..f93eea15464c3 100644 --- a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -944,7 +944,7 @@ void LIRGenerator::do_NewInstance(NewInstance* x) { LIR_Opr tmp2 = new_register(objectType); LIR_Opr tmp3 = FrameMap::LR_oop_opr; - new_instance(reg, x->klass(), x->is_unresolved(), tmp1, tmp2, tmp3, + new_instance(reg, x->klass(), x->is_unresolved(), /* allow_inline */ false, tmp1, tmp2, tmp3, LIR_OprFact::illegalOpr, klass_reg, info); LIR_Opr result = rlock_result(x); @@ -1104,7 +1104,8 @@ void LIRGenerator::do_CheckCast(CheckCast* x) { LIR_Opr tmp3 = LIR_OprFact::illegalOpr; __ checkcast(out_reg, obj.result(), x->klass(), tmp1, tmp2, tmp3, x->direct_compare(), - info_for_exception, patching_info, stub, x->profiled_method(), x->profiled_bci()); + info_for_exception, patching_info, stub, + x->profiled_method(), x->profiled_bci(), /*is_null_free*/ false); } diff --git a/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp index ad6c56186df55..943a8fc5e3d7c 100644 --- a/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -42,7 +42,10 @@ // arm [macro]assembler) and used with care in the other C1 specific // files. -void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) { +void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes, + int sp_offset_for_orig_pc, + bool needs_stack_repair, bool has_scalarized_args, + Label* verified_inline_entry_label) { assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); assert((frame_size_in_bytes % StackAlignmentInBytes) == 0, "frame size should be aligned"); @@ -59,11 +62,6 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by bs->nmethod_entry_barrier(this); } -void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) { - add_slow(SP, SP, frame_size_in_bytes); - raw_pop(FP, LR); -} - void C1_MacroAssembler::verified_entry(bool breakAtEntry) { if (breakAtEntry) { breakpoint(); @@ -238,4 +236,8 @@ void C1_MacroAssembler::verify_not_null_oop(Register r) { verify_oop(r); } +int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, Label& verified_inline_entry_label, bool is_inline_ro_entry) { + Unimplemented(); +} + #endif // !PRODUCT diff --git a/src/hotspot/cpu/arm/continuationFreezeThaw_arm.inline.hpp b/src/hotspot/cpu/arm/continuationFreezeThaw_arm.inline.hpp index 6728f716caaa3..1868d61a831f2 100644 --- a/src/hotspot/cpu/arm/continuationFreezeThaw_arm.inline.hpp +++ b/src/hotspot/cpu/arm/continuationFreezeThaw_arm.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,7 +39,7 @@ inline frame FreezeBase::sender(const frame& f) { return frame(); } -template frame FreezeBase::new_heap_frame(frame& f, frame& caller) { +template frame FreezeBase::new_heap_frame(frame& f, frame& caller, int size_adjust) { Unimplemented(); return frame(); } @@ -56,7 +56,7 @@ inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, co Unimplemented(); } -inline void FreezeBase::patch_pd(frame& hf, const frame& caller) { +inline void FreezeBase::patch_pd(frame& hf, const frame& caller, bool is_bottom_frame) { Unimplemented(); } @@ -82,7 +82,7 @@ inline frame ThawBase::new_entry_frame() { return frame(); } -template frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom) { +template frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom, int size_adjust) { Unimplemented(); return frame(); } diff --git a/src/hotspot/cpu/arm/frame_arm.cpp b/src/hotspot/cpu/arm/frame_arm.cpp index f791fae7bd735..2a7db876e7edf 100644 --- a/src/hotspot/cpu/arm/frame_arm.cpp +++ b/src/hotspot/cpu/arm/frame_arm.cpp @@ -510,3 +510,18 @@ intptr_t* frame::real_fp() const { assert(! is_compiled_frame(), "unknown compiled frame size"); return fp(); } + +intptr_t* frame::repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const { + Unimplemented(); + return nullptr; +} + +intptr_t* frame::repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr) { + Unimplemented(); + return nullptr; +} + +bool frame::was_augmented_on_entry(int& real_size) const { + Unimplemented(); + return false; +} diff --git a/src/hotspot/cpu/arm/frame_arm.hpp b/src/hotspot/cpu/arm/frame_arm.hpp index 026bd993981c1..f0a7fe3d55511 100644 --- a/src/hotspot/cpu/arm/frame_arm.hpp +++ b/src/hotspot/cpu/arm/frame_arm.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -125,4 +125,8 @@ static jint interpreter_frame_expression_stack_direction() { return -1; } + intptr_t* repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const; + static intptr_t* repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr); + bool was_augmented_on_entry(int& real_size) const; + #endif // CPU_ARM_FRAME_ARM_HPP diff --git a/src/hotspot/cpu/arm/globals_arm.hpp b/src/hotspot/cpu/arm/globals_arm.hpp index c568ea041220e..ca5f013d04472 100644 --- a/src/hotspot/cpu/arm/globals_arm.hpp +++ b/src/hotspot/cpu/arm/globals_arm.hpp @@ -73,6 +73,9 @@ define_pd_global(bool, CompactStrings, false); define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); +define_pd_global(bool, InlineTypePassFieldsAsArgs, false); +define_pd_global(bool, InlineTypeReturnedAsFields, false); + #define ARCH_FLAGS(develop, \ product, \ range, \ diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.cpp b/src/hotspot/cpu/arm/macroAssembler_arm.cpp index 935c954462072..6715effa68fd1 100644 --- a/src/hotspot/cpu/arm/macroAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2023, Red Hat, Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -1888,3 +1888,37 @@ int MacroAssembler::ic_check(int end_alignment) { bind(dont); return uep_offset; } + +void MacroAssembler::remove_frame(int frame_size_in_bytes) { + add_slow(SP, SP, frame_size_in_bytes); + raw_pop(FP, LR); +} + +// Unimplemented methods for inline types. +int MacroAssembler::store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter) { + Unimplemented(); +} + +bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]) { + Unimplemented(); +} + +bool MacroAssembler::unpack_inline_helper(const GrowableArray* sig, int& sig_index, + VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index, + RegState reg_state[]) { + Unimplemented(); +} + +bool MacroAssembler::pack_inline_helper(const GrowableArray* sig, int& sig_index, int vtarg_index, + VMRegPair* from, int from_count, int& from_index, VMReg to, + RegState reg_state[], Register val_array) { + Unimplemented(); +} + +int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) { + Unimplemented(); +} + +VMReg MacroAssembler::spill_reg_for(VMReg reg) { + Unimplemented(); +} diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.hpp b/src/hotspot/cpu/arm/macroAssembler_arm.hpp index 8e80c5bcc6ecd..3119c7141b1fa 100644 --- a/src/hotspot/cpu/arm/macroAssembler_arm.hpp +++ b/src/hotspot/cpu/arm/macroAssembler_arm.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,6 +28,10 @@ #include "code/relocInfo.hpp" #include "utilities/powerOfTwo.hpp" +class ciInlineKlass; +class SigEntry; +class VMRegPair; + // Introduced AddressLiteral and its subclasses to ease portability from // x86 and avoid relocation issues class AddressLiteral { @@ -1081,6 +1085,11 @@ class MacroAssembler: public Assembler { static int ic_check_size(); int ic_check(int end_alignment); + + // Inline type specific methods + #include "asm/macroAssembler_common.hpp" + + void remove_frame(int initial_framesize); }; diff --git a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp index 13e1f4493ffa2..9f92e1923d92e 100644 --- a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp +++ b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -464,9 +464,8 @@ static void patch_callers_callsite(MacroAssembler *masm) { __ bind(skip); } -void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, - int total_args_passed, int comp_args_on_stack, - const BasicType *sig_bt, const VMRegPair *regs) { +void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, int comp_args_on_stack, const GrowableArray* sig, const VMRegPair *regs) { + // TODO: ARM - May be can use ldm to load arguments const Register tmp = Rtemp; // avoid erasing R5_mh @@ -501,9 +500,11 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, } __ bic(SP, SP, StackAlignmentInBytes - 1); + int total_args_passed = sig->length(); for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + BasicType bt = sig->at(i)._bt; + if (bt == T_VOID) { + assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half"); continue; } assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered"); @@ -549,9 +550,7 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, } -static void gen_c2i_adapter(MacroAssembler *masm, - int total_args_passed, int comp_args_on_stack, - const BasicType *sig_bt, const VMRegPair *regs, +static void gen_c2i_adapter(MacroAssembler *masm, int comp_args_on_stack, const GrowableArray* sig, const VMRegPair *regs, Label& skip_fixup) { // TODO: ARM - May be can use stm to deoptimize arguments const Register tmp = Rtemp; @@ -562,14 +561,16 @@ static void gen_c2i_adapter(MacroAssembler *masm, __ mov(Rsender_sp, SP); // not yet saved + int total_args_passed = sig->length(); int extraspace = total_args_passed * Interpreter::stackElementSize; if (extraspace) { __ sub_slow(SP, SP, extraspace); } for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + BasicType bt = sig->at(i)._bt; + if (bt == T_VOID) { + assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half"); continue; } int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize; @@ -612,14 +613,20 @@ static void gen_c2i_adapter(MacroAssembler *masm, } -void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, +void SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm, int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - address entry_address[AdapterBlob::ENTRY_COUNT]) { + const GrowableArray* sig, + const VMRegPair* regs, + const GrowableArray* sig_cc, + const VMRegPair* regs_cc, + const GrowableArray* sig_cc_ro, + const VMRegPair* regs_cc_ro, + address entry_address[AdapterBlob::ENTRY_COUNT], + AdapterBlob*& new_adapter, + bool allocate_code_blob) { + entry_address[AdapterBlob::I2C] = __ pc(); - gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); entry_address[AdapterBlob::C2I_Unverified] = __ pc(); Label skip_fixup; @@ -636,7 +643,7 @@ void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, entry_address[AdapterBlob::C2I] = __ pc(); entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr; - gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + gen_c2i_adapter(masm, comp_args_on_stack, sig, regs, skip_fixup); return; } @@ -1843,3 +1850,16 @@ RuntimeStub* SharedRuntime::generate_jfr_return_lease() { } #endif // INCLUDE_JFR + +const uint SharedRuntime::java_return_convention_max_int = 0; // Argument::n_int_register_parameters_j; +const uint SharedRuntime::java_return_convention_max_float = 0; // Argument::n_float_register_parameters_j; + +int SharedRuntime::java_return_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed) { + Unimplemented(); + return 0; +} + +BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) { + Unimplemented(); + return nullptr; +} diff --git a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp index 8abefe39b2de2..6ecdc29cf4518 100644 --- a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp +++ b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp @@ -1139,7 +1139,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // // Generic interpreted method entry to (asm) interpreter // -address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { +address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized, bool object_init) { // determine code generation flags bool inc_counter = UseCompiler || CountCompiledCalls; @@ -1254,6 +1254,12 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { #endif } + // Issue a StoreStore barrier on entry to Object_init if the + // class has strict field fields. Be lazy, always do it. + if (object_init) { + __ membar(MacroAssembler::StoreStore, R1_tmp); + } + // start execution #ifdef ASSERT { Label L; diff --git a/src/hotspot/cpu/arm/vtableStubs_arm.cpp b/src/hotspot/cpu/arm/vtableStubs_arm.cpp index 2d7ccd1969b0d..80b3cb3a400fa 100644 --- a/src/hotspot/cpu/arm/vtableStubs_arm.cpp +++ b/src/hotspot/cpu/arm/vtableStubs_arm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -46,10 +46,10 @@ extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); #endif -VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { +VtableStub* VtableStubs::create_vtable_stub(int vtable_index, bool caller_is_c1) { // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. const int stub_code_length = code_size_limit(true); - VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index, caller_is_c1); // Can be null if there is no free space in the code cache. if (s == nullptr) { return nullptr; @@ -116,10 +116,10 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { return s; } -VtableStub* VtableStubs::create_itable_stub(int itable_index) { +VtableStub* VtableStubs::create_itable_stub(int itable_index, bool caller_is_c1) { // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. const int stub_code_length = code_size_limit(false); - VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index, caller_is_c1); // Can be null if there is no free space in the code cache. if (s == nullptr) { return nullptr; diff --git a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp index 65e9505c812a1..f86b0a9d4fc98 100644 --- a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp @@ -143,6 +143,85 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) { } +// Implementation of LoadFlattenedArrayStub + +LoadFlattenedArrayStub::LoadFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) { + _array = array; + _index = index; + _result = result; + _scratch_reg = FrameMap::R3_oop_opr; + _info = new CodeEmitInfo(info); +} + +void LoadFlattenedArrayStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + // Pass arguments on stack. + __ std(_array->as_register(), -16, R1_SP); + __ std(_index->as_register(), -8, R1_SP); + address stub = Runtime1::entry_for(StubId::c1_load_flat_array_id); + //__ load_const_optimized(R0, stub); + __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub)); + __ mtctr(R0); + __ bctrl(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ mr_if_needed(_result->as_register(), R3_RET); + __ b(_continuation); +} + + +// Implementation of StoreFlattenedArrayStub + +StoreFlattenedArrayStub::StoreFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr value, CodeEmitInfo* info) { + _array = array; + _index = index; + _value = value; + _scratch_reg = LIR_OprFact::illegalOpr; + _info = new CodeEmitInfo(info); +} + +void StoreFlattenedArrayStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + // Pass arguments on stack. + __ std(_array->as_register(), -24, R1_SP); + __ std(_index->as_register(), -16, R1_SP); + __ std(_value->as_register(), -8, R1_SP); + address stub = Runtime1::entry_for(StubId::c1_store_flat_array_id); + //__ load_const_optimized(R0, stub); + __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub)); + __ mtctr(R0); + __ bctrl(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + + +// Implementation of SubstitutabilityCheckStub +SubstitutabilityCheckStub::SubstitutabilityCheckStub(LIR_Opr left, LIR_Opr right, CodeEmitInfo* info) { + _left = left; + _right = right; + _scratch_reg = FrameMap::R3_oop_opr; + _info = new CodeEmitInfo(info); +} + +void SubstitutabilityCheckStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + // Pass arguments on stack. + __ std(_left->as_register(), -16, R1_SP); + __ std(_right->as_register(), -8, R1_SP); + address stub = Runtime1::entry_for(StubId::c1_substitutability_check_id); + //__ load_const_optimized(R0, stub); + __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub)); + __ mtctr(R0); + __ bctrl(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + // Result is in R3_RET (_scratch_reg) + __ b(_continuation); +} + + void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { address a; if (_info->deoptimize_on_exception()) { @@ -231,17 +310,20 @@ void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { // Implementation of NewObjectArrayStub -NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, + CodeEmitInfo* info, bool is_null_free) { _klass_reg = klass_reg; _length = length; _result = result; _info = new CodeEmitInfo(info); + _is_null_free = is_null_free; } void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); - address entry = Runtime1::entry_for(StubId::c1_new_object_array_id); + address entry = _is_null_free ? Runtime1::entry_for(StubId::c1_new_null_free_array_id) + : Runtime1::entry_for(StubId::c1_new_object_array_id); //__ load_const_optimized(R0, entry); __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(entry)); __ mr_if_needed(/*op->tmp1()->as_register()*/ R5_ARG3, _length->as_register()); // already sign-extended @@ -254,6 +336,15 @@ void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { void MonitorEnterStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); + if (_throw_ie_stub != nullptr) { + // When we come here, _obj_reg has already been checked to be non-null. + const int is_value_mask = markWord::inline_type_pattern; + __ ld(R0, oopDesc::mark_offset_in_bytes(), _obj_reg->as_register()); + __ andi(R0, R0, is_value_mask); + __ cmpdi(CR0, R0, is_value_mask); + __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CR0, Assembler::equal), *_throw_ie_stub->entry()); + } + address stub = Runtime1::entry_for(ce->compilation()->has_fpu_code() ? StubId::c1_monitorenter_id : StubId::c1_monitorenter_nofpu_id); //__ load_const_optimized(R0, stub); __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub)); diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp index 78fae5c267753..440743044272a 100644 --- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp @@ -1246,6 +1246,8 @@ void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { const Register return_pc = R31; // Must survive C-call to enable_stack_reserved_zone(). const Register temp = R12; + assert(!InlineTypeReturnedAsFields, "unimplemented"); + // Pop the stack before the safepoint code. int frame_size = initial_frame_size_in_bytes(); if (Assembler::is_simm(frame_size, 16)) { @@ -1761,6 +1763,20 @@ void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) { } +void LIR_Assembler::arraycopy_inlinetype_check(Register obj, Register tmp, CodeStub* slow_path, bool is_dest, bool null_check) { + if (null_check) { + __ cmpdi(CR0, obj, 0); + __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CR0, Assembler::equal), *slow_path->entry()); + } + if (is_dest) { + __ test_null_free_array_oop(obj, tmp, *slow_path->entry(), true); + __ test_flat_array_oop(obj, tmp, *slow_path->entry(), true); + } else { + __ test_flat_array_oop(obj, tmp, *slow_path->entry(), true); + } +} + + void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { Register src = op->src()->as_register(); Register dst = op->dst()->as_register(); @@ -1778,6 +1794,12 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { // Set up the arraycopy stub information. ArrayCopyStub* stub = op->stub(); + if (flags & LIR_OpArrayCopy::always_slow_path) { + __ b(*stub->entry()); + __ bind(*stub->continuation()); + return; + } + // Always do stub if no type information is available. It's ok if // the known type isn't loaded since the code sanity checks // in debug mode and the type isn't required when we know the exact type @@ -1817,6 +1839,14 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { return; } + // Handle inline type arrays + if (flags & LIR_OpArrayCopy::src_inlinetype_check) { + arraycopy_inlinetype_check(src, tmp, stub, false, (flags & LIR_OpArrayCopy::src_null_check)); + } + if (flags & LIR_OpArrayCopy::dst_inlinetype_check) { + arraycopy_inlinetype_check(dst, tmp, stub, true, (flags & LIR_OpArrayCopy::dst_null_check)); + } + assert(default_type != nullptr && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point"); Label cont, slow, copyfunc; @@ -2311,6 +2341,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L } if (op->fast_check()) { + assert(!k->is_loaded() || !k->is_obj_array_klass(), "Use refined array for a direct pointer comparison"); assert_different_registers(klass_RInfo, k_RInfo); __ cmpd(CR0, k_RInfo, klass_RInfo); __ beq(CR0, *success); @@ -2380,26 +2411,28 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { Label done; - if (should_profile) { - Label not_null; - Register mdo = k_RInfo; - Register data_val = Rtmp1; - metadata2reg(md->constant_encoding(), mdo); - __ add_const_optimized(mdo, mdo, mdo_offset_bias, R0); - __ cmpdi(CR0, value, 0); - __ bne(CR0, not_null); - __ lbz(data_val, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias, mdo); - __ ori(data_val, data_val, BitData::null_seen_byte_constant()); - __ stb(data_val, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias, mdo); - __ b(done); - __ bind(not_null); - - Register recv = klass_RInfo; - __ load_klass(recv, value); - type_profile_helper(mdo, mdo_offset_bias, md, data, recv, Rtmp1); // kills recv - } else { - __ cmpdi(CR0, value, 0); - __ beq(CR0, done); + if (op->need_null_check()) { + if (should_profile) { + Label not_null; + Register mdo = k_RInfo; + Register data_val = Rtmp1; + metadata2reg(md->constant_encoding(), mdo); + __ add_const_optimized(mdo, mdo, mdo_offset_bias, R0); + __ cmpdi(CR0, value, 0); + __ bne(CR0, not_null); + __ lbz(data_val, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias, mdo); + __ ori(data_val, data_val, BitData::null_seen_byte_constant()); + __ stb(data_val, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias, mdo); + __ b(done); + __ bind(not_null); + + Register recv = klass_RInfo; + __ load_klass(recv, value); + type_profile_helper(mdo, mdo_offset_bias, md, data, recv, Rtmp1); // kills recv + } else { + __ cmpdi(CR0, value, 0); + __ beq(CR0, done); + } } if (!os::zero_page_read_protected() || !ImplicitNullChecks) { explicit_null_check(array, op->info_for_exception()); @@ -3023,6 +3056,25 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { __ bind(Ldone); } +void LIR_Assembler::emit_profile_inline_type(LIR_OpProfileInlineType* op) { + Register obj = op->obj()->as_register(); + //Register tmp = op->tmp()->as_pointer_register(); not needed! + LIR_Address* mdo_addr = op->mdp()->as_address_ptr(); + assert(!mdo_addr->index()->is_valid(), "index unsupported"); + Register mdo_base = mdo_addr->base()->as_pointer_register(); + int mdo_offs = mdo_addr->disp(); + bool not_null = op->not_null(); + int flag = op->flag(); + + Label not_inline_type; + __ test_oop_is_not_inline_type(obj, not_inline_type, !not_null); + + __ lbz(R0, mdo_offs, mdo_base); + __ ori(R0, R0, flag); + __ stb(R0, mdo_offs, mdo_base); + + __ bind(not_inline_type); +} void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { assert(op->crc()->is_single_cpu(), "crc must be register"); @@ -3039,4 +3091,105 @@ void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { __ mr(res, crc); } +// Valhalla support + +void LIR_Assembler::check_orig_pc() { + Address address_for_orig_pc_addr = frame_map()->address_for_orig_pc_addr(); + __ ld(R0, address_for_orig_pc_addr); + __ cmpdi(BOOL_RESULT, R0, (u1)NULL_WORD); +} + +int LIR_Assembler::store_inline_type_fields_to_buf(ciInlineKlass* vk) { + return (__ store_inline_type_fields_to_buf(vk, false)); +} + +void LIR_Assembler::emit_opFlattenedArrayCheck(LIR_OpFlattenedArrayCheck* op) { + // We are loading/storing from/to an array that *may* be a flat array (the + // declared type is Object[], abstract[], interface[] or VT.ref[]). + // If this array is a flat array, take the slow path. + __ test_flat_array_oop(op->array()->as_register(), op->tmp()->as_register(), *op->stub()->entry(), true); +} + +void LIR_Assembler::emit_opNullFreeArrayCheck(LIR_OpNullFreeArrayCheck* op) { + // We are storing into an array that *may* be null-free (the declared type is + // Object[], abstract[], interface[] or VT.ref[]). + Label test_mark_word; + Register tmp = op->tmp()->as_register(); + __ ld(tmp, oopDesc::mark_offset_in_bytes(), op->array()->as_register()); + __ andi_(R0, tmp, markWord::unlocked_value); + __ bne(CR0, test_mark_word); + __ load_prototype_header(tmp, op->array()->as_register()); + __ bind(test_mark_word); + __ andi(R0, tmp, markWord::null_free_array_bit_in_place); + __ cmpwi(BOOL_RESULT, R0, 0); +} + +void LIR_Assembler::emit_opSubstitutabilityCheck(LIR_OpSubstitutabilityCheck* op) { + Label L_oops_equal; + Label L_oops_not_equal; + Label L_end; + + Register left = op->left()->as_register(); + Register right = op->right()->as_register(); + + __ cmpd(CR0, left, right); + __ beq(CR0, L_oops_equal); + + // (1) Null check -- if one of the operands is null, the other must not be null (because + // the two references are not equal), so they are not substitutable, + __ cmpdi(CR0, left, 0); + __ cmpdi(CR1, right, 0); + __ cror(CR0, Assembler::equal, CR1, Assembler::equal); + __ beq(CR0, L_oops_not_equal); + + ciKlass* left_klass = op->left_klass(); + ciKlass* right_klass = op->right_klass(); + + // (2) Inline type check -- if either of the operands is not an inline type, + // they are not substitutable. We do this only if we are not sure that the + // operands are inline type + if ((left_klass == nullptr || right_klass == nullptr) ||// The klass is still unloaded, or came from a Phi node. + !left_klass->is_inlinetype() || !right_klass->is_inlinetype()) { + Register tmp = op->tmp1()->as_register(); + __ ld(tmp, oopDesc::mark_offset_in_bytes(), left); + __ ld(R0, oopDesc::mark_offset_in_bytes(), right); + __ andi(tmp, tmp, (intptr_t)markWord::inline_type_pattern); + __ andr(tmp, tmp, R0); + __ cmpdi(CR0, tmp, (intptr_t)markWord::inline_type_pattern); + __ bne(CR0, L_oops_not_equal); + } + + // (3) Same klass check: if the operands are of different klasses, they are not substitutable. + if (left_klass != nullptr && left_klass->is_inlinetype() && left_klass == right_klass) { + // No need to load klass -- the operands are statically known to be the same inline klass. + __ b(*op->stub()->entry()); + } else { + Register tmp1 = op->tmp1()->as_register(); + Register tmp2 = op->tmp2()->as_register(); + if (left == right) { // same operand, so clearly the same klasses, let's save the check + __ b(*op->stub()->entry()); // -> do slow check + } else { + __ cmp_klasses_from_objects(CR0, left, right, tmp1, tmp2); + __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CR0, Assembler::equal), + *op->stub()->entry()); // same klass -> do slow check + } + // fall through to L_oops_not_equal + } + + __ bind(L_oops_not_equal); + load_to_reg(this, op->not_equal_result(), op->result_opr()); + __ b(L_end); + + // We've returned from the stub. R3_RET (stub's _scratch_reg) contains 0x0 IFF the two + // operands are not substitutable. (Don't compare against 0x1 in case the + // C compiler is naughty) + __ bind(*op->stub()->continuation()); + __ cmpdi(CR0, R3_RET, 0); + __ beq(CR0, L_oops_not_equal); + + __ bind(L_oops_equal); + load_to_reg(this, op->equal_result(), op->result_opr()); // (call_stub() != 0x0) -> equal + // fall-through + __ bind(L_end); +} #undef __ diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp index 5a065d364b207..e7f7960092012 100644 --- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp @@ -66,6 +66,8 @@ enum { _deopt_handler_size = MacroAssembler::bl64_patchable_size + BytesPerInstWord }; + void arraycopy_inlinetype_check(Register obj, Register tmp, CodeStub* slow_path, bool is_dest, bool null_check); + // '_static_call_stub_size' is only used on ppc (see LIR_Assembler::emit_static_call_stub() // in c1_LIRAssembler_ppc.cpp. The other, shared getters are defined in c1_LIRAssembler.hpp static int static_call_stub_size() { diff --git a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp index a652a155f62b9..293a6472748eb 100644 --- a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2005, 2025, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2024 SAP SE. All rights reserved. + * Copyright (c) 2005, 2026, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2026 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,6 +32,7 @@ #include "c1/c1_Runtime1.hpp" #include "c1/c1_ValueStack.hpp" #include "ci/ciArray.hpp" +#include "ci/ciInlineKlass.hpp" #include "ci/ciObjArrayKlass.hpp" #include "ci/ciTypeArrayKlass.hpp" #include "runtime/sharedRuntime.hpp" @@ -351,10 +352,15 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { info_for_exception = state_for(x); } + CodeStub* throw_ie_stub = + x->maybe_inlinetype() ? + new SimpleExceptionStub(StubId::c1_throw_identity_exception_id, obj.result(), state_for(x)) : + nullptr; + // This CodeEmitInfo must not have the xhandlers because here the // object is already locked (xhandlers expects object to be unlocked). CodeEmitInfo* info = state_for(x, x->state(), true); - monitor_enter(obj.result(), lock, hdr, scratch, x->monitor_no(), info_for_exception, info); + monitor_enter(obj.result(), lock, hdr, scratch, x->monitor_no(), info_for_exception, info, throw_ie_stub); } @@ -837,13 +843,14 @@ void LIRGenerator::do_NewInstance(NewInstance* x) { tty->print_cr(" ###class not loaded at new bci %d", x->printable_bci()); } #endif - CodeEmitInfo* info = state_for(x, x->state()); + CodeEmitInfo* info = state_for(x, x->needs_state_before() ? x->state_before() : x->state()); LIR_Opr klass_reg = FrameMap::R4_metadata_opr; // Used by slow path (NewInstanceStub). LIR_Opr tmp1 = FrameMap::R5_oop_opr; LIR_Opr tmp2 = FrameMap::R6_oop_opr; LIR_Opr tmp3 = FrameMap::R7_oop_opr; LIR_Opr tmp4 = FrameMap::R8_oop_opr; - new_instance(reg, x->klass(), x->is_unresolved(), tmp1, tmp2, tmp3, tmp4, klass_reg, info); + new_instance(reg, x->klass(), x->is_unresolved(), !x->is_unresolved() && x->klass()->is_inlinetype(), + tmp1, tmp2, tmp3, tmp4, klass_reg, info); // Must prevent reordering of stores for object initialization // with stores that publish the new object. @@ -912,13 +919,18 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { LIR_Opr tmp4 = FrameMap::R8_oop_opr; LIR_Opr len = length.result(); - CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); - ciMetadata* obj = ciObjArrayKlass::make(x->klass()); + ciKlass* obj = ciObjArrayKlass::make(x->klass()); + + // TODO 8265122 Implement a fast path for this + bool is_flat = obj->is_loaded() && obj->is_flat_array_klass(); + bool is_null_free = obj->is_loaded() && obj->as_array_klass()->is_elem_null_free(); + + CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info, is_null_free); if (obj == ciEnv::unloaded_ciobjarrayklass()) { BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); } klass2reg_with_patching(klass_reg, obj, patching_info); - __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path, true, is_null_free || is_flat); // Must prevent reordering of stores for object initialization // with stores that publish the new object. @@ -1027,7 +1039,7 @@ void LIRGenerator::do_CheckCast(CheckCast* x) { LIR_Opr tmp3 = FrameMap::R6_oop_opr; // temp __ checkcast(out_reg, obj.result(), x->klass(), tmp1, tmp2, tmp3, x->direct_compare(), info_for_exception, patching_info, stub, - x->profiled_method(), x->profiled_bci()); + x->profiled_method(), x->profiled_bci(), x->is_null_free()); } @@ -1072,17 +1084,21 @@ void LIRGenerator::do_If(If* x) { xin->load_item(); left = xin->result(); - if (yin->result()->is_constant() && yin->result()->type() == T_INT && - Assembler::is_simm16(yin->result()->as_constant_ptr()->as_jint())) { - // Inline int constants which are small enough to be immediate operands. - right = LIR_OprFact::value_type(yin->value()->type()); - } else if (tag == longTag && yin->is_constant() && yin->get_jlong_constant() == 0 && - (cond == If::eql || cond == If::neq)) { - // Inline long zero. - right = LIR_OprFact::value_type(yin->value()->type()); - } else if (tag == objectTag && yin->is_constant() && (yin->get_jobject_constant()->is_null_object())) { - right = LIR_OprFact::value_type(yin->value()->type()); - } else { + if (yin->result()->is_constant() && !x->substitutability_check()) { + if (yin->result()->type() == T_INT && + Assembler::is_simm16(yin->result()->as_constant_ptr()->as_jint())) { + // Inline int constants which are small enough to be immediate operands. + right = LIR_OprFact::value_type(yin->value()->type()); + } else if (tag == longTag && yin->get_jlong_constant() == 0 && + (cond == If::eql || cond == If::neq)) { + // Inline long zero. + right = LIR_OprFact::value_type(yin->value()->type()); + } else if (tag == objectTag && (yin->get_jobject_constant()->is_null_object())) { + right = LIR_OprFact::value_type(yin->value()->type()); + } + } + + if (right == LIR_OprFact::illegalOpr) { yin->load_item(); right = yin->result(); } @@ -1096,7 +1112,12 @@ void LIRGenerator::do_If(If* x) { __ safepoint(safepoint_poll_register(), state_for(x, x->state_before())); } - __ cmp(lir_cond(cond), left, right); + if (x->substitutability_check()) { + substitutability_check(x, *xin, *yin); + } else { + __ cmp(lir_cond(cond), left, right); + } + // Generate branch profiling. Profiling code doesn't kill flags. profile_branch(x, cond); move_to_phi(x->state()); diff --git a/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp index 359c7cf22ad69..2286cb6eaf19d 100644 --- a/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2025 SAP SE. All rights reserved. + * Copyright (c) 2012, 2026 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,10 +27,13 @@ #include "c1/c1_MacroAssembler.hpp" #include "c1/c1_Runtime1.hpp" #include "gc/shared/collectedHeap.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" #include "gc/shared/tlab_globals.hpp" #include "interpreter/interpreter.hpp" #include "oops/arrayOop.hpp" #include "oops/markWord.hpp" +#include "runtime/arguments.hpp" #include "runtime/basicLock.hpp" #include "runtime/os.hpp" #include "runtime/sharedRuntime.hpp" @@ -45,19 +48,42 @@ void C1_MacroAssembler::explicit_null_check(Register base) { } -void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) { +void C1_MacroAssembler::build_frame_helper(int frame_size_in_bytes, int sp_offset_for_orig_pc, int sp_inc, bool reset_orig_pc, bool needs_stack_repair) { const Register return_pc = R20; mflr(return_pc); + std(return_pc, _abi0(lr), R1_SP); // SP->lr = return_pc + push_frame(frame_size_in_bytes, R0); // SP -= frame_size_in_bytes + + if (needs_stack_repair) { + // Save stack increment (also account for fixed framesize and rbp) + Unimplemented(); + } + if (reset_orig_pc) { + // Zero orig_pc to detect deoptimization during buffering in the entry points + li(R0, 0); + untested("build_frame_helper reset_orig_pc"); + std(R0, sp_offset_for_orig_pc, R1_SP); + } +} + +void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes, + int sp_offset_for_orig_pc, + bool needs_stack_repair, bool has_scalarized_args, + Label* verified_inline_entry_label) { // Make sure there is enough stack space for this method's activation. assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); generate_stack_overflow_check(bang_size_in_bytes); - std(return_pc, _abi0(lr), R1_SP); // SP->lr = return_pc - push_frame(frame_size_in_bytes, R0); // SP -= frame_size_in_bytes + build_frame_helper(frame_size_in_bytes, sp_offset_for_orig_pc, 0, has_scalarized_args, needs_stack_repair); BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->nmethod_entry_barrier(this, R20); + + if (verified_inline_entry_label != nullptr) { + // Jump here from the scalarized entry points that already created the frame. + bind(*verified_inline_entry_label); + } } @@ -133,12 +159,20 @@ void C1_MacroAssembler::try_allocate( void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) { assert_different_registers(obj, klass, len, t1, t2); - if (UseCompactObjectHeaders) { + if (UseCompactObjectHeaders || Arguments::is_valhalla_enabled()) { + // COH: Markword contains class pointer which is only known at runtime. + // Valhalla: Could have value class which has a different prototype header to a normal object. + // In both cases, we need to fetch dynamically. ld(t1, in_bytes(Klass::prototype_header_offset()), klass); - std(t1, oopDesc::mark_offset_in_bytes(), obj); } else { + // Otherwise: Can use the statically computed prototype header which is the same for every object. load_const_optimized(t1, (intx)markWord::prototype().value()); - std(t1, oopDesc::mark_offset_in_bytes(), obj); + } + std(t1, oopDesc::mark_offset_in_bytes(), obj); + + if (!UseCompactObjectHeaders) { + // COH: Markword already contains class pointer. Nothing else to do. + // Otherwise: Store encoded klass pointer following the markword store_klass(obj, klass); } @@ -332,3 +366,8 @@ void C1_MacroAssembler::null_check(Register r, Label* Lnull) { bc_far_optimized(Assembler::bcondCRbiIs1, bi0(CR0, Assembler::equal), *Lnull); } } + +int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, Label& verified_inline_entry_label, bool is_inline_ro_entry) { + Unimplemented(); +} + diff --git a/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp b/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp index 5649f23856c5c..b79491f0636ef 100644 --- a/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp @@ -329,8 +329,8 @@ OopMapSet* Runtime1::generate_stub_call(StubAssembler* sasm, Register result, ad return oop_maps; } -static OopMapSet* stub_call_with_stack_parms(StubAssembler* sasm, Register result, address target, - int stack_parms, bool do_return = true) { +static OopMapSet* stub_call_with_stack_parms(StubAssembler* sasm, Register oop_result, address target, + int stack_parms, bool do_return = true, Register result2 = noreg) { // Make a frame and preserve the caller's caller-save registers. const int parm_size_in_bytes = align_up(stack_parms << LogBytesPerWord, frame::alignment_in_bytes); const int padding = parm_size_in_bytes - (stack_parms << LogBytesPerWord); @@ -345,14 +345,14 @@ static OopMapSet* stub_call_with_stack_parms(StubAssembler* sasm, Register resul case 1: __ ld(R4_ARG2, frame_size_in_bytes + padding + 0, R1_SP); case 0: - call_offset = __ call_RT(result, noreg, target); + call_offset = __ call_RT(oop_result, noreg, target); break; default: Unimplemented(); break; } OopMapSet* oop_maps = new OopMapSet(); oop_maps->add_gc_map(call_offset, oop_map); - restore_live_registers(sasm, result, noreg); + restore_live_registers(sasm, oop_result, result2); if (do_return) __ blr(); return oop_maps; } @@ -432,25 +432,38 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { case StubId::c1_new_type_array_id: case StubId::c1_new_object_array_id: + case StubId::c1_new_null_free_array_id: { if (id == StubId::c1_new_type_array_id) { __ set_info("new_type_array", dont_gc_arguments); - } else { + } else if (id == StubId::c1_new_object_array_id) { __ set_info("new_object_array", dont_gc_arguments); + } else { + __ set_info("new_null_free_array", dont_gc_arguments); } #ifdef ASSERT // Assert object type is really an array of the proper kind. - { - int tag = (id == StubId::c1_new_type_array_id) ? Klass::_lh_array_tag_type_value : Klass::_lh_array_tag_obj_value; - Label ok; - __ lwz(R0, in_bytes(Klass::layout_helper_offset()), R4_ARG2); - __ srawi(R0, R0, Klass::_lh_array_tag_shift); - __ cmpwi(CR0, R0, tag); - __ beq(CR0, ok); - __ stop("assert(is an array klass)"); - __ should_not_reach_here(); - __ bind(ok); + __ lwz(R0, in_bytes(Klass::layout_helper_offset()), R4_ARG2); + __ srawi(R0, R0, Klass::_lh_array_tag_shift); + switch (id) { + case StubId::c1_new_type_array_id: + __ cmpwi(CR0, R0, Klass::_lh_array_tag_type_value); + __ asm_assert_eq("assert(is a type array klass)"); + break; + case StubId::c1_new_object_array_id: + __ cmpwi(CR0, R0, Klass::_lh_array_tag_ref_value); // new "[Ljava/lang/Object;" + __ cmpwi(CR1, R0, Klass::_lh_array_tag_flat_value); // new "[LVT;" + __ cror(CR0, Assembler::equal, CR1, Assembler::equal); + __ asm_assert_eq("assert(is an object or inline type array klass)"); + break; + case StubId::c1_new_null_free_array_id: + __ cmpwi(CR0, R0, Klass::_lh_array_tag_flat_value); // the array can be a flat array. + __ cmpwi(CR1, R0, Klass::_lh_array_tag_ref_value); // the array cannot be a flat array (due to the InlineArrayElementMaxFlatSize, etc.) + __ cror(CR0, Assembler::equal, CR1, Assembler::equal); + __ asm_assert_eq("assert(is an object or inline type array klass)"); + break; + default: ShouldNotReachHere(); } #endif // ASSERT @@ -458,8 +471,11 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { if (id == StubId::c1_new_type_array_id) { oop_maps = generate_stub_call(sasm, R3_RET, CAST_FROM_FN_PTR(address, new_type_array), R4_ARG2, R5_ARG3); - } else { + } else if (id == StubId::c1_new_object_array_id) { oop_maps = generate_stub_call(sasm, R3_RET, CAST_FROM_FN_PTR(address, new_object_array), R4_ARG2, R5_ARG3); + } else { + assert(id == StubId::c1_new_null_free_array_id, "must be"); + oop_maps = generate_stub_call(sasm, R3_RET, CAST_FROM_FN_PTR(address, new_null_free_array), R4_ARG2, R5_ARG3); } } break; @@ -474,6 +490,36 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { } break; + case StubId::c1_buffer_inline_args_id: + case StubId::c1_buffer_inline_args_no_receiver_id: + { + address entry = (id == StubId::c1_buffer_inline_args_id) ? + CAST_FROM_FN_PTR(address, buffer_inline_args) : + CAST_FROM_FN_PTR(address, buffer_inline_args_no_receiver); + + __ unimplemented("c1_buffer_inline_args"); // TODO: handle arguments and return value + OopMap* oop_map = save_live_registers(sasm); + int call_offset = __ call_RT(noreg, noreg, entry, R3_ARG1); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm, R3_RET, noreg); + __ blr(); + } + break; + + case StubId::c1_load_flat_array_id: + oop_maps = stub_call_with_stack_parms(sasm, R3_RET, CAST_FROM_FN_PTR(address, load_flat_array), 2); + break; + + case StubId::c1_store_flat_array_id: + oop_maps = stub_call_with_stack_parms(sasm, noreg, CAST_FROM_FN_PTR(address, store_flat_array), 3); + break; + + case StubId::c1_substitutability_check_id: + oop_maps = stub_call_with_stack_parms(sasm, noreg, CAST_FROM_FN_PTR(address, substitutability_check), 2, true, R3_RET); + break; + case StubId::c1_register_finalizer_id: { __ set_info("register_finalizer", dont_gc_arguments); @@ -591,11 +637,25 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { case StubId::c1_throw_incompatible_class_change_error_id: { - __ set_info("throw_incompatible_class_cast_exception", dont_gc_arguments); + __ set_info("throw_incompatible_class_change_error", dont_gc_arguments); oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); } break; + case StubId::c1_throw_illegal_monitor_state_exception_id: + { + __ set_info("c1_throw_illegal_monitor_state_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_illegal_monitor_state_exception), false); + } + break; + + case StubId::c1_throw_identity_exception_id: + { + __ set_info("throw_identity_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_identity_exception), true); + } + break; + case StubId::c1_slow_subtype_check_id: { // Support for uint StubRoutine::partial_subtype_check( Klass sub, Klass super ); const Register sub_klass = R5, diff --git a/src/hotspot/cpu/ppc/continuationEntry_ppc.inline.hpp b/src/hotspot/cpu/ppc/continuationEntry_ppc.inline.hpp index 4af637b2988d7..0b2f043b2fb5f 100644 --- a/src/hotspot/cpu/ppc/continuationEntry_ppc.inline.hpp +++ b/src/hotspot/cpu/ppc/continuationEntry_ppc.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,6 +27,7 @@ #include "runtime/continuationEntry.hpp" +#include "code/codeCache.hpp" #include "oops/method.inline.hpp" #include "runtime/frame.inline.hpp" #include "runtime/registerMap.hpp" diff --git a/src/hotspot/cpu/ppc/continuationFreezeThaw_ppc.inline.hpp b/src/hotspot/cpu/ppc/continuationFreezeThaw_ppc.inline.hpp index 82167949065cd..3f81b1f482ed4 100644 --- a/src/hotspot/cpu/ppc/continuationFreezeThaw_ppc.inline.hpp +++ b/src/hotspot/cpu/ppc/continuationFreezeThaw_ppc.inline.hpp @@ -1,5 +1,6 @@ /* - * Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2026 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -56,6 +57,9 @@ inline frame FreezeBase::sender(const frame& f) { return frame(f.sender_sp(), f.sender_pc(), f.interpreter_frame_sender_sp()); } + assert(f.cb() == nullptr || !f.cb()->is_nmethod() || !f.cb()->as_nmethod()->needs_stack_repair(), + "unsupported"); + intptr_t* sender_sp = f.sender_sp(); address sender_pc = f.sender_pc(); assert(sender_sp != f.sp(), "must have changed"); @@ -269,7 +273,8 @@ inline void FreezeBase::set_top_frame_metadata_pd(const frame& hf) { // See also StackChunkFrameStream::frame_size(). ========================== // template -frame FreezeBase::new_heap_frame(frame& f, frame& caller) { +frame FreezeBase::new_heap_frame(frame& f, frame& caller, int size_adjust) { + assert(size_adjust == 0, "unsupported"); assert(FKind::is_instance(f), ""); intptr_t *sp, *fp; @@ -317,7 +322,7 @@ frame FreezeBase::new_heap_frame(frame& f, frame& caller) { } } -inline void FreezeBase::patch_pd(frame& hf, const frame& caller) { +inline void FreezeBase::patch_pd(frame& hf, const frame& caller, bool is_bottom_frame) { if (caller.is_interpreted_frame()) { assert(!caller.is_empty(), ""); patch_callee_link_relative(caller, caller.fp()); @@ -503,7 +508,8 @@ inline frame ThawBase::new_entry_frame() { // limited/known. In contrast to the interpreted caller case the abi overlaps with the caller // if there are no stackargs. This is to comply with shared code (see e.g. StackChunkFrameStream::frame_size()) // -template frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom) { +template frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom, int size_adjust) { + assert(size_adjust == 0, "unsupported"); assert(FKind::is_instance(hf), ""); assert(is_aligned(caller.fp(), frame::frame_alignment), PTR_FORMAT, p2i(caller.fp())); diff --git a/src/hotspot/cpu/ppc/continuationHelper_ppc.inline.hpp b/src/hotspot/cpu/ppc/continuationHelper_ppc.inline.hpp index 1471a6299ee79..d1bc6ee620ae6 100644 --- a/src/hotspot/cpu/ppc/continuationHelper_ppc.inline.hpp +++ b/src/hotspot/cpu/ppc/continuationHelper_ppc.inline.hpp @@ -106,6 +106,8 @@ inline void ContinuationHelper::InterpretedFrame::patch_sender_sp(frame& f, cons } inline address* ContinuationHelper::Frame::return_pc_address(const frame& f) { + assert(f.cb() == nullptr || !f.cb()->is_nmethod() || !f.cb()->as_nmethod()->needs_stack_repair(), + "unsupported"); return (address*)&f.callers_abi()->lr; } diff --git a/src/hotspot/cpu/ppc/foreignGlobals_ppc.cpp b/src/hotspot/cpu/ppc/foreignGlobals_ppc.cpp index c74cd3781a2ea..5069030ce51e5 100644 --- a/src/hotspot/cpu/ppc/foreignGlobals_ppc.cpp +++ b/src/hotspot/cpu/ppc/foreignGlobals_ppc.cpp @@ -44,11 +44,11 @@ const ABIDescriptor ForeignGlobals::parse_abi_descriptor(jobject jabi) { oop abi_oop = JNIHandles::resolve_non_null(jabi); ABIDescriptor abi; - objArrayOop inputStorage = jdk_internal_foreign_abi_ABIDescriptor::inputStorage(abi_oop); + refArrayOop inputStorage = jdk_internal_foreign_abi_ABIDescriptor::inputStorage(abi_oop); parse_register_array(inputStorage, StorageType::INTEGER, abi._integer_argument_registers, as_Register); parse_register_array(inputStorage, StorageType::FLOAT, abi._float_argument_registers, as_FloatRegister); - objArrayOop outputStorage = jdk_internal_foreign_abi_ABIDescriptor::outputStorage(abi_oop); + refArrayOop outputStorage = jdk_internal_foreign_abi_ABIDescriptor::outputStorage(abi_oop); parse_register_array(outputStorage, StorageType::INTEGER, abi._integer_return_registers, as_Register); parse_register_array(outputStorage, StorageType::FLOAT, abi._float_return_registers, as_FloatRegister); diff --git a/src/hotspot/cpu/ppc/frame_ppc.cpp b/src/hotspot/cpu/ppc/frame_ppc.cpp index 7d2e22b5965dd..3803be5fddb1b 100644 --- a/src/hotspot/cpu/ppc/frame_ppc.cpp +++ b/src/hotspot/cpu/ppc/frame_ppc.cpp @@ -120,6 +120,9 @@ bool frame::safe_for_sender(JavaThread *thread) { intptr_t* sender_sp = (intptr_t*) fp; address sender_pc = (address) sender_abi->lr; + DEBUG_ONLY(nmethod* nm = _cb->as_nmethod_or_null()); + assert(nm == nullptr || !nm->needs_stack_repair(), "unsupported"); + if (Continuation::is_return_barrier_entry(sender_pc)) { // sender_pc might be invalid so check that the frame // actually belongs to a Continuation. @@ -458,6 +461,8 @@ void frame::describe_pd(FrameValues& values, int frame_no) { } if (is_java_frame() || Continuation::is_continuation_enterSpecial(*this)) { + DEBUG_ONLY(nmethod* nm = _cb->as_nmethod_or_null()); + assert(nm == nullptr || !nm->needs_stack_repair(), "unsupported"); intptr_t* ret_pc_loc = (intptr_t*)&own_abi()->lr; address ret_pc = *(address*)ret_pc_loc; values.describe(frame_no, ret_pc_loc, @@ -492,3 +497,18 @@ BasicObjectLock* frame::interpreter_frame_monitor_end() const { intptr_t* frame::interpreter_frame_tos_at(jint offset) const { return &interpreter_frame_tos_address()[offset]; } + +intptr_t* frame::repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr) { + assert(nm != nullptr && nm->needs_stack_repair(), ""); + Unimplemented(); + return nullptr; +} + +bool frame::was_augmented_on_entry(int& real_size) const { + assert(is_compiled_frame(), ""); + if (_cb->as_nmethod_or_null()->needs_stack_repair()) { + Unimplemented(); + } + real_size = _cb->frame_size(); + return false; +} diff --git a/src/hotspot/cpu/ppc/frame_ppc.hpp b/src/hotspot/cpu/ppc/frame_ppc.hpp index 14743c7d75aba..b244d727584e9 100644 --- a/src/hotspot/cpu/ppc/frame_ppc.hpp +++ b/src/hotspot/cpu/ppc/frame_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2025 SAP SE. All rights reserved. + * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2026 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -406,7 +406,9 @@ frame_alignment = 16, frame_alignment_in_words = frame_alignment >> LogBytesPerWord, // size, in words, of maximum shift in frame position due to alignment - align_wiggle = 1 + align_wiggle = 1, + // This is wrong and unimplemented + sender_sp_offset = 0 }; static jint interpreter_frame_expression_stack_direction() { return -1; } @@ -414,4 +416,7 @@ // returns the sending frame, without applying any barriers inline frame sender_raw(RegisterMap* map) const; + static intptr_t* repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr); + bool was_augmented_on_entry(int& real_size) const; + #endif // CPU_PPC_FRAME_PPC_HPP diff --git a/src/hotspot/cpu/ppc/frame_ppc.inline.hpp b/src/hotspot/cpu/ppc/frame_ppc.inline.hpp index 123e6d8a0b1d9..fb4213e9b5386 100644 --- a/src/hotspot/cpu/ppc/frame_ppc.inline.hpp +++ b/src/hotspot/cpu/ppc/frame_ppc.inline.hpp @@ -327,6 +327,7 @@ inline frame frame::sender(RegisterMap* map) const { inline frame frame::sender_for_compiled_frame(RegisterMap *map) const { assert(map != nullptr, "map must be set"); + assert(!_cb->is_nmethod() || !_cb->as_nmethod()->needs_stack_repair(), "unsupported"); intptr_t* sender_sp = this->sender_sp(); address sender_pc = this->sender_pc(); @@ -335,6 +336,11 @@ inline frame frame::sender_for_compiled_frame(RegisterMap *map) const { // Tell GC to use argument oopmaps for some runtime stubs that need it. // For C1, the runtime stub might not have oop maps, so set this flag // outside of update_register_map. +#ifdef COMPILER1 + DEBUG_ONLY(nmethod* nm = _cb->as_nmethod_or_null()); + assert(nm == nullptr || !nm->is_compiled_by_c1() || !nm->method()->has_scalarized_args() || + pc() >= nm->verified_inline_entry_point(), "unsupported"); +#endif if (!_cb->is_nmethod()) { // compiled frames do not use callee-saved registers map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); if (oop_map() != nullptr) { diff --git a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp index 5c3e1302ed38b..e27390203fbb4 100644 --- a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018, 2025 SAP SE. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -281,14 +281,22 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco Register base, RegisterOrConstant ind_or_offs, Register val, Register tmp1, Register tmp2, Register tmp3, MacroAssembler::PreservationLevel preservation_level) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool as_normal = (decorators & AS_NORMAL) != 0; + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; + bool needs_pre_barrier = as_normal && !dest_uninitialized; + bool needs_post_barrier = (val != noreg && in_heap); bool is_array = (decorators & IS_ARRAY) != 0; bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; bool precise = is_array || on_anonymous; + // Load and record the previous value. - g1_write_barrier_pre(masm, decorators, - base, ind_or_offs, - tmp1, tmp2, tmp3, - preservation_level); + if (needs_pre_barrier) { + g1_write_barrier_pre(masm, decorators, + base, ind_or_offs, + tmp1, tmp2, tmp3, + preservation_level); + } BarrierSetAssembler::store_at(masm, decorators, type, base, ind_or_offs, val, @@ -296,7 +304,7 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco preservation_level); // No need for post barrier if storing null - if (val != noreg) { + if (needs_post_barrier) { if (precise) { if (ind_or_offs.is_constant()) { __ add_const_optimized(base, base, ind_or_offs.as_constant(), tmp1); diff --git a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp index 7a9e29191b95d..3ec6cb5c4a5a7 100644 --- a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp @@ -27,6 +27,7 @@ #include "classfile/classLoaderData.hpp" #include "gc/shared/barrierSetAssembler.hpp" #include "gc/shared/barrierSetNMethod.hpp" +#include "gc/shared/barrierSetRuntime.hpp" #include "interpreter/interp_masm.hpp" #include "oops/compressedOops.hpp" #include "runtime/jniHandles.hpp" @@ -55,6 +56,7 @@ void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators if (UseCompressedOops && in_heap) { Register co = tmp1; if (val == noreg) { + assert(!not_null, "inconsistent access"); __ li(co, 0); } else { co = not_null ? __ encode_heap_oop_not_null(tmp1, val) : __ encode_heap_oop(tmp1, val); @@ -62,6 +64,7 @@ void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators __ stw(co, ind_or_offs, base, tmp2); } else { if (val == noreg) { + assert(!not_null, "inconsistent access"); val = tmp1; __ li(val, 0); } @@ -113,6 +116,19 @@ void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, } } +void BarrierSetAssembler::flat_field_copy(MacroAssembler* masm, DecoratorSet decorators, + Register src, Register dst, Register inline_layout_info) { + // flat_field_copy implementation is fairly complex, and there are not any + // "short-cuts" to be made from asm. What there is, appears to have the same + // cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds + // of hand-rolled instructions... + if (decorators & IS_DEST_UNINITIALIZED) { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized), src, dst, inline_layout_info); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy), src, dst, inline_layout_info); + } +} + // Generic implementation. GCs can provide an optimized one. void BarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2, diff --git a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.hpp index 8e8c4d41c3b7f..27383be3cf856 100644 --- a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.hpp @@ -60,6 +60,9 @@ class BarrierSetAssembler: public CHeapObj { Register tmp1, Register tmp2, MacroAssembler::PreservationLevel preservation_level, Label *L_handle_null = nullptr); + virtual void flat_field_copy(MacroAssembler* masm, DecoratorSet decorators, + Register src, Register dst, Register inline_layout_info); + virtual void resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2, MacroAssembler::PreservationLevel preservation_level); diff --git a/src/hotspot/cpu/ppc/gc/shared/barrierSetNMethod_ppc.cpp b/src/hotspot/cpu/ppc/gc/shared/barrierSetNMethod_ppc.cpp index 96fa03df51949..02117f59c1c2d 100644 --- a/src/hotspot/cpu/ppc/gc/shared/barrierSetNMethod_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/shared/barrierSetNMethod_ppc.cpp @@ -156,6 +156,8 @@ void BarrierSetNMethod::set_guard_value(nmethod* nm, int value, int bit_mask) { return; } + assert(nm->is_osr_method() || !nm->method()->has_scalarized_args(), "unsupported"); + NativeNMethodBarrier* barrier = get_nmethod_barrier(nm); barrier->release_set_guard_value(value, bit_mask); } diff --git a/src/hotspot/cpu/ppc/globals_ppc.hpp b/src/hotspot/cpu/ppc/globals_ppc.hpp index aba3a99b0d845..e64794dc16610 100644 --- a/src/hotspot/cpu/ppc/globals_ppc.hpp +++ b/src/hotspot/cpu/ppc/globals_ppc.hpp @@ -77,6 +77,9 @@ define_pd_global(bool, CompactStrings, true); // 2x unrolled loop is shorter with more than 9 HeapWords. define_pd_global(intx, InitArrayShortSize, 9*BytesPerLong); +define_pd_global(bool, InlineTypePassFieldsAsArgs, false); +define_pd_global(bool, InlineTypeReturnedAsFields, false); + // Platform dependent flag handling: flags only defined on this platform. #define ARCH_FLAGS(develop, \ product, \ diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp index 45af9bfc252ed..043eda1a068ab 100644 --- a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp +++ b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp @@ -85,7 +85,7 @@ class InterpreterMacroAssembler: public MacroAssembler { // Generate a subtype check: branch to ok_is_subtype if sub_klass is // a subtype of super_klass. Blows registers tmp1, tmp2 and tmp3. void gen_subtype_check(Register sub_klass, Register super_klass, - Register tmp1, Register tmp2, Register tmp3, Label &ok_is_subtype); + Register tmp1, Register tmp2, Register tmp3, Label &ok_is_subtype, bool profile = true); // Load object from cpool->resolved_references(index). void load_resolved_reference_at_index(Register result, Register index, Register tmp1, Register tmp2, @@ -255,7 +255,7 @@ class InterpreterMacroAssembler: public MacroAssembler { void update_mdp_for_ret(TosState state, Register return_bci); void profile_taken_branch(Register scratch, Register bumped_count); - void profile_not_taken_branch(Register scratch1, Register scratch2); + void profile_not_taken_branch(Register scratch1, Register scratch2, bool acmp = false); void profile_call(Register scratch1, Register scratch2); void profile_final_call(Register scratch1, Register scratch2); void profile_virtual_call(Register Rreceiver, Register Rscratch1, Register Rscratch2); @@ -265,6 +265,12 @@ class InterpreterMacroAssembler: public MacroAssembler { void profile_switch_case(Register index, Register scratch1,Register scratch2, Register scratch3); void profile_null_seen(Register Rscratch1, Register Rscratch2); + template void profile_array_type(Register array, Register tmp1, Register tmp2); + + void profile_multiple_element_types(Register element, Register tmp1, Register tmp2, Register tmp3); + void profile_element_type(Register element, Register tmp1, Register tmp2); + void profile_acmp(Register left, Register right, Register tmp1, Register tmp2); + // Argument and return type profiling. void profile_obj_type(Register obj, Register mdo_addr_base, RegisterOrConstant mdo_addr_offs, Register tmp, Register tmp2); void profile_arguments_type(Register callee, Register tmp1, Register tmp2, bool is_virtual); @@ -281,6 +287,13 @@ class InterpreterMacroAssembler: public MacroAssembler { void notify_method_entry(); void notify_method_exit(bool is_native_method, TosState state, NotifyMethodExitMode mode, bool check_exceptions); + + // Allocate instance in "obj" and read in the content of the inline field + // NOTES: + // - input holder object via "obj", which must be r0, + // will return new instance via the same reg + void read_flat_field(Register entry, Register obj); + void write_flat_field(Register entry, Register tmp1, Register tmp2, Register obj, Register field_offset, Register value); }; #endif // CPU_PPC_INTERP_MASM_PPC_HPP diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp index 789f8da957456..0c0c90384290b 100644 --- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp @@ -555,9 +555,11 @@ void InterpreterMacroAssembler::load_resolved_klass_at_offset(Register Rcpool, R // Generate a subtype check: branch to ok_is_subtype if sub_klass is // a subtype of super_klass. Blows registers Rsub_klass, tmp1, tmp2. void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, Register Rsuper_klass, Register Rtmp1, - Register Rtmp2, Register Rtmp3, Label &ok_is_subtype) { + Register Rtmp2, Register Rtmp3, Label &ok_is_subtype, bool profile) { // Profile the not-null value's klass. - profile_typecheck(Rsub_klass, Rtmp1, Rtmp2); + if (profile) { + profile_typecheck(Rsub_klass, Rtmp1, Rtmp2); + } check_klass_subtype(Rsub_klass, Rsuper_klass, Rtmp1, Rtmp2, ok_is_subtype); } @@ -921,6 +923,41 @@ void InterpreterMacroAssembler::remove_activation(TosState state, bind(no_reserved_zone_enabling); } + if (state == atos && InlineTypeReturnedAsFields) { + Label skip, not_null; + cmpdi(CR0, R17_tos, 0); + bne(CR0, not_null); + + untested("remove_activation InlineTypeReturnedAsFields null"); + // Returned value is null, zero all return registers because they may belong to oop fields + li(R3_ARG1, 0); + li(R4_ARG2, 0); + li(R5_ARG3, 0); + li(R6_ARG4, 0); + li(R7_ARG5, 0); + li(R8_ARG6, 0); + li(R9_ARG7, 0); + li(R10_ARG8, 0); + b(skip); + + bind(not_null); + + // Check if we are returning an non-null inline type and load its fields into registers + test_oop_is_not_inline_type(R17_tos, skip, /* can_be_null= */ false); + + // Load fields from a buffered value with an inline class specific handler + load_klass(R11_scratch1, R17_tos); + ld(R11_scratch1, InlineKlass::adr_members_offset(), R11_scratch1); + ld(R11_scratch1, InlineKlass::unpack_handler_offset(), R11_scratch1); + // Unpack handler can be null if inline type is not scalarizable in returns + cmpdi(CR0, R11_scratch1, 0); + beq(CR0, skip); + mtctr(R11_scratch1); + bctrl(); + + bind(skip); + } + verify_oop(R17_tos, state); remove_top_frame_given_fp(fp, R21_sender_SP, R23_tmp3, /*return_pc*/ R0, R11_scratch1); @@ -1286,7 +1323,7 @@ void InterpreterMacroAssembler::profile_taken_branch(Register scratch, Register } // Count a not-taken branch in the bytecodes. -void InterpreterMacroAssembler::profile_not_taken_branch(Register scratch1, Register scratch2) { +void InterpreterMacroAssembler::profile_not_taken_branch(Register scratch1, Register scratch2, bool acmp) { if (ProfileInterpreter) { Label profile_continue; @@ -1298,7 +1335,7 @@ void InterpreterMacroAssembler::profile_not_taken_branch(Register scratch1, Regi // The method data pointer needs to be updated to correspond to the // next bytecode. - update_mdp_by_constant(in_bytes(BranchData::branch_data_size())); + update_mdp_by_constant(acmp ? in_bytes(ACmpData::acmp_data_size()) : in_bytes(BranchData::branch_data_size())); bind (profile_continue); } } @@ -1460,6 +1497,111 @@ void InterpreterMacroAssembler::profile_switch_case(Register index, } } +template void InterpreterMacroAssembler::profile_array_type(Register array, + Register tmp1, + Register tmp2) { + if (ProfileInterpreter) { + Label profile_continue; + assert_different_registers(array, tmp1, tmp2); + + // If no method data exists, go to profile_continue. + test_method_data_pointer(profile_continue); + + profile_obj_type(array, R28_mdx, in_bytes(ArrayData::array_offset()), tmp1, tmp2); + + Label not_flat; + test_non_flat_array_oop(array, tmp1, not_flat); + set_mdp_flag_at(ArrayData::flat_array_byte_constant(), tmp1); + bind(not_flat); + + Label not_null_free; + test_non_null_free_array_oop(array, tmp1, not_null_free); + set_mdp_flag_at(ArrayData::null_free_array_byte_constant(), tmp1); + bind(not_null_free); + + bind(profile_continue); + } +} + +template void InterpreterMacroAssembler::profile_array_type(Register array, + Register tmp1, + Register tmp2); +template void InterpreterMacroAssembler::profile_array_type(Register array, + Register tmp1, + Register tmp2); + +void InterpreterMacroAssembler::profile_multiple_element_types(Register element, Register tmp1, Register tmp2, Register tmp3) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(profile_continue); + + Label done, update; + cmpdi(CR0, element, 0); + bne(CR0, update); + set_mdp_flag_at(BitData::null_seen_byte_constant(), tmp1); + b(done); + + bind(update); + load_klass(tmp1, element); + + // Record the object type. + profile_receiver_type(tmp1, R28_mdx, 0, tmp2, tmp3); + + bind(done); + + // The method data pointer needs to be updated. + update_mdp_by_constant(in_bytes(ArrayStoreData::array_store_data_size())); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_element_type(Register element, Register tmp1, Register tmp2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(profile_continue); + + profile_obj_type(element, R28_mdx, in_bytes(ArrayLoadData::element_offset()), tmp1, tmp2); + + // The method data pointer needs to be updated. + update_mdp_by_constant(in_bytes(ArrayLoadData::array_load_data_size())); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_acmp(Register left, + Register right, + Register tmp1, + Register tmp2) { + if (ProfileInterpreter) { + Label profile_continue; + assert_different_registers(left, right, tmp1, tmp2); + + // If no method data exists, go to profile_continue. + test_method_data_pointer(profile_continue); + + profile_obj_type(left, R28_mdx, in_bytes(ACmpData::left_offset()), tmp1, tmp2); + + Label left_not_inline_type; + test_oop_is_not_inline_type(left, left_not_inline_type); + set_mdp_flag_at(ACmpData::left_inline_type_byte_constant(), tmp1); + bind(left_not_inline_type); + + profile_obj_type(right, R28_mdx, in_bytes(ACmpData::right_offset()), tmp1, tmp2); + + test_oop_is_not_inline_type(right, profile_continue); + set_mdp_flag_at(ACmpData::right_inline_type_byte_constant(), tmp1); + + bind(profile_continue); + } +} + void InterpreterMacroAssembler::profile_null_seen(Register Rscratch1, Register Rscratch2) { if (ProfileInterpreter) { assert_different_registers(Rscratch1, Rscratch2); @@ -1597,7 +1739,7 @@ void InterpreterMacroAssembler::profile_arguments_type(Register callee, // argument. tmp1 is the number of cells left in the // CallTypeData/VirtualCallTypeData to reach its end. Non null // if there's a return to profile. - assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), + assert(SingleTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); sldi(tmp1, tmp1, exact_log2(DataLayout::cell_size)); add(R28_mdx, tmp1, R28_mdx); @@ -1638,7 +1780,7 @@ void InterpreterMacroAssembler::profile_return_type(Register ret, Register tmp1, bne(CR0, profile_continue); } - profile_obj_type(ret, R28_mdx, -in_bytes(ReturnTypeEntry::size()), tmp1, tmp2); + profile_obj_type(ret, R28_mdx, -in_bytes(SingleTypeEntry::size()), tmp1, tmp2); align(32, 12); bind(profile_continue); @@ -2287,3 +2429,36 @@ void InterpreterMacroAssembler::notify_method_exit(bool is_native_method, TosSta // Dtrace support not implemented. } + +void InterpreterMacroAssembler::read_flat_field(Register entry, Register obj) { + call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flat_field), obj, entry); + membar(Assembler::StoreStore | Assembler::LoadLoad); // for allocation and volatile load +} + +void InterpreterMacroAssembler::write_flat_field(Register entry, Register tmp1, Register tmp2, + Register obj, Register field_offset, Register value) { + assert_different_registers(entry, field_offset, tmp1, tmp2, obj, value); + Label slow_path, done; + + lbz(tmp1, in_bytes(ResolvedFieldEntry::flags_offset()), entry); + test_field_is_not_null_free_inline_type(tmp1, slow_path); + + null_check_throw(value, -1, tmp1); + + add(obj, obj, field_offset); + + load_klass(tmp1, value); + payload_address(value, value, tmp1, tmp2); + + Register layout_info = field_offset; + lbz(tmp1, in_bytes(ResolvedFieldEntry::field_index_offset()), entry); + ld(tmp2, in_bytes(ResolvedFieldEntry::field_holder_offset()), entry); + inline_layout_info(tmp2, tmp1, layout_info); + + flat_field_copy(IN_HEAP, value, obj, layout_info); + b(done); + + bind(slow_path); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::write_flat_field), obj, value, entry); + bind(done); +} diff --git a/src/hotspot/cpu/ppc/jniFastGetField_ppc.cpp b/src/hotspot/cpu/ppc/jniFastGetField_ppc.cpp index 1c48ee4412ec7..5f83651b5bf4b 100644 --- a/src/hotspot/cpu/ppc/jniFastGetField_ppc.cpp +++ b/src/hotspot/cpu/ppc/jniFastGetField_ppc.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2025 SAP SE. All rights reserved. + * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2026 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,6 +30,7 @@ #include "prims/jniFastGetField.hpp" #include "prims/jvm_misc.hpp" #include "prims/jvmtiExport.hpp" +#include "runtime/jfieldIDWorkaround.hpp" #include "runtime/safepoint.hpp" #define __ masm-> @@ -98,7 +99,7 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->try_resolve_jobject_in_native(masm, Robj, R3_ARG1, R4_ARG2, Rtmp, slow); - __ srwi(Rtmp, R5_ARG3, 2); // offset + __ srwi(Rtmp, R5_ARG3, jfieldIDWorkaround::offset_shift); // offset assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); speculative_load_pclist[count] = __ pc(); // Used by the segfault handler diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index 1501934d48f05..595794eb7b742 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -3318,6 +3318,129 @@ void MacroAssembler::load_method_holder(Register holder, Register method) { ld(holder, ConstantPool::pool_holder_offset(), holder); } +void MacroAssembler::test_markword_is_inline_type(Register markword, Label& is_inline_type) { + assert_different_registers(markword, R0); + andi(R0, markword, markWord::inline_type_pattern_mask); + cmpwi(CR0, R0, markWord::inline_type_pattern); + beq(CR0, is_inline_type); +} + +void MacroAssembler::test_oop_is_not_inline_type(Register object, Label& not_inline_type, bool can_be_null) { + if (can_be_null) { + cmpdi(CR0, object, 0); + beq(CR0, not_inline_type); + } + ld(R0, oopDesc::mark_offset_in_bytes(), object); + andi(R0, R0, markWord::inline_type_pattern_mask); + cmpwi(CR0, R0, markWord::inline_type_pattern); + bne(CR0, not_inline_type); +} + +void MacroAssembler::test_field_is_null_free_inline_type(Register flags, Label& is_null_free_inline_type) { + testbitdi(CR0, R0, flags, ResolvedFieldEntry::is_null_free_inline_type_shift); + bne(CR0, is_null_free_inline_type); +} + +void MacroAssembler::test_field_is_not_null_free_inline_type(Register flags, Label& not_null_free_inline_type) { + testbitdi(CR0, R0, flags, ResolvedFieldEntry::is_null_free_inline_type_shift); + beq(CR0, not_null_free_inline_type); +} + +void MacroAssembler::test_field_is_flat(Register flags, Label& is_flat) { + testbitdi(CR0, R0, flags, ResolvedFieldEntry::is_flat_shift); + bne(CR0, is_flat); +} + +void MacroAssembler::test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, + Label& jmp_label, bool maybe_far) { + Label test_mark_word; + // load mark word + ld(temp_reg, oopDesc::mark_offset_in_bytes(), oop); + // if unlocked bit is set we can directly use the mark word + andi_(R0, temp_reg, markWord::unlocked_value); + bne(CR0, test_mark_word); + // slow path use klass prototype + load_prototype_header(temp_reg, oop); + + bind(test_mark_word); + andi_(R0, temp_reg, test_bit); + if (maybe_far) { + bc_far_optimized(jmp_set ? Assembler::bcondCRbiIs0 : Assembler::bcondCRbiIs1, + bi0(CR0, Assembler::equal), jmp_label); + } else { + if (jmp_set) { + bne(CR0, jmp_label); + } else { + beq(CR0, jmp_label); + } + } +} + +void MacroAssembler::test_flat_array_oop(Register oop, Register temp_reg, Label& is_flat_array, bool maybe_far) { + test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, true, is_flat_array, maybe_far); +} + +void MacroAssembler::test_non_flat_array_oop(Register oop, Register temp_reg, Label& is_non_flat_array) { + test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, false, is_non_flat_array); +} + +void MacroAssembler::test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array, bool maybe_far) { + test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, true, is_null_free_array, maybe_far); +} + +void MacroAssembler::test_non_null_free_array_oop(Register oop, Register temp_reg, Label& is_non_null_free_array) { + test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, false, is_non_null_free_array); +} + +void MacroAssembler::test_flat_array_layout(Register lh, Label& is_flat_array) { + testbitdi(CR0, R0, lh, exact_log2(Klass::_lh_array_tag_flat_value_bit_inplace)); + bne(CR0, is_flat_array); +} + +void MacroAssembler::load_metadata(Register dst, Register src) { + if (UseCompactObjectHeaders) { + load_narrow_klass_compact(dst, src); + } else { + lwz(dst, oopDesc::klass_offset_in_bytes(), src); + } +} + +void MacroAssembler::load_prototype_header(Register dst, Register src) { + load_klass(dst, src); + ld(dst, Klass::prototype_header_offset(), dst); +} + +void MacroAssembler::flat_field_copy(DecoratorSet decorators, Register src, Register dst, Register inline_layout_info) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->flat_field_copy(this, decorators, src, dst, inline_layout_info); +} + +void MacroAssembler::payload_offset(Register inline_klass, Register offset) { + ld(offset, in_bytes(InlineKlass::adr_members_offset()), inline_klass); + lwz(offset, in_bytes(InlineKlass::payload_offset_offset()), offset); +} + +void MacroAssembler::payload_address(Register oop, Register data, Register inline_klass, Register t1) { + // ((address) (void*) o) + vk->payload_offset(); + payload_offset(inline_klass, t1); + add(data, oop, t1); +} + +void MacroAssembler::inline_layout_info(Register holder_klass, Register index, Register layout_info) { + assert_different_registers(holder_klass, index, layout_info); + InlineLayoutInfo array[2]; + int size = (char*)&array[1] - (char*)&array[0]; // computing size of array elements + if (is_power_of_2(size)) { + sldi(index, index, log2i_exact(size)); // Scale index by power of 2 + } else { + mulld(index, index, size); // Scale the index to be the entry index * array_element_size + } + ld(layout_info, InstanceKlass::inline_layout_info_array_offset(), holder_klass); + addi(layout_info, layout_info, Array::base_offset_in_bytes()); + add(layout_info, layout_info, index); +} + + // Clear Array // For very short arrays. tmp == R0 is allowed. void MacroAssembler::clear_memory_unrolled(Register base_ptr, int cnt_dwords, Register tmp, int offset) { @@ -3406,6 +3529,31 @@ void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwo bind(done); } +// base: Address of a buffer to be filled, 8 bytes aligned. Killed. +// cnt: Count in 8-byte unit. +// value: Value to be filled with. +void MacroAssembler::fill_words(Register base, Register cnt, Register value) { + Label loop, loop_end, done; + + // 2x unrolled loop + srdi_(R0, cnt, 1); + beq(CR0, loop_end); // less than 2 elements + mtctr(R0); + + bind(loop); + std(value, 0, base); + std(value, 8, base); + addi(base, base, 16); + bdnz(loop); + + bind(loop_end); + andi_(R0, cnt, 1); + beq(CR0, done); + std(value, 0, base); // last element + + bind(done); +} + /////////////////////////////////////////// String intrinsics //////////////////////////////////////////// // Helpers for Intrinsic Emitters @@ -4725,8 +4873,8 @@ void MacroAssembler::atomically_flip_locked_state(bool is_unlock, Register obj, if (!is_unlock) { ldarx(tmp, obj, MacroAssembler::cmpxchgx_hint_acquire_lock()); xori(tmp, tmp, markWord::unlocked_value); // flip unlocked bit - andi_(R0, tmp, markWord::lock_mask_in_place); - bne(CR0, failed); // failed if new header doesn't contain locked_value (which is 0) + andi_(R0, tmp, markWord::lock_mask_in_place | markWord::inline_type_bit_in_place); + bne(CR0, failed); // failed if new header doesn't contain locked_value (which is 0) or belongs to an inline type } else { ldarx(tmp, obj, MacroAssembler::cmpxchgx_hint_release_lock()); andi_(R0, tmp, markWord::lock_mask_in_place); @@ -4880,3 +5028,32 @@ void MacroAssembler::fast_unlock(Register obj, Register t1, Label& slow) { bind(unlocked); } + +// Unimplemented methods for inline types. +int MacroAssembler::store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter) { + Unimplemented(); +} + +bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]) { + Unimplemented(); +} + +bool MacroAssembler::unpack_inline_helper(const GrowableArray* sig, int& sig_index, + VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index, + RegState reg_state[]) { + Unimplemented(); +} + +bool MacroAssembler::pack_inline_helper(const GrowableArray* sig, int& sig_index, int vtarg_index, + VMRegPair* from, int from_count, int& from_index, VMReg to, + RegState reg_state[], Register val_array) { + Unimplemented(); +} + +int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) { + Unimplemented(); +} + +VMReg MacroAssembler::spill_reg_for(VMReg reg) { + Unimplemented(); +} diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp index bbfa75f515150..533d03230a4d8 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp @@ -28,12 +28,16 @@ #include "asm/assembler.hpp" #include "oops/accessDecorators.hpp" +#include "runtime/signature.hpp" #include "utilities/macros.hpp" // MacroAssembler extends Assembler by a few frequently used macros. class ciTypeArray; class OopMap; +class ciInlineKlass; +class SigEntry; +class VMRegPair; class MacroAssembler: public Assembler { public: @@ -394,6 +398,11 @@ class MacroAssembler: public Assembler { Register Rmethod_toc = noreg, bool scratch_emit = false); + // Inline type specific methods +#include "asm/macroAssembler_common.hpp" + + void save_stack_increment(int sp_inc, int frame_size); + protected: // It is imperative that all calls into the VM are handled via the @@ -813,6 +822,38 @@ class MacroAssembler: public Assembler { void decode_klass_not_null(Register dst, Register src = noreg); Register encode_klass_not_null(Register dst, Register src = noreg); + // markWord tests, kills markWord reg + void test_markword_is_inline_type(Register markword, Label& is_inline_type); + + // inlineKlass queries, kills temp_reg + void test_oop_is_not_inline_type(Register object, Label& not_inline_type, bool can_be_null = true); + + void test_field_is_null_free_inline_type(Register flags, Label& is_null_free); + void test_field_is_not_null_free_inline_type(Register flags, Label& not_null_free); + void test_field_is_flat(Register flags, Label& is_flat); + + // Check oops for special arrays, i.e. flat arrays and/or null-free arrays + void test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label, bool maybe_far = false); + void test_flat_array_oop(Register oop, Register temp_reg, Label& is_flat_array, bool maybe_far = false); + void test_non_flat_array_oop(Register oop, Register temp_reg, Label& is_non_flat_array); + void test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array, bool maybe_far = false); + void test_non_null_free_array_oop(Register oop, Register temp_reg, Label& is_non_null_free_array); + + // Check array klass layout helper for flat or null-free arrays... + void test_flat_array_layout(Register lh, Label& is_flat_array); + + void load_metadata(Register dst, Register src); + + void flat_field_copy(DecoratorSet decorators, Register src, Register dst, Register inline_layout_info); + + void load_prototype_header(Register dst, Register src); + + void inline_layout_info(Register holder_klass, Register index, Register layout_info); + + // inline type data payload offsets... + void payload_offset(Register inline_klass, Register offset); + void payload_address(Register oop, Register data, Register inline_klass, Register t1); + // SIGTRAP-based range checks for arrays. inline void trap_range_check_l(Register a, Register b); inline void trap_range_check_l(Register a, int si16); @@ -842,6 +883,7 @@ class MacroAssembler: public Assembler { void clear_memory_unrolled(Register base_ptr, int cnt_dwords, Register tmp = R0, int offset = 0); void clear_memory_constlen(Register base_ptr, int cnt_dwords, Register tmp = R0); void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0, long const_cnt = -1); + void fill_words(Register base, Register cnt, Register value); // Emitters for BigInteger.multiplyToLen intrinsic. inline void multiply64(Register dest_hi, Register dest_lo, @@ -1010,6 +1052,9 @@ class MacroAssembler: public Assembler { void should_not_reach_here(const char* msg = nullptr) { stop(stop_shouldnotreachhere, msg); } void zap_from_to(Register low, int before, Register high, int after, Register val, Register addr) PRODUCT_RETURN; + + // Inline type specific methods + #include "asm/macroAssembler_common.hpp" }; #endif // CPU_PPC_MACROASSEMBLER_PPC_HPP diff --git a/src/hotspot/cpu/ppc/methodHandles_ppc.cpp b/src/hotspot/cpu/ppc/methodHandles_ppc.cpp index ae94a9618b57b..9341fc50904a1 100644 --- a/src/hotspot/cpu/ppc/methodHandles_ppc.cpp +++ b/src/hotspot/cpu/ppc/methodHandles_ppc.cpp @@ -144,7 +144,11 @@ void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register meth __ cmplwi(CR0, R19_method, 0); __ beq(CR0, L_no_such_method); - const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : + // The following jump might pass an inline type argument that was erased to Object as oop to a + // callee that expects inline type arguments to be passed as fields. We need to call the compiled + // value entry (_code->inline_entry_point() or _adapter->c2i_inline_entry()) which will take care + // of translating between the calling conventions. + const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_inline_offset() : Method::from_interpreted_offset(); __ ld(target, in_bytes(entry_offset), R19_method); __ mtctr(target); diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index 9bec99e90cc88..055e882d3d59b 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -1542,11 +1542,6 @@ void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { C->output()->set_frame_complete(__ offset()); } -uint MachPrologNode::size(PhaseRegAlloc *ra_) const { - // Variable size. determine dynamically. - return MachNode::size(ra_); -} - int MachPrologNode::reloc() const { // Return number of relocatable values contained in this instruction. return 1; // 1 reloc entry for load_const(toc). @@ -1605,11 +1600,6 @@ void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { } } -uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { - // Variable size. Determine dynamically. - return MachNode::size(ra_); -} - int MachEpilogNode::reloc() const { // Return number of relocatable values contained in this instruction. return 1; // 1 for load_from_polling_page. @@ -1959,6 +1949,18 @@ uint BoxLockNode::size(PhaseRegAlloc *ra_) const { return 4; } +#ifndef PRODUCT +void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const +{ + Unimplemented(); +} +#endif + +void MachVEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const +{ + Unimplemented(); +} + #ifndef PRODUCT void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *st) const { st->print_cr("---- MachUEPNode ----"); @@ -1972,11 +1974,6 @@ void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { // Argument is valid and klass is as expected, continue. } -uint MachUEPNode::size(PhaseRegAlloc *ra_) const { - // Variable size. Determine dynamically. - return MachNode::size(ra_); -} - //============================================================================= %} // interrupt source @@ -9711,6 +9708,17 @@ instruct castP2X(iRegLdst dst, iRegP_N2P src) %{ ins_pipe(pipe_class_default); %} +instruct castN2X(iRegLdst dst, iRegNsrc src) %{ + match(Set dst (CastP2X src)); + + format %{ "MR $dst, $src \t// Ptr->Long" %} + // variable size, 0 or 4. + ins_encode %{ + __ mr_if_needed($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + instruct castPP(iRegPdst dst) %{ match(Set dst (CastPP dst)); format %{ " -- \t// castPP of $dst" %} @@ -11427,8 +11435,8 @@ instruct array_size(iRegLdst dst, iRegPsrc end, iRegPsrc start) %{ %} // Clear-array with constant short array length. The versions below can use dcbz with cnt > 30. -instruct inlineCallClearArrayShort(immLmax30 cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{ - match(Set dummy (ClearArray cnt base)); +instruct inlineCallClearArrayShort(immLmax30 cnt, rarg2RegP base, immL_0 zero, Universe dummy, regCTR ctr) %{ + match(Set dummy (ClearArray (Binary cnt base) zero)); effect(USE_KILL base, KILL ctr); ins_cost(2 * MEMORY_REF_COST); @@ -11440,8 +11448,8 @@ instruct inlineCallClearArrayShort(immLmax30 cnt, rarg2RegP base, Universe dummy %} // Clear-array with constant large array length. -instruct inlineCallClearArrayLarge(immL cnt, rarg2RegP base, Universe dummy, iRegLdst tmp, regCTR ctr) %{ - match(Set dummy (ClearArray cnt base)); +instruct inlineCallClearArrayLarge(immL cnt, rarg2RegP base, immL_0 zero, Universe dummy, iRegLdst tmp, regCTR ctr) %{ + match(Set dummy (ClearArray (Binary cnt base) zero)); effect(USE_KILL base, TEMP tmp, KILL ctr); ins_cost(3 * MEMORY_REF_COST); @@ -11453,8 +11461,8 @@ instruct inlineCallClearArrayLarge(immL cnt, rarg2RegP base, Universe dummy, iRe %} // Clear-array with dynamic array length. -instruct inlineCallClearArray(rarg1RegL cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{ - match(Set dummy (ClearArray cnt base)); +instruct inlineCallClearArray(rarg1RegL cnt, rarg2RegP base, immL_0 zero, Universe dummy, regCTR ctr) %{ + match(Set dummy (ClearArray (Binary cnt base) zero)); effect(USE_KILL cnt, USE_KILL base, KILL ctr); ins_cost(4 * MEMORY_REF_COST); @@ -11465,6 +11473,20 @@ instruct inlineCallClearArray(rarg1RegL cnt, rarg2RegP base, Universe dummy, reg ins_pipe(pipe_class_default); %} +// Clear-array with dynamic array length and non-zero value. +instruct inlineCallClearArrayWordCopy(rarg1RegL cnt, rarg2RegP base, iRegLdst val, Universe dummy, regCTR ctr) %{ + predicate(((ClearArrayNode*)n)->word_copy_only()); + match(Set dummy (ClearArray (Binary cnt base) val)); + effect(USE_KILL base, KILL ctr); + ins_cost(8 * MEMORY_REF_COST); + + format %{ "ClearArray $cnt, $base, $val" %} + ins_encode %{ + __ fill_words($base$$Register, $cnt$$Register, $val$$Register); + %} + ins_pipe(pipe_class_default); +%} + instruct string_compareL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result, iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); diff --git a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp index a1446f4979798..b26dee3d56755 100644 --- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp +++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp @@ -627,11 +627,8 @@ static int reg2offset(VMReg r) { // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit // units regardless of build. Of course for i486 there is no 64 bit build -// The Java calling convention is a "shifted" version of the C ABI. -// By skipping the first C ABI register we can call non-static jni methods -// with small numbers of arguments without having to shuffle the arguments -// at all. Since we control the java ABI we ought to at least get some -// advantage out of it. +// In contrast to other platforms the Java calling convention is *NOT* a +// "shifted" version of the C ABI. const VMReg java_iarg_reg[8] = { R3->as_VMReg(), @@ -771,6 +768,87 @@ int SharedRuntime::java_calling_convention(const BasicType *sig_bt, return stk; } +// Similar to java_calling_convention() but for multiple return +// values. There's no way to store them on the stack so if we don't +// have enough registers, multiple values can't be returned. +const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j; +const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j; +int SharedRuntime::java_return_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed) { + // Create the mapping between argument positions and + // registers. + static const Register INT_ArgReg[java_return_convention_max_int] = { + R3_RET, R10_ARG8, R9_ARG7, R8_ARG6, R7_ARG5, R6_ARG4, R5_ARG3, R4_ARG2 + }; + static const FloatRegister FP_ArgReg[java_return_convention_max_float] = { + F1_RET, F2_ARG2, F3_ARG3, F4_ARG4, F5_ARG5, F6_ARG6, F7_ARG7, F8_ARG8, + F9_ARG9, F10_ARG10, F11_ARG11, F12_ARG12, F13_ARG13 + }; + + + uint int_args = 0; + uint fp_args = 0; + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (int_args < java_return_convention_max_int) { + regs[i].set1(INT_ArgReg[int_args]->as_VMReg()); + int_args++; + } else { + return -1; + } + break; + case T_VOID: + // halves of T_LONG or T_DOUBLE + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_LONG: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + case T_METADATA: + if (int_args < java_return_convention_max_int) { + regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); + int_args++; + } else { + return -1; + } + break; + case T_FLOAT: + if (fp_args < java_return_convention_max_float) { + regs[i].set1(FP_ArgReg[fp_args]->as_VMReg()); + fp_args++; + } else { + return -1; + } + break; + case T_DOUBLE: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args < java_return_convention_max_float) { + regs[i].set2(FP_ArgReg[fp_args]->as_VMReg()); + fp_args++; + } else { + return -1; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return int_args + fp_args; +} + // Calling convention for calling C code. int SharedRuntime::c_calling_convention(const BasicType *sig_bt, VMRegPair *regs, @@ -916,44 +994,17 @@ int SharedRuntime::vector_calling_convention(VMRegPair *regs, return 0; } -static address gen_c2i_adapter(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - Label& call_interpreter, - const Register& ientry) { - - address c2i_entrypoint; - - const Register sender_SP = R21_sender_SP; // == R21_tmp1 - const Register code = R22_tmp2; - //const Register ientry = R23_tmp3; - const Register value_regs[] = { R24_tmp4, R25_tmp5, R26_tmp6 }; - const int num_value_regs = sizeof(value_regs) / sizeof(Register); - int value_regs_index = 0; - - const Register return_pc = R27_tmp7; - const Register tmp = R28_tmp8; - - assert_different_registers(sender_SP, code, ientry, return_pc, tmp); - - // Adapter needs TOP_IJAVA_FRAME_ABI. - const int adapter_size = frame::top_ijava_frame_abi_size + - align_up(total_args_passed * wordSize, frame::alignment_in_bytes); - - // regular (verified) c2i entry point - c2i_entrypoint = __ pc(); - - // Does compiled code exists? If yes, patch the caller's callsite. - __ ld(code, method_(code)); - __ cmpdi(CR0, code, 0); - __ ld(ientry, method_(interpreter_entry)); // preloaded - __ beq(CR0, call_interpreter); - +// Patch the callers callsite with entry to compiled code if it exists. +static void patch_callers_callsite(MacroAssembler *masm, int adapter_size, int total_args_passed, const VMRegPair *regs) { + Label L; + __ ld(R0, in_bytes(Method::code_offset()), R19_method); + __ cmpdi(CR0, R0, 0); + __ beq(CR0, L); // Patch caller's callsite, method_(code) was not null which means that // compiled code exists. + const Register return_pc = R11_scratch1; + const Register tmp = R12_scratch2; __ mflr(return_pc); __ std(return_pc, _abi0(lr), R1_SP); RegisterSaver::push_frame_and_save_argument_registers(masm, tmp, adapter_size, total_args_passed, regs); @@ -962,16 +1013,131 @@ static address gen_c2i_adapter(MacroAssembler *masm, RegisterSaver::restore_argument_registers_and_pop_frame(masm, adapter_size, total_args_passed, regs); __ ld(return_pc, _abi0(lr), R1_SP); - __ ld(ientry, method_(interpreter_entry)); // preloaded __ mtlr(return_pc); + // callsite->set_to_clean() uses icache flush including isync + + __ bind(L); +} + +// For each inline type argument, sig includes the list of fields of +// the inline type. This utility function computes the number of +// arguments for the call if inline types are passed by reference (the +// calling convention the interpreter expects). +static int compute_total_args_passed_int(const GrowableArray* sig_extended) { + int total_args_passed = 0; + if (InlineTypePassFieldsAsArgs) { + for (int i = 0; i < sig_extended->length(); i++) { + BasicType bt = sig_extended->at(i)._bt; + if (bt == T_METADATA) { + // In sig_extended, an inline type argument starts with: + // T_METADATA, followed by the types of the fields of the + // inline type and T_VOID to mark the end of the value + // type. Inline types are flattened so, for instance, in the + // case of an inline type with an int field and an inline type + // field that itself has 2 fields, an int and a long: + // T_METADATA T_INT T_METADATA T_INT T_LONG T_VOID (second + // slot for the T_LONG) T_VOID (inner inline type) T_VOID + // (outer inline type) + total_args_passed++; + int vt = 1; + do { + i++; + BasicType bt = sig_extended->at(i)._bt; + BasicType prev_bt = sig_extended->at(i-1)._bt; + if (bt == T_METADATA) { + vt++; + } else if (bt == T_VOID && + prev_bt != T_LONG && + prev_bt != T_DOUBLE) { + vt--; + } + } while (vt != 0); + } else { + total_args_passed++; + } + } + } else { + total_args_passed = sig_extended->length(); + } + return total_args_passed; +} + +static void gen_c2i_adapter(MacroAssembler *masm, + const GrowableArray* sig_extended, + const VMRegPair *regs, + bool requires_clinit_barrier, + address& c2i_no_clinit_check_entry, + Label& skip_fixup, + address start, + OopMapSet* oop_maps, + int& frame_complete, + int& frame_size_in_words, + bool alloc_inline_receiver) { + if (requires_clinit_barrier) { + assert(VM_Version::supports_fast_class_init_checks(), "sanity"); + Label L_skip_barrier; + + // Bypass the barrier for non-static methods + __ lhz(R0, in_bytes(Method::access_flags_offset()), R19_method); + __ andi_(R0, R0, JVM_ACC_STATIC); + __ beq(CR0, L_skip_barrier); // non-static + + Register klass = R11_scratch1; + __ load_method_holder(klass, R19_method); + __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/); + + __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0); + __ mtctr(klass); + __ bctr(); + + __ bind(L_skip_barrier); + c2i_no_clinit_check_entry = __ pc(); + } + + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->c2i_entry_barrier(masm, R11_scratch1, R12_scratch2, R21_tmp1); + + // Since all args are passed on the stack, total_args_passed * + // Interpreter::stackElementSize is the space we need. + int total_args_passed = compute_total_args_passed_int(sig_extended); + assert(total_args_passed >= 0, "total_args_passed is %d", total_args_passed); + + // Adapter needs TOP_IJAVA_FRAME_ABI. + const int adapter_size = frame::top_ijava_frame_abi_size + + align_up(total_args_passed * wordSize, frame::alignment_in_bytes); + + // Before we get into the guts of the C2I adapter, see if we should be here + // at all. We've come from compiled code and are attempting to jump to the + // interpreter, which means the caller made a static call to get here + // (vcalls always get a compiled target if there is one). Check for a + // compiled target. If there is one, we need to patch the caller's call. + patch_callers_callsite(masm, adapter_size, total_args_passed, regs); + + __ bind(skip_fixup); + + if (InlineTypePassFieldsAsArgs) { + // Is there an inline type argument? + bool has_inline_argument = false; + for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) { + has_inline_argument = (sig_extended->at(i)._bt == T_METADATA); + } + if (has_inline_argument) { + __ unimplemented("c2i has_inline_argument"); + } + } // Call the interpreter. - __ BIND(call_interpreter); + const Register tmp = R22_tmp2, ientry = R23_tmp3; + const Register value_regs[] = { R24_tmp4, R25_tmp5, R26_tmp6 }; + const int num_value_regs = sizeof(value_regs) / sizeof(Register); + int value_regs_index = 0; + + __ ld(ientry, method_(interpreter_entry)); // preloaded __ mtctr(ientry); // Get a copy of the current SP for loading caller's arguments. - __ mr(sender_SP, R1_SP); + __ mr(R21_sender_SP, R1_SP); // Add space for the adapter. __ resize_frame(-adapter_size, R12_scratch2); @@ -979,7 +1145,10 @@ static address gen_c2i_adapter(MacroAssembler *masm, int st_off = adapter_size - wordSize; // Write the args into the outgoing interpreter space. + // TODO: support for InlineTypePassFieldsAsArgs for (int i = 0; i < total_args_passed; i++) { + BasicType bt = sig_extended->at(i)._bt; + VMReg r_1 = regs[i].first(); VMReg r_2 = regs[i].second(); if (!r_1->is_valid()) { @@ -993,9 +1162,9 @@ static address gen_c2i_adapter(MacroAssembler *masm, // preserve area (JIT's ABI). We must account for it here. int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; if (!r_2->is_valid()) { - __ lwz(tmp_reg, ld_off, sender_SP); + __ lwz(tmp_reg, ld_off, R21_sender_SP); } else { - __ ld(tmp_reg, ld_off, sender_SP); + __ ld(tmp_reg, ld_off, R21_sender_SP); } // Pretend stack targets were loaded into tmp_reg. r_1 = tmp_reg->as_VMReg(); @@ -1009,7 +1178,7 @@ static address gen_c2i_adapter(MacroAssembler *masm, } else { // Longs are given 2 64-bit slots in the interpreter, but the // data is passed in only 1 slot. - if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + if (bt == T_LONG || bt == T_DOUBLE) { DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); ) st_off-=wordSize; } @@ -1041,17 +1210,12 @@ static address gen_c2i_adapter(MacroAssembler *masm, // load TOS __ addi(R15_esp, R1_SP, st_off); - // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in R21_tmp1. - assert(sender_SP == R21_sender_SP, "passing initial caller's SP in wrong register"); __ bctr(); - - return c2i_entrypoint; } void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, - int total_args_passed, int comp_args_on_stack, - const BasicType *sig_bt, + const GrowableArray* sig, const VMRegPair *regs) { // Load method's entry-point from method. @@ -1078,6 +1242,7 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, const int num_value_regs = sizeof(value_regs) / sizeof(Register); int value_regs_index = 0; + int total_args_passed = sig->length(); int ld_offset = total_args_passed*wordSize; // Cut-out for having no stack args. Since up to 2 int/oop args are passed @@ -1098,9 +1263,11 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, // Now generate the shuffle code. Pick up all register args and move the // rest through register value=Z_R12. BLOCK_COMMENT("Shuffle arguments"); + for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + BasicType bt = sig->at(i)._bt; + if (bt == T_VOID) { + assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half"); continue; } @@ -1133,7 +1300,7 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, } if (!r_2->is_valid()) { // Not sure we need to do this but it shouldn't hurt. - if (is_reference_type(sig_bt[i]) || sig_bt[i] == T_ADDRESS) { + if (is_reference_type(bt) || bt == T_ADDRESS) { __ ld(r, ld_offset, ld_ptr); ld_offset-=wordSize; } else { @@ -1143,7 +1310,7 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, } else { // In 64bit, longs are given 2 64-bit slots in the interpreter, but the // data is passed in only 1 slot. - if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + if (bt == T_LONG || bt == T_DOUBLE) { ld_offset-=wordSize; } __ ld(r, ld_offset, ld_ptr); @@ -1154,8 +1321,8 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, // Now store value where the compiler expects it int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots())*VMRegImpl::stack_slot_size; - if (sig_bt[i] == T_INT || sig_bt[i] == T_FLOAT ||sig_bt[i] == T_BOOLEAN || - sig_bt[i] == T_SHORT || sig_bt[i] == T_CHAR || sig_bt[i] == T_BYTE) { + if (bt == T_INT || bt == T_FLOAT || bt == T_BOOLEAN || + bt == T_SHORT || bt == T_CHAR || bt == T_BYTE) { __ stw(r, st_off, R1_SP); } else { __ std(r, st_off, R1_SP); @@ -1182,79 +1349,99 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, __ bctr(); } -void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - address entry_address[AdapterBlob::ENTRY_COUNT]) { - // entry: i2c +static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) { + __ ic_check(BytesPerInstWord /* end_alignment */); + __ ld(R19_method, CompiledICData::speculated_method_offset(), R19_inline_cache_reg); - __ align(CodeEntryAlignment); - entry_address[AdapterBlob::I2C] = __ pc(); - gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); - - - // entry: c2i unverified - - __ align(CodeEntryAlignment); - BLOCK_COMMENT("c2i unverified entry"); - entry_address[AdapterBlob::C2I_Unverified] = __ pc(); - - // inline_cache contains a CompiledICData - const Register ic = R19_inline_cache_reg; - const Register ic_klass = R11_scratch1; - const Register receiver_klass = R12_scratch2; - const Register code = R21_tmp1; - const Register ientry = R23_tmp3; - - assert_different_registers(ic, ic_klass, receiver_klass, R3_ARG1, code, ientry); - assert(R11_scratch1 == R11, "need prologue scratch register"); - - Label call_interpreter; - - __ ic_check(4 /* end_alignment */); - __ ld(R19_method, CompiledICData::speculated_method_offset(), ic); - // Argument is valid and klass is as expected, continue. - - __ ld(code, method_(code)); - __ cmpdi(CR0, code, 0); - __ ld(ientry, method_(interpreter_entry)); // preloaded - __ beq_predict_taken(CR0, call_interpreter); + // Method might have been compiled since the call site was patched to + // interpreted; if that is the case treat it as a miss so we can get + // the call site corrected. + __ ld(R0, method_(code)); + __ cmpdi(CR0, R0, 0); + __ beq_predict_taken(CR0, skip_fixup); // Branch to ic_miss_stub. __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); +} - // entry: c2i +void SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm, + int comp_args_on_stack, + const GrowableArray* sig, + const VMRegPair* regs, + const GrowableArray* sig_cc, + const VMRegPair* regs_cc, + const GrowableArray* sig_cc_ro, + const VMRegPair* regs_cc_ro, + address entry_address[AdapterBlob::ENTRY_COUNT], + AdapterBlob*& new_adapter, + bool allocate_code_blob) { - entry_address[AdapterBlob::C2I] = __ pc(); + entry_address[AdapterBlob::I2C] = __ pc(); + gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); - // Class initialization barrier for static methods - entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr; - assert(VM_Version::supports_fast_class_init_checks(), "sanity"); - Label L_skip_barrier; + // ------------------------------------------------------------------------- + // Generate a C2I adapter. On entry we know rmethod holds the Method* during calls + // to the interpreter. The args start out packed in the compiled layout. They + // need to be unpacked into the interpreter layout. This will almost always + // require some stack space. We grow the current (compiled) stack, then repack + // the args. We finally end in a jump to the generic interpreter entry point. + // On exit from the interpreter, the interpreter will restore our SP (lest the + // compiled code, which relies solely on SP and not FP, get sick). - // Bypass the barrier for non-static methods - __ lhz(R0, in_bytes(Method::access_flags_offset()), R19_method); - __ andi_(R0, R0, JVM_ACC_STATIC); - __ beq(CR0, L_skip_barrier); // non-static + entry_address[AdapterBlob::C2I_Unverified] = __ pc(); + entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc(); + Label skip_fixup; - Register klass = R11_scratch1; - __ load_method_holder(klass, R19_method); - __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/); + gen_inline_cache_check(masm, skip_fixup); - __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0); - __ mtctr(klass); - __ bctr(); + OopMapSet* oop_maps = new OopMapSet(); + int frame_complete = CodeOffsets::frame_never_safe; + int frame_size_in_words = 0; - __ bind(L_skip_barrier); - entry_address[AdapterBlob::C2I_No_Clinit_Check] = __ pc(); + // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver) + entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr; + entry_address[AdapterBlob::C2I_Inline_RO] = __ pc(); + if (regs_cc != regs_cc_ro) { + // No class init barrier needed because method is guaranteed to be non-static + __ unimplemented("C2I_Inline_RO"); +#if 0 + gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, entry_address[AdapterBlob::C2I_No_Clinit_Check], + skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); +#endif + skip_fixup.reset(); + } - BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->c2i_entry_barrier(masm, /* tmp register*/ ic_klass, /* tmp register*/ receiver_klass, /* tmp register*/ code); + // Scalarized c2i adapter + entry_address[AdapterBlob::C2I] = __ pc(); + entry_address[AdapterBlob::C2I_Inline] = __ pc(); + gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check], + skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true); + + // Non-scalarized c2i adapter + if (regs != regs_cc) { + entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc(); + Label inline_entry_skip_fixup; + __ unimplemented("C2I_Unverified_Inline"); +#if 0 + gen_inline_cache_check(masm, inline_entry_skip_fixup); +#endif - gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, call_interpreter, ientry); - return; + entry_address[AdapterBlob::C2I_Inline] = __ pc(); + __ unimplemented("C2I_Inline2"); +#if 0 + gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check], + inline_entry_skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); +#endif + } + // The c2i adapters might safepoint and trigger a GC. The caller must make sure that + // the GC knows about the location of oop argument locations passed to the c2i adapter. + if (allocate_code_blob) { + bool caller_must_gc_arguments = (regs != regs_cc); + int entry_offset[AdapterHandlerEntry::ENTRIES_COUNT]; + assert(AdapterHandlerEntry::ENTRIES_COUNT == 7, "sanity"); + AdapterHandlerLibrary::address_to_offset(entry_address, entry_offset); + new_adapter = AdapterBlob::create(masm->code(), entry_offset, frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments); + } } // An oop arg. Must pass a handle not the oop itself. @@ -3709,6 +3896,11 @@ void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, reverse_words(m, (unsigned long *)m_ints, longwords); } +BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) { + Unimplemented(); + return nullptr; +} + #if INCLUDE_JFR // For c2: c_rarg0 is junk, call to runtime to write a checkpoint. @@ -3784,5 +3976,4 @@ RuntimeStub* SharedRuntime::generate_jfr_return_lease() { oop_maps, false); return stub; } - #endif // INCLUDE_JFR diff --git a/src/hotspot/cpu/ppc/stackChunkFrameStream_ppc.inline.hpp b/src/hotspot/cpu/ppc/stackChunkFrameStream_ppc.inline.hpp index 785f7e4b2871d..41ad7765c2323 100644 --- a/src/hotspot/cpu/ppc/stackChunkFrameStream_ppc.inline.hpp +++ b/src/hotspot/cpu/ppc/stackChunkFrameStream_ppc.inline.hpp @@ -34,6 +34,7 @@ template inline bool StackChunkFrameStream::is_in_frame(void* p0) const { assert(!is_done(), ""); assert(is_compiled(), ""); + assert(!_cb->as_nmethod()->needs_stack_repair(), "unsupported"); intptr_t* p = (intptr_t*)p0; int argsize = (_cb->as_nmethod()->num_stack_arg_slots() * VMRegImpl::stack_slot_size) >> LogBytesPerWord; int frame_size = _cb->frame_size() + (argsize > 0 ? argsize + frame::metadata_words_at_top : 0); diff --git a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp index 4d3b99afa1cee..c1a6b54df0bfd 100644 --- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2025 SAP SE. All rights reserved. + * Copyright (c) 2012, 2026 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -339,8 +339,21 @@ class StubGenerator: public StubCodeGenerator { __ blr(); // return to caller // case T_OBJECT: - // case T_LONG: __ bind(ret_is_object); + if (InlineTypeReturnedAsFields) { + // Check for scalarized return value + __ cmpdi(CR0, R3_RET, 0); + __ beq(CR0, ret_is_long); + // Load pack handler address + __ untested("call stub InlineTypeReturnedAsFields"); // TODO: check return registers usage + __ andi(R12_scratch2, R3_RET, -2); + __ ld(R12_scratch2, InlineKlass::adr_members_offset(), R12_scratch2); + __ ld(R12_scratch2, InlineKlass::pack_handler_jobject_offset(), R12_scratch2); + __ mtctr(R12_scratch2); + __ bctr(); // tail call + } // else fall through + + // case T_LONG: __ bind(ret_is_long); __ std(R3_RET, 0, r_arg_result_addr); __ blr(); // return to caller @@ -2606,10 +2619,16 @@ class StubGenerator: public StubCodeGenerator { __ beq(CR0, L_objArray); __ cmpd(CR5, src_klass, dst_klass); // if (src->klass() != dst->klass()) return -1; - __ cmpwi(CR6, lh, Klass::_lh_neutral_value); // if (!src->is_Array()) return -1; + __ bne(CR5, L_failed); - __ crnand(CR5, Assembler::equal, CR6, Assembler::less); - __ beq(CR5, L_failed); + // Check for flat inline type array -> return -1 + __ test_flat_array_oop(src, temp, L_failed); + + // Check for null-free (non-flat) inline type array -> handle as object array + __ test_null_free_array_oop(src, temp, L_objArray); + + __ cmpwi(CR6, lh, Klass::_lh_neutral_value); // if (!src->is_Array()) return -1; + __ bge(CR6, L_failed); // At this point, it is known to be a typeArray (array_tag 0x3). #ifdef ASSERT @@ -4801,6 +4820,7 @@ void generate_lookup_secondary_supers_table_stub() { } if (return_barrier) { + assert(!InlineTypeReturnedAsFields, "unsupported"); __ mr(nvtmp, R3_RET); __ fmr(nvftmp, F1_RET); // preserve possible return value from a method returning to the return barrier DEBUG_ONLY(__ ld_ptr(tmp1, _abi0(callers_sp), R1_SP);) __ ld_ptr(R1_SP, JavaThread::cont_entry_offset(), R16_thread); @@ -4845,6 +4865,7 @@ void generate_lookup_secondary_supers_table_stub() { __ mr(R1_SP, R3_RET); // R3_RET contains the SP of the thawed top frame if (return_barrier) { + assert(!InlineTypeReturnedAsFields, "unsupported"); // we're now in the caller of the frame that returned to the barrier __ mr(R3_RET, nvtmp); __ fmr(F1_RET, nvftmp); // restore return value (no safepoint in the call to thaw, so even an oop return value should be OK) } else { diff --git a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp index 3fe7d35396235..0fee343928470 100644 --- a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2014, 2025, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015, 2025 SAP SE. All rights reserved. + * Copyright (c) 2014, 2026, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2026 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -619,6 +619,11 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, default : ShouldNotReachHere(); } + if (state == atos && InlineTypeReturnedAsFields) { + __ unimplemented("return entry InlineTypeReturnedAsFields"); + //__ store_inline_type_fields_to_buf(nullptr, true); + } + __ restore_interpreter_state(R11_scratch1, false /*bcp_and_mdx_only*/, true /*restore_top_frame_sp*/); // Compiled code destroys templateTableBase, reload. @@ -1711,7 +1716,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // Generic interpreted method entry to (asm) interpreter. // -address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { +address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized, bool object_init) { bool inc_counter = UseCompiler || CountCompiledCalls; address entry = __ pc(); // Generate the code to allocate the interpreter stack frame. @@ -1792,6 +1797,7 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { if (synchronized) { lock_method(R3_ARG1, R4_ARG2, R5_ARG3); } + #ifdef ASSERT else { Label Lok; @@ -1802,6 +1808,12 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { } #endif // ASSERT + // Issue a StoreStore barrier on entry to Object_init if the + // class has strict field fields. Be lazy, always do it. + if (object_init) { + __ membar(MacroAssembler::StoreStore); + } + // -------------------------------------------------------------------------- // JVMTI support __ notify_method_entry(); diff --git a/src/hotspot/cpu/ppc/templateTable_ppc.hpp b/src/hotspot/cpu/ppc/templateTable_ppc.hpp index 3396acb192643..fcc2175ae2541 100644 --- a/src/hotspot/cpu/ppc/templateTable_ppc.hpp +++ b/src/hotspot/cpu/ppc/templateTable_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2014, 2023, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, 2023 SAP SE. All rights reserved. + * Copyright (c) 2014, 2026, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2026 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -31,8 +31,10 @@ static void generate_vtable_call(Register Rrecv_klass, Register Rindex, Register Rret, Register Rtemp); static void invokeinterface_object_method(Register Rrecv_klass, Register Rret, Register Rflags, Register Rcache, Register Rtemp, Register Rtemp2); + static void invoke_is_substitutable(Register aobj, Register bobj, Label& is_subst, Label& not_subst); + // Branch_conditional which takes TemplateTable::Condition. static void branch_conditional(ConditionRegister crx, TemplateTable::Condition cc, Label& L, bool invert = false); - static void if_cmp_common(Register Rfirst, Register Rsecond, Register Rscratch1, Register Rscratch2, Condition cc, bool is_jint, bool cmp0); + static void if_cmp_common(Register Rfirst, Register Rsecond, Register Rscratch1, Register Rscratch2, Condition cc, bool is_jint, bool is_acmp = false); #endif // CPU_PPC_TEMPLATETABLE_PPC_HPP diff --git a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp index 252425fb10452..76829a09255de 100644 --- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2014, 2026, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, 2025 SAP SE. All rights reserved. + * Copyright (c) 2013, 2026 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -102,6 +102,7 @@ void TemplateTable::patch_bytecode(Bytecodes::Code new_bc, Register Rnew_bc, Reg Label L_patch_done; switch (new_bc) { + case Bytecodes::_fast_vputfield: case Bytecodes::_fast_aputfield: case Bytecodes::_fast_bputfield: case Bytecodes::_fast_zputfield: @@ -684,9 +685,26 @@ void TemplateTable::aaload() { Rtemp = R5_ARG3, Rtemp2 = R31; __ index_check(Rarray, R17_tos /* index */, UseCompressedOops ? 2 : LogBytesPerWord, Rtemp, Rload_addr); - do_oop_load(_masm, Rload_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), R17_tos, Rtemp, Rtemp2, - IS_ARRAY); - __ verify_oop(R17_tos); + __ profile_array_type(Rarray, R11_scratch1, R12_scratch2); + if (UseArrayFlattening) { + Label is_flat_array, cont; + + __ test_flat_array_oop(Rarray, Rtemp, is_flat_array); + do_oop_load(_masm, Rload_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), R17_tos, Rtemp, Rtemp2, + IS_ARRAY); + __ verify_oop(R17_tos); + __ b(cont); + + __ bind(is_flat_array); + __ call_VM(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::flat_array_load), Rarray, R17_tos); + __ bind(cont); + } else { + do_oop_load(_masm, Rload_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), R17_tos, Rtemp, Rtemp2, + IS_ARRAY); + __ verify_oop(R17_tos); + } + __ profile_element_type(R17_tos, Rtemp, Rtemp2); + //__ dcbt(R17_tos); // prefetch } @@ -973,14 +991,14 @@ void TemplateTable::dastore() { void TemplateTable::aastore() { transition(vtos, vtos); - Label Lstore_ok, Lis_null, Ldone; - const Register Rindex = R3_ARG1, - Rarray = R4_ARG2, + Label Lstore_ok, Lis_null, Lis_flat_array, Lwrite_null_to_null_free_array, Ldone; + const Register Rindex = R6_ARG4, + Rarray = R5_ARG3, Rscratch = R11_scratch1, Rscratch2 = R12_scratch2, - Rarray_klass = R5_ARG3, + Rarray_klass = R4_ARG2, Rarray_element_klass = Rarray_klass, - Rvalue_klass = R6_ARG4, + Rvalue_klass = R3_ARG1, Rstore_addr = R31; // Use register which survives VM call. __ ld(R17_tos, Interpreter::expr_offset_in_bytes(0), R15_esp); // Get value to store. @@ -989,34 +1007,64 @@ void TemplateTable::aastore() { __ verify_oop(R17_tos); __ index_check_without_pop(Rarray, Rindex, UseCompressedOops ? 2 : LogBytesPerWord, Rscratch, Rstore_addr); - // Rindex is dead! - Register Rscratch3 = Rindex; + + __ profile_array_type(Rarray, Rscratch, Rscratch2); + __ profile_multiple_element_types(R17_tos, Rscratch, Rscratch2, /* temp */ Rarray_klass); + + if (UseArrayFlattening) { + __ load_klass(Rarray_klass, Rarray); + __ lwz(Rscratch, in_bytes(Klass::layout_helper_offset()), Rarray_klass); + __ test_flat_array_layout(Rscratch, Lis_flat_array); + } // Do array store check - check for null value first. __ cmpdi(CR0, R17_tos, 0); __ beq(CR0, Lis_null); - __ load_klass(Rarray_klass, Rarray); + // Rindex is dead! + Register Rscratch3 = Rindex; + + if (!UseArrayFlattening) { + __ load_klass(Rarray_klass, Rarray); // haven't done this above + } __ load_klass(Rvalue_klass, R17_tos); // Do fast instanceof cache test. __ ld(Rarray_element_klass, in_bytes(ObjArrayKlass::element_klass_offset()), Rarray_klass); // Generate a fast subtype check. Branch to store_ok if no failure. Throw if failure. - __ gen_subtype_check(Rvalue_klass /*subklass*/, Rarray_element_klass /*superklass*/, Rscratch, Rscratch2, Rscratch3, Lstore_ok); + __ gen_subtype_check(Rvalue_klass /*subklass*/, Rarray_element_klass /*superklass*/, + Rscratch, Rscratch2, Rscratch3, Lstore_ok, false); // Fell through: subtype check failed => throw an exception. __ load_dispatch_table(R11_scratch1, (address*)Interpreter::_throw_ArrayStoreException_entry); __ mtctr(R11_scratch1); __ bctr(); + if (UseArrayFlattening) { + __ bind(Lis_flat_array); // Store non-null value to flat + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::flat_array_store), R17_tos, Rarray, Rindex); + __ b(Ldone); + } + __ bind(Lis_null); + if (Arguments::is_valhalla_enabled()) { + // No way to store null in null-free array + __ test_null_free_array_oop(Rarray, Rscratch, Lwrite_null_to_null_free_array); + } do_oop_store(_masm, Rstore_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), noreg /* 0 */, Rscratch, Rscratch2, Rscratch3, IS_ARRAY); - __ profile_null_seen(Rscratch, Rscratch2); __ b(Ldone); + if (Arguments::is_valhalla_enabled()) { + __ bind(Lwrite_null_to_null_free_array); + __ load_dispatch_table(Rscratch, (address*)Interpreter::_throw_NullPointerException_entry); + __ mtctr(Rscratch); + __ bctr(); + } + // Store is OK. + __ align(32, 12); __ bind(Lstore_ok); do_oop_store(_masm, Rstore_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), R17_tos /* value */, Rscratch, Rscratch2, Rscratch3, IS_ARRAY | IS_NOT_NULL); @@ -1753,19 +1801,19 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) { // Helper function for if_cmp* methods below. // Factored out common compare and branch code. -void TemplateTable::if_cmp_common(Register Rfirst, Register Rsecond, Register Rscratch1, Register Rscratch2, Condition cc, bool is_jint, bool cmp0) { +void TemplateTable::if_cmp_common(Register Rfirst, Register Rsecond, Register Rscratch1, Register Rscratch2, Condition cc, bool is_jint, bool is_acmp) { Label Lnot_taken; // Note: The condition code we get is the condition under which we // *fall through*! So we have to inverse the CC here. if (is_jint) { - if (cmp0) { + if (Rsecond == noreg) { __ cmpwi(CR0, Rfirst, 0); } else { __ cmpw(CR0, Rfirst, Rsecond); } } else { - if (cmp0) { + if (Rsecond == noreg) { __ cmpdi(CR0, Rfirst, 0); } else { __ cmpd(CR0, Rfirst, Rsecond); @@ -1779,14 +1827,14 @@ void TemplateTable::if_cmp_common(Register Rfirst, Register Rsecond, Register Rs // Condition is not true => Continue. __ align(32, 12); __ bind(Lnot_taken); - __ profile_not_taken_branch(Rscratch1, Rscratch2); + __ profile_not_taken_branch(Rscratch1, Rscratch2, is_acmp); } // Compare integer values with zero and fall through if CC holds, branch away otherwise. void TemplateTable::if_0cmp(Condition cc) { transition(itos, vtos); - if_cmp_common(R17_tos, noreg, R11_scratch1, R12_scratch2, cc, true, true); + if_cmp_common(R17_tos, noreg, R11_scratch1, R12_scratch2, cc, true); } // Compare integer values and fall through if CC holds, branch away otherwise. @@ -1801,23 +1849,79 @@ void TemplateTable::if_icmp(Condition cc) { Rsecond = R17_tos; __ pop_i(Rfirst); - if_cmp_common(Rfirst, Rsecond, R11_scratch1, R12_scratch2, cc, true, false); + if_cmp_common(Rfirst, Rsecond, R11_scratch1, R12_scratch2, cc, true); } void TemplateTable::if_nullcmp(Condition cc) { transition(atos, vtos); - if_cmp_common(R17_tos, noreg, R11_scratch1, R12_scratch2, cc, false, true); + if_cmp_common(R17_tos, noreg, R11_scratch1, R12_scratch2, cc, false); } void TemplateTable::if_acmp(Condition cc) { transition(atos, vtos); - const Register Rfirst = R0, + const Register Rfirst = R31, Rsecond = R17_tos; __ pop_ptr(Rfirst); - if_cmp_common(Rfirst, Rsecond, R11_scratch1, R12_scratch2, cc, false, false); + + __ profile_acmp(Rsecond, Rfirst, R11_scratch1, R12_scratch2); + + const int is_inline_type_mask = markWord::inline_type_pattern; + if (Arguments::is_valhalla_enabled()) { + Label taken, not_taken; + __ cmpd(CR0, Rfirst, Rsecond); + __ beq(CR0, (cc == equal) ? taken : not_taken); + + // test if any input is null + __ cmpdi(CR0, Rfirst, 0); + __ cmpdi(CR1, Rsecond, 0); + __ cror(CR0, Assembler::equal, CR1, Assembler::equal); + __ beq(CR0, (cc == equal) ? not_taken : taken); + + // and both are values ? + __ ld(R11_scratch1, oopDesc::mark_offset_in_bytes(), Rfirst); + __ ld(R12_scratch2, oopDesc::mark_offset_in_bytes(), Rsecond); + __ andr(R11_scratch1, R11_scratch1, R12_scratch2); + __ andi(R11_scratch1, R11_scratch1, is_inline_type_mask); + __ cmpdi(CR0, R11_scratch1, is_inline_type_mask); + __ bne(CR0, (cc == equal) ? not_taken : taken); + + // same value klass ? + __ load_metadata(R11_scratch1, Rfirst); + __ load_metadata(R12_scratch2, Rsecond); + __ cmpd(CR0, R11_scratch1, R12_scratch2); + __ bne(CR0, (cc == equal) ? not_taken : taken); + + // Know both are the same type, let's test for substitutability... + if (cc == equal) { + invoke_is_substitutable(Rfirst, Rsecond, taken, not_taken); + } else { + invoke_is_substitutable(Rfirst, Rsecond, not_taken, taken); + } + DEBUG_ONLY( __ stop("Not reachable"); ) + + // Conition is false => Jump! + __ align(32, 12); + __ bind(taken); + branch(false, false); + + // Condition is not true => Continue. + __ align(32, 12); + __ bind(not_taken); + __ profile_not_taken_branch(R11_scratch1, R12_scratch2, true); + + } else { + if_cmp_common(Rfirst, Rsecond, R11_scratch1, R12_scratch2, cc, false, true); + } +} + +void TemplateTable::invoke_is_substitutable(Register aobj, Register bobj, Label& is_subst, Label& not_subst) { + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::is_substitutable), aobj, bobj); + __ cmpwi(CR0, R3_RET, 0); + __ beq(CR0, not_subst); + __ b(is_subst); } void TemplateTable::ret() { @@ -2737,12 +2841,41 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr __ fence(); // Volatile entry point (one instruction before non-volatile_entry point). assert(branch_table[atos] == nullptr, "can't compute twice"); branch_table[atos] = __ pc(); // non-volatile_entry point - do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP); - __ verify_oop(R17_tos); - __ push(atos); - //__ dcbt(R17_tos); // prefetch - if (!is_static && rc == may_rewrite) { - patch_bytecode(Bytecodes::_fast_agetfield, Rbc, Rscratch); + if (!Arguments::is_valhalla_enabled()) { + do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP); + __ verify_oop(R17_tos); + __ push(atos); + //__ dcbt(R17_tos); // prefetch + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, Rbc, Rscratch); + } + } else { // Valhalla + if (is_static) { + do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP); + __ verify_oop(R17_tos); + __ push(atos); + } else { + Label is_flat; + __ test_field_is_flat(Rflags, is_flat); + do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP); + __ verify_oop(R17_tos); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, Rbc, Rscratch); + } + __ beq(CR2, Lacquire); // Volatile? + __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode())); + + __ bind(is_flat); + // field is flat (null-free or nullable with a null-marker) + __ mr(R17_tos, Rclass_or_obj); + __ read_flat_field(Rcache, R17_tos); + __ verify_oop(R17_tos); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_vgetfield, Rbc, Rscratch); + } + } } __ beq(CR2, Lacquire); // Volatile? __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode())); @@ -2778,7 +2911,7 @@ void TemplateTable::getstatic(int byte_no) { // The function may destroy various registers, just not the cache and index registers. void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rscratch, bool is_static) { - assert_different_registers(Rcache, Rscratch, R6_ARG4); + assert_different_registers(Rcache, Rscratch); if (JvmtiExport::can_post_field_modification()) { Label Lno_field_mod_post; @@ -2801,6 +2934,7 @@ void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rscratch, boo int offs = Interpreter::expr_offset_in_bytes(0); Register base = R15_esp; switch(bytecode()) { + case Bytecodes::_fast_vputfield: // fall through case Bytecodes::_fast_aputfield: __ push_ptr(); offs+= Interpreter::stackElementSize; break; case Bytecodes::_fast_iputfield: // Fall through case Bytecodes::_fast_bputfield: // Fall through @@ -2835,12 +2969,17 @@ void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rscratch, boo __ verify_oop(Robj); } - __ addi(R6_ARG4, R15_esp, Interpreter::expr_offset_in_bytes(0)); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), Robj, Rcache, R6_ARG4); + // Pass arguments without register clashes (R16_thread passed by call_VM) + __ mr_if_needed(R4_ARG2, Robj); + assert(Rcache != R4_ARG2, "smashed argument"); + __ mr_if_needed(R5_ARG3, Rcache); + __ addi(R6_ARG4, R15_esp, Interpreter::expr_offset_in_bytes(0)); // set R6_ARG4 last (may use same reg as other args) + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification)); __ load_field_entry(Rcache, Rscratch); // In case of the fast versions, value lives in registers => put it back on tos. switch(bytecode()) { + case Bytecodes::_fast_vputfield: // fall through case Bytecodes::_fast_aputfield: __ pop_ptr(); break; case Bytecodes::_fast_iputfield: // Fall through case Bytecodes::_fast_bputfield: // Fall through @@ -2862,7 +3001,7 @@ void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rscratch, boo void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { Label Lvolatile; - const Register Rcache = R5_ARG3, // Do not use ARG1/2 (causes trouble in jvmti_post_field_mod). + const Register Rcache = R6_ARG4, // Do not use ARG1-3 (causes trouble in jvmti_post_field_mod or write_flat_field). Rclass_or_obj = R31, // Needs to survive C call. Roffset = R22_tmp2, // Needs to survive C call. Rtos_state = R23_tmp3, // Needs to survive C call. @@ -2870,7 +3009,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr Rbtable = R4_ARG2, Rscratch = R11_scratch1, // used by load_field_cp_cache_entry Rscratch2 = R12_scratch2, // used by load_field_cp_cache_entry - Rscratch3 = R6_ARG4, + Rscratch3 = R10_ARG8, Rbc = Rscratch3; const ConditionRegister CR_is_vol = CR2; // Non-volatile condition register (survives runtime call in do_oop_store). @@ -3064,11 +3203,52 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr assert(branch_table[atos] == nullptr, "can't compute twice"); branch_table[atos] = __ pc(); // non-volatile_entry point __ pop(atos); - if (!is_static) { pop_and_check_object(Rclass_or_obj); } // kills R11_scratch1 - do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP); - if (!is_static && rc == may_rewrite) { - patch_bytecode(Bytecodes::_fast_aputfield, Rbc, Rscratch, true, byte_no); - } + if (!Arguments::is_valhalla_enabled()) { + if (!is_static) { pop_and_check_object(Rclass_or_obj); } // kills R11_scratch1 + do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, Rbc, Rscratch, true, byte_no); + } + } else { // Valhalla + if (is_static) { + Label is_nullable; + __ test_field_is_not_null_free_inline_type(Rflags, is_nullable); + __ null_check_throw(R17_tos, -1, Rscratch); + __ align(32, 12); + __ bind(is_nullable); + do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP); + } else { + Label null_free_reference, is_flat, rewrite_inline; + __ test_field_is_flat(Rflags, is_flat); + __ test_field_is_null_free_inline_type(Rflags, null_free_reference); + pop_and_check_object(Rclass_or_obj); + // Store into the field + do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, Rbc, Rscratch, true, byte_no); + } + if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ beq(CR_is_vol, Lvolatile); // Volatile? + } + __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode())); + + // Implementation of the inline type semantic + __ bind(null_free_reference); + __ null_check_throw(R17_tos, -1, Rscratch); + pop_and_check_object(Rclass_or_obj); + // Store into the field + do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP); + __ b(rewrite_inline); + + __ bind(is_flat); + pop_and_check_object(Rclass_or_obj); + __ write_flat_field(Rcache, Rscratch, Rscratch2, Rclass_or_obj, Roffset, R17_tos); + __ bind(rewrite_inline); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_vputfield, Rbc, Rscratch, true, byte_no); + } + } + } // Valhalla if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { __ beq(CR_is_vol, Lvolatile); // Volatile? __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode())); @@ -3108,7 +3288,7 @@ void TemplateTable::jvmti_post_fast_field_mod() { void TemplateTable::fast_storefield(TosState state) { transition(state, vtos); - const Register Rcache = R5_ARG3, // Do not use ARG1/2 (causes trouble in jvmti_post_field_mod). + const Register Rcache = R6_ARG4, // Do not use ARG1-3 (causes trouble in jvmti_post_field_mod or write_flat_field). Rclass_or_obj = R31, // Needs to survive C call. Roffset = R22_tmp2, // Needs to survive C call. Rflags = R3_ARG1, @@ -3138,6 +3318,19 @@ void TemplateTable::fast_storefield(TosState state) { // Do the store and fencing. switch(bytecode()) { + case Bytecodes::_fast_vputfield: + { + Label is_flat, done; + __ test_field_is_flat(Rflags, is_flat); + __ null_check_throw(Rclass_or_obj, -1, Rscratch); + do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP); + __ b(done); + __ bind(is_flat); + __ write_flat_field(Rcache, Rscratch, Rscratch2, Rclass_or_obj, Roffset, R17_tos); + __ bind(done); + break; + } + case Bytecodes::_fast_aputfield: // Store into the field. do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP); @@ -3213,6 +3406,20 @@ void TemplateTable::fast_accessfield(TosState state) { __ bne(CR0, LisVolatile); switch(bytecode()) { + case Bytecodes::_fast_vgetfield: + { + // field is flat + __ read_flat_field(Rcache, R17_tos); + __ verify_oop(R17_tos); + __ dispatch_epilog(state, Bytecodes::length_for(bytecode())); + + __ bind(LisVolatile); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); } + __ read_flat_field(Rcache, R17_tos); + __ verify_oop(R17_tos); + // memory barrier in read_flat_field + break; + } case Bytecodes::_fast_agetfield: { do_oop_load(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, /* nv temp */ Rflags, IN_HEAP); @@ -3842,12 +4049,14 @@ void TemplateTable::_new() { // -------------------------------------------------------------------------- // Init2: Initialize the header: mark, klass // Init mark. - if (UseCompactObjectHeaders) { + if (UseCompactObjectHeaders || Arguments::is_valhalla_enabled()) { __ ld(Rscratch, in_bytes(Klass::prototype_header_offset()), RinstanceKlass); - __ std(Rscratch, oopDesc::mark_offset_in_bytes(), RallocatedObject); } else { __ load_const_optimized(Rscratch, markWord::prototype().value(), R0); - __ std(Rscratch, oopDesc::mark_offset_in_bytes(), RallocatedObject); + } + __ std(Rscratch, oopDesc::mark_offset_in_bytes(), RallocatedObject); + + if (!UseCompactObjectHeaders) { __ store_klass_gap(RallocatedObject); __ store_klass(RallocatedObject, RinstanceKlass, Rscratch); } @@ -4110,6 +4319,10 @@ void TemplateTable::monitorenter() { // Null pointer exception. __ null_check_throw(Robj_to_lock, -1, Rscratch1); + Label is_inline_type; + __ ld(Rscratch1, oopDesc::mark_offset_in_bytes(), Robj_to_lock); + __ test_markword_is_inline_type(Rscratch1, is_inline_type); + // Check if any slot is present => short cut to allocation if not. __ cmpld(CR0, Rcurrent_monitor, Rbot); __ beq(CR0, Lallocate_new); @@ -4166,6 +4379,11 @@ void TemplateTable::monitorenter() { // The bcp has already been incremented. Just need to dispatch to next instruction. __ dispatch_next(vtos); + + __ bind(is_inline_type); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_identity_exception), Robj_to_lock); + __ should_not_reach_here(); } void TemplateTable::monitorexit() { @@ -4189,6 +4407,12 @@ void TemplateTable::monitorexit() { // Null pointer check. __ null_check_throw(Robj_to_lock, -1, Rscratch); + const int is_inline_type_mask = markWord::inline_type_pattern; + __ ld(Rscratch, oopDesc::mark_offset_in_bytes(), Robj_to_lock); + __ andi(Rscratch, Rscratch, is_inline_type_mask); + __ cmpwi(CR0, Rscratch, is_inline_type_mask); + __ beq(CR0, Lillegal_monitor_state); + // Check corner case: unbalanced monitorEnter / Exit. __ cmpld(CR0, Rcurrent_monitor, Rbot); __ beq(CR0, Lillegal_monitor_state); diff --git a/src/hotspot/cpu/ppc/vm_version_ppc.cpp b/src/hotspot/cpu/ppc/vm_version_ppc.cpp index be05ec1dfb3b5..8781230d8126f 100644 --- a/src/hotspot/cpu/ppc/vm_version_ppc.cpp +++ b/src/hotspot/cpu/ppc/vm_version_ppc.cpp @@ -342,6 +342,23 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseUnalignedAccesses, true); } + if (InlineTypePassFieldsAsArgs) { + warning("InlineTypePassFieldsAsArgs is not supported on this CPU"); + FLAG_SET_DEFAULT(InlineTypePassFieldsAsArgs, false); + } + if (InlineTypeReturnedAsFields) { + warning("InlineTypeReturnedAsFields is not supported on this CPU"); + FLAG_SET_DEFAULT(InlineTypeReturnedAsFields, false); + } + + // TODO: Valhalla optimizations + if (FLAG_IS_DEFAULT(UseArrayFlattening )) FLAG_SET_DEFAULT(UseArrayFlattening , false); + if (FLAG_IS_DEFAULT(UseFieldFlattening )) FLAG_SET_DEFAULT(UseFieldFlattening , false); + if (FLAG_IS_DEFAULT(UseNullFreeNonAtomicValueFlattening)) FLAG_SET_DEFAULT(UseNullFreeNonAtomicValueFlattening, false); + if (FLAG_IS_DEFAULT(UseNullableAtomicValueFlattening )) FLAG_SET_DEFAULT(UseNullableAtomicValueFlattening , false); + if (FLAG_IS_DEFAULT(UseNullFreeAtomicValueFlattening )) FLAG_SET_DEFAULT(UseNullFreeAtomicValueFlattening , false); + if (FLAG_IS_DEFAULT(UseNullableNonAtomicValueFlattening)) FLAG_SET_DEFAULT(UseNullableNonAtomicValueFlattening, false); + check_virtualizations(); } diff --git a/src/hotspot/cpu/ppc/vtableStubs_ppc_64.cpp b/src/hotspot/cpu/ppc/vtableStubs_ppc_64.cpp index e25a8baa9da61..73a1cbe090f84 100644 --- a/src/hotspot/cpu/ppc/vtableStubs_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/vtableStubs_ppc_64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2025 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -44,10 +44,10 @@ extern "C" void bad_compiled_vtable_index(JavaThread* thread, oopDesc* receiver, #endif // Used by compiler only; may use only caller saved, non-argument registers. -VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { +VtableStub* VtableStubs::create_vtable_stub(int vtable_index, bool caller_is_c1) { // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. const int stub_code_length = code_size_limit(true); - VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index, caller_is_c1); // Can be null if there is no free space in the code cache. if (s == nullptr) { return nullptr; @@ -130,10 +130,10 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { return s; } -VtableStub* VtableStubs::create_itable_stub(int itable_index) { +VtableStub* VtableStubs::create_itable_stub(int itable_index, bool caller_is_c1) { // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. const int stub_code_length = code_size_limit(false); - VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index, caller_is_c1); // Can be null if there is no free space in the code cache. if (s == nullptr) { return nullptr; diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp index a8a21342248c3..53af501e9b681 100644 --- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -121,6 +121,72 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) { #endif } +// Implementation of LoadFlattenedArrayStub + +LoadFlattenedArrayStub::LoadFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) { + _array = array; + _index = index; + _result = result; + _scratch_reg = FrameMap::r10_oop_opr; + _info = new CodeEmitInfo(info); +} + +void LoadFlattenedArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_array->as_register(), 1); + ce->store_parameter(_index->as_register(), 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(StubId::c1_load_flat_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + if (_result->as_register() != x10) { + __ mv(_result->as_register(), x10); + } + __ j(_continuation); +} + +// Implementation of StoreFlattenedArrayStub + +StoreFlattenedArrayStub::StoreFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr value, CodeEmitInfo* info) { + _array = array; + _index = index; + _value = value; + _scratch_reg = FrameMap::r10_oop_opr; + _info = new CodeEmitInfo(info); +} + +void StoreFlattenedArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_array->as_register(), 2); + ce->store_parameter(_index->as_register(), 1); + ce->store_parameter(_value->as_register(), 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(StubId::c1_store_flat_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ j(_continuation); +} + +// Implementation of SubstitutabilityCheckStub + +SubstitutabilityCheckStub::SubstitutabilityCheckStub(LIR_Opr left, LIR_Opr right, CodeEmitInfo* info) { + _left = left; + _right = right; + _scratch_reg = FrameMap::r10_oop_opr; + _info = new CodeEmitInfo(info); +} + +void SubstitutabilityCheckStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_left->as_register(), 1); + ce->store_parameter(_right->as_register(), 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(StubId::c1_substitutability_check_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ j(_continuation); +} + // Implementation of NewInstanceStub NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, StubId stub_id) { _result = result; @@ -166,11 +232,13 @@ void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { } // Implementation of NewObjectArrayStub -NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, + CodeEmitInfo* info, bool is_null_free) { _klass_reg = klass_reg; _result = result; _length = length; _info = new CodeEmitInfo(info); + _is_null_free = is_null_free; } void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { @@ -178,7 +246,13 @@ void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); assert(_length->as_register() == x9, "length must in x9"); assert(_klass_reg->as_register() == x13, "klass_reg must in x13"); - __ far_call(RuntimeAddress(Runtime1::entry_for(StubId::c1_new_object_array_id))); + + if (_is_null_free) { + __ far_call(RuntimeAddress(Runtime1::entry_for(StubId::c1_new_null_free_array_id))); + } else { + __ far_call(RuntimeAddress(Runtime1::entry_for(StubId::c1_new_object_array_id))); + } + ce->add_call_info_here(_info); ce->verify_oop_map(_info); assert(_result->as_register() == x10, "result must in x10"); @@ -188,6 +262,13 @@ void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { void MonitorEnterStub::emit_code(LIR_Assembler* ce) { assert(__ rsp_offset() == 0, "frame size should be fixed"); __ bind(_entry); + if (_throw_ie_stub != nullptr) { + // When we come here, _obj_reg has already been checked to be non-null. + __ ld(t0, Address(_obj_reg->as_register(), oopDesc::mark_offset_in_bytes())); + __ mv(t1, markWord::inline_type_pattern); + __ andr(t0, t0, t1); + __ beq(t0, t1, *_throw_ie_stub->entry(), /* is_far */ true); + } ce->store_parameter(_obj_reg->as_register(), 1); ce->store_parameter(_lock_reg->as_register(), 0); StubId enter_id; diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp index 8aced227a060b..979aae0b663bf 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp @@ -240,6 +240,18 @@ void LIR_Assembler::arraycopy_type_check(Register src, Register src_pos, Registe } } +void LIR_Assembler::arraycopy_inlinetype_check(Register obj, Register tmp, CodeStub* slow_path, bool is_dest, bool null_check) { + if (null_check) { + __ beqz(obj, *slow_path->entry(), /* is_far */ true); + } + if (is_dest) { + __ test_null_free_array_oop(obj, tmp, *slow_path->entry()); + __ test_flat_array_oop(obj, tmp, *slow_path->entry()); + } else { + __ test_flat_array_oop(obj, tmp, *slow_path->entry()); + } +} + void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { ciArrayKlass *default_type = op->expected_type(); Register src = op->src()->as_register(); @@ -254,12 +266,25 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { BasicType basic_type = default_type != nullptr ? default_type->element_type()->basic_type() : T_ILLEGAL; if (is_reference_type(basic_type)) { basic_type = T_OBJECT; } + if (flags & LIR_OpArrayCopy::always_slow_path) { + __ j(*stub->entry()); + __ bind(*stub->continuation()); + return; + } + // if we don't know anything, just go through the generic arraycopy if (default_type == nullptr) { generic_arraycopy(src, src_pos, length, dst, dst_pos, stub); return; } + if (flags & LIR_OpArrayCopy::src_inlinetype_check) { + arraycopy_inlinetype_check(src, tmp, stub, false, (flags & LIR_OpArrayCopy::src_null_check)); + } + if (flags & LIR_OpArrayCopy::dst_inlinetype_check) { + arraycopy_inlinetype_check(dst, tmp, stub, true, (flags & LIR_OpArrayCopy::dst_null_check)); + } + assert(default_type != nullptr && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point"); diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp index b5452f3e4cd0b..5e0b559cc12bf 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp @@ -47,5 +47,6 @@ Register dst, Register dst_pos); void arraycopy_load_args(Register src, Register src_pos, Register length, Register dst, Register dst_pos); + void arraycopy_inlinetype_check(Register obj, Register tmp, CodeStub* slow_path, bool is_dest, bool null_check); #endif // CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp index 29e5d86d0cca0..a5359f81ea764 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp @@ -33,11 +33,14 @@ #include "c1/c1_Runtime1.hpp" #include "c1/c1_ValueStack.hpp" #include "ci/ciArrayKlass.hpp" +#include "ci/ciInlineKlass.hpp" #include "ci/ciInstance.hpp" +#include "ci/ciObjArrayKlass.hpp" #include "code/compiledIC.hpp" #include "gc/shared/collectedHeap.hpp" #include "nativeInst_riscv.hpp" #include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" #include "runtime/frame.inline.hpp" #include "runtime/sharedRuntime.hpp" #include "utilities/powerOfTwo.hpp" @@ -341,7 +344,7 @@ int LIR_Assembler::emit_unwind_handler() { // remove the activation and dispatch to the unwind handler __ block_comment("remove_frame and dispatch to the unwind handler"); - __ remove_frame(initial_frame_size_in_bytes()); + __ remove_frame(initial_frame_size_in_bytes(), needs_stack_repair()); __ far_jump(RuntimeAddress(Runtime1::entry_for(StubId::c1_unwind_exception_id))); // Emit the slow path assembly @@ -382,8 +385,10 @@ int LIR_Assembler::emit_deopt_handler() { void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10"); + assert(!InlineTypeReturnedAsFields, "unimplemented"); + // Pop the stack before the safepoint code - __ remove_frame(initial_frame_size_in_bytes()); + __ remove_frame(initial_frame_size_in_bytes(), needs_stack_repair()); if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) { __ reserved_stack_check(); @@ -395,6 +400,11 @@ void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { __ ret(); } +int LIR_Assembler::store_inline_type_fields_to_buf(ciInlineKlass* vk) { + Unimplemented(); + return 0; +} + int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { guarantee(info != nullptr, "Shouldn't be null"); __ get_polling_page(t0, relocInfo::poll_type); @@ -436,10 +446,10 @@ void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_cod case T_OBJECT: case T_ARRAY: - if (patch_code == lir_patch_none) { - jobject2reg(c->as_jobject(), dest->as_register()); - } else { + if (patch_code != lir_patch_none) { jobject2reg_with_patching(dest->as_register(), info); + } else { + jobject2reg(c->as_jobject(), dest->as_register()); } break; @@ -843,6 +853,21 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch } } +void LIR_Assembler::move(LIR_Opr src, LIR_Opr dst) { + assert(dst->is_cpu_register(), "must be"); + assert(dst->type() == src->type(), "must be"); + + if (src->is_cpu_register()) { + reg2reg(src, dst); + } else if (src->is_stack()) { + stack2reg(src, dst, dst->type()); + } else if (src->is_constant()) { + const2reg(src, dst, lir_patch_none, nullptr); + } else { + ShouldNotReachHere(); + } +} + void LIR_Assembler::emit_op3(LIR_Op3* op) { switch (op->code()) { case lir_idiv: // fall through @@ -997,7 +1022,7 @@ void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { Register len = op->len()->as_register(); - if (UseSlowPath || + if (UseSlowPath || op->always_slow_path() || (!UseFastNewObjectArray && is_reference_type(op->type())) || (!UseFastNewTypeArray && !is_reference_type(op->type()))) { __ j(*op->stub()->entry()); @@ -1060,8 +1085,18 @@ void LIR_Assembler::typecheck_helper_slowcheck(ciKlass *k, Register obj, Registe // See if we get an immediate positive hit __ beq(k_RInfo, t0, *success_target); // check for self - __ beq(klass_RInfo, k_RInfo, *success_target); - + if (k->is_loaded() && k->is_obj_array_klass()) { + // For a direct pointer comparison, we need the refined array klass pointer + ciKlass* k_refined = ciObjArrayKlass::make(k->as_obj_array_klass()->element_klass()); + if (!k_refined->is_loaded()) { + bailout("encountered unloaded_ciobjarrayklass due to out of memory error"); + return; + } + __ mov_metadata(t0, k_refined->constant_encoding()); + __ beq(klass_RInfo, t0, *success_target); + } else { + __ beq(klass_RInfo, k_RInfo, *success_target); + } __ subi(sp, sp, 2 * wordSize); // 2: store k_RInfo and klass_RInfo __ sd(k_RInfo, Address(sp, 0)); // sub klass __ sd(klass_RInfo, Address(sp, wordSize)); // super klass @@ -1146,16 +1181,19 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L assert_different_registers(obj, k_RInfo, klass_RInfo); - if (should_profile) { - profile_object(md, data, obj, k_RInfo, klass_RInfo, obj_is_null); - } else { - __ beqz(obj, *obj_is_null); + if (op->need_null_check()) { + if (should_profile) { + profile_object(md, data, obj, k_RInfo, klass_RInfo, obj_is_null); + } else { + __ beqz(obj, *obj_is_null); + } } typecheck_loaded(op, k, k_RInfo); __ verify_oop(obj); if (op->fast_check()) { + assert(!k->is_loaded() || !k->is_obj_array_klass(), "Use refined array for a direct pointer comparison"); // get object class // not a safepoint as obj null check happens earlier __ load_klass(t0, obj, t1); @@ -1199,6 +1237,122 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { } } +void LIR_Assembler::emit_opFlattenedArrayCheck(LIR_OpFlattenedArrayCheck* op) { + // We are loading/storing from/to an array that *may* be a flat array (the + // declared type is Object[], abstract[], interface[] or VT.ref[]). + // If this array is a flat array, take the slow path. + __ test_flat_array_oop(op->array()->as_register(), op->tmp()->as_register(), *op->stub()->entry()); +} + +void LIR_Assembler::emit_opNullFreeArrayCheck(LIR_OpNullFreeArrayCheck* op) { + // We are storing into an array that *may* be null-free (the declared type is + // Object[], abstract[], interface[] or VT.ref[]). + Label test_mark_word; + Register tmp = op->tmp()->as_register(); + __ ld(tmp, Address(op->array()->as_register(), oopDesc::mark_offset_in_bytes())); + __ test_bit(t0, tmp, exact_log2(markWord::unlocked_value)); + __ bnez(t0, test_mark_word); + __ load_prototype_header(tmp, op->array()->as_register()); + __ bind(test_mark_word); + __ test_bit(tmp, tmp, exact_log2(markWord::null_free_array_bit_in_place)); +} + +void LIR_Assembler::emit_opSubstitutabilityCheck(LIR_OpSubstitutabilityCheck* op) { + Label L_oops_equal; + Label L_oops_not_equal; + Label L_end; + + Register left = op->left()->as_register(); + Register right = op->right()->as_register(); + + __ beq(left, right, L_oops_equal); + + // (1) Null check -- if one of the operands is null, the other must not be null (because + // the two references are not equal), so they are not substitutable, + __ beqz(left, L_oops_not_equal); + __ beqz(right, L_oops_not_equal); + + ciKlass* left_klass = op->left_klass(); + ciKlass* right_klass = op->right_klass(); + + // (2) Inline type check -- if either of the operands is not a inline type, + // they are not substitutable. We do this only if we are not sure that the + // operands are inline type + if ((left_klass == nullptr || right_klass == nullptr) ||// The klass is still unloaded, or came from a Phi node. + !left_klass->is_inlinetype() || !right_klass->is_inlinetype()) { + Register tmp1 = op->tmp1()->as_register(); + Register tmp2 = op->tmp2()->as_register(); + __ mv(tmp1, markWord::inline_type_pattern); + __ ld(tmp2, Address(left, oopDesc::mark_offset_in_bytes())); + __ andr(tmp1, tmp1, tmp2); + __ ld(tmp2, Address(right, oopDesc::mark_offset_in_bytes())); + __ andr(tmp1, tmp1, tmp2); + __ mv(tmp2, (u1)markWord::inline_type_pattern); + __ bne(tmp1, tmp2, L_oops_not_equal); + } + + // (3) Same klass check: if the operands are of different klasses, they are not substitutable. + if (left_klass != nullptr && left_klass->is_inlinetype() && left_klass == right_klass) { + // No need to load klass -- the operands are statically known to be the same inline klass. + __ j(*op->stub()->entry()); + } else { + Register left_klass_op = op->tmp1()->as_register(); + Register right_klass_op = op->tmp2()->as_register(); + if (UseCompactObjectHeaders) { + __ load_narrow_klass_compact(left_klass_op, left); + __ load_narrow_klass_compact(right_klass_op, right); + } else { + __ lwu(left_klass_op, Address(left, oopDesc::klass_offset_in_bytes())); + __ lwu(right_klass_op, Address(right, oopDesc::klass_offset_in_bytes())); + } + __ beq(left_klass_op, right_klass_op, *op->stub()->entry(), /* is_far */ true); // same klass -> do slow check + // fall through to L_oops_not_equal + } + + __ bind(L_oops_not_equal); + move(op->not_equal_result(), op->result_opr()); + __ j(L_end); + + // We've returned from the stub. X10 contains 0x0 IFF the two + // operands are not substitutable. (Don't compare against 0x1 in case the + // C compiler is naughty) + __ bind(*op->stub()->continuation()); + __ beqz(x10, L_oops_not_equal); // (call_stub() == 0x0) -> not_equal + + __ bind(L_oops_equal); + move(op->equal_result(), op->result_opr()); // (call_stub() != 0x0) -> equal + // fall-through + __ bind(L_end); +} + +void LIR_Assembler::emit_profile_inline_type(LIR_OpProfileInlineType* op) { + Register obj = op->obj()->as_register(); + Register tmp = op->tmp()->as_pointer_register(); + bool not_null = op->not_null(); + int flag = op->flag(); + + assert_different_registers(tmp, t0, t1); + + Label not_inline_type; + if (!not_null) { + __ beqz(obj, not_inline_type); + } + + __ test_oop_is_not_inline_type(obj, tmp, not_inline_type); + + Address mdo_addr = as_Address(op->mdp()->as_address_ptr(), t1); + __ lbu(tmp, mdo_addr); + __ mv(t0, flag); + __ orr(tmp, tmp, t0); + __ sb(tmp, mdo_addr); + + __ bind(not_inline_type); +} + +void LIR_Assembler::check_orig_pc() { + Unimplemented(); +} + void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { Register addr; if (op->addr()->is_register()) { @@ -1320,7 +1474,7 @@ void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { bailout("reloc call address stub overflow"); return; } - add_call_info(code_offset(), op->info()); + add_call_info(code_offset(), op->info(), op->maybe_return_as_fields()); __ post_call_nop(); } @@ -1331,7 +1485,7 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { bailout("reloc call address stub overflow"); return; } - add_call_info(code_offset(), op->info()); + add_call_info(code_offset(), op->info(), op->maybe_return_as_fields()); __ post_call_nop(); } diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp index 90b6b3ee4f476..4a915c727ae37 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -112,6 +112,7 @@ friend class ArrayCopyStub; void logic_op_reg(Register dst, Register left, Register right, LIR_Code code); void logic_op_imm(Register dst, Register left, int right, LIR_Code code); + void move(LIR_Opr src, LIR_Opr dst); public: void emit_cmove(LIR_Op4* op); diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp index 5e0deb84a14eb..4ff00cb279296 100644 --- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp @@ -33,6 +33,7 @@ #include "c1/c1_Runtime1.hpp" #include "c1/c1_ValueStack.hpp" #include "ci/ciArray.hpp" +#include "ci/ciInstanceKlass.hpp" #include "ci/ciObjArrayKlass.hpp" #include "ci/ciTypeArrayKlass.hpp" #include "runtime/sharedRuntime.hpp" @@ -103,6 +104,7 @@ LIR_Opr LIRGenerator::rlock_byte(BasicType type) { return reg; } + //--------- loading items into registers -------------------------------- @@ -279,11 +281,17 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { if (x->needs_null_check()) { info_for_exception = state_for(x); } + + CodeStub* throw_ie_stub = + x->maybe_inlinetype() ? + new SimpleExceptionStub(StubId::c1_throw_identity_exception_id, obj.result(), state_for(x)) : + nullptr; + // this CodeEmitInfo must not have the xhandlers because here the // object is already locked (xhandlers expect object to be unlocked) CodeEmitInfo* info = state_for(x, x->state(), true); monitor_enter(obj.result(), lock, syncTempOpr(), scratch, - x->monitor_no(), info_for_exception, info); + x->monitor_no(), info_for_exception, info, throw_ie_stub); } void LIRGenerator::do_MonitorExit(MonitorExit* x) { @@ -910,15 +918,17 @@ void LIRGenerator::do_NewInstance(NewInstance* x) { tty->print_cr(" ###class not loaded at new bci %d", x->printable_bci()); } #endif - CodeEmitInfo* info = state_for(x, x->state()); + CodeEmitInfo* info = state_for(x, x->needs_state_before() ? x->state_before() : x->state()); LIR_Opr reg = result_register_for(x->type()); new_instance(reg, x->klass(), x->is_unresolved(), + !x->is_unresolved() && x->klass()->is_inlinetype(), FrameMap::r12_oop_opr, FrameMap::r15_oop_opr, FrameMap::r14_oop_opr, LIR_OprFact::illegalOpr, FrameMap::r13_metadata_opr, info); + LIR_Opr result = rlock_result(x); __ move(reg, result); } @@ -974,13 +984,19 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { length.load_item_force(FrameMap::r9_opr); LIR_Opr len = length.result(); - CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); - ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); + ciKlass* obj = ciObjArrayKlass::make(x->klass()); + + // TODO 8265122 Implement a fast path for this + bool is_flat = obj->is_loaded() && obj->is_flat_array_klass(); + bool is_null_free = obj->is_loaded() && obj->as_array_klass()->is_elem_null_free(); + + CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info, is_null_free); + if (obj == ciEnv::unloaded_ciobjarrayklass()) { BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); } klass2reg_with_patching(klass_reg, obj, patching_info); - __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path, true, is_null_free || is_flat); LIR_Opr result = rlock_result(x); __ move(reg, result); @@ -1077,7 +1093,7 @@ void LIRGenerator::do_CheckCast(CheckCast* x) { __ checkcast(reg, obj.result(), x->klass(), new_register(objectType), new_register(objectType), tmp3, x->direct_compare(), info_for_exception, patching_info, stub, - x->profiled_method(), x->profiled_bci()); + x->profiled_method(), x->profiled_bci(), x->is_null_free()); } void LIRGenerator::do_InstanceOf(InstanceOf* x) { @@ -1142,8 +1158,13 @@ void LIRGenerator::do_If(If* x) { __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before())); } + if (x->substitutability_check()) { + substitutability_check(x, *xin, *yin); + } else { + __ cmp(lir_cond(cond), left, right); + } + // Generate branch profiling. Profiling code doesn't kill flags. - __ cmp(lir_cond(cond), left, right); profile_branch(x, cond); move_to_phi(x->state()); if (x->x()->type()->is_float_kind()) { diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp index abcc070b25334..6db1b8be990a0 100644 --- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp @@ -28,11 +28,13 @@ #include "c1/c1_MacroAssembler.hpp" #include "c1/c1_Runtime1.hpp" #include "classfile/systemDictionary.hpp" +#include "gc/shared/barrierSet.hpp" #include "gc/shared/barrierSetAssembler.hpp" #include "gc/shared/collectedHeap.hpp" #include "interpreter/interpreter.hpp" #include "oops/arrayOop.hpp" #include "oops/markWord.hpp" +#include "runtime/arguments.hpp" #include "runtime/basicLock.hpp" #include "runtime/os.hpp" #include "runtime/sharedRuntime.hpp" @@ -85,14 +87,22 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) { assert_different_registers(obj, klass, len, tmp1, tmp2); - if (UseCompactObjectHeaders) { + if (UseCompactObjectHeaders || Arguments::is_valhalla_enabled()) { + // COH: Markword contains class pointer which is only known at runtime. + // Valhalla: Could have value class which has a different prototype header to a normal object. + // In both cases, we need to fetch dynamically. ld(tmp1, Address(klass, Klass::prototype_header_offset())); sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); } else { - // This assumes that all prototype bits fitr in an int32_t + // Otherwise: Can use the statically computed prototype header which is the same for every object. mv(tmp1, checked_cast(markWord::prototype().value())); sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); - encode_klass_not_null(tmp1, klass, tmp2); + } + + if (!UseCompactObjectHeaders) { + // COH: Markword already contains class pointer. Nothing else to do. + // Otherwise: Fetch klass pointer following the markword + encode_klass_not_null(tmp1, klass, tmp2); // Take care not to kill klass sw(tmp1, Address(obj, oopDesc::klass_offset_in_bytes())); } @@ -243,23 +253,29 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register tmp1 verify_oop(obj); } -void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { - assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); +void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes, + int sp_offset_for_orig_pc, + bool needs_stack_repair, bool has_scalarized_args, + Label* verified_inline_entry_label) { + assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); + + assert(!needs_stack_repair && !has_scalarized_args, ""); + // Make sure there is enough stack space for this method's activation. // Note that we do this before creating a frame. generate_stack_overflow_check(bang_size_in_bytes); - MacroAssembler::build_frame(framesize); + MacroAssembler::build_frame(frame_size_in_bytes); // Insert nmethod entry barrier into frame. BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->nmethod_entry_barrier(this, nullptr /* slow_path */, nullptr /* continuation */, nullptr /* guard */); -} -void C1_MacroAssembler::remove_frame(int framesize) { - MacroAssembler::remove_frame(framesize); + if (verified_inline_entry_label != nullptr) { + // Jump here from the scalarized entry points that already created the frame. + bind(*verified_inline_entry_label); + } } - void C1_MacroAssembler::verified_entry(bool breakAtEntry) { // If we have to make this method not-entrant we'll overwrite its // first instruction with a jump. For this action to be legal we @@ -270,6 +286,12 @@ void C1_MacroAssembler::verified_entry(bool breakAtEntry) { nop(); // 4 bytes } +int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int frame_size_in_bytes, int bang_size_in_bytes, + int sp_offset_for_orig_pc, Label& verified_inline_entry_label, bool is_inline_ro_entry) { + Unimplemented(); + return 0; +} + void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { // fp + -2: link // + -1: return address diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp index a06584e9411ec..e15422b83a704 100644 --- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -735,6 +735,7 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { case StubId::c1_new_type_array_id: case StubId::c1_new_object_array_id: + case StubId::c1_new_null_free_array_id: { Register length = x9; // Incoming Register klass = x13; // Incoming @@ -742,8 +743,11 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { if (id == StubId::c1_new_type_array_id) { __ set_info("new_type_array", dont_gc_arguments); - } else { + } else if (id == StubId::c1_new_object_array_id) { __ set_info("new_object_array", dont_gc_arguments); + } else { + assert(id == StubId::c1_new_null_free_array_id, "must be"); + __ set_info("new_null_free_array", dont_gc_arguments); } #ifdef ASSERT @@ -753,10 +757,30 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { Register tmp = obj; __ lwu(tmp, Address(klass, Klass::layout_helper_offset())); __ sraiw(tmp, tmp, Klass::_lh_array_tag_shift); - int tag = ((id == StubId::c1_new_type_array_id) ? Klass::_lh_array_tag_type_value : Klass::_lh_array_tag_obj_value); - __ mv(t0, tag); - __ beq(t0, tmp, ok); - __ stop("assert(is an array klass)"); + + switch (id) { + case StubId::c1_new_type_array_id: + __ mv(t0, (int)Klass::_lh_array_tag_type_value); + __ beq(t0, tmp, ok); + __ stop("assert(is a type array klass)"); + break; + case StubId::c1_new_object_array_id: + __ mv(t0, (int)Klass::_lh_array_tag_ref_value); // new "[Ljava/lang/Object;" + __ beq(t0, tmp, ok); + __ mv(t0, (int)Klass::_lh_array_tag_flat_value); // new "[LVT;" + __ beq(t0, tmp, ok); + __ stop("assert(is an object or inline type array klass)"); + break; + case StubId::c1_new_null_free_array_id: + __ mv(t0, (int)Klass::_lh_array_tag_flat_value); // the array can be a flat array. + __ beq(t0, tmp, ok); + __ mv(t0, (int)Klass::_lh_array_tag_ref_value); // the array cannot be a flat array (due to the InlineArrayElementMaxFlatSize, etc.) + __ beq(t0, tmp, ok); + __ stop("assert(is an object or inline type array klass)"); + break; + default: ShouldNotReachHere(); + } + __ should_not_reach_here(); __ bind(ok); } @@ -768,8 +792,10 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { int call_offset = 0; if (id == StubId::c1_new_type_array_id) { call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); - } else { + } else if (id == StubId::c1_new_object_array_id) { call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); + } else { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_null_free_array), klass, length); } oop_maps = new OopMapSet(); @@ -808,6 +834,93 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { } break; + case StubId::c1_buffer_inline_args_id: + case StubId::c1_buffer_inline_args_no_receiver_id: + { + const char* name = (id == StubId::c1_buffer_inline_args_id) ? + "buffer_inline_args" : "buffer_inline_args_no_receiver"; + StubFrame f(sasm, name, dont_gc_arguments); + OopMap* map = save_live_registers(sasm); + Register method = x9; // Incoming + address entry = (id == StubId::c1_buffer_inline_args_id) ? + CAST_FROM_FN_PTR(address, buffer_inline_args) : + CAST_FROM_FN_PTR(address, buffer_inline_args_no_receiver); + // This is called from a C1 method's scalarized entry point + // where x10-x17 may be holding live argument values so we can't + // return the result in x10 as the other stubs do. RA is used as + // a temporary below to avoid the result being clobbered by + // restore_live_registers. It's saved and restored by + // StubAssembler::prologue and epilogue anyway. + int call_offset = __ call_RT(ra, noreg, entry, method); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ mv(x18, ra); + __ verify_oop(x18); // x18: an array of buffered value objects + } + break; + + case StubId::c1_load_flat_array_id: + { + StubFrame f(sasm, "load_flat_array", dont_gc_arguments); + OopMap* map = save_live_registers(sasm); + + // Called with store_parameter and not C abi + + f.load_argument(1, x10); // x10,: array + f.load_argument(0, x11); // x11,: index + int call_offset = __ call_RT(x10, noreg, CAST_FROM_FN_PTR(address, load_flat_array), x10, x11); + + // Ensure the stores that initialize the buffer are visible + // before many subsequent store that publishes this reference. + __ membar(MacroAssembler::StoreStore); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r10(sasm); + + // x10: loaded element at array[index] + __ verify_oop(x10); + } + break; + + case StubId::c1_store_flat_array_id: + { + StubFrame f(sasm, "store_flat_array", dont_gc_arguments); + OopMap* map = save_live_registers(sasm); + + // Called with store_parameter and not C abi + + f.load_argument(2, x10); // x10: array + f.load_argument(1, x11); // x11: index + f.load_argument(0, x12); // x12: value + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, store_flat_array), x10, x11, x12); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r10(sasm); + } + break; + + case StubId::c1_substitutability_check_id: + { + StubFrame f(sasm, "substitutability_check", dont_gc_arguments); + OopMap* map = save_live_registers(sasm); + + // Called with store_parameter and not C abi + + f.load_argument(1, x11); // x11,: left + f.load_argument(0, x12); // x12,: right + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, substitutability_check), x11, x12); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r10(sasm); + + // x10,: are the two operands substitutable + } + break; + case StubId::c1_register_finalizer_id: { __ set_info("register_finalizer", dont_gc_arguments); @@ -851,12 +964,24 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { case StubId::c1_throw_incompatible_class_change_error_id: { - StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return); + StubFrame f(sasm, "throw_incompatible_class_change_error", dont_gc_arguments, does_not_return); oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); } break; + case StubId::c1_throw_illegal_monitor_state_exception_id: + { StubFrame f(sasm, "throw_illegal_monitor_state_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_illegal_monitor_state_exception), false); + } + break; + + case StubId::c1_throw_identity_exception_id: + { StubFrame f(sasm, "throw_identity_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_identity_exception), true); + } + break; + case StubId::c1_slow_subtype_check_id: { // Typical calling sequence: diff --git a/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp index 3436f9f8fc9db..3ff490fbb81d3 100644 --- a/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -68,7 +68,6 @@ void C2EntryBarrierStub::emit(C2_MacroAssembler& masm) { // make guard value 4-byte aligned so that it can be accessed atomically __ align(4); __ bind(guard()); - __ relocate(entry_guard_Relocation::spec()); __ emit_int32(0); // nmethod guard value } diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index 8d383f07c9ad4..6bff5c95d3471 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -45,6 +45,29 @@ #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") +void C2_MacroAssembler::entry_barrier() { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + // Dummy labels for just measuring the code size + Label dummy_slow_path; + Label dummy_continuation; + Label dummy_guard; + Label* slow_path = &dummy_slow_path; + Label* continuation = &dummy_continuation; + Label* guard = &dummy_guard; + + if (!Compile::current()->output()->in_scratch_emit_size()) { + // Use real labels from actual stub when not emitting code for the purpose of measuring its size + C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub(); + Compile::current()->output()->add_stub(stub); + slow_path = &stub->entry(); + continuation = &stub->continuation(); + guard = &stub->guard(); + } + + // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub. + bs->nmethod_entry_barrier(this, slow_path, continuation, guard); +} + void C2_MacroAssembler::fast_lock(Register obj, Register box, Register tmp1, Register tmp2, Register tmp3, Register tmp4) { // Flag register, zero for success; non-zero for failure. @@ -2352,7 +2375,7 @@ void C2_MacroAssembler::float16_to_float(FloatRegister dst, Register src, Regist mv(t0, 0x7c00); andr(tmp, src, t0); // jump to stub processing NaN and Inf cases. - beq(t0, tmp, stub->entry(), true); + beq(t0, tmp, stub->entry(), /* is_far */ true); // non-NaN or non-Inf cases, just use built-in instructions. fmv_h_x(dst, src); @@ -2384,7 +2407,7 @@ void C2_MacroAssembler::float_to_float16(Register dst, FloatRegister src, FloatR // replace fclass with feq as performance optimization. feq_s(t0, src, src); // jump to stub processing NaN cases. - beqz(t0, stub->entry(), true); + beqz(t0, stub->entry(), /* is_far */ true); // non-NaN cases, just use built-in instructions. fcvt_h_s(ftmp, src); @@ -2445,7 +2468,7 @@ void C2_MacroAssembler::float16_to_float_v(VectorRegister dst, VectorRegister sr vfwcvt_f_f_v(dst, src); // jump to stub processing NaN and Inf cases if there is any of them in the vector-wide. - bnez(t0, stub->entry(), true); + bnez(t0, stub->entry(), /* is_far */ true); bind(stub->continuation()); } @@ -2538,7 +2561,7 @@ void C2_MacroAssembler::float_to_float16_v(VectorRegister dst, VectorRegister sr vfncvt_f_f_w(dst, src); // jump to stub processing NaN cases. - bnez(t0, stub->entry(), true); + bnez(t0, stub->entry(), /* is_far */ true); bind(stub->continuation()); } diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp index 468d53b1a540c..93ee41a10d7f5 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp @@ -49,6 +49,8 @@ const int STUB_THRESHOLD, Label *STUB, Label *DONE); public: + void entry_barrier(); + // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. void fast_lock(Register object, Register box, Register tmp1, Register tmp2, Register tmp3, Register tmp4); diff --git a/src/hotspot/cpu/riscv/continuationFreezeThaw_riscv.inline.hpp b/src/hotspot/cpu/riscv/continuationFreezeThaw_riscv.inline.hpp index 2c2cb03b59e13..42c4c291fcf4a 100644 --- a/src/hotspot/cpu/riscv/continuationFreezeThaw_riscv.inline.hpp +++ b/src/hotspot/cpu/riscv/continuationFreezeThaw_riscv.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -74,7 +74,8 @@ inline frame FreezeBase::sender(const frame& f) { : frame(sender_sp, sender_sp, *link_addr, sender_pc); } -template frame FreezeBase::new_heap_frame(frame& f, frame& caller) { +template +frame FreezeBase::new_heap_frame(frame& f, frame& caller, int size_adjust) { assert(FKind::is_instance(f), ""); assert(!caller.is_interpreted_frame() || caller.unextended_sp() == (intptr_t*)caller.at(frame::interpreter_frame_last_sp_offset), ""); @@ -104,14 +105,14 @@ template frame FreezeBase::new_heap_frame(frame& f, frame& calle fp = *(intptr_t**)(f.sp() - 2); int fsize = FKind::size(f); - sp = caller.unextended_sp() - fsize; - if (caller.is_interpreted_frame()) { + sp = caller.unextended_sp() - fsize - size_adjust; + if (caller.is_interpreted_frame() && size_adjust == 0) { // If the caller is interpreted, our stackargs are not supposed to overlap with it // so we make more room by moving sp down by argsize int argsize = FKind::stack_argsize(f); sp -= argsize; + caller.set_sp(sp + fsize); } - caller.set_sp(sp + fsize); assert(_cont.tail()->is_in_chunk(sp), ""); @@ -182,11 +183,12 @@ inline void FreezeBase::set_top_frame_metadata_pd(const frame& hf) { : (intptr_t)hf.fp(); } -inline void FreezeBase::patch_pd(frame& hf, const frame& caller) { +inline void FreezeBase::patch_pd(frame& hf, const frame& caller, bool is_bottom_frame) { if (caller.is_interpreted_frame()) { assert(!caller.is_empty(), ""); patch_callee_link_relative(caller, caller.fp()); - } else { + } else if (is_bottom_frame && caller.pc() != nullptr) { + assert(caller.is_compiled_frame(), ""); // If we're the bottom-most frame frozen in this freeze, the caller might have stayed frozen in the chunk, // and its oop-containing fp fixed. We've now just overwritten it, so we must patch it back to its value // as read from the chunk. @@ -256,7 +258,8 @@ inline frame ThawBase::new_entry_frame() { return frame(sp, sp, _cont.entryFP(), _cont.entryPC()); } -template frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom) { +template +frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom, int size_adjust) { assert(FKind::is_instance(hf), ""); // The values in the returned frame object will be written into the callee's stack in patch. @@ -284,24 +287,23 @@ template frame ThawBase::new_stack_frame(const frame& hf, frame& return f; } else { int fsize = FKind::size(hf); - intptr_t* frame_sp = caller.unextended_sp() - fsize; + intptr_t* frame_sp = caller.unextended_sp() - fsize - size_adjust; if (bottom || caller.is_interpreted_frame()) { - int argsize = FKind::stack_argsize(hf); - - fsize += argsize; - frame_sp -= argsize; - caller.set_sp(caller.sp() - argsize); - assert(caller.sp() == frame_sp + (fsize-argsize), ""); - + if (size_adjust == 0) { + int argsize = FKind::stack_argsize(hf); + frame_sp -= argsize; + } frame_sp = align(hf, frame_sp, caller, bottom); + caller.set_sp(frame_sp + fsize + size_adjust); } + assert(is_aligned(frame_sp, frame::frame_alignment), ""); assert(hf.cb() != nullptr, ""); assert(hf.oop_map() != nullptr, ""); intptr_t* fp; if (PreserveFramePointer) { // we need to recreate a "real" frame pointer, pointing into the stack - fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset; + fp = frame_sp + fsize - frame::sender_sp_offset; } else { fp = FKind::stub || FKind::native // fp always points to the address above the pushed return pc. We need correct address. @@ -319,16 +321,16 @@ inline intptr_t* ThawBase::align(const frame& hf, intptr_t* frame_sp, frame& cal if (((intptr_t)frame_sp & 0xf) != 0) { assert(caller.is_interpreted_frame() || (bottom && hf.compiled_frame_stack_argsize() % 2 != 0), ""); frame_sp--; - caller.set_sp(caller.sp() - 1); } assert(is_aligned(frame_sp, frame::frame_alignment), ""); #endif - return frame_sp; } inline void ThawBase::patch_pd(frame& f, const frame& caller) { - patch_callee_link(caller, caller.fp()); + if (caller.is_interpreted_frame() || PreserveFramePointer) { + patch_callee_link(caller, caller.fp()); + } } inline void ThawBase::patch_pd(frame& f, intptr_t* caller_sp) { diff --git a/src/hotspot/cpu/riscv/foreignGlobals_riscv.cpp b/src/hotspot/cpu/riscv/foreignGlobals_riscv.cpp index 339e205c48afe..a56d16f66808a 100644 --- a/src/hotspot/cpu/riscv/foreignGlobals_riscv.cpp +++ b/src/hotspot/cpu/riscv/foreignGlobals_riscv.cpp @@ -51,15 +51,15 @@ const ABIDescriptor ForeignGlobals::parse_abi_descriptor(jobject jabi) { oop abi_oop = JNIHandles::resolve_non_null(jabi); ABIDescriptor abi; - objArrayOop inputStorage = jdk_internal_foreign_abi_ABIDescriptor::inputStorage(abi_oop); + refArrayOop inputStorage = jdk_internal_foreign_abi_ABIDescriptor::inputStorage(abi_oop); parse_register_array(inputStorage, StorageType::INTEGER, abi._integer_argument_registers, as_Register); parse_register_array(inputStorage, StorageType::FLOAT, abi._float_argument_registers, as_FloatRegister); - objArrayOop outputStorage = jdk_internal_foreign_abi_ABIDescriptor::outputStorage(abi_oop); + refArrayOop outputStorage = jdk_internal_foreign_abi_ABIDescriptor::outputStorage(abi_oop); parse_register_array(outputStorage, StorageType::INTEGER, abi._integer_return_registers, as_Register); parse_register_array(outputStorage, StorageType::FLOAT, abi._float_return_registers, as_FloatRegister); - objArrayOop volatileStorage = jdk_internal_foreign_abi_ABIDescriptor::volatileStorage(abi_oop); + refArrayOop volatileStorage = jdk_internal_foreign_abi_ABIDescriptor::volatileStorage(abi_oop); parse_register_array(volatileStorage, StorageType::INTEGER, abi._integer_additional_volatile_registers, as_Register); parse_register_array(volatileStorage, StorageType::FLOAT, abi._float_additional_volatile_registers, as_FloatRegister); diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp index 19dbdd6aeaed6..bf659c6053a01 100644 --- a/src/hotspot/cpu/riscv/frame_riscv.cpp +++ b/src/hotspot/cpu/riscv/frame_riscv.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -620,6 +620,21 @@ frame::frame(void* ptr_sp, void* ptr_fp, void* pc) : _on_heap(false) { #endif +// Check for a method with scalarized inline type arguments that needs +// a stack repair and return the repaired sender stack pointer. + +intptr_t* frame::repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr) { + Unimplemented(); + return nullptr; +} + +bool frame::was_augmented_on_entry(int& real_size) const { + assert(is_compiled_frame(), ""); + assert(!_cb->as_nmethod_or_null()->needs_stack_repair(), "unimplemented"); + real_size = _cb->frame_size(); + return false; +} + void JavaFrameAnchor::make_walkable() { // last frame set? if (last_Java_sp() == nullptr) { return; } diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp index ce5a8dde23049..1d875707fc7af 100644 --- a/src/hotspot/cpu/riscv/frame_riscv.hpp +++ b/src/hotspot/cpu/riscv/frame_riscv.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -184,6 +184,10 @@ } public: + // Support for scalarized inline type calling convention + static intptr_t* repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr); + bool was_augmented_on_entry(int& real_size) const; + // Constructors frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc); diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp index cf7cb98d2d4c3..b2eb92c1bba6a 100644 --- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -138,6 +138,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, const Register thread, const Register value, const Register tmp1, const Register tmp2) { + assert_different_registers(value, tmp1, tmp2); // Can we store a value in the given thread's buffer? // (The index field is typed as size_t.) __ ld(tmp1, Address(thread, in_bytes(index_offset))); // tmp1 := *(index address) @@ -176,7 +177,7 @@ static void generate_pre_barrier_slow_path(MacroAssembler* masm, __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); } // Is the previous value null? - __ beqz(pre_val, done, true); + __ beqz(pre_val, done, /* is_far */ true); generate_queue_test_and_insertion(masm, G1ThreadLocalData::satb_mark_queue_index_offset(), G1ThreadLocalData::satb_mark_queue_buffer_offset(), @@ -214,6 +215,18 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, __ push_call_clobbered_registers(); + // Calling the runtime using the regular call_VM_leaf mechanism generates + // code (generated by InterpreterMacroAssember::call_VM_leaf_base) + // that checks that the *(fp+frame::interpreter_frame_last_sp) == nullptr. + // + // If we care generating the pre-barrier without a frame (e.g. in the + // intrinsified Reference.get() routine) then fp might be pointing to + // the caller frame and so this check will most likely fail at runtime. + // + // Expanding the call directly bypasses the generation of the check. + // So when we do not have have a full interpreter frame on the stack + // expand_call should be passed true. + if (expand_call) { assert(pre_val != c_rarg1, "smashed arg"); __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); @@ -301,7 +314,7 @@ void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, generate_pre_barrier_fast_path(masm, thread, tmp1); // If marking is active (*(mark queue active address) != 0), jump to stub (slow path) - __ bnez(tmp1, *stub->entry(), true); + __ bnez(tmp1, *stub->entry(), /* is_far */ true); __ bind(*stub->continuation()); } @@ -364,6 +377,16 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { + + bool in_heap = (decorators & IN_HEAP) != 0; + bool as_normal = (decorators & AS_NORMAL) != 0; + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; + + bool needs_pre_barrier = as_normal && !dest_uninitialized; + bool needs_post_barrier = (val != noreg && in_heap); + + assert_different_registers(val, tmp1, tmp2, tmp3); + // flatten object address if needed if (dst.offset() == 0) { if (dst.base() != tmp3) { @@ -373,31 +396,37 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco __ la(tmp3, dst); } - g1_write_barrier_pre(masm, - tmp3 /* obj */, - tmp2 /* pre_val */, - xthread /* thread */, - tmp1 /* tmp1 */, - t1 /* tmp2 */, - val != noreg /* tosca_live */, - false /* expand_call */); + if (needs_pre_barrier) { + g1_write_barrier_pre(masm, + tmp3 /* obj */, + tmp2 /* pre_val */, + xthread /* thread */, + tmp1 /* tmp1 */, + t1 /* tmp2 */, + val != noreg /* tosca_live */, + false /* expand_call */); + } if (val == noreg) { BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg, noreg); } else { // G1 barrier needs uncompressed oop for region cross check. Register new_val = val; - if (UseCompressedOops) { - new_val = t1; - __ mv(new_val, val); + if (needs_post_barrier) { + if (UseCompressedOops) { + new_val = t1; + __ mv(new_val, val); + } } BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg); - g1_write_barrier_post(masm, - tmp3 /* store_adr */, - new_val /* new_val */, - xthread /* thread */, - tmp1 /* tmp1 */, - tmp2 /* tmp2 */); + if (needs_post_barrier) { + g1_write_barrier_post(masm, + tmp3 /* store_adr */, + new_val /* new_val */, + xthread /* thread */, + tmp1 /* tmp1 */, + tmp2 /* tmp2 */); + } } } diff --git a/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad b/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad index 8461a36e68c55..8b34679c90fb4 100644 --- a/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad +++ b/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad @@ -1,5 +1,5 @@ // -// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2024, 2026, Oracle and/or its affiliates. All rights reserved. // Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // @@ -74,6 +74,126 @@ static void write_barrier_post(MacroAssembler* masm, %} + +// TODO 8350865 (same applies to g1StoreLSpecialTwoOops) +// - Do not set/overwrite barrier data here, also handle G1C2BarrierPostNotNull + +instruct g1StoreLSpecialOneOopOff0(indirect mem, iRegLNoSp src, immI0 off, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC); + match(Set mem (StoreLSpecial mem (Binary src off))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(STORE_COST); + format %{ "sd $src, $mem\t# g1StoreLSpecialOneOopOff0" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + ((MachNode*)this)->set_barrier_data(G1C2BarrierPre | G1C2BarrierPost); + + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + + __ sd($src$$Register, Address($mem$$Register)); + + // Extract the narrow oop field value + __ zext($tmp1$$Register, $src$$Register, 32); + __ decode_heap_oop($tmp1$$Register, $tmp1$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + +instruct g1StoreLSpecialOneOopOff4(indirect mem, iRegLNoSp src, immI_4 off, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegPNoSp tmp4, rFlagsReg cr) +%{ + predicate(UseG1GC); + match(Set mem (StoreLSpecial mem (Binary src off))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + ins_cost(STORE_COST); + format %{ "sd $src, $mem\t# g1StoreLSpecialOneOopOff4" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + ((MachNode*)this)->set_barrier_data(G1C2BarrierPre | G1C2BarrierPost); + + // Adjust address to point to narrow oop + __ add($tmp4$$Register, $mem$$Register, 4); + write_barrier_pre(masm, this, + $tmp4$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register, $tmp4$$Register) /* preserve */); + + __ sd($src$$Register, Address($mem$$Register)); + + // Shift long value to extract the narrow oop field value and zero-extend it + __ srli($tmp1$$Register, $src$$Register, 32); + __ decode_heap_oop($tmp1$$Register, $tmp1$$Register); + write_barrier_post(masm, this, + $tmp4$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + +instruct g1StoreLSpecialTwoOops(indirect mem, iRegLNoSp src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegPNoSp tmp4, rFlagsReg cr) +%{ + predicate(UseG1GC); + match(Set mem (StoreLSpecial mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + ins_cost(STORE_COST); + format %{ "sd $src, $mem\t# g1StoreLSpecialTwoOops" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + ((MachNode*)this)->set_barrier_data(G1C2BarrierPre | G1C2BarrierPost); + + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + // Adjust address to point to the second narrow oop in the long value + __ add($tmp4$$Register, $mem$$Register, 4); + write_barrier_pre(masm, this, + $tmp4$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register, $tmp4$$Register) /* preserve */); + + __ sd($src$$Register, Address($mem$$Register)); + + // Zero-extend first narrow oop to long + __ zext($tmp1$$Register, $src$$Register, 32); + __ decode_heap_oop($tmp1$$Register, $tmp1$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + + // Shift long value to extract the second narrow oop field value + __ srli($tmp1$$Register, $src$$Register, 32); + __ decode_heap_oop($tmp1$$Register, $tmp1$$Register); + write_barrier_post(masm, this, + $tmp4$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + + instruct g1StoreP(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) %{ predicate(UseG1GC && n->as_Store()->barrier_data() != 0); diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp index fd78b429ee4ff..4b8754360226e 100644 --- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp @@ -27,6 +27,7 @@ #include "gc/shared/barrierSet.hpp" #include "gc/shared/barrierSetAssembler.hpp" #include "gc/shared/barrierSetNMethod.hpp" +#include "gc/shared/barrierSetRuntime.hpp" #include "gc/shared/collectedHeap.hpp" #include "interpreter/interp_masm.hpp" #include "memory/universe.hpp" @@ -84,22 +85,35 @@ void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { bool in_heap = (decorators & IN_HEAP) != 0; bool in_native = (decorators & IN_NATIVE) != 0; + bool is_not_null = (decorators & IS_NOT_NULL) != 0; + switch (type) { case T_OBJECT: // fall through case T_ARRAY: { - val = val == noreg ? zr : val; if (in_heap) { - if (UseCompressedOops) { - assert(!dst.uses(val), "not enough registers"); - if (val != zr) { - __ encode_heap_oop(val); + if (val == noreg) { + assert(!is_not_null, "inconsistent access"); + if (UseCompressedOops) { + __ sw(zr, dst); + } else { + __ sd(zr, dst); } - __ sw(val, dst); } else { - __ sd(val, dst); + if (UseCompressedOops) { + assert(!dst.uses(val), "not enough registers"); + if (is_not_null) { + __ encode_heap_oop_not_null(val); + } else { + __ encode_heap_oop(val); + } + __ sw(val, dst); + } else { + __ sd(val, dst); + } } } else { assert(in_native, "why else?"); + assert(val != noreg, "not supported"); __ sd(val, dst); } break; @@ -121,6 +135,19 @@ void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators } +void BarrierSetAssembler::flat_field_copy(MacroAssembler* masm, DecoratorSet decorators, + Register src, Register dst, Register inline_layout_info) { + // flat_field_copy implementation is fairly complex, and there are not any + // "short-cuts" to be made from asm. What there is, appears to have the same + // cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds + // of hand-rolled instructions... + if (decorators & IS_DEST_UNINITIALIZED) { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized), src, dst, inline_layout_info); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy), src, dst, inline_layout_info); + } +} + void BarrierSetAssembler::copy_load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, @@ -292,9 +319,9 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slo Label& barrier_target = slow_path == nullptr ? skip_barrier : *slow_path; if (slow_path == nullptr) { - __ beq(t0, t1, barrier_target, true /* is_far */); + __ beq(t0, t1, barrier_target, /* is_far */ true); } else { - __ bne(t0, t1, barrier_target, true /* is_far */); + __ bne(t0, t1, barrier_target, /* is_far */ true); } if (slow_path == nullptr) { diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp index 4a14fa65e7862..e9f330b2f4e2e 100644 --- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp @@ -75,6 +75,9 @@ class BarrierSetAssembler: public CHeapObj { virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); + virtual void flat_field_copy(MacroAssembler* masm, DecoratorSet decorators, + Register src, Register dst, Register inline_layout_info); + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, Register obj, Register tmp, Label& slowpath); diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp index ff15a0c693722..3fd8483ce138a 100644 --- a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp @@ -23,8 +23,10 @@ * */ +#include "asm/macroAssembler.hpp" #include "code/codeCache.hpp" #include "code/nativeInst.hpp" +#include "gc/shared/barrierSet.hpp" #include "gc/shared/barrierSetAssembler.hpp" #include "gc/shared/barrierSetNMethod.hpp" #include "logging/log.hpp" @@ -53,69 +55,131 @@ static int entry_barrier_offset(nmethod* nm) { return 0; } +static int* decode_guard_from_instruction(nmethod* nm, address& instruction) { + int* result = reinterpret_cast(MacroAssembler::target_addr_for_insn(instruction)); + assert(nm->insts_contains(reinterpret_cast
(result)) || + nm->stub_contains(reinterpret_cast
(result)), + "guard must be in nmethod code"); + return result; +} + +// The NativeNMethodBarrier class encapsulates up to three entrypoints and handles their +// arming/verification. +// An entrypoint is defined as a tuple of : +// * The instr. address corresponds to the ldr of the guard value of that entrypoint. +// * The guard address is the address where the guard value of that entrypoint resides. +// +// Each nmethod has at least one entrypoint. The default must always be well-defined +// (neither instruction nor guard are nullptr). +// +// When using the scalarized calling convention, up to two additional (verified) entrypoints, +// alt1 and alt2 can be present. The meaning of these depends on who compiled the nmethod. +// +// The mapping of C1-compiled methods (scalarization used) looks as follows: +// * alt1: verified entry point +// * alt2 (optional): verified inline ro entry point +// +// The mapping of C2-compiled methods (scalarization used) looks as follows: +// * alt1: verified inline entry point +// * alt2 (optional): verified inline ro entry point +// +// In other scenarios, neither alt1 nor alt2 are defined. class NativeNMethodBarrier { - address _instruction_address; - int* _guard_addr; - nmethod* _nm; + private: + // The addresses of the instructions that act as the guards. + address _default_entry_instruction; + address _verified_alt1_instruction; + address _verified_alt2_instruction; + // Pointers representing the actual guard values themselves. + int* _default_entry_guard; + int* _verified_alt1_guard; + int* _verified_alt2_guard; + + public: + NativeNMethodBarrier(nmethod* nm) : + _default_entry_instruction(nullptr), + _verified_alt1_instruction(nullptr), + _verified_alt2_instruction(nullptr), + _default_entry_guard(nullptr), + _verified_alt1_guard(nullptr), + _verified_alt2_guard(nullptr) + { + // The default entry point has a known address. The guard address can be + // decoded from the literal in the instruction. Verification will confirm + // that this instruction corresponds to a load. + _default_entry_instruction = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(nm); + + _default_entry_guard = decode_guard_from_instruction(nm, _default_entry_instruction); - address instruction_address() const { return _instruction_address; } + // If the nmethod has scalarized arguments, then there are more entry + // points, each with their own nmethod entry barrier. + assert(nm->is_osr_method() || !nm->method()->has_scalarized_args(), "unimplemented"); - int *guard_addr() { - return _guard_addr; + // Perform the checking as verification. + err_msg msg("%s", ""); + assert(check_barriers(msg), "%s", msg.buffer()); } - int local_guard_offset(nmethod* nm) { - // It's the last instruction - return (-entry_barrier_offset(nm)) - 4; + // Gets the value of the default entry guard. + // This does not consider the alternative entrypoints, as these should + // all be consistent. It is up to the caller to enforce this. + int get_default_guard_value() { + return AtomicAccess::load_acquire(_default_entry_guard); } -public: - NativeNMethodBarrier(nmethod* nm): _nm(nm) { - _instruction_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(nm); - if (nm->is_compiled_by_c2()) { - // With c2 compiled code, the guard is out-of-line in a stub - // We find it using the RelocIterator. - RelocIterator iter(nm); - while (iter.next()) { - if (iter.type() == relocInfo::entry_guard_type) { - entry_guard_Relocation* const reloc = iter.entry_guard_reloc(); - _guard_addr = reinterpret_cast(reloc->addr()); - return; - } - } - - ShouldNotReachHere(); + // Sets the value for all barriers. + void set_values(int value, int bit_mask) { + set_value_impl(_default_entry_guard, value, bit_mask); + if (_verified_alt1_guard != nullptr) { + set_value_impl(_verified_alt1_guard, value, bit_mask); + } + if (_verified_alt2_guard != nullptr) { + set_value_impl(_verified_alt2_guard, value, bit_mask); } - _guard_addr = reinterpret_cast(instruction_address() + local_guard_offset(nm)); - - // Perform the checking as verification. - err_msg msg("%s", ""); - assert(check_barrier(msg), "%s", msg.buffer()); } - int get_value() { - return AtomicAccess::load_acquire(guard_addr()); + // Verifies that all potential barriers are correct. + bool check_barriers(err_msg& msg) { + // The default entry barrier should always be checked. + if (!check_barrier_impl(_default_entry_instruction, msg)) { + return false; + } + // Check the alternative entry barriers only if they are specified. + // Note that the guard values are already validated at construction time, + // if they fall out of the nmethod range, this will be caught earlier. + if (_verified_alt1_instruction != nullptr && + !check_barrier_impl(_verified_alt1_instruction, msg)) { + return false; + } + if (_verified_alt2_instruction != nullptr && + !check_barrier_impl(_verified_alt2_instruction, msg)) { + return false; + } + return true; } - void set_value(int value, int bit_mask) { +private: + // Sets the value for a single barrier. + void set_value_impl(int* guard, int value, int bit_mask) { if (bit_mask == ~0) { - AtomicAccess::release_store(guard_addr(), value); + AtomicAccess::release_store(guard, value); return; } assert((value & ~bit_mask) == 0, "trying to set bits outside the mask"); value &= bit_mask; - int old_value = AtomicAccess::load(guard_addr()); + int old_value = AtomicAccess::load(guard); while (true) { // Only bits in the mask are changed int new_value = value | (old_value & ~bit_mask); if (new_value == old_value) break; - int v = AtomicAccess::cmpxchg(guard_addr(), old_value, new_value, memory_order_release); + int v = AtomicAccess::cmpxchg(guard, old_value, new_value, memory_order_release); if (v == old_value) break; old_value = v; } } - bool check_barrier(err_msg& msg) const; + // Checks the validity of a single barrier. + bool check_barrier_impl(address& instruction, err_msg& msg) const; }; // Store the instruction bitmask, bits and name for checking the barrier. @@ -137,8 +201,8 @@ static const struct CheckInsn barrierInsn[] = { // The encodings must match the instructions emitted by // BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific // register numbers and immediate values in the encoding. -bool NativeNMethodBarrier::check_barrier(err_msg& msg) const { - address addr = instruction_address(); +bool NativeNMethodBarrier::check_barrier_impl(address& instruction, err_msg& msg) const { + address addr = instruction; for (unsigned int i = 0; i < sizeof(barrierInsn) / sizeof(struct CheckInsn); i++) { uint32_t inst = Assembler::ld_instr(addr); if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) { @@ -208,7 +272,7 @@ void BarrierSetNMethod::set_guard_value(nmethod* nm, int value, int bit_mask) { } NativeNMethodBarrier barrier(nm); - barrier.set_value(value, bit_mask); + barrier.set_values(value, bit_mask); } int BarrierSetNMethod::guard_value(nmethod* nm) { @@ -217,5 +281,5 @@ int BarrierSetNMethod::guard_value(nmethod* nm) { } NativeNMethodBarrier barrier(nm); - return barrier.get_value(); + return barrier.get_default_guard_value(); } diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp index eec5f9a5165c5..d6a89740af0c3 100644 --- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp @@ -522,7 +522,7 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); __ add(tmp2, tmp2, tmp1); __ lbu(tmp2, Address(tmp2)); - __ beqz(tmp2, *stub->continuation(), true /* is_far */); + __ beqz(tmp2, *stub->continuation(), /* is_far */ true); } ce->store_parameter(res, 0); diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp index bf37ccf64e2e8..d9a0c5063f14c 100644 --- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp @@ -185,7 +185,7 @@ void ZBarrierSetAssembler::store_barrier_fast(MacroAssembler* masm, __ relocate(barrier_Relocation::spec(), [&] { __ li16u(rnew_zpointer, barrier_Relocation::unpatched); }, ZBarrierRelocationFormatStoreGoodBits); - __ bne(rtmp, rnew_zpointer, medium_path, true /* is_far */); + __ bne(rtmp, rnew_zpointer, medium_path, /* is_far */ true); } else { __ ld(rtmp, ref_addr); // Stores on relocatable objects never need to deal with raw null pointers in fields. @@ -196,7 +196,7 @@ void ZBarrierSetAssembler::store_barrier_fast(MacroAssembler* masm, __ li16u(rnew_zpointer, barrier_Relocation::unpatched); }, ZBarrierRelocationFormatStoreBadMask); __ andr(rtmp, rtmp, rnew_zpointer); - __ bnez(rtmp, medium_path, true /* is_far */); + __ bnez(rtmp, medium_path, /* is_far */ true); } __ bind(medium_path_continuation); __ relocate(barrier_Relocation::spec(), [&] { @@ -210,7 +210,7 @@ void ZBarrierSetAssembler::store_barrier_fast(MacroAssembler* masm, __ ld(rtmp, rtmp); __ ld(rnew_zpointer, Address(xthread, ZThreadLocalData::store_bad_mask_offset())); __ andr(rtmp, rtmp, rnew_zpointer); - __ bnez(rtmp, medium_path, true /* is_far */); + __ bnez(rtmp, medium_path, /* is_far */ true); __ bind(medium_path_continuation); if (rnew_zaddress == noreg) { __ mv(rnew_zpointer, zr); diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp index dc3915aa39881..ce006d430f4c0 100644 --- a/src/hotspot/cpu/riscv/globals_riscv.hpp +++ b/src/hotspot/cpu/riscv/globals_riscv.hpp @@ -76,6 +76,9 @@ define_pd_global(intx, InitArrayShortSize, BytesPerLong); define_pd_global(intx, InlineSmallCode, 1000); +define_pd_global(bool, InlineTypePassFieldsAsArgs, false); +define_pd_global(bool, InlineTypeReturnedAsFields, false); + #define ARCH_FLAGS(develop, \ product, \ range, \ diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp index bb56acb3f38c5..623ecc1fd3b94 100644 --- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp @@ -32,9 +32,11 @@ #include "interpreter/interpreterRuntime.hpp" #include "logging/log.hpp" #include "oops/arrayOop.hpp" +#include "oops/constMethodFlags.hpp" #include "oops/markWord.hpp" #include "oops/method.hpp" #include "oops/methodData.hpp" +#include "oops/inlineKlass.hpp" #include "oops/resolvedFieldEntry.hpp" #include "oops/resolvedIndyEntry.hpp" #include "oops/resolvedMethodEntry.hpp" @@ -239,12 +241,15 @@ void InterpreterMacroAssembler::load_resolved_klass_at_offset( // Kills: // x12 void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, - Label& ok_is_subtype) { + Label& ok_is_subtype, + bool profile) { assert(Rsub_klass != x10, "x10 holds superklass"); assert(Rsub_klass != x12, "x12 holds 2ndary super array length"); // Profile the not-null value's klass. - profile_typecheck(x12, Rsub_klass); // blows x12 + if (profile) { + profile_typecheck(x12, Rsub_klass); // blows x12 + } // Do the check. check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12 @@ -987,7 +992,7 @@ void InterpreterMacroAssembler::profile_taken_branch(Register mdp) { } } -void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { +void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp, bool acmp) { if (ProfileInterpreter) { Label profile_continue; @@ -999,7 +1004,7 @@ void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { // The method data pointer needs to be updated to correspond to // the next bytecode - update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); + update_mdp_by_constant(mdp, acmp ? in_bytes(ACmpData::acmp_data_size()) : in_bytes(BranchData::branch_data_size())); bind(profile_continue); } } @@ -1186,6 +1191,120 @@ void InterpreterMacroAssembler::profile_switch_case(Register index, } } +template void InterpreterMacroAssembler::profile_array_type(Register mdp, + Register array, + Register tmp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + mv(tmp, array); + profile_obj_type(tmp, Address(mdp, in_bytes(ArrayData::array_offset())), t1); + + Label not_flat; + test_non_flat_array_oop(array, tmp, not_flat); + + set_mdp_flag_at(mdp, ArrayData::flat_array_byte_constant()); + + bind(not_flat); + + Label not_null_free; + test_non_null_free_array_oop(array, tmp, not_null_free); + + set_mdp_flag_at(mdp, ArrayData::null_free_array_byte_constant()); + + bind(not_null_free); + + bind(profile_continue); + } +} + +template void InterpreterMacroAssembler::profile_array_type(Register mdp, + Register array, + Register tmp); +template void InterpreterMacroAssembler::profile_array_type(Register mdp, + Register array, + Register tmp); + +void InterpreterMacroAssembler::profile_multiple_element_types(Register mdp, Register element, Register tmp, const Register tmp2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label done, update; + bnez(element, update); + set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + j(done); + + bind(update); + load_klass(tmp, element); + + // Record the object type. + profile_receiver_type(tmp, mdp, 0); + + bind(done); + + // The method data pointer needs to be updated. + update_mdp_by_constant(mdp, in_bytes(ArrayStoreData::array_store_data_size())); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_element_type(Register mdp, + Register element, + Register tmp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + mv(tmp, element); + profile_obj_type(tmp, Address(mdp, in_bytes(ArrayLoadData::element_offset())), t1); + + // The method data pointer needs to be updated. + update_mdp_by_constant(mdp, in_bytes(ArrayLoadData::array_load_data_size())); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_acmp(Register mdp, + Register left, + Register right, + Register tmp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + mv(tmp, left); + profile_obj_type(tmp, Address(mdp, in_bytes(ACmpData::left_offset())), t1); + + Label left_not_inline_type; + test_oop_is_not_inline_type(left, tmp, left_not_inline_type); + set_mdp_flag_at(mdp, ACmpData::left_inline_type_byte_constant()); + bind(left_not_inline_type); + + mv(tmp, right); + profile_obj_type(tmp, Address(mdp, in_bytes(ACmpData::right_offset())), t1); + + Label right_not_inline_type; + test_oop_is_not_inline_type(right, tmp, right_not_inline_type); + set_mdp_flag_at(mdp, ACmpData::right_inline_type_byte_constant()); + bind(right_not_inline_type); + + bind(profile_continue); + } +} + + void InterpreterMacroAssembler::notify_method_entry() { // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to // track stack depth. If it is possible to enter interp_only_mode we add @@ -1600,7 +1719,7 @@ void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register ca // argument. tmp is the number of cells left in the // CallTypeData/VirtualCallTypeData to reach its end. Non null // if there's a return to profile. - assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + assert(SingleTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size)); } sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); @@ -1645,7 +1764,7 @@ void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, bind(do_profile); } - Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); + Address mdo_ret_addr(mdp, -in_bytes(SingleTypeEntry::size())); mv(tmp, ret); profile_obj_type(tmp, mdo_ret_addr, t1); @@ -1747,6 +1866,40 @@ void InterpreterMacroAssembler::get_method_counters(Register method, bind(has_counters); } +void InterpreterMacroAssembler::read_flat_field(Register entry, Register obj) { + call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flat_field), obj, entry); + membar(MacroAssembler::StoreStore); +} + +void InterpreterMacroAssembler::write_flat_field(Register entry, Register field_offset, + Register tmp1, Register tmp2, + Register obj) { + assert_different_registers(entry, field_offset, tmp1, tmp2, obj); + Label slow_path, done; + + load_unsigned_byte(tmp1, Address(entry, in_bytes(ResolvedFieldEntry::flags_offset()))); + test_field_is_not_null_free_inline_type(tmp1, tmp2, slow_path); + + null_check(x10); // FIXME JDK-8341120 + + add(obj, obj, field_offset); + + load_klass(tmp1, x10); + payload_address(x10, x10, tmp1); + + Register layout_info = field_offset; + load_unsigned_short(tmp1, Address(entry, in_bytes(ResolvedFieldEntry::field_index_offset()))); + ld(tmp2, Address(entry, in_bytes(ResolvedFieldEntry::field_holder_offset()))); + inline_layout_info(tmp2, tmp1, layout_info); + + flat_field_copy(IN_HEAP, x10, obj, layout_info); + j(done); + + bind(slow_path); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::write_flat_field), obj, x10, entry); + bind(done); +} + void InterpreterMacroAssembler::load_method_entry(Register cache, Register index, int bcp_offset) { // Get index out of bytecode pointer get_cache_index_at_bcp(index, cache, bcp_offset, sizeof(u2)); diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp index df86f0dc532bd..2b6115a9302f9 100644 --- a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp @@ -161,6 +161,17 @@ class InterpreterMacroAssembler: public MacroAssembler { void get_cache_index_at_bcp(Register index, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); void get_method_counters(Register method, Register mcs, Label& skip); + // Allocate instance in "obj" and read in the content of the inline field + // NOTES: + // - input holder object via "obj", which must be x10, + // will return new instance via the same reg + // - assumes holder_klass and valueKlass field klass have both been resolved + void read_flat_field(Register entry, Register obj); + + void write_flat_field(Register entry, Register field_offset, + Register tmp1, Register tmp2, + Register obj); + // Load cpool->resolved_references(index). void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15); @@ -198,7 +209,7 @@ class InterpreterMacroAssembler: public MacroAssembler { // Generate a subtype check: branch to ok_is_subtype if sub_klass is // a subtype of super_klass. - void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); + void gen_subtype_check( Register sub_klass, Label &ok_is_subtype, bool profile = true); // Dispatching void dispatch_prolog(TosState state, int step = 0); @@ -271,7 +282,7 @@ class InterpreterMacroAssembler: public MacroAssembler { void narrow(Register result); void profile_taken_branch(Register mdp); - void profile_not_taken_branch(Register mdp); + void profile_not_taken_branch(Register mdp, bool acmp = false); void profile_call(Register mdp); void profile_final_call(Register mdp); void profile_virtual_call(Register receiver, Register mdp); @@ -282,6 +293,10 @@ class InterpreterMacroAssembler: public MacroAssembler { void profile_switch_default(Register mdp); void profile_switch_case(Register index_in_scratch, Register mdp, Register temp); + template void profile_array_type(Register mdp, Register array, Register tmp); + void profile_multiple_element_types(Register mdp, Register element, Register tmp, Register tmp2); + void profile_element_type(Register mdp, Register element, Register tmp); + void profile_acmp(Register mdp, Register left, Register right, Register tmp); void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp); void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp index b08e520393ab8..9755cb9ef16c7 100644 --- a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp +++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2004, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -31,6 +31,7 @@ #include "prims/jniFastGetField.hpp" #include "prims/jvm_misc.hpp" #include "prims/jvmtiExport.hpp" +#include "runtime/jfieldIDWorkaround.hpp" #include "runtime/safepoint.hpp" #define __ masm-> @@ -103,7 +104,7 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { assert_cond(bs != nullptr); bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow); - __ srli(roffset, c_rarg2, 2); // offset + __ srli(roffset, c_rarg2, jfieldIDWorkaround::offset_shift); // offset assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); speculative_load_pclist[count] = __ pc(); // Used by the segfault handler diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index d93329544a7e8..5f625f3607998 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -26,6 +26,7 @@ #include "asm/assembler.hpp" #include "asm/assembler.inline.hpp" +#include "ci/ciInlineKlass.hpp" #include "code/compiledIC.hpp" #include "compiler/disassembler.hpp" #include "gc/shared/barrierSet.hpp" @@ -43,6 +44,7 @@ #include "oops/compressedOops.inline.hpp" #include "oops/klass.inline.hpp" #include "oops/oop.hpp" +#include "oops/resolvedFieldEntry.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/javaThread.hpp" #include "runtime/jniHandles.inline.hpp" @@ -945,6 +947,10 @@ void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, call_VM_leaf_base(entry_point, 3); } +void MacroAssembler::super_call_VM_leaf(address entry_point) { + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { pass_arg0(this, arg_0); MacroAssembler::call_VM_leaf_base(entry_point, 1); @@ -3595,6 +3601,46 @@ void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { movptr(dst, Address((address)obj, rspec)); } +void MacroAssembler::inline_layout_info(Register holder_klass, Register index, Register layout_info) { + assert_different_registers(holder_klass, index, layout_info); + InlineLayoutInfo array[2]; + int size = (char*)&array[1] - (char*)&array[0]; // computing size of array elements + if (is_power_of_2(size)) { + slli(index, index, log2i_exact(size)); // Scale index by power of 2 + } else { + mv(layout_info, size); + mul(index, index, layout_info); // Scale the index to be the entry index * array_element_size + } + ld(layout_info, Address(holder_klass, InstanceKlass::inline_layout_info_array_offset())); + add(layout_info, layout_info, Array::base_offset_in_bytes()); + add(layout_info, layout_info, index); + la(layout_info, Address(layout_info)); +} + +void MacroAssembler::flat_field_copy(DecoratorSet decorators, Register src, Register dst, + Register inline_layout_info) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->flat_field_copy(this, decorators, src, dst, inline_layout_info); +} + +void MacroAssembler::payload_offset(Register inline_klass, Register offset) { + ld(offset, Address(inline_klass, InlineKlass::adr_members_offset())); + lwu(offset, Address(offset, InlineKlass::payload_offset_offset())); +} + +void MacroAssembler::payload_address(Register oop, Register data, Register inline_klass) { + assert_different_registers(data, t0); + // ((address) (void*) o) + vk->payload_offset(); + Register offset = (data == oop) ? t0 : data; + payload_offset(inline_klass, offset); + if (data == oop) { + add(data, data, offset); + } else { + add(data, oop, offset); + la(data, Address(data)); + } +} + // Writes to stack successive pages until offset reached to check for // stack overflow + shadow pages. This clobbers tmp. void MacroAssembler::bang_stack_size(Register size, Register tmp) { @@ -3680,6 +3726,82 @@ void MacroAssembler::null_check(Register reg, int offset) { } } +void MacroAssembler::test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free_inline_type) { + test_bit(temp_reg, flags, ResolvedFieldEntry::is_null_free_inline_type_shift); + bnez(temp_reg, is_null_free_inline_type); +} + +void MacroAssembler::test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free_inline_type) { + test_bit(temp_reg, flags, ResolvedFieldEntry::is_null_free_inline_type_shift); + beqz(temp_reg, not_null_free_inline_type); +} + +void MacroAssembler::test_field_is_flat(Register flags, Register temp_reg, Label& is_flat) { + test_bit(temp_reg, flags, ResolvedFieldEntry::is_flat_shift); + bnez(temp_reg, is_flat); +} + +void MacroAssembler::test_markword_is_inline_type(Register markword, Label& is_inline_type) { + assert_different_registers(markword, t1); + mv(t1, markWord::inline_type_pattern_mask); + andr(markword, markword, t1); + mv(t1, markWord::inline_type_pattern); + beq(markword, t1, is_inline_type); +} + +void MacroAssembler::test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type, bool can_be_null) { + assert_different_registers(tmp, t0); + if (can_be_null) { + beqz(object, not_inline_type); + } + const int is_inline_type_mask = markWord::inline_type_pattern; + ld(tmp, Address(object, oopDesc::mark_offset_in_bytes())); + mv(t0, is_inline_type_mask); + andr(tmp, tmp, t0); + bne(tmp, t0, not_inline_type); +} + +void MacroAssembler::test_oop_prototype_bit(Register oop, Register temp_reg, int32_t tst_bit, bool jmp_set, Label& jmp_label) { + assert_different_registers(temp_reg, t0); + Label test_mark_word; + // load mark word + ld(temp_reg, Address(oop, oopDesc::mark_offset_in_bytes())); + // check displaced + test_bit(t0, temp_reg, exact_log2(markWord::unlocked_value)); + bnez(t0, test_mark_word); + // slow path use klass prototype + load_prototype_header(temp_reg, oop); + + bind(test_mark_word); + andi(temp_reg, temp_reg, tst_bit); + if (jmp_set) { + bnez(temp_reg, jmp_label, /* is_far */ true); + } else { + beqz(temp_reg, jmp_label, /* is_far */ true); + } +} + +void MacroAssembler::test_flat_array_oop(Register oop, Register temp_reg, Label& is_flat_array) { + test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, true, is_flat_array); +} + +void MacroAssembler::test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array) { + test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, true, is_null_free_array); +} + +void MacroAssembler::test_non_flat_array_oop(Register oop, Register temp_reg, Label&is_non_flat_array) { + test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, false, is_non_flat_array); +} + +void MacroAssembler::test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array) { + test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, false, is_non_null_free_array); +} + +void MacroAssembler::test_flat_array_layout(Register lh, Label& is_flat_array) { + test_bit(t0, lh, exact_log2(Klass::_lh_array_tag_flat_value_bit_inplace)); + bnez(t0, is_flat_array); +} + void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { @@ -3779,6 +3901,11 @@ void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { } } +void MacroAssembler::load_prototype_header(Register dst, Register src, Register tmp) { + load_klass(dst, src, tmp); + ld(dst, Address(dst, Klass::prototype_header_offset())); +} + void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { // FIXME: Should this be a store release? concurrent gcs assumes // klass length is valid if klass field is not null. @@ -5243,6 +5370,8 @@ void MacroAssembler::get_thread(Register thread) { void MacroAssembler::load_byte_map_base(Register reg) { CardTableBarrierSet* ctbs = CardTableBarrierSet::barrier_set(); + // Strictly speaking the card table base isn't an address at all, and it might + // even be negative. It is thus materialised as a constant. mv(reg, (uint64_t)ctbs->card_table_base_const()); } @@ -5263,6 +5392,74 @@ void MacroAssembler::remove_frame(int framesize) { add(sp, sp, framesize); } +void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair) { + assert(!needs_stack_repair, "unimplemented"); + remove_frame(initial_framesize); +} + +#ifdef COMPILER2 +// C2 compiled method's prolog code +// Moved here from riscv.ad to support Valhalla code belows +void MacroAssembler::verified_entry(Compile* C, int sp_inc) { + if (C->clinit_barrier_on_entry()) { + assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); + + Label L_skip_barrier; + + mov_metadata(t1, C->method()->holder()->constant_encoding()); + clinit_barrier(t1, t0, &L_skip_barrier); + far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + bind(L_skip_barrier); + } + + int bangsize = C->output()->bang_size_in_bytes(); + if (C->output()->need_stack_bang(bangsize)) { + generate_stack_overflow_check(bangsize); + } + + // n.b. frame size includes space for return pc and fp + const long framesize = C->output()->frame_size_in_bytes(); + build_frame(framesize); + + assert(!C->needs_stack_repair(), "unimplemented"); +} +#endif // COMPILER2 + +// Move a value between registers/stack slots and update the reg_state +bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]) { + Unimplemented(); + return false; +} + +// Read all fields from an inline type oop and store the values in registers/stack slots +bool MacroAssembler::unpack_inline_helper(const GrowableArray* sig, int& sig_index, + VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index, + RegState reg_state[]) { + + Unimplemented(); + return false; +} + +// Pack fields back into an inline type oop +bool MacroAssembler::pack_inline_helper(const GrowableArray* sig, int& sig_index, int vtarg_index, + VMRegPair* from, int from_count, int& from_index, VMReg to, + RegState reg_state[], Register val_array) { + Unimplemented(); + return false; +} + +// Calculate the extra stack space required for packing or unpacking inline +// args and adjust the stack pointer +int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) { + Unimplemented(); + return false; +} + +VMReg MacroAssembler::spill_reg_for(VMReg reg) { + Unimplemented(); + return reg; +} + void MacroAssembler::reserved_stack_check() { // testing if reserved zone needs to be enabled Label no_reserved_zone_enabling; @@ -5528,6 +5725,14 @@ void MacroAssembler::load_method_holder(Register holder, Register method) { ld(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass* } +void MacroAssembler::load_metadata(Register dst, Register src) { + if (UseCompactObjectHeaders) { + load_narrow_klass_compact(dst, src); + } else { + lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); + } +} + // string indexof // compute index by trailing zeros void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, @@ -6854,6 +7059,8 @@ void MacroAssembler::fast_lock(Register basic_lock, Register obj, Register tmp1, // Try to lock. Transition lock-bits 0b01 => 0b00 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a la"); ori(mark, mark, markWord::unlocked_value); + // Mask inline_type bit such that we go to the slow path if object is an inline type + andi(mark, mark, ~((int) markWord::inline_type_bit_in_place)); xori(t, mark, markWord::unlocked_value); cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t); diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index a5ad7eeaa5f6a..4a693ab0a0ea1 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -32,6 +32,9 @@ #include "metaprogramming/enableIf.hpp" #include "oops/compressedOops.hpp" #include "utilities/powerOfTwo.hpp" +#include "runtime/signature.hpp" + +class ciInlineKlass; // MacroAssembler extends Assembler by frequently used macros. // @@ -138,6 +141,7 @@ class MacroAssembler: public Assembler { // These always tightly bind to MacroAssembler::call_VM_base // bypassing the virtual implementation + void super_call_VM_leaf(address entry_point); void super_call_VM_leaf(address entry_point, Register arg_0); void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1); void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2); @@ -196,6 +200,7 @@ class MacroAssembler: public Assembler { void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); void load_klass(Register dst, Register src, Register tmp = t0); + void load_prototype_header(Register dst, Register src, Register tmp = t0); void load_narrow_klass_compact(Register dst, Register src); void store_klass(Register dst, Register src, Register tmp = t0); void cmp_klass_beq(Register obj, Register klass, @@ -248,6 +253,29 @@ class MacroAssembler: public Assembler { static bool needs_explicit_null_check(intptr_t offset); static bool uses_implicit_null_check(void* address); + void test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free); + void test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free_inline_type); + void test_field_is_flat(Register flags, Register temp_reg, Label& is_flat); + + void test_markword_is_inline_type(Register markword, Label& is_inline_type); + void test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type, bool can_be_null = true); + void test_oop_prototype_bit(Register oop, Register temp_reg, int32_t tst_bit, bool jmp_set, Label& jmp_label); + void test_flat_array_oop(Register klass, Register temp_reg, Label& is_flat_array); + void test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array); + void test_non_flat_array_oop(Register oop, Register temp_reg, Label&is_non_flat_array); + void test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array); + + // Check array klass layout helper for flat or null-free arrays... + void test_flat_array_layout(Register lh, Label& is_flat_array); + + void inline_layout_info(Register holder_klass, Register index, Register layout_info); + + void flat_field_copy(DecoratorSet decorators, Register src, Register dst, Register inline_layout_info); + + // inline type data payload offsets... + void payload_offset(Register inline_klass, Register offset); + void payload_address(Register oop, Register data, Register inline_klass); + // interface method calling void lookup_interface_method(Register recv_klass, Register intf_klass, @@ -290,6 +318,7 @@ class MacroAssembler: public Assembler { } // allocation + void tlab_allocate( Register obj, // result: pointer to object after successful allocation Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise @@ -1278,6 +1307,8 @@ class MacroAssembler: public Assembler { void build_frame(int framesize); void remove_frame(int framesize); + void verified_entry(Compile* C, int sp_inc); + void reserved_stack_check(); void get_polling_page(Register dest, relocInfo::relocType rtype); @@ -1350,6 +1381,7 @@ class MacroAssembler: public Assembler { void load_method_holder_cld(Register result, Register method); void load_method_holder(Register holder, Register method); + void load_metadata(Register dst, Register src); void compute_index(Register str1, Register trailing_zeros, Register match_mask, Register result, Register char_tmp, Register tmp, @@ -1806,6 +1838,10 @@ class MacroAssembler: public Assembler { } static uint32_t get_membar_kind(address addr); static void set_membar_kind(address addr, uint32_t order_kind); + + public: + // Inline type specific methods + #include "asm/macroAssembler_common.hpp" }; #ifdef ASSERT diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp index e80dedf58ed47..3f7f4a34f4a06 100644 --- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -166,7 +166,11 @@ void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register meth __ BIND(run_compiled_code); } - const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : + // The following jump might pass an inline type argument that was erased to Object as oop to a + // callee that expects inline type arguments to be passed as fields. We need to call the compiled + // value entry (_code->inline_entry_point() or _adapter->c2i_inline_entry()) which will take care + // of translating between the calling conventions. + const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_inline_offset() : Method::from_interpreted_offset(); __ ld(t1, Address(method, entry_offset)); __ jr(t1); diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 0c077dc84a30f..0ae1e2ad2ac7e 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -1228,6 +1228,9 @@ int MachCallRuntimeNode::ret_addr_offset() { // jalr(t1, offset) -> jalr if (CodeCache::contains(_entry_point)) { return 2 * NativeInstruction::instruction_size; + } else if (_entry_point == nullptr) { + // See CallLeafNoFPIndirect + return 1 * NativeInstruction::instruction_size; } else { return 8 * NativeInstruction::instruction_size; } @@ -1275,6 +1278,11 @@ int CallLeafDirectVectorNode::compute_padding(int current_offset) const return align_up(current_offset, alignment_required()) - current_offset; } +int CallLeafNoFPIndirectNode::compute_padding(int current_offset) const +{ + return align_up(current_offset, alignment_required()) - current_offset; +} + int CallLeafNoFPDirectNode::compute_padding(int current_offset) const { return align_up(current_offset, alignment_required()) - current_offset; @@ -1381,53 +1389,21 @@ void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { assert_cond(ra_ != nullptr); Compile* C = ra_->C; - // n.b. frame size includes space for return pc and fp - const int framesize = C->output()->frame_size_in_bytes(); - - assert_cond(C != nullptr); - - if (C->clinit_barrier_on_entry()) { - assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); - - Label L_skip_barrier; - - __ mov_metadata(t1, C->method()->holder()->constant_encoding()); - __ clinit_barrier(t1, t0, &L_skip_barrier); - __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); - __ bind(L_skip_barrier); - } - - int bangsize = C->output()->bang_size_in_bytes(); - if (C->output()->need_stack_bang(bangsize)) { - __ generate_stack_overflow_check(bangsize); - } - - __ build_frame(framesize); + __ verified_entry(C, 0); if (VerifyStackAtCalls) { + // n.b. frame size includes space for return pc and fp + const long framesize = C->output()->frame_size_in_bytes(); __ mv(t2, MAJIK_DWORD); __ sd(t2, Address(sp, framesize - 3 * wordSize)); } if (C->stub_function() == nullptr) { - BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); - // Dummy labels for just measuring the code size - Label dummy_slow_path; - Label dummy_continuation; - Label dummy_guard; - Label* slow_path = &dummy_slow_path; - Label* continuation = &dummy_continuation; - Label* guard = &dummy_guard; - if (!Compile::current()->output()->in_scratch_emit_size()) { - // Use real labels from actual stub when not emitting code for purpose of measuring its size - C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub(); - Compile::current()->output()->add_stub(stub); - slow_path = &stub->entry(); - continuation = &stub->continuation(); - guard = &stub->guard(); - } - // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub. - bs->nmethod_entry_barrier(masm, slow_path, continuation, guard); + __ entry_barrier(); + } + + if (!Compile::current()->output()->in_scratch_emit_size()) { + __ bind(*_verified_entry); } C->output()->set_frame_complete(__ offset()); @@ -1440,13 +1416,6 @@ void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { } } -uint MachPrologNode::size(PhaseRegAlloc* ra_) const -{ - assert_cond(ra_ != nullptr); - return MachNode::size(ra_); // too many variables; just compute it - // the hard way -} - int MachPrologNode::reloc() const { return 0; @@ -1506,12 +1475,6 @@ void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { } } -uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { - assert_cond(ra_ != nullptr); - // Variable size. Determine dynamically. - return MachNode::size(ra_); -} - int MachEpilogNode::reloc() const { // Return number of relocatable values contained in this instruction. return 1; // 1 for polling page. @@ -1790,6 +1753,20 @@ uint BoxLockNode::size(PhaseRegAlloc *ra_) const { //============================================================================= +#ifndef PRODUCT +void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const +{ + Unimplemented(); +} +#endif + +void MachVEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const +{ + Unimplemented(); +} + +//============================================================================= + #ifndef PRODUCT void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const { @@ -1812,12 +1789,6 @@ void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const assert(((__ offset()) % CodeEntryAlignment) == 0, "Misaligned verified entry point"); } -uint MachUEPNode::size(PhaseRegAlloc* ra_) const -{ - assert_cond(ra_ != nullptr); - return MachNode::size(ra_); -} - // REQUIRED EMIT CODE //============================================================================= @@ -2682,6 +2653,16 @@ operand immI_le_4() interface(CONST_INTER); %} +operand immI_4() +%{ + predicate(n->get_int() == 4); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + operand immI_16() %{ predicate(n->get_int() == 16); @@ -8345,6 +8326,34 @@ instruct spin_wait() %{ // ============================================================================ // Cast Instructions (Java-level type cast) +instruct castI2N(iRegNNoSp dst, iRegI src) %{ + match(Set dst (CastI2N src)); + + ins_cost(ALU_COST); + format %{ "zext $dst, $src, 32\t# int -> narrow ptr" %} + + ins_encode %{ + __ zext(as_Register($dst$$reg), as_Register($src$$reg), 32); + %} + + ins_pipe(ialu_reg); +%} + +instruct castN2X(iRegLNoSp dst, iRegN src) %{ + match(Set dst (CastP2X src)); + + ins_cost(ALU_COST); + format %{ "mv $dst, $src\t# ptr -> long" %} + + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ mv(as_Register($dst$$reg), as_Register($src$$reg)); + } + %} + + ins_pipe(ialu_reg); +%} + instruct castX2P(iRegPNoSp dst, iRegL src) %{ match(Set dst (CastX2P src)); @@ -10882,8 +10891,31 @@ instruct CallLeafDirectVector(method meth) // Call Runtime Instruction +// entry point is null, target holds the address to call +instruct CallLeafNoFPIndirect(iRegP target) +%{ + predicate(n->as_Call()->entry_point() == nullptr); + + match(CallLeafNoFP target); + + ins_cost(BRANCH_COST); + + format %{ "CALL, runtime leaf nofp indirect $target" %} + + ins_encode %{ + Assembler::IncompressibleScope scope(masm); // Fixed length: see ret_addr_offset + __ jalr($target$$Register); + __ post_call_nop(); + %} + + ins_pipe(pipe_class_call); + ins_alignment(4); +%} + instruct CallLeafNoFPDirect(method meth) %{ + predicate(n->as_Call()->entry_point() != nullptr); + match(CallLeafNoFP); effect(USE meth); @@ -11213,17 +11245,20 @@ instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, ins_pipe(pipe_class_memory); %} +// ============================================================================ // clearing of an array -instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, iRegP_R30 tmp1, - iRegP_R31 tmp2, rFlagsReg cr, Universe dummy) + +instruct clearArray_reg_reg_immL0(iRegL_R29 cnt, iRegP_R28 base, immL0 zero, + iRegP_R30 tmp1, iRegP_R31 tmp2, rFlagsReg cr, + Universe dummy) %{ // temp registers must match the one used in StubGenerator::generate_zero_blocks() predicate(UseBlockZeroing || !UseRVV); - match(Set dummy (ClearArray cnt base)); + match(Set dummy (ClearArray (Binary cnt base) zero)); effect(USE_KILL cnt, USE_KILL base, TEMP tmp1, TEMP tmp2, KILL cr); ins_cost(4 * DEFAULT_COST); - format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} + format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg_immL0" %} ins_encode %{ address tpc = __ zero_words($base$$Register, $cnt$$Register); @@ -11236,11 +11271,32 @@ instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, iRegP_R30 tmp1, ins_pipe(pipe_class_memory); %} -instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) +instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, iRegL val, + iRegP_R30 tmp1, iRegP_R31 tmp2, rFlagsReg cr, + Universe dummy) +%{ + // temp registers must match the one used in StubGenerator::generate_zero_blocks() + predicate(((ClearArrayNode*)n)->word_copy_only()); + match(Set dummy (ClearArray (Binary cnt base) val)); + effect(USE_KILL cnt, USE_KILL base, TEMP tmp1, TEMP tmp2, KILL cr); + + ins_cost(4 * DEFAULT_COST); + format %{ "ClearArray $cnt, $base, $val\t#@clearArray_reg_reg" %} + + ins_encode %{ + __ fill_words($base$$Register, $cnt$$Register, $val$$Register); + %} + + ins_pipe(pipe_class_memory); +%} + +instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, immL0 zero, Universe dummy, rFlagsReg cr) %{ - predicate(!UseRVV && (uint64_t)n->in(2)->get_long() - < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); - match(Set dummy (ClearArray cnt base)); + predicate(!UseRVV + && (uint64_t)n->in(2)->in(1)->get_long() + < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord) + && !((ClearArrayNode*)n)->word_copy_only()); + match(Set dummy (ClearArray (Binary cnt base) zero)); effect(USE_KILL base, KILL cr); ins_cost(4 * DEFAULT_COST); diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad index a0af43364cb0e..4e85f9dea813d 100644 --- a/src/hotspot/cpu/riscv/riscv_v.ad +++ b/src/hotspot/cpu/riscv/riscv_v.ad @@ -1,5 +1,5 @@ // -// Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved. // Copyright (c) 2020, 2023, Arm Limited. All rights reserved. // Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. // Copyright (c) 2023, 2025, Rivos Inc. All rights reserved. @@ -4344,14 +4344,15 @@ instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, %} // clearing of an array -instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, - vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7) +instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, immL0 zero, + vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7, + Universe dummy) %{ predicate(!UseBlockZeroing && UseRVV); - match(Set dummy (ClearArray cnt base)); + match(Set dummy (ClearArray (Binary cnt base) zero)); effect(USE_KILL cnt, USE_KILL base, TEMP v4, TEMP v5, TEMP v6, TEMP v7); - format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} + format %{ "ClearArray $cnt, $base\t#@vclearArray_reg_reg" %} ins_encode %{ __ clear_array_v($base$$Register, $cnt$$Register); diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp index 9ad6663d0fa35..ce9fbaef9593a 100644 --- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp @@ -26,6 +26,7 @@ #include "asm/macroAssembler.hpp" #include "asm/macroAssembler.inline.hpp" +#include "classfile/symbolTable.hpp" #include "code/compiledIC.hpp" #include "code/debugInfoRec.hpp" #include "code/vtableStubs.hpp" @@ -309,6 +310,90 @@ int SharedRuntime::java_calling_convention(const BasicType *sig_bt, return stk_args; } +const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j; +const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j; + +int SharedRuntime::java_return_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed) { + // Create the mapping between argument positions and registers. + + static const Register INT_ArgReg[java_return_convention_max_int] = { + x10 /* j_rarg7 */, j_rarg6, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0 + }; + + static const FloatRegister FP_ArgReg[java_return_convention_max_float] = { + j_farg0, j_farg1, j_farg2, j_farg3, j_farg4, j_farg5, j_farg6, j_farg7 + }; + + uint int_args = 0; + uint fp_args = 0; + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (int_args < SharedRuntime::java_return_convention_max_int) { + regs[i].set1(INT_ArgReg[int_args]->as_VMReg()); + int_args ++; + } else { + return -1; + } + break; + case T_VOID: + // halves of T_LONG or T_DOUBLE + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_LONG: + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + // Should T_METADATA be added to java_calling_convention as well ? + case T_METADATA: + if (int_args < SharedRuntime::java_return_convention_max_int) { + regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); + int_args ++; + } else { + return -1; + } + break; + case T_FLOAT: + if (fp_args < SharedRuntime::java_return_convention_max_float) { + regs[i].set1(FP_ArgReg[fp_args]->as_VMReg()); + fp_args ++; + } else { + return -1; + } + break; + case T_DOUBLE: + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args < SharedRuntime::java_return_convention_max_float) { + regs[i].set2(FP_ArgReg[fp_args]->as_VMReg()); + fp_args ++; + } else { + return -1; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return int_args + fp_args; +} + +BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) { + Unimplemented(); + return nullptr; +} + // Patch the callers callsite with entry to compiled code if it exists. static void patch_callers_callsite(MacroAssembler *masm) { Label L; @@ -337,12 +422,103 @@ static void patch_callers_callsite(MacroAssembler *masm) { __ bind(L); } +// For each inline type argument, sig includes the list of fields of +// the inline type. This utility function computes the number of +// arguments for the call if inline types are passed by reference (the +// calling convention the interpreter expects). +static int compute_total_args_passed_int(const GrowableArray* sig_extended) { + int total_args_passed = 0; + assert(!InlineTypePassFieldsAsArgs, ""); + total_args_passed = sig_extended->length(); + return total_args_passed; +} + +static void gen_c2i_adapter_helper(MacroAssembler* masm, + BasicType bt, + BasicType prev_bt, + size_t size_in_bytes, + const VMRegPair& reg_pair, + const Address& to, + int extraspace) { + if (bt == T_VOID) { + assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); + return; + } + + // Say 4 args: + // i st_off + // 0 32 T_LONG + // 1 24 T_VOID + // 2 16 T_OBJECT + // 3 8 T_BOOL + // - 0 return address + // + // However to make thing extra confusing. Because we can fit a Java long/double in + // a single slot on a 64 bit vm and it would be silly to break them up, the interpreter + // leaves one slot empty and only stores to a single slot. In this case the + // slot that is occupied is the T_VOID slot. See I said it was confusing. + + bool wide = (size_in_bytes == wordSize); + + VMReg r_1 = reg_pair.first(); + VMReg r_2 = reg_pair.second(); + assert(r_2->is_valid() == wide, "invalid size"); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + return; + } + + if (!r_1->is_FloatRegister()) { + Register val = t1; + if (r_1->is_stack()) { + int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + __ load_sized_value(val, Address(sp, ld_off), size_in_bytes, /* is_signed */ false); + } else { + val = r_1->as_Register(); + } + __ store_sized_value(to, val, size_in_bytes); + } else { + if (wide) { + __ fsd(r_1->as_FloatRegister(), to); + } else { + // only a float use just part of the slot + __ fsw(r_1->as_FloatRegister(), to); + } + } +} + static void gen_c2i_adapter(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, + const GrowableArray* sig_extended, const VMRegPair *regs, - Label& skip_fixup) { + bool requires_clinit_barrier, + address& c2i_no_clinit_check_entry, + Label& skip_fixup, + address start, + OopMapSet* oop_maps, + int& frame_complete, + int& frame_size_in_words, + bool alloc_inline_receiver) { + if (requires_clinit_barrier) { + assert(VM_Version::supports_fast_class_init_checks(), "sanity"); + Label L_skip_barrier; + + { // Bypass the barrier for non-static methods + __ lhu(t0, Address(xmethod, Method::access_flags_offset())); + __ test_bit(t0, t0, exact_log2(JVM_ACC_STATIC)); + __ beqz(t0, L_skip_barrier); // non-static + } + + __ load_method_holder(t1, xmethod); + __ clinit_barrier(t1, t0, &L_skip_barrier); + __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + + __ bind(L_skip_barrier); + c2i_no_clinit_check_entry = __ pc(); + } + + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->c2i_entry_barrier(masm); + // Before we get into the guts of the C2I adapter, see if we should be here // at all. We've come from compiled code and are attempting to jump to the // interpreter, which means the caller made a static call to get here @@ -352,112 +528,56 @@ static void gen_c2i_adapter(MacroAssembler *masm, __ bind(skip_fixup); - int words_pushed = 0; - // Since all args are passed on the stack, total_args_passed * // Interpreter::stackElementSize is the space we need. + int total_args_passed = compute_total_args_passed_int(sig_extended); int extraspace = total_args_passed * Interpreter::stackElementSize; __ mv(x19_sender_sp, sp); // stack is aligned, keep it that way - extraspace = align_up(extraspace, 2 * wordSize); + extraspace = align_up(extraspace, StackAlignmentInBytes); if (extraspace) { __ sub(sp, sp, extraspace); } // Now write the args into the outgoing interpreter space - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half"); - continue; - } - // offset to start parameters - int st_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; + // next_arg_comp is the next argument from the compiler point of + // view (inline type fields are passed in registers/on the stack). In + // sig_extended, an inline type argument starts with: T_METADATA, + // followed by the types of the fields of the inline type and T_VOID + // to mark the end of the inline type. ignored counts the number of + // T_METADATA/T_VOID. next_vt_arg is the next inline type argument: + // used to get the buffer for that argument from the pool of buffers + // we allocated above and want to pass to the + // interpreter. next_arg_int is the next argument from the + // interpreter point of view (inline types are passed by reference). + for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0; + next_arg_comp < sig_extended->length(); next_arg_comp++) { + assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments"); + assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?"); + BasicType bt = sig_extended->at(next_arg_comp)._bt; + assert(!InlineTypePassFieldsAsArgs, ""); + + int st_off = (total_args_passed - next_arg_int - 1) * Interpreter::stackElementSize; int next_off = st_off - Interpreter::stackElementSize; + const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; + const VMRegPair reg_pair = regs[next_arg_comp-ignored]; + size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4; + gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp - 1)._bt : T_ILLEGAL, + size_in_bytes, reg_pair, Address(sp, offset), extraspace); + next_arg_int++; - // Say 4 args: - // i st_off - // 0 32 T_LONG - // 1 24 T_VOID - // 2 16 T_OBJECT - // 3 8 T_BOOL - // - 0 return address - // - // However to make thing extra confusing. Because we can fit a Java long/double in - // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter - // leaves one slot empty and only stores to a single slot. In this case the - // slot that is occupied is the T_VOID slot. See I said it was confusing. - - VMReg r_1 = regs[i].first(); - VMReg r_2 = regs[i].second(); - if (!r_1->is_valid()) { - assert(!r_2->is_valid(), ""); - continue; - } - if (r_1->is_stack()) { - // memory to memory use t0 - int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size - + extraspace - + words_pushed * wordSize); - if (!r_2->is_valid()) { - __ lwu(t0, Address(sp, ld_off)); - __ sd(t0, Address(sp, st_off), /*temp register*/esp); - } else { - __ ld(t0, Address(sp, ld_off), /*temp register*/esp); - - // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG - // T_DOUBLE and T_LONG use two slots in the interpreter - if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { - // ld_off == LSW, ld_off+wordSize == MSW - // st_off == MSW, next_off == LSW - __ sd(t0, Address(sp, next_off), /*temp register*/esp); -#ifdef ASSERT - // Overwrite the unused slot with known junk - __ mv(t0, 0xdeadffffdeadaaaaul); - __ sd(t0, Address(sp, st_off), /*temp register*/esp); -#endif /* ASSERT */ - } else { - __ sd(t0, Address(sp, st_off), /*temp register*/esp); - } - } - } else if (r_1->is_Register()) { - Register r = r_1->as_Register(); - if (!r_2->is_valid()) { - // must be only an int (or less ) so move only 32bits to slot - __ sd(r, Address(sp, st_off)); - } else { - // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG - // T_DOUBLE and T_LONG use two slots in the interpreter - if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { - // long/double in gpr -#ifdef ASSERT - // Overwrite the unused slot with known junk - __ mv(t0, 0xdeadffffdeadaaabul); - __ sd(t0, Address(sp, st_off), /*temp register*/esp); -#endif /* ASSERT */ - __ sd(r, Address(sp, next_off)); - } else { - __ sd(r, Address(sp, st_off)); - } - } - } else { - assert(r_1->is_FloatRegister(), ""); - if (!r_2->is_valid()) { - // only a float use just part of the slot - __ fsw(r_1->as_FloatRegister(), Address(sp, st_off)); - } else { #ifdef ASSERT - // Overwrite the unused slot with known junk - __ mv(t0, 0xdeadffffdeadaaacul); - __ sd(t0, Address(sp, st_off), /*temp register*/esp); -#endif /* ASSERT */ - __ fsd(r_1->as_FloatRegister(), Address(sp, next_off)); - } + if (bt == T_LONG || bt == T_DOUBLE) { + // Overwrite the unused slot with known junk + __ mv(t0, CONST64(0xdeadffffdeadaaaa)); + __ sd(t0, Address(sp, st_off)); } +#endif /* ASSERT */ } __ mv(esp, sp); // Interp expects args on caller's expression stack @@ -467,9 +587,8 @@ static void gen_c2i_adapter(MacroAssembler *masm, } void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, - int total_args_passed, int comp_args_on_stack, - const BasicType *sig_bt, + const GrowableArray* sig, const VMRegPair *regs) { // Note: x19_sender_sp contains the senderSP on entry. We must // preserve it since we may do a i2c -> c2i transition if we lose a @@ -477,20 +596,24 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, // ready. // Cut-out for having no stack args. - int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord; + int comp_words_on_stack = 0; if (comp_args_on_stack != 0) { + comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord; __ sub(t0, sp, comp_words_on_stack * wordSize); __ andi(sp, t0, -16); } // Will jump to the compiled code just as if compiled code was doing it. // Pre-load the register-jump target early, to schedule it better. - __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset()))); + __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_inline_offset()))); + + int total_args_passed = sig->length(); // Now generate the shuffle code. for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half"); + BasicType bt = sig->at(i)._bt; + if (bt == T_VOID) { + assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half"); continue; } @@ -527,8 +650,7 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, // are accessed as negative so LSW is at LOW address // ld_off is MSW so get LSW - const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? - next_off : ld_off; + const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off; __ ld(t0, Address(esp, offset)); // st_off is LSW (i.e. reg.first()) __ sd(t0, Address(sp, st_off), /*temp register*/t2); @@ -543,8 +665,7 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, // So we must adjust where to pick up the data to match the // interpreter. - const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? - next_off : ld_off; + const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off; // this can be a misaligned move __ ld(r, Address(esp, offset)); @@ -578,22 +699,36 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, __ jr(t1); } +static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) { + Register data = t0; + + __ ic_check(); + __ ld(xmethod, Address(data, CompiledICData::speculated_method_offset())); + + // Method might have been compiled since the call site was patched to + // interpreted; if that is the case treat it as a miss so we can get + // the call site corrected. + __ ld(t0, Address(xmethod, in_bytes(Method::code_offset()))); + __ beqz(t0, skip_fixup); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); +} + // --------------------------------------------------------------- -void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, +void SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm, int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - address entry_address[AdapterBlob::ENTRY_COUNT]) { - entry_address[AdapterBlob::I2C] = __ pc(); - gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); - - entry_address[AdapterBlob::C2I_Unverified] = __ pc(); - Label skip_fixup; + const GrowableArray* sig, + const VMRegPair* regs, + const GrowableArray* sig_cc, + const VMRegPair* regs_cc, + const GrowableArray* sig_cc_ro, + const VMRegPair* regs_cc_ro, + address entry_address[AdapterBlob::ENTRY_COUNT], + AdapterBlob*& new_adapter, + bool allocate_code_blob) { - const Register receiver = j_rarg0; - const Register data = t0; + entry_address[AdapterBlob::I2C] = __ pc(); + gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); // ------------------------------------------------------------------------- // Generate a C2I adapter. On entry we know xmethod holds the Method* during calls @@ -603,43 +738,52 @@ void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, // the args. We finally end in a jump to the generic interpreter entry point. // On exit from the interpreter, the interpreter will restore our SP (lest the // compiled code, which relies solely on SP and not FP, get sick). + entry_address[AdapterBlob::C2I_Unverified] = __ pc(); + entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc(); - { - __ block_comment("c2i_unverified_entry {"); - - __ ic_check(); - __ ld(xmethod, Address(data, CompiledICData::speculated_method_offset())); - - __ ld(t0, Address(xmethod, in_bytes(Method::code_offset()))); - __ beqz(t0, skip_fixup); - __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); - __ block_comment("} c2i_unverified_entry"); - } + Label skip_fixup; + gen_inline_cache_check(masm, skip_fixup); - entry_address[AdapterBlob::C2I] = __ pc(); + OopMapSet* oop_maps = new OopMapSet(); + int frame_complete = CodeOffsets::frame_never_safe; + int frame_size_in_words = 0; - // Class initialization barrier for static methods + // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver) entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr; - assert(VM_Version::supports_fast_class_init_checks(), "sanity"); - Label L_skip_barrier; - - // Bypass the barrier for non-static methods - __ load_unsigned_short(t0, Address(xmethod, Method::access_flags_offset())); - __ test_bit(t1, t0, exact_log2(JVM_ACC_STATIC)); - __ beqz(t1, L_skip_barrier); // non-static - - __ load_method_holder(t1, xmethod); - __ clinit_barrier(t1, t0, &L_skip_barrier); - __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); - - __ bind(L_skip_barrier); - entry_address[AdapterBlob::C2I_No_Clinit_Check] = __ pc(); - - BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->c2i_entry_barrier(masm); - - gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); - return; + entry_address[AdapterBlob::C2I_Inline_RO] = __ pc(); + if (regs_cc != regs_cc_ro) { + // No class init barrier needed because method is guaranteed to be non-static + gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, entry_address[AdapterBlob::C2I_No_Clinit_Check], + skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); + skip_fixup.reset(); + } + + // Scalarized c2i adapter + entry_address[AdapterBlob::C2I] = __ pc(); + entry_address[AdapterBlob::C2I_Inline] = __ pc(); + gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check], + skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true); + + // Non-scalarized c2i adapter + if (regs != regs_cc) { + entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc(); + Label inline_entry_skip_fixup; + gen_inline_cache_check(masm, inline_entry_skip_fixup); + + entry_address[AdapterBlob::C2I_Inline] = __ pc(); + gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check], + inline_entry_skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); + } + + // The c2i adapters might safepoint and trigger a GC. The caller must make sure that + // the GC knows about the location of oop argument locations passed to the c2i adapter. + if (allocate_code_blob) { + bool caller_must_gc_arguments = (regs != regs_cc); + int entry_offset[AdapterHandlerEntry::ENTRIES_COUNT]; + assert(AdapterHandlerEntry::ENTRIES_COUNT == 7, "sanity"); + AdapterHandlerLibrary::address_to_offset(entry_address, entry_offset); + new_adapter = AdapterBlob::create(masm->code(), entry_offset, frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments); + } } int SharedRuntime::vector_calling_convention(VMRegPair *regs, diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp index 13f3ed4de89a8..0da5248ac840f 100644 --- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp @@ -1924,6 +1924,12 @@ class StubGenerator: public StubCodeGenerator { __ load_klass(t1, dst); __ bne(t1, scratch_src_klass, L_failed); + // Check for flat inline type array -> return -1 + __ test_flat_array_oop(src, t1, L_failed); + + // Check for null-free (non-flat) inline type array -> handle as object array + __ test_null_free_array_oop(src, t1, L_objArray); + // if src->is_Array() isn't null then return -1 // i.e. (lh >= 0) __ bgez(lh, L_failed); @@ -6000,7 +6006,7 @@ class StubGenerator: public StubCodeGenerator { int64_t block_bytes = 16 * 4; __ addi(buf, buf, block_bytes); - __ bge(limit, buf, L_sha1_loop, true); + __ bge(limit, buf, L_sha1_loop, /* is_far */ true); } // store back the state. diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp index f073909bf5d73..b5b1b89ca72a8 100644 --- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -1423,7 +1423,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // // Generic interpreted method entry to (asm) interpreter // -address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { +address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized, bool object_init) { // determine code generation flags const bool inc_counter = UseCompiler || CountCompiledCalls; @@ -1536,6 +1536,12 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { #endif } + // Issue a StoreStore barrier on entry to Object_init if the + // class has strict field fields. Be lazy, always do it. + if (object_init) { + __ membar(MacroAssembler::StoreStore); + } + // start execution __ verify_frame_setup(); diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp index bae5bb7b57b38..0d169f35f6b5b 100644 --- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp +++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp @@ -43,6 +43,7 @@ #include "oops/resolvedMethodEntry.hpp" #include "prims/jvmtiExport.hpp" #include "prims/methodHandles.hpp" +#include "runtime/arguments.hpp" #include "runtime/frame.inline.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -138,6 +139,7 @@ void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, Label L_patch_done; switch (bc) { + case Bytecodes::_fast_vputfield: // fall through case Bytecodes::_fast_aputfield: // fall through case Bytecodes::_fast_bputfield: // fall through case Bytecodes::_fast_zputfield: // fall through @@ -771,9 +773,25 @@ void TemplateTable::aaload() { // x10: array // x11: index index_check(x10, x11); // leaves index in x11 - __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); - __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop); - __ load_heap_oop(x10, Address(x10), x28, x29, IS_ARRAY); + __ profile_array_type(x12, x10, x14); + if (UseArrayFlattening) { + Label is_flat_array, done; + + __ test_flat_array_oop(x10, x28, is_flat_array); + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop); + __ load_heap_oop(x10, Address(x10), x28, x29, IS_ARRAY); + + __ j(done); + __ bind(is_flat_array); + __ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::flat_array_load), x10, x11); + __ bind(done); + } else { + __ addi(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop); + __ load_heap_oop(x10, Address(x10), x28, x29, IS_ARRAY); + } + __ profile_element_type(x12, x10, x14); } void TemplateTable::baload() { @@ -1047,7 +1065,7 @@ void TemplateTable::dastore() { } void TemplateTable::aastore() { - Label is_null, ok_is_subtype, done; + Label is_null, is_flat_array, ok_is_subtype, done; transition(vtos, vtos); // stack: ..., array, index, value __ ld(x10, at_tos()); // value @@ -1055,6 +1073,10 @@ void TemplateTable::aastore() { __ ld(x13, at_tos_p2()); // array index_check(x13, x12); // kills x11 + + __ profile_array_type(x14, x13, x15); + __ profile_multiple_element_types(x14, x10, x15, x16); + __ addi(x14, x12, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); __ shadd(x14, x14, x13, x14, LogBytesPerHeapOop); @@ -1063,17 +1085,26 @@ void TemplateTable::aastore() { // do array store check - check for null value first __ beqz(x10, is_null); + // Move array class to x15 + __ load_klass(x15, x13); + + if (UseArrayFlattening) { + __ lwu(x16, Address(x15, Klass::layout_helper_offset())); + __ test_flat_array_layout(x16, is_flat_array); + } + // Move subklass into x11 __ load_klass(x11, x10); - // Move superklass into x10 - __ load_klass(x10, x13); - __ ld(x10, Address(x10, - ObjArrayKlass::element_klass_offset())); + + // Move array element superklass into x10 + __ ld(x10, Address(x15, ObjArrayKlass::element_klass_offset())); // Compress array + index * oopSize + 12 into a single register. Frees x12. // Generate subtype check. Blows x12, x15 // Superklass in x10. Subklass in x11. - __ gen_subtype_check(x11, ok_is_subtype); + + // is "x11 <: x10" ? (value subclass <: array element superclass) + __ gen_subtype_check(x11, ok_is_subtype, false); // Come here on failure // object is at TOS @@ -1090,10 +1121,36 @@ void TemplateTable::aastore() { // Have a null in x10, x13=array, x12=index. Store null at ary[idx] __ bind(is_null); - __ profile_null_seen(x12); + if (Arguments::is_valhalla_enabled()) { + Label is_null_into_value_array_npe, store_null; + + if (UseArrayFlattening) { + __ test_flat_array_oop(x13, x28, is_flat_array); + } + + // No way to store null in a null-free array + __ test_null_free_array_oop(x13, x28, is_null_into_value_array_npe); + __ j(store_null); + + __ bind(is_null_into_value_array_npe); + __ j(ExternalAddress(Interpreter::_throw_NullPointerException_entry)); + + __ bind(store_null); + } // Store a null __ store_heap_oop(element_address, noreg, x28, x29, x13, IS_ARRAY); + __ j(done); + + if (UseArrayFlattening) { + Label is_type_ok; + __ bind(is_flat_array); // Store non-null value to flat + + __ ld(x10, at_tos()); // value + __ lw(x13, at_tos_p1()); // index + __ ld(x12, at_tos_p2()); // array + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::flat_array_store), x10, x12, x13); + } // Pop stack arguments __ bind(done); @@ -1850,17 +1907,78 @@ void TemplateTable::if_nullcmp(Condition cc) { void TemplateTable::if_acmp(Condition cc) { transition(atos, vtos); // assume branch is more often taken than not (loops use backward branches) - Label not_taken; + Label taken, not_taken; __ pop_ptr(x11); + __ profile_acmp(x12, x11, x10, x14); + + Register is_inline_type_mask = t1; + __ mv(is_inline_type_mask, markWord::inline_type_pattern); + + if (Arguments::is_valhalla_enabled()) { + // The substitutability test is only necessary if x11 and x10 are not the same... + if (cc == equal) { + __ beq(x11, x10, taken); + } else { + __ beq(x11, x10, not_taken); + } + + // ... neither are null... + if (cc == equal) { + __ beqz(x11, not_taken); + __ beqz(x10, not_taken); + } else { + __ beqz(x11, taken); + __ beqz(x10, taken); + } + + // ...and both are values... + __ ld(x12, Address(x11, oopDesc::mark_offset_in_bytes())); + __ andr(x12, x12, is_inline_type_mask); + __ ld(x14, Address(x10, oopDesc::mark_offset_in_bytes())); + __ andr(x14, x14, is_inline_type_mask); + __ andr(x12, x12, x14); + if (cc == equal) { + __ bne(x12, is_inline_type_mask, not_taken); + } else { + __ bne(x12, is_inline_type_mask, taken); + } + + // ...with the same value klass + __ load_metadata(x12, x11); + __ load_metadata(x14, x10); + if (cc == equal) { + __ bne(x12, x14, not_taken); + } else { + __ bne(x12, x14, taken); + } + + // Know both are the same type, let's test for substitutability ... + if (cc == equal) { + invoke_is_substitutable(x10, x11, taken, not_taken); + } else { + invoke_is_substitutable(x10, x11, not_taken, taken); + } + __ stop("Not reachable"); + } + if (cc == equal) { __ bne(x11, x10, not_taken); - } else if (cc == not_equal) { + } else { __ beq(x11, x10, not_taken); } + __ bind(taken); branch(false, false); __ bind(not_taken); - __ profile_not_taken_branch(x10); + __ profile_not_taken_branch(x10, true); +} + +void TemplateTable::invoke_is_substitutable(Register aobj, Register bobj, + Label& is_subst, Label& not_subst) { + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::is_substitutable), aobj, bobj); + // Restored ... x10 answer, jmp to outcome ... + __ beqz(x10, not_subst); + __ j(is_subst); } void TemplateTable::ret() { @@ -2107,7 +2225,8 @@ void TemplateTable::_return(TosState state) { // Issue a StoreStore barrier after all stores but before return // from any constructor for any class with a final field. We don't // know if this is a finalizer, so we always do so. - if (_desc->bytecode() == Bytecodes::_return) { + if (_desc->bytecode() == Bytecodes::_return + || _desc->bytecode() == Bytecodes::_return_register_finalizer) { __ membar(MacroAssembler::StoreStore); } @@ -2505,7 +2624,7 @@ void TemplateTable::pop_and_check_object(Register r) { } void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { - const Register cache = x14; + const Register cache = x12; const Register obj = x14; const Register index = x13; const Register tos_state = x13; @@ -2515,6 +2634,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr resolve_cache_and_index_for_field(byte_no, cache, index); jvmti_post_field_access(cache, index, is_static, false); + load_resolved_field_entry(obj, cache, tos_state, off, flags, is_static); if (!is_static) { @@ -2563,12 +2683,39 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr __ subi(t0, tos_state, (u1)atos); __ bnez(t0, notObj); // atos - __ load_heap_oop(x10, field, x28, x29, IN_HEAP); - __ push(atos); - if (rc == may_rewrite) { - patch_bytecode(Bytecodes::_fast_agetfield, bc, x11); + if (!Arguments::is_valhalla_enabled()) { + __ load_heap_oop(x10, field, x28, x29, IN_HEAP); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, x11); + } + __ j(Done); + } else { // Valhalla + if (is_static) { + __ load_heap_oop(x10, field, x28, x29); + __ push(atos); + __ j(Done); + } else { + Label is_flat; + __ test_field_is_flat(flags, x28, is_flat); + __ load_heap_oop(x10, field, x28, x29); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, x11); + } + __ j(Done); + __ bind(is_flat); + // field is flat (null-free or nullable with a null-marker) + __ mv(x10, obj); + __ read_flat_field(cache, x10); + __ verify_oop(x10); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_vgetfield, bc, x11); + } + __ j(Done); + } } - __ j(Done); __ bind(notObj); __ subi(t0, tos_state, (u1)itos); @@ -2728,7 +2875,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr const Register tos_state = x13; const Register obj = x12; const Register off = x9; - const Register flags = x10; + const Register flags = x16; const Register bc = x14; resolve_cache_and_index_for_field(byte_no, cache, index); @@ -2736,11 +2883,9 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr load_resolved_field_entry(obj, cache, tos_state, off, flags, is_static); Label Done; - __ mv(x15, flags); - { Label notVolatile; - __ test_bit(t0, x15, ResolvedFieldEntry::is_volatile_shift); + __ test_bit(t0, flags, ResolvedFieldEntry::is_volatile_shift); __ beqz(t0, notVolatile); __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore); __ bind(notVolatile); @@ -2799,19 +2944,69 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr // atos { - __ pop(atos); - // field address - if (!is_static) { - pop_and_check_object(obj); - } - __ add(off, obj, off); // if static, obj from cache, else obj from stack. - const Address field(off, 0); - // Store into the field - __ store_heap_oop(field, x10, x28, x29, x13, IN_HEAP); - if (rc == may_rewrite) { - patch_bytecode(Bytecodes::_fast_aputfield, bc, x11, true, byte_no); - } - __ j(Done); + if (!Arguments::is_valhalla_enabled()) { + __ pop(atos); + // field address + if (!is_static) { + pop_and_check_object(obj); + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); + // Store into the field + __ store_heap_oop(field, x10, x28, x29, x13, IN_HEAP); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, x11, true, byte_no); + } + __ j(Done); + } else { // Valhalla + __ pop(atos); + if (is_static) { + Label is_nullable; + __ test_field_is_not_null_free_inline_type(flags, x28, is_nullable); + __ null_check(x10); // FIXME JDK-8341120 + __ bind(is_nullable); + // field address + __ add(off, obj, off); + const Address field(off, 0); + __ store_heap_oop(field, x10, x28, x29, x13, IN_HEAP); + __ j(Done); + } else { + Label null_free_reference, is_flat, rewrite_inline; + __ test_field_is_flat(flags, x28, is_flat); + __ test_field_is_null_free_inline_type(flags, x28, null_free_reference); + pop_and_check_object(obj); + { + __ add(off, obj, off); + const Address field(off, 0); + // Store into the field + __ store_heap_oop(field, x10, x28, x29, x13, IN_HEAP); + } + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, x9, true, byte_no); + } + __ j(Done); + // Implementation of the inline type semantic + __ bind(null_free_reference); + __ null_check(x10); // FIXME JDK-8341120 + pop_and_check_object(obj); + { + // field address + __ add(off, obj, off); + const Address field(off, 0); + // Store into the field + __ store_heap_oop(field, x10, x28, x29, x13, IN_HEAP); + } + __ j(rewrite_inline); + __ bind(is_flat); + pop_and_check_object(x17); + __ write_flat_field(cache, off, index, flags, x17); + __ bind(rewrite_inline); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_vputfield, bc, x9, true, byte_no); + } + __ j(Done); + } + } // Valhalla } __ bind(notObj); @@ -2946,7 +3141,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr { Label notVolatile; - __ test_bit(t0, x15, ResolvedFieldEntry::is_volatile_shift); + __ test_bit(t0, flags, ResolvedFieldEntry::is_volatile_shift); __ beqz(t0, notVolatile); __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore); __ bind(notVolatile); @@ -2980,6 +3175,7 @@ void TemplateTable::jvmti_post_fast_field_mod() { // to do it for every data type, we use the saved values as the // jvalue object. switch (bytecode()) { // load values into the jvalue object + case Bytecodes::_fast_vputfield: // fall through case Bytecodes::_fast_aputfield: __ push_ptr(x10); break; case Bytecodes::_fast_bputfield: // fall through case Bytecodes::_fast_zputfield: // fall through @@ -3006,6 +3202,7 @@ void TemplateTable::jvmti_post_fast_field_mod() { x9, c_rarg2, c_rarg3); switch (bytecode()) { // restore tos values + case Bytecodes::_fast_vputfield: // fall through case Bytecodes::_fast_aputfield: __ pop_ptr(x10); break; case Bytecodes::_fast_bputfield: // fall through case Bytecodes::_fast_zputfield: // fall through @@ -3047,11 +3244,27 @@ void TemplateTable::fast_storefield(TosState state) { pop_and_check_object(x12); // field address - __ add(x11, x12, x11); - const Address field(x11, 0); + __ add(t1, x12, x11); + const Address field(t1, 0); // access field, must not clobber x13 - flags switch (bytecode()) { + case Bytecodes::_fast_vputfield: + { + Label is_flat, done; + __ test_field_is_flat(x13, x28, is_flat); + __ null_check(x10); + __ store_heap_oop(field, x10, x28, x29, x15, IN_HEAP); + __ j(done); + __ bind(is_flat); + __ load_field_entry(x14, x13); + // Re-shuffle registers because of VM calls calling convention + __ mv(x9, x11); + __ mv(x17, x12); + __ write_flat_field(x14, x9, x16, x18, x17); + __ bind(done); + } + break; case Bytecodes::_fast_aputfield: __ store_heap_oop(field, x10, x28, x29, x15, IN_HEAP); break; @@ -3133,6 +3346,13 @@ void TemplateTable::fast_accessfield(TosState state) { // access field switch (bytecode()) { + case Bytecodes::_fast_vgetfield: + { + // field is flat + __ read_flat_field(x12, x10); + __ verify_oop(x10); + } + break; case Bytecodes::_fast_agetfield: __ load_heap_oop(x10, field, x28, x29, IN_HEAP); __ verify_oop(x10); @@ -3540,7 +3760,7 @@ void TemplateTable::_new() { // get instance_size in InstanceKlass (scaled to a count of bytes) __ lwu(x13, Address(x14, Klass::layout_helper_offset())); - // test to see if is malformed in some way + // test to see if it is malformed in some way __ test_bit(t0, x13, exact_log2(Klass::_lh_instance_slow_path_bit)); __ bnez(t0, slow_case); @@ -3550,6 +3770,7 @@ void TemplateTable::_new() { // If fails, go to the slow path. // Initialize the allocation. // Exit. + // // Go to slow path. if (UseTLAB) { @@ -3560,49 +3781,55 @@ void TemplateTable::_new() { __ j(initialize_header); } - // The object is initialized before the header. If the object size is + // The object is initialized before the header. If the object size is // zero, go directly to the header initialization. - if (UseCompactObjectHeaders) { - assert(is_aligned(oopDesc::base_offset_in_bytes(), BytesPerLong), "oop base offset must be 8-byte-aligned"); - __ subi(x13, x13, oopDesc::base_offset_in_bytes()); - } else { - __ subi(x13, x13, sizeof(oopDesc)); - } + int header_size = oopDesc::header_size() * HeapWordSize; + assert(is_aligned(header_size, BytesPerLong), "oop header size must be 8-byte-aligned"); + __ subi(x13, x13, header_size); __ beqz(x13, initialize_header); + #ifdef ASSERT + // make sure instance_size was multiple of 8 + Label L; + __ andi(t0, x13, 7); + __ beqz(t0, L); + __ stop("object size is not multiple of 8 - adjust this code"); + __ bind(L); + // must be > 0, no extra check needed here + #endif + // Initialize object fields { - if (UseCompactObjectHeaders) { - assert(is_aligned(oopDesc::base_offset_in_bytes(), BytesPerLong), "oop base offset must be 8-byte-aligned"); - __ addi(x12, x10, oopDesc::base_offset_in_bytes()); - } else { - __ addi(x12, x10, sizeof(oopDesc)); - } + __ addi(x12, x10, header_size); Label loop; __ bind(loop); - __ sd(zr, Address(x12)); + __ sd(zr, Address(x12, 0)); __ addi(x12, x12, BytesPerLong); __ subi(x13, x13, BytesPerLong); __ bnez(x13, loop); } - // initialize object hader only. + // initialize object header only. __ bind(initialize_header); - if (UseCompactObjectHeaders) { - __ ld(t0, Address(x14, Klass::prototype_header_offset())); - __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); + if (UseCompactObjectHeaders || Arguments::is_valhalla_enabled()) { + __ ld(t1, Address(x14, Klass::prototype_header_offset())); + __ sd(t1, Address(x10, oopDesc::mark_offset_in_bytes())); } else { - __ mv(t0, (intptr_t)markWord::prototype().value()); - __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); - __ store_klass_gap(x10, zr); // zero klass gap for compressed oops - __ store_klass(x10, x14); // store klass last + __ mv(t1, (intptr_t)markWord::prototype().value()); + __ sd(t1, Address(x10, oopDesc::mark_offset_in_bytes())); + } + if (!UseCompactObjectHeaders) { + __ store_klass_gap(x10, zr); // zero klass gap for compressed oops + __ store_klass(x10, x14); // store klass last } if (DTraceAllocProbes) { // Trigger dtrace event for fastpath __ push(atos); // save the return value - __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast(SharedRuntime::dtrace_object_alloc)), x10); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, static_cast(SharedRuntime::dtrace_object_alloc)), x10); __ pop(atos); // restore the return value + } __ j(done); } @@ -3689,13 +3916,12 @@ void TemplateTable::checkcast() { __ bind(ok_is_subtype); __ mv(x10, x13); // Restore object in x13 + __ j(done); + __ bind(is_null); + // Collect counts on whether this test sees nulls a lot or not. if (ProfileInterpreter) { - __ j(done); - __ bind(is_null); __ profile_null_seen(x12); - } else { - __ bind(is_null); // same as 'done' } __ bind(done); } @@ -3816,6 +4042,10 @@ void TemplateTable::monitorenter() { // check for null object __ null_check(x10); + Label is_inline_type; + __ ld(t0, Address(x10, oopDesc::mark_offset_in_bytes())); + __ test_markword_is_inline_type(t0, is_inline_type); + const Address monitor_block_top( fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); const Address monitor_block_bot( @@ -3915,6 +4145,11 @@ void TemplateTable::monitorenter() { // The bcp has already been incremented. Just need to dispatch to // next instruction. __ dispatch_next(vtos); + + __ bind(is_inline_type); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_identity_exception), x10); + __ should_not_reach_here(); } void TemplateTable::monitorexit() { @@ -3923,6 +4158,17 @@ void TemplateTable::monitorexit() { // check for null object __ null_check(x10); + const int is_inline_type_mask = markWord::inline_type_pattern; + Label has_identity; + __ ld(t0, Address(x10, oopDesc::mark_offset_in_bytes())); + __ mv(t1, is_inline_type_mask); + __ andr(t0, t0, t1); + __ bne(t0, t1, has_identity); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + __ bind(has_identity); + const Address monitor_block_top( fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); const Address monitor_block_bot( diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.hpp b/src/hotspot/cpu/riscv/templateTable_riscv.hpp index 8ae66d88521b1..12c627d6312ac 100644 --- a/src/hotspot/cpu/riscv/templateTable_riscv.hpp +++ b/src/hotspot/cpu/riscv/templateTable_riscv.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -34,4 +34,6 @@ static void invokevirtual_helper(Register index, Register recv, // Helpers static void index_check(Register array, Register index); +static void invoke_is_substitutable(Register aobj, Register bobj, Label& is_subst, Label& not_subst); + #endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp index 3a6415d52bd39..22fd66a8da321 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. * Copyright (c) 2023, Rivos Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -218,6 +218,15 @@ void VM_Version::common_initialize() { warning("CRC32C intrinsics are not available on this CPU."); FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); } + + if (InlineTypePassFieldsAsArgs) { + warning("InlineTypePassFieldsAsArgs is not supported on this CPU"); + FLAG_SET_DEFAULT(InlineTypePassFieldsAsArgs, false); + } + if (InlineTypeReturnedAsFields) { + warning("InlineTypeReturnedAsFields is not supported on this CPU"); + FLAG_SET_DEFAULT(InlineTypeReturnedAsFields, false); + } } #ifdef COMPILER2 diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp index 11a88dfedd7be..aef77a0c42d82 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. * Copyright (c) 2023, Rivos Inc. All rights reserved. @@ -32,7 +32,7 @@ #include "runtime/arguments.hpp" #include "runtime/globals_extension.hpp" #include "utilities/globalDefinitions.hpp" -#include "utilities/growableArray.hpp" +#include "utilities/ostream.hpp" #include "utilities/sizes.hpp" class RiscvHwprobe; diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp index d889141c74437..4fc70e7656f29 100644 --- a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp +++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -47,10 +47,10 @@ extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); #endif -VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { +VtableStub* VtableStubs::create_vtable_stub(int vtable_index, bool caller_is_c1) { // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. const int stub_code_length = code_size_limit(true); - VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index, caller_is_c1); // Can be null if there is no free space in the code cache. if (s == nullptr) { return nullptr; @@ -63,6 +63,10 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { int slop_bytes = 0; int slop_delta = 0; + ByteSize entry_offset = caller_is_c1 + ? Method::from_compiled_inline_offset() + : Method::from_compiled_inline_ro_offset(); + ResourceMark rm; CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); @@ -119,7 +123,7 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { if (DebugVtables) { Label L; __ beqz(xmethod, L); - __ ld(t0, Address(xmethod, Method::from_compiled_offset())); + __ ld(t0, Address(xmethod, entry_offset)); __ bnez(t0, L); __ stop("Vtable entry is null"); __ bind(L); @@ -130,7 +134,7 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { // xmethod: Method* // x12: receiver address ame_addr = __ pc(); - __ ld(t1, Address(xmethod, Method::from_compiled_offset())); + __ ld(t1, Address(xmethod, entry_offset)); __ jr(t1); masm->flush(); @@ -139,10 +143,10 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { return s; } -VtableStub* VtableStubs::create_itable_stub(int itable_index) { +VtableStub* VtableStubs::create_itable_stub(int itable_index, bool caller_is_c1) { // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. const int stub_code_length = code_size_limit(false); - VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index, caller_is_c1); // Can be null if there is no free space in the code cache. if (s == nullptr) { return nullptr; @@ -154,6 +158,10 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { int slop_bytes = 0; int slop_delta = 0; + ByteSize entry_offset = caller_is_c1 + ? Method::from_compiled_inline_offset() + : Method::from_compiled_inline_ro_offset(); + ResourceMark rm; CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); @@ -216,7 +224,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { if (DebugVtables) { Label L2; __ beqz(xmethod, L2); - __ ld(t0, Address(xmethod, Method::from_compiled_offset())); + __ ld(t0, Address(xmethod, entry_offset)); __ bnez(t0, L2); __ stop("compiler entrypoint is null"); __ bind(L2); @@ -226,7 +234,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { // xmethod: Method* // j_rarg0: receiver address ame_addr = __ pc(); - __ ld(t1, Address(xmethod, Method::from_compiled_offset())); + __ ld(t1, Address(xmethod, entry_offset)); __ jr(t1); __ bind(L_no_such_interface); diff --git a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp index f1272ee1a2219..e282128c33a2a 100644 --- a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp +++ b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2024 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -194,11 +194,13 @@ void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { __ z_brul(_continuation); } -NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, + CodeEmitInfo* info, bool is_null_free) { _klass_reg = klass_reg; _length = length; _result = result; _info = new CodeEmitInfo(info); + _is_null_free = is_null_free; // unimplemented } void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { @@ -444,4 +446,30 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) { __ branch_optimized(Assembler::bcondAlways, _continuation); } +// Implementation of SubstitutabilityCheckStub +SubstitutabilityCheckStub::SubstitutabilityCheckStub(LIR_Opr left, LIR_Opr right, CodeEmitInfo* info) { + Unimplemented(); +} + +void SubstitutabilityCheckStub::emit_code(LIR_Assembler* ce) { + Unimplemented(); +} + +LoadFlattenedArrayStub::LoadFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) { + Unimplemented(); +} + +void LoadFlattenedArrayStub::emit_code(LIR_Assembler* ce) { + Unimplemented(); +} + +// Implementation of StoreFlattenedArrayStub + +StoreFlattenedArrayStub::StoreFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr value, CodeEmitInfo* info) { + Unimplemented(); +} + +void StoreFlattenedArrayStub::emit_code(LIR_Assembler* ce) { + Unimplemented(); +} #undef __ diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp index 08f922a0b9ae1..8ec1443532aaa 100644 --- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp @@ -3019,6 +3019,10 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { } } +void LIR_Assembler::emit_profile_inline_type(LIR_OpProfileInlineType* op) { + Unimplemented(); +} + void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { assert(op->crc()->is_single_cpu(), "crc must be register"); assert(op->val()->is_single_cpu(), "byte value must be register"); @@ -3034,4 +3038,26 @@ void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { __ z_lgfr(res, crc); } +// Valhalla support + +void LIR_Assembler::check_orig_pc() { + Unimplemented(); +} + +int LIR_Assembler::store_inline_type_fields_to_buf(ciInlineKlass* vk) { + Unimplemented(); + return 0; +} + +void LIR_Assembler::emit_opFlattenedArrayCheck(LIR_OpFlattenedArrayCheck* op) { + Unimplemented(); +} + +void LIR_Assembler::emit_opNullFreeArrayCheck(LIR_OpNullFreeArrayCheck* op) { + Unimplemented(); +} + +void LIR_Assembler::emit_opSubstitutabilityCheck(LIR_OpSubstitutabilityCheck* op) { + Unimplemented(); +} #undef __ diff --git a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp index 1ffd172df8f01..580b8f8db57c0 100644 --- a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2024 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -777,7 +777,7 @@ void LIRGenerator::do_NewInstance(NewInstance* x) { LIR_Opr tmp3 = reg; LIR_Opr tmp4 = LIR_OprFact::illegalOpr; LIR_Opr klass_reg = FrameMap::Z_R11_metadata_opr; - new_instance(reg, x->klass(), x->is_unresolved(), tmp1, tmp2, tmp3, tmp4, klass_reg, info); + new_instance(reg, x->klass(), x->is_unresolved(), /* allow_inline */ false, tmp1, tmp2, tmp3, tmp4, klass_reg, info); LIR_Opr result = rlock_result(x); __ move(reg, result); } @@ -937,7 +937,7 @@ void LIRGenerator::do_CheckCast(CheckCast* x) { __ checkcast(reg, obj.result(), x->klass(), tmp1, tmp2, tmp3, x->direct_compare(), info_for_exception, patching_info, stub, - x->profiled_method(), x->profiled_bci()); + x->profiled_method(), x->profiled_bci(), /*is_null_free*/ false); } diff --git a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp index 813143938f980..e300431b9097d 100644 --- a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp @@ -43,7 +43,10 @@ void C1_MacroAssembler::explicit_null_check(Register base) { ShouldNotCallThis(); // unused } -void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) { +void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes, + int sp_offset_for_orig_pc, + bool needs_stack_repair, bool has_scalarized_args, + Label* verified_inline_entry_label) { assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); generate_stack_overflow_check(bang_size_in_bytes); save_return_pc(); @@ -249,6 +252,11 @@ void C1_MacroAssembler::allocate_array( verify_oop(obj, FILE_AND_LINE); } +int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, Label& verified_inline_entry_label, bool is_inline_ro_entry) { + Unimplemented(); +} + + #ifndef PRODUCT diff --git a/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp b/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp index e78b04fe91106..01fe91928f40f 100644 --- a/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp +++ b/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp @@ -396,8 +396,7 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { __ mem2reg_opt(t0, Address(klass, Klass::layout_helper_offset()), false); __ z_sra(t0, Klass::_lh_array_tag_shift); int tag = ((id == StubId::c1_new_type_array_id) - ? Klass::_lh_array_tag_type_value - : Klass::_lh_array_tag_obj_value); + ? Klass::_lh_array_tag_type_value : Klass::_lh_array_tag_ref_value); __ compare32_and_branch(t0, tag, Assembler::bcondEqual, ok); __ stop("assert(is an array klass)"); __ should_not_reach_here(); diff --git a/src/hotspot/cpu/s390/continuationFreezeThaw_s390.inline.hpp b/src/hotspot/cpu/s390/continuationFreezeThaw_s390.inline.hpp index 1102a745ac06f..9139f3b05fadb 100644 --- a/src/hotspot/cpu/s390/continuationFreezeThaw_s390.inline.hpp +++ b/src/hotspot/cpu/s390/continuationFreezeThaw_s390.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,7 +39,7 @@ inline frame FreezeBase::sender(const frame& f) { return frame(); } -template frame FreezeBase::new_heap_frame(frame& f, frame& caller) { +template frame FreezeBase::new_heap_frame(frame& f, frame& caller, int size_adjust) { Unimplemented(); return frame(); } @@ -56,7 +56,7 @@ inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, co Unimplemented(); } -inline void FreezeBase::patch_pd(frame& hf, const frame& caller) { +inline void FreezeBase::patch_pd(frame& hf, const frame& caller, bool is_bottom_frame) { Unimplemented(); } @@ -82,7 +82,7 @@ inline frame ThawBase::new_entry_frame() { return frame(); } -template frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom) { +template frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom, int size_adjust) { Unimplemented(); return frame(); } diff --git a/src/hotspot/cpu/s390/foreignGlobals_s390.cpp b/src/hotspot/cpu/s390/foreignGlobals_s390.cpp index 1ad0570bad8ab..81331d1e17946 100644 --- a/src/hotspot/cpu/s390/foreignGlobals_s390.cpp +++ b/src/hotspot/cpu/s390/foreignGlobals_s390.cpp @@ -52,15 +52,15 @@ const ABIDescriptor ForeignGlobals::parse_abi_descriptor(jobject jabi) { oop abi_oop = JNIHandles::resolve_non_null(jabi); ABIDescriptor abi; - objArrayOop inputStorage = jdk_internal_foreign_abi_ABIDescriptor::inputStorage(abi_oop); + refArrayOop inputStorage = jdk_internal_foreign_abi_ABIDescriptor::inputStorage(abi_oop); parse_register_array(inputStorage, StorageType::INTEGER, abi._integer_argument_registers, as_Register); parse_register_array(inputStorage, StorageType::FLOAT, abi._float_argument_registers, as_FloatRegister); - objArrayOop outputStorage = jdk_internal_foreign_abi_ABIDescriptor::outputStorage(abi_oop); + refArrayOop outputStorage = jdk_internal_foreign_abi_ABIDescriptor::outputStorage(abi_oop); parse_register_array(outputStorage, StorageType::INTEGER, abi._integer_return_registers, as_Register); parse_register_array(outputStorage, StorageType::FLOAT, abi._float_return_registers, as_FloatRegister); - objArrayOop volatileStorage = jdk_internal_foreign_abi_ABIDescriptor::volatileStorage(abi_oop); + refArrayOop volatileStorage = jdk_internal_foreign_abi_ABIDescriptor::volatileStorage(abi_oop); parse_register_array(volatileStorage, StorageType::INTEGER, abi._integer_volatile_registers, as_Register); parse_register_array(volatileStorage, StorageType::FLOAT, abi._float_additional_volatile_registers, as_FloatRegister); diff --git a/src/hotspot/cpu/s390/frame_s390.cpp b/src/hotspot/cpu/s390/frame_s390.cpp index b602d0adce579..8977262e6802e 100644 --- a/src/hotspot/cpu/s390/frame_s390.cpp +++ b/src/hotspot/cpu/s390/frame_s390.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2023 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -685,3 +685,17 @@ intptr_t* frame::interpreter_frame_tos_at(jint offset) const { return &interpreter_frame_tos_address()[offset]; } +intptr_t* frame::repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const { + Unimplemented(); + return nullptr; +} + +intptr_t* frame::repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr) { + Unimplemented(); + return nullptr; +} + +bool frame::was_augmented_on_entry(int& real_size) const { + Unimplemented(); + return false; +} diff --git a/src/hotspot/cpu/s390/frame_s390.hpp b/src/hotspot/cpu/s390/frame_s390.hpp index bcdeec43e1a25..8f1893648b15e 100644 --- a/src/hotspot/cpu/s390/frame_s390.hpp +++ b/src/hotspot/cpu/s390/frame_s390.hpp @@ -555,9 +555,15 @@ metadata_words_at_top = 0, frame_alignment = 16, // size, in words, of maximum shift in frame position due to alignment - align_wiggle = 1 + align_wiggle = 1, + // This is wrong and unimplemented + sender_sp_offset = 0 }; static jint interpreter_frame_expression_stack_direction() { return -1; } + intptr_t* repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const; + static intptr_t* repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr); + bool was_augmented_on_entry(int& real_size) const; + #endif // CPU_S390_FRAME_S390_HPP diff --git a/src/hotspot/cpu/s390/globals_s390.hpp b/src/hotspot/cpu/s390/globals_s390.hpp index 80ed6d1acc89d..c408d20de8c6f 100644 --- a/src/hotspot/cpu/s390/globals_s390.hpp +++ b/src/hotspot/cpu/s390/globals_s390.hpp @@ -78,6 +78,9 @@ define_pd_global(bool, CompactStrings, true); // 8146801 (Short Array Allocation): No performance work done here yet. define_pd_global(intx, InitArrayShortSize, 1*BytesPerLong); +define_pd_global(bool, InlineTypePassFieldsAsArgs, false); +define_pd_global(bool, InlineTypeReturnedAsFields, false); + #define ARCH_FLAGS(develop, \ product, \ range, \ diff --git a/src/hotspot/cpu/s390/interp_masm_s390.cpp b/src/hotspot/cpu/s390/interp_masm_s390.cpp index d50cb833e6839..d6f8c1a33ac1d 100644 --- a/src/hotspot/cpu/s390/interp_masm_s390.cpp +++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp @@ -1519,7 +1519,7 @@ void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register ca // argument. Tmp is the number of cells left in the // CallTypeData/VirtualCallTypeData to reach its end. Non null // if there's a return to profile. - assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + assert(SingleTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); z_sllg(tmp, tmp, exact_log2(DataLayout::cell_size)); z_agr(mdp, tmp); } @@ -1568,7 +1568,7 @@ void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, bind(do_profile); } - Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); + Address mdo_ret_addr(mdp, -in_bytes(SingleTypeEntry::size())); profile_obj_type(ret, mdo_ret_addr, tmp); bind(profile_continue); diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index ea75d483e5f0f..f760be64b8f5e 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -6920,3 +6920,32 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_ z_agr(offset, mdp); add2mem_64(Address(offset), DataLayout::counter_increment, r0_tmp); } + +// Unimplemented methods for inline types. +int MacroAssembler::store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter) { + Unimplemented(); +} + +bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]) { + Unimplemented(); +} + +bool MacroAssembler::unpack_inline_helper(const GrowableArray* sig, int& sig_index, + VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index, + RegState reg_state[]) { + Unimplemented(); +} + +bool MacroAssembler::pack_inline_helper(const GrowableArray* sig, int& sig_index, int vtarg_index, + VMRegPair* from, int from_count, int& from_index, VMReg to, + RegState reg_state[], Register val_array) { + Unimplemented(); +} + +int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) { + Unimplemented(); +} + +VMReg MacroAssembler::spill_reg_for(VMReg reg) { + Unimplemented(); +} diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.hpp b/src/hotspot/cpu/s390/macroAssembler_s390.hpp index 8e2834ba9b703..df1596bf413b6 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.hpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.hpp @@ -30,6 +30,10 @@ #include "asm/assembler.hpp" #include "oops/accessDecorators.hpp" +class ciInlineKlass; +class SigEntry; +class VMRegPair; + #define MODERN_IFUN(name) ((void (MacroAssembler::*)(Register, int64_t, Register, Register))&MacroAssembler::name) #define CLASSIC_IFUN(name) ((void (MacroAssembler::*)(Register, int64_t, Register, Register))&MacroAssembler::name) #define MODERN_FFUN(name) ((void (MacroAssembler::*)(FloatRegister, int64_t, Register, Register))&MacroAssembler::name) @@ -1113,6 +1117,9 @@ class MacroAssembler: public Assembler { void load_on_condition_imm_64(Register dst, int64_t i2, branch_condition cc); void profile_receiver_type(Register recv, Register mdp, int mdp_offset, Register tmp1); + + // Inline type specific methods + #include "asm/macroAssembler_common.hpp" }; #ifdef ASSERT diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad index 2208a197ac985..bc046a0ee06f7 100644 --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -1159,12 +1159,6 @@ void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { C->output()->set_frame_complete(__ offset()); } - -uint MachPrologNode::size(PhaseRegAlloc *ra_) const { - // Variable size. Determine dynamically. - return MachNode::size(ra_); -} - int MachPrologNode::reloc() const { // Return number of relocatable values contained in this instruction. return 1; // One reloc entry for load_const(toc). @@ -1207,11 +1201,6 @@ void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { } } -uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { - // Variable size. determine dynamically. - return MachNode::size(ra_); -} - int MachEpilogNode::reloc() const { // Return number of relocatable values contained in this instruction. return 1; // One for load_from_polling_page. @@ -1616,6 +1605,18 @@ class CallStubImpl { source %{ +#ifndef PRODUCT +void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const +{ + Unimplemented(); +} +#endif + +void MachVEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const +{ + Unimplemented(); +} + #if !defined(PRODUCT) void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *os) const { os->print_cr("---- MachUEPNode ----"); @@ -1635,11 +1636,6 @@ void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { __ ic_check(CodeEntryAlignment); } -uint MachUEPNode::size(PhaseRegAlloc *ra_) const { - // Determine size dynamically. - return MachNode::size(ra_); -} - //============================================================================= %} // interrupt source section diff --git a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp index e5a27e66968b8..394f21e9ce947 100644 --- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp +++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp @@ -2085,9 +2085,8 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, } static address gen_c2i_adapter(MacroAssembler *masm, - int total_args_passed, int comp_args_on_stack, - const BasicType *sig_bt, + const GrowableArray* sig, const VMRegPair *regs, Label &skip_fixup) { // Before we get into the guts of the C2I adapter, see if we should be here @@ -2116,7 +2115,7 @@ static address gen_c2i_adapter(MacroAssembler *masm, // Since all args are passed on the stack, total_args_passed*wordSize is the // space we need. We need ABI scratch area but we use the caller's since // it has already been allocated. - + int total_args_passed = sig->length(); const int abi_scratch = frame::z_top_ijava_frame_abi_size; int extraspace = align_up(total_args_passed, 2)*wordSize + abi_scratch; Register sender_SP = Z_R10; @@ -2137,6 +2136,8 @@ static address gen_c2i_adapter(MacroAssembler *masm, // Now write the args into the outgoing interpreter space. for (int i = 0; i < total_args_passed; i++) { + BasicType bt = sig->at(i)._bt; + VMReg r_1 = regs[i].first(); VMReg r_2 = regs[i].second(); if (!r_1->is_valid()) { @@ -2153,7 +2154,7 @@ static address gen_c2i_adapter(MacroAssembler *masm, } else { // longs are given 2 64-bit slots in the interpreter, // but the data is passed in only 1 slot. - if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + if (bt == T_LONG || bt == T_DOUBLE) { #ifdef ASSERT __ clear_mem(Address(Z_SP, st_off), sizeof(void *)); #endif @@ -2168,7 +2169,7 @@ static address gen_c2i_adapter(MacroAssembler *masm, } else { // longs are given 2 64-bit slots in the interpreter, but the // data is passed in only 1 slot. - if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + if (bt == T_LONG || bt == T_DOUBLE) { #ifdef ASSERT __ clear_mem(Address(Z_SP, st_off), sizeof(void *)); #endif @@ -2233,12 +2234,12 @@ static address gen_c2i_adapter(MacroAssembler *masm, // Z_SP r15 - SP prepared by call stub such that caller's outgoing args are near top // void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, - int total_args_passed, int comp_args_on_stack, - const BasicType *sig_bt, + const GrowableArray* sig, const VMRegPair *regs) { const Register value = Z_R12; const Register ld_ptr= Z_esp; + int total_args_passed = sig->length(); int ld_offset = total_args_passed * wordSize; @@ -2258,8 +2259,9 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, // Now generate the shuffle code. Pick up all register args and move the // rest through register value=Z_R12. for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + BasicType bt = sig->at(i)._bt; + if (bt == T_VOID) { + assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half"); continue; } @@ -2291,7 +2293,7 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, } else { // In 64bit, longs are given 2 64-bit slots in the interpreter, but the // data is passed in only 1 slot. - if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + if (bt == T_LONG || bt == T_DOUBLE) { ld_offset -= wordSize; } __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*)); @@ -2299,7 +2301,7 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, } else { if (!r_2->is_valid()) { // Not sure we need to do this but it shouldn't hurt. - if (is_reference_type(sig_bt[i]) || sig_bt[i] == T_ADDRESS) { + if (is_reference_type(bt) || bt == T_ADDRESS) { __ z_lg(r_1->as_Register(), ld_offset, ld_ptr); } else { __ z_l(r_1->as_Register(), ld_offset, ld_ptr); @@ -2307,7 +2309,7 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, } else { // In 64bit, longs are given 2 64-bit slots in the interpreter, but the // data is passed in only 1 slot. - if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + if (bt == T_LONG || bt == T_DOUBLE) { ld_offset -= wordSize; } __ z_lg(r_1->as_Register(), ld_offset, ld_ptr); @@ -2336,15 +2338,20 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, __ z_br(Z_R1_scratch); } -void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, +void SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm, int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - address entry_address[AdapterBlob::ENTRY_COUNT]) { + const GrowableArray* sig, + const VMRegPair* regs, + const GrowableArray* sig_cc, + const VMRegPair* regs_cc, + const GrowableArray* sig_cc_ro, + const VMRegPair* regs_cc_ro, + address entry_address[AdapterBlob::ENTRY_COUNT], + AdapterBlob*& new_adapter, + bool allocate_code_blob) { __ align(CodeEntryAlignment); entry_address[AdapterBlob::I2C] = __ pc(); - gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); Label skip_fixup; { @@ -2389,7 +2396,7 @@ void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, __ bind(L_skip_barrier); entry_address[AdapterBlob::C2I_No_Clinit_Check] = __ pc(); - gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + gen_c2i_adapter(masm, comp_args_on_stack, sig, regs, skip_fixup); return; } @@ -3400,3 +3407,16 @@ RuntimeStub* SharedRuntime::generate_jfr_return_lease() { } #endif // INCLUDE_JFR + +const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j; +const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j; + +int SharedRuntime::java_return_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed) { + Unimplemented(); + return 0; +} + +BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) { + Unimplemented(); + return nullptr; +} diff --git a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp index dba04fc0e852b..9752c859022eb 100644 --- a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2024 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -1684,7 +1684,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // // Generic interpreted method entry to template interpreter. // -address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { +address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized, bool object_init) { address entry_point = __ pc(); bool inc_counter = UseCompiler || CountCompiledCalls; @@ -1805,6 +1805,12 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { #endif // ASSERT } + // If object_init == true, we should insert a StoreStore barrier here to + // prevent strict fields initial default values from being observable. + // However, s390 is a TSO platform, so if `this` escapes, strict fields + // initialized values are guaranteed to be the ones observed, so the + // barrier can be elided. + // start execution #ifdef ASSERT diff --git a/src/hotspot/cpu/s390/vtableStubs_s390.cpp b/src/hotspot/cpu/s390/vtableStubs_s390.cpp index f60d91183da6b..de4049ccacfe9 100644 --- a/src/hotspot/cpu/s390/vtableStubs_s390.cpp +++ b/src/hotspot/cpu/s390/vtableStubs_s390.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2023 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -44,10 +44,10 @@ extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int #endif // Used by compiler only; may use only caller saved, non-argument registers. -VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { +VtableStub* VtableStubs::create_vtable_stub(int vtable_index, bool caller_is_c1) { // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. const int stub_code_length = code_size_limit(true); - VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index, caller_is_c1); // Can be null if there is no free space in the code cache. if (s == nullptr) { return nullptr; @@ -147,10 +147,10 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { return s; } -VtableStub* VtableStubs::create_itable_stub(int itable_index) { +VtableStub* VtableStubs::create_itable_stub(int itable_index, bool caller_is_c1) { // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. const int stub_code_length = code_size_limit(false); - VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index, caller_is_c1); // Can be null if there is no free space in the code cache. if (s == nullptr) { return nullptr; diff --git a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp index 9a4044a4f0ccf..d0b23777ae911 100644 --- a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp +++ b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp @@ -29,6 +29,7 @@ #include "c1/c1_Runtime1.hpp" #include "classfile/javaClasses.hpp" #include "nativeInst_x86.hpp" +#include "oops/objArrayKlass.hpp" #include "runtime/sharedRuntime.hpp" #include "utilities/align.hpp" #include "utilities/macros.hpp" @@ -115,6 +116,79 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce) { } +// Implementation of LoadFlattenedArrayStub + +LoadFlattenedArrayStub::LoadFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) { + _array = array; + _index = index; + _result = result; + // Tell the register allocator that the runtime call will scratch rax. + _scratch_reg = FrameMap::rax_oop_opr; + _info = new CodeEmitInfo(info); +} + +void LoadFlattenedArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_array->as_register(), 1); + ce->store_parameter(_index->as_register(), 0); + __ call(RuntimeAddress(Runtime1::entry_for(StubId::c1_load_flat_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + if (_result->as_register() != rax) { + __ movptr(_result->as_register(), rax); + } + __ jmp(_continuation); +} + + +// Implementation of StoreFlattenedArrayStub + +StoreFlattenedArrayStub::StoreFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr value, CodeEmitInfo* info) { + _array = array; + _index = index; + _value = value; + // Tell the register allocator that the runtime call will scratch rax. + _scratch_reg = FrameMap::rax_oop_opr; + _info = new CodeEmitInfo(info); +} + + +void StoreFlattenedArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_array->as_register(), 2); + ce->store_parameter(_index->as_register(), 1); + ce->store_parameter(_value->as_register(), 0); + __ call(RuntimeAddress(Runtime1::entry_for(StubId::c1_store_flat_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ jmp(_continuation); +} + + +// Implementation of SubstitutabilityCheckStub + +SubstitutabilityCheckStub::SubstitutabilityCheckStub(LIR_Opr left, LIR_Opr right, CodeEmitInfo* info) { + _left = left; + _right = right; + // Tell the register allocator that the runtime call will scratch rax. + _scratch_reg = FrameMap::rax_oop_opr; + _info = new CodeEmitInfo(info); +} + +void SubstitutabilityCheckStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_left->as_register(), 1); + ce->store_parameter(_right->as_register(), 0); + __ call(RuntimeAddress(Runtime1::entry_for(StubId::c1_substitutability_check_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ jmp(_continuation); +} + + // Implementation of NewInstanceStub NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, StubId stub_id) { @@ -167,11 +241,13 @@ void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { // Implementation of NewObjectArrayStub -NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, + CodeEmitInfo* info, bool is_null_free) { _klass_reg = klass_reg; _result = result; _length = length; _info = new CodeEmitInfo(info); + _is_null_free = is_null_free; } @@ -180,7 +256,11 @@ void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); assert(_length->as_register() == rbx, "length must in rbx,"); assert(_klass_reg->as_register() == rdx, "klass_reg must in rdx"); - __ call(RuntimeAddress(Runtime1::entry_for(StubId::c1_new_object_array_id))); + if (_is_null_free) { + __ call(RuntimeAddress(Runtime1::entry_for(StubId::c1_new_null_free_array_id))); + } else { + __ call(RuntimeAddress(Runtime1::entry_for(StubId::c1_new_object_array_id))); + } ce->add_call_info_here(_info); ce->verify_oop_map(_info); assert(_result->as_register() == rax, "result must in rax,"); @@ -190,6 +270,15 @@ void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { void MonitorEnterStub::emit_code(LIR_Assembler* ce) { assert(__ rsp_offset() == 0, "frame size should be fixed"); __ bind(_entry); + if (_throw_ie_stub != nullptr) { + // When we come here, _obj_reg has already been checked to be non-null. + const int is_value_mask = markWord::inline_type_pattern; + Register mark = _scratch_reg->as_register(); + __ movptr(mark, Address(_obj_reg->as_register(), oopDesc::mark_offset_in_bytes())); + __ andptr(mark, is_value_mask); + __ cmpl(mark, is_value_mask); + __ jcc(Assembler::equal, *_throw_ie_stub->entry()); + } ce->store_parameter(_obj_reg->as_register(), 1); ce->store_parameter(_lock_reg->as_register(), 0); StubId enter_id; diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 1f660d6d34931..9e89bbed6fb00 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -31,12 +31,15 @@ #include "c1/c1_Runtime1.hpp" #include "c1/c1_ValueStack.hpp" #include "ci/ciArrayKlass.hpp" +#include "ci/ciInlineKlass.hpp" #include "ci/ciInstance.hpp" +#include "ci/ciObjArrayKlass.hpp" #include "code/aotCodeCache.hpp" #include "compiler/oopMap.hpp" #include "gc/shared/collectedHeap.hpp" #include "gc/shared/gc_globals.hpp" #include "nativeInst_x86.hpp" +#include "oops/oop.inline.hpp" #include "oops/objArrayKlass.hpp" #include "runtime/frame.inline.hpp" #include "runtime/safepointMechanism.hpp" @@ -424,7 +427,7 @@ int LIR_Assembler::emit_unwind_handler() { } // remove the activation and dispatch to the unwind handler - __ remove_frame(initial_frame_size_in_bytes()); + __ remove_frame(initial_frame_size_in_bytes(), needs_stack_repair()); __ jump(RuntimeAddress(Runtime1::entry_for(StubId::c1_unwind_exception_id))); // Emit the slow path assembly @@ -469,9 +472,53 @@ void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { if (!result->is_illegal() && result->is_float_kind() && !result->is_xmm_register()) { assert(result->fpu() == 0, "result must already be on TOS"); } + if (InlineTypeReturnedAsFields) { + #ifndef _LP64 + Unimplemented(); + #endif + // Check if we are returning a non-null inline type and load its fields into registers + ciType* return_type = compilation()->method()->return_type(); + if (return_type->is_inlinetype()) { + ciInlineKlass* vk = return_type->as_inline_klass(); + if (vk->can_be_returned_as_fields()) { + address unpack_handler = vk->unpack_handler(); + assert(unpack_handler != nullptr, "must be"); + __ call(RuntimeAddress(unpack_handler)); + } + } else if (return_type->is_instance_klass() && (!return_type->is_loaded() || StressCallingConvention)) { + Label skip; + Label not_null; + __ testptr(rax, rax); + __ jcc(Assembler::notZero, not_null); + // Returned value is null, zero all return registers because they may belong to oop fields + __ xorq(j_rarg1, j_rarg1); + __ xorq(j_rarg2, j_rarg2); + __ xorq(j_rarg3, j_rarg3); + __ xorq(j_rarg4, j_rarg4); + __ xorq(j_rarg5, j_rarg5); + __ jmp(skip); + __ bind(not_null); + + // Check if we are returning a non-null inline type and load its fields into registers + __ test_oop_is_not_inline_type(rax, rscratch1, skip, /* can_be_null= */ false); + + // Load fields from a buffered value with an inline class specific handler + __ load_klass(rdi, rax, rscratch1); + __ movptr(rdi, Address(rdi, InlineKlass::adr_members_offset())); + __ movptr(rdi, Address(rdi, InlineKlass::unpack_handler_offset())); + // Unpack handler can be null if inline type is not scalarizable in returns + __ testptr(rdi, rdi); + __ jcc(Assembler::zero, skip); + __ call(rdi); + + __ bind(skip); + } + // At this point, rax points to the value object (for interpreter or C1 caller). + // The fields of the object are copied into registers (for C2 caller). + } // Pop the stack before the safepoint code - __ remove_frame(initial_frame_size_in_bytes()); + __ remove_frame(initial_frame_size_in_bytes(), needs_stack_repair()); if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) { __ reserved_stack_check(); @@ -487,6 +534,10 @@ void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { } +int LIR_Assembler::store_inline_type_fields_to_buf(ciInlineKlass* vk) { + return (__ store_inline_type_fields_to_buf(vk, false)); +} + int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { guarantee(info != nullptr, "Shouldn't be null"); int offset = __ offset(); @@ -1249,7 +1300,7 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { Register len = op->len()->as_register(); __ movslq(len, len); - if (UseSlowPath || + if (UseSlowPath || op->always_slow_path() || (!UseFastNewObjectArray && is_reference_type(op->type())) || (!UseFastNewTypeArray && !is_reference_type(op->type()))) { __ jmp(*op->stub()->entry()); @@ -1324,24 +1375,26 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L assert_different_registers(obj, k_RInfo, klass_RInfo); - __ testptr(obj, obj); - if (op->should_profile()) { - Label not_null; - Register mdo = klass_RInfo; - __ mov_metadata(mdo, md->constant_encoding()); - __ jccb(Assembler::notEqual, not_null); - // Object is null; update MDO and exit - Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); - int header_bits = BitData::null_seen_byte_constant(); - __ orb(data_addr, header_bits); - __ jmp(*obj_is_null); - __ bind(not_null); + if (op->need_null_check()) { + __ testptr(obj, obj); + if (op->should_profile()) { + Label not_null; + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); + __ jccb(Assembler::notEqual, not_null); + // Object is null; update MDO and exit + Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); + int header_bits = BitData::null_seen_byte_constant(); + __ orb(data_addr, header_bits); + __ jmp(*obj_is_null); + __ bind(not_null); Register recv = k_RInfo; __ load_klass(recv, obj, tmp_load_klass); type_profile_helper(mdo, md, data, recv); - } else { - __ jcc(Assembler::equal, *obj_is_null); + } else { + __ jcc(Assembler::equal, *obj_is_null); + } } if (!k->is_loaded()) { @@ -1352,6 +1405,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L __ verify_oop(obj); if (op->fast_check()) { + assert(!k->is_loaded() || !k->is_obj_array_klass(), "Use refined array for a direct pointer comparison"); // get object class // not a safepoint as obj null check happens earlier __ load_klass(Rtmp1, obj, tmp_load_klass); @@ -1372,7 +1426,18 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L // See if we get an immediate positive hit __ jcc(Assembler::equal, *success_target); // check for self - __ cmpptr(klass_RInfo, k_RInfo); + if (k->is_loaded() && k->is_obj_array_klass()) { + // For a direct pointer comparison, we need the refined array klass pointer + ciKlass* k_refined = ciObjArrayKlass::make(k->as_obj_array_klass()->element_klass()); + if (!k_refined->is_loaded()) { + bailout("encountered unloaded_ciobjarrayklass due to out of memory error"); + return; + } + __ mov_metadata(tmp_load_klass, k_refined->constant_encoding()); + __ cmpptr(klass_RInfo, tmp_load_klass); + } else { + __ cmpptr(klass_RInfo, k_RInfo); + } __ jcc(Assembler::equal, *success_target); __ push_ppx(klass_RInfo); @@ -1502,6 +1567,92 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { } +void LIR_Assembler::emit_opFlattenedArrayCheck(LIR_OpFlattenedArrayCheck* op) { + // We are loading/storing from/to an array that *may* be a flat array (the + // declared type is Object[], abstract[], interface[] or VT.ref[]). + // If this array is a flat array, take the slow path. + __ test_flat_array_oop(op->array()->as_register(), op->tmp()->as_register(), *op->stub()->entry()); +} + +void LIR_Assembler::emit_opNullFreeArrayCheck(LIR_OpNullFreeArrayCheck* op) { + // We are storing into an array that *may* be null-free (the declared type is + // Object[], abstract[], interface[] or VT.ref[]). + Label test_mark_word; + Register tmp = op->tmp()->as_register(); + __ movptr(tmp, Address(op->array()->as_register(), oopDesc::mark_offset_in_bytes())); + __ testl(tmp, markWord::unlocked_value); + __ jccb(Assembler::notZero, test_mark_word); + __ load_prototype_header(tmp, op->array()->as_register(), rscratch1); + __ bind(test_mark_word); + __ testl(tmp, markWord::null_free_array_bit_in_place); +} + +void LIR_Assembler::emit_opSubstitutabilityCheck(LIR_OpSubstitutabilityCheck* op) { + Label L_oops_equal; + Label L_oops_not_equal; + Label L_end; + + Register left = op->left()->as_register(); + Register right = op->right()->as_register(); + + __ cmpptr(left, right); + __ jcc(Assembler::equal, L_oops_equal); + + // (1) Null check -- if one of the operands is null, the other must not be null (because + // the two references are not equal), so they are not substitutable, + __ testptr(left, left); + __ jcc(Assembler::zero, L_oops_not_equal); + __ testptr(right, right); + __ jcc(Assembler::zero, L_oops_not_equal); + + ciKlass* left_klass = op->left_klass(); + ciKlass* right_klass = op->right_klass(); + + // (2) Inline type check -- if either of the operands is not an inline type, + // they are not substitutable. We do this only if we are not sure that the + // operands are inline type + if ((left_klass == nullptr || right_klass == nullptr) ||// The klass is still unloaded, or came from a Phi node. + !left_klass->is_inlinetype() || !right_klass->is_inlinetype()) { + Register tmp = op->tmp1()->as_register(); + __ movptr(tmp, (intptr_t)markWord::inline_type_pattern); + __ andptr(tmp, Address(left, oopDesc::mark_offset_in_bytes())); + __ andptr(tmp, Address(right, oopDesc::mark_offset_in_bytes())); + __ cmpptr(tmp, (intptr_t)markWord::inline_type_pattern); + __ jcc(Assembler::notEqual, L_oops_not_equal); + } + + // (3) Same klass check: if the operands are of different klasses, they are not substitutable. + if (left_klass != nullptr && left_klass->is_inlinetype() && left_klass == right_klass) { + // No need to load klass -- the operands are statically known to be the same inline klass. + __ jmp(*op->stub()->entry()); + } else { + Register tmp1 = op->tmp1()->as_register(); + Register tmp2 = op->tmp2()->as_register(); + if (left == right) { // same operand, so clearly the same klasses, let's save the check + __ jmp (*op->stub()->entry()); // -> do slow check + } else { + __ cmp_klasses_from_objects(left, right, tmp1, tmp2); + __ jcc(Assembler::equal, *op->stub()->entry()); // same klass -> do slow check + } + // fall through to L_oops_not_equal + } + + __ bind(L_oops_not_equal); + move(op->not_equal_result(), op->result_opr()); + __ jmp(L_end); + + // We've returned from the stub. RAX contains 0x0 IFF the two + // operands are not substitutable. (Don't compare against 0x1 in case the + // C compiler is naughty) + __ bind(*op->stub()->continuation()); + __ cmpl(rax, 0); + __ jcc(Assembler::equal, L_oops_not_equal); // (call_stub() == 0x0) -> not_equal + + __ bind(L_oops_equal); + move(op->equal_result(), op->result_opr()); // (call_stub() != 0x0) -> equal + // fall-through + __ bind(L_end); +} void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { if (op->code() == lir_cas_int || op->code() == lir_cas_obj) { @@ -1547,6 +1698,21 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { } } +void LIR_Assembler::move(LIR_Opr src, LIR_Opr dst) { + assert(dst->is_cpu_register(), "must be"); + assert(dst->type() == src->type(), "must be"); + + if (src->is_cpu_register()) { + reg2reg(src, dst); + } else if (src->is_stack()) { + stack2reg(src, dst, dst->type()); + } else if (src->is_constant()) { + const2reg(src, dst, lir_patch_none, nullptr); + } else { + ShouldNotReachHere(); + } +} + void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on x86"); @@ -2157,14 +2323,14 @@ void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { assert((__ offset() + NativeCall::displacement_offset) % BytesPerWord == 0, "must be aligned"); __ call(AddressLiteral(op->addr(), rtype)); - add_call_info(code_offset(), op->info()); + add_call_info(code_offset(), op->info(), op->maybe_return_as_fields()); __ post_call_nop(); } void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { __ ic_call(op->addr()); - add_call_info(code_offset(), op->info()); + add_call_info(code_offset(), op->info(), op->maybe_return_as_fields()); assert((__ offset() - NativeCall::instruction_size + NativeCall::displacement_offset) % BytesPerWord == 0, "must be aligned"); __ post_call_nop(); @@ -2331,6 +2497,20 @@ void LIR_Assembler::store_parameter(Metadata* m, int offset_from_rsp_in_words) { } +void LIR_Assembler::arraycopy_inlinetype_check(Register obj, Register tmp, CodeStub* slow_path, bool is_dest, bool null_check) { + if (null_check) { + __ testptr(obj, obj); + __ jcc(Assembler::zero, *slow_path->entry()); + } + if (is_dest) { + __ test_null_free_array_oop(obj, tmp, *slow_path->entry()); + __ test_flat_array_oop(obj, tmp, *slow_path->entry()); + } else { + __ test_flat_array_oop(obj, tmp, *slow_path->entry()); + } +} + + // This code replaces a call to arraycopy; no exception may // be thrown in this code, they must be thrown in the System.arraycopy // activation frame; we could save some checks if this would not be the case @@ -2350,6 +2530,12 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { BasicType basic_type = default_type != nullptr ? default_type->element_type()->basic_type() : T_ILLEGAL; if (is_reference_type(basic_type)) basic_type = T_OBJECT; + if (flags & LIR_OpArrayCopy::always_slow_path) { + __ jmp(*stub->entry()); + __ bind(*stub->continuation()); + return; + } + // if we don't know anything, just go through the generic arraycopy if (default_type == nullptr) { // save outgoing arguments on stack in case call to System.arraycopy is needed @@ -2426,6 +2612,14 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { return; } + // Handle inline type arrays + if (flags & LIR_OpArrayCopy::src_inlinetype_check) { + arraycopy_inlinetype_check(src, tmp, stub, false, (flags & LIR_OpArrayCopy::src_null_check)); + } + if (flags & LIR_OpArrayCopy::dst_inlinetype_check) { + arraycopy_inlinetype_check(dst, tmp, stub, true, (flags & LIR_OpArrayCopy::dst_null_check)); + } + assert(default_type != nullptr && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point"); int elem_size = type2aelembytes(basic_type); @@ -2963,6 +3157,27 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { __ bind(next); } +void LIR_Assembler::emit_profile_inline_type(LIR_OpProfileInlineType* op) { + Register obj = op->obj()->as_register(); + Register tmp = op->tmp()->as_pointer_register(); + Address mdo_addr = as_Address(op->mdp()->as_address_ptr()); + bool not_null = op->not_null(); + int flag = op->flag(); + + Label not_inline_type; + if (!not_null) { + __ testptr(obj, obj); + __ jccb(Assembler::zero, not_inline_type); + } + + __ test_oop_is_not_inline_type(obj, tmp, not_inline_type); + + __ orb(mdo_addr, flag); + + __ bind(not_inline_type); +} + + void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no)); } @@ -3148,6 +3363,9 @@ void LIR_Assembler::get_thread(LIR_Opr result_reg) { __ mov(result_reg->as_register(), r15_thread); } +void LIR_Assembler::check_orig_pc() { + __ cmpptr(frame_map()->address_for_orig_pc_addr(), NULL_WORD); +} void LIR_Assembler::peephole(LIR_List*) { // do nothing for now diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp index 6f179255e4a7a..ed9105fabc000 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp @@ -51,6 +51,9 @@ _deopt_handler_size = 7 }; + void arraycopy_inlinetype_check(Register obj, Register tmp, CodeStub* slow_path, bool is_dest, bool null_check); + void move(LIR_Opr src, LIR_Opr dst); + public: void store_parameter(Register r, int offset_from_esp_in_words); diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index cc068cda7a9b1..7ad65fbf4cbb2 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -30,6 +30,7 @@ #include "c1/c1_Runtime1.hpp" #include "c1/c1_ValueStack.hpp" #include "ci/ciArray.hpp" +#include "ci/ciInlineKlass.hpp" #include "ci/ciObjArrayKlass.hpp" #include "ci/ciTypeArrayKlass.hpp" #include "gc/shared/c1/barrierSetC1.hpp" @@ -281,17 +282,24 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { // "lock" stores the address of the monitor stack slot, so this is not an oop LIR_Opr lock = new_register(T_INT); + // Need a scratch register for inline types on x86 + LIR_Opr scratch = new_register(T_ADDRESS); CodeEmitInfo* info_for_exception = nullptr; if (x->needs_null_check()) { info_for_exception = state_for(x); } + + CodeStub* throw_ie_stub = x->maybe_inlinetype() ? + new SimpleExceptionStub(StubId::c1_throw_identity_exception_id, + obj.result(), state_for(x)) + : nullptr; + // this CodeEmitInfo must not have the xhandlers because here the // object is already locked (xhandlers expect object to be unlocked) CodeEmitInfo* info = state_for(x, x->state(), true); - LIR_Opr tmp = new_register(T_ADDRESS); - monitor_enter(obj.result(), lock, syncTempOpr(), tmp, - x->monitor_no(), info_for_exception, info); + monitor_enter(obj.result(), lock, syncTempOpr(), scratch, + x->monitor_no(), info_for_exception, info, throw_ie_stub); } @@ -1129,19 +1137,19 @@ void LIRGenerator::do_Convert(Convert* x) { void LIRGenerator::do_NewInstance(NewInstance* x) { print_if_not_loaded(x); - CodeEmitInfo* info = state_for(x, x->state()); + CodeEmitInfo* info = state_for(x, x->needs_state_before() ? x->state_before() : x->state()); LIR_Opr reg = result_register_for(x->type()); new_instance(reg, x->klass(), x->is_unresolved(), - FrameMap::rcx_oop_opr, - FrameMap::rdi_oop_opr, - FrameMap::rsi_oop_opr, - LIR_OprFact::illegalOpr, - FrameMap::rdx_metadata_opr, info); + !x->is_unresolved() && x->klass()->is_inlinetype(), + FrameMap::rcx_oop_opr, + FrameMap::rdi_oop_opr, + FrameMap::rsi_oop_opr, + LIR_OprFact::illegalOpr, + FrameMap::rdx_metadata_opr, info); LIR_Opr result = rlock_result(x); __ move(reg, result); } - void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { CodeEmitInfo* info = nullptr; if (x->state_before() != nullptr && x->state_before()->force_reexecute()) { @@ -1194,13 +1202,18 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { length.load_item_force(FrameMap::rbx_opr); LIR_Opr len = length.result(); - CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); - ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); + ciKlass* obj = ciObjArrayKlass::make(x->klass()); + + // TODO 8265122 Implement a fast path for this + bool is_flat = obj->is_loaded() && obj->is_flat_array_klass(); + bool is_null_free = obj->is_loaded() && obj->as_array_klass()->is_elem_null_free(); + + CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info, is_null_free); if (obj == ciEnv::unloaded_ciobjarrayklass()) { BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); } klass2reg_with_patching(klass_reg, obj, patching_info); - __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path, true, is_null_free || is_flat); LIR_Opr result = rlock_result(x); __ move(reg, result); @@ -1295,7 +1308,7 @@ void LIRGenerator::do_CheckCast(CheckCast* x) { __ checkcast(reg, obj.result(), x->klass(), new_register(objectType), new_register(objectType), tmp3, x->direct_compare(), info_for_exception, patching_info, stub, - x->profiled_method(), x->profiled_bci()); + x->profiled_method(), x->profiled_bci(), x->is_null_free()); } @@ -1349,7 +1362,7 @@ void LIRGenerator::do_If(If* x) { if (tag == longTag && yin->is_constant() && yin->get_jlong_constant() == 0 && (cond == If::eql || cond == If::neq)) { // inline long zero yin->dont_load_item(); - } else if (tag == longTag || tag == floatTag || tag == doubleTag) { + } else if (tag == longTag || tag == floatTag || tag == doubleTag || x->substitutability_check()) { // longs cannot handle constants at right side yin->load_item(); } else { @@ -1369,7 +1382,11 @@ void LIRGenerator::do_If(If* x) { __ safepoint(safepoint_poll_register(), state_for(x, x->state_before())); } - __ cmp(lir_cond(cond), left, right); + if (x->substitutability_check()) { + substitutability_check(x, *xin, *yin); + } else { + __ cmp(lir_cond(cond), left, right); + } // Generate branch profiling. Profiling code doesn't kill flags. profile_branch(x, cond); move_to_phi(x->state()); diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index 7adaea48ff14c..0f77923fd85ca 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -33,7 +33,9 @@ #include "interpreter/interpreter.hpp" #include "oops/arrayOop.hpp" #include "oops/markWord.hpp" +#include "runtime/arguments.hpp" #include "runtime/basicLock.hpp" +#include "runtime/frame.inline.hpp" #include "runtime/globals.hpp" #include "runtime/os.hpp" #include "runtime/sharedRuntime.hpp" @@ -82,13 +84,21 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) { assert_different_registers(obj, klass, len, t1, t2); - if (UseCompactObjectHeaders) { + if (UseCompactObjectHeaders || Arguments::is_valhalla_enabled()) { + // COH: Markword contains class pointer which is only known at runtime. + // Valhalla: Could have value class which has a different prototype header to a normal object. + // In both cases, we need to fetch dynamically. movptr(t1, Address(klass, Klass::prototype_header_offset())); movptr(Address(obj, oopDesc::mark_offset_in_bytes()), t1); - } else { // Take care not to kill klass + } else { + // Otherwise: Can use the statically computed prototype header which is the same for every object. movptr(Address(obj, oopDesc::mark_offset_in_bytes()), checked_cast(markWord::prototype().value())); + } + if (!UseCompactObjectHeaders) { + // COH: Markword already contains class pointer. Nothing else to do. + // Otherwise: Store encoded klass pointer following the markword movptr(t1, klass); - encode_klass_not_null(t1, rscratch1); + encode_klass_not_null(t1, rscratch1); // Take care not to kill klass movl(Address(obj, oopDesc::klass_offset_in_bytes()), t1); } @@ -219,30 +229,53 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, verify_oop(obj); } -void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) { - assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); +void C1_MacroAssembler::build_frame_helper(int frame_size_in_bytes, int sp_offset_for_orig_pc, int sp_inc, bool reset_orig_pc, bool needs_stack_repair) { + push(rbp); +#ifdef ASSERT + if (sp_inc > 0) { + movl(Address(rsp, 0), badRegWordVal); + movl(Address(rsp, VMRegImpl::stack_slot_size), badRegWordVal); + } +#endif + if (PreserveFramePointer) { + mov(rbp, rsp); + } + decrement(rsp, frame_size_in_bytes); + + if (needs_stack_repair) { + // Save stack increment (also account for fixed framesize and rbp) + assert((sp_inc & (StackAlignmentInBytes-1)) == 0, "stack increment not aligned"); + int real_frame_size = sp_inc + frame_size_in_bytes; + movptr(Address(rsp, frame_size_in_bytes - wordSize), real_frame_size); + } + if (reset_orig_pc) { + // Zero orig_pc to detect deoptimization during buffering in the entry points + movptr(Address(rsp, sp_offset_for_orig_pc), 0); + } +} + +void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes, + int sp_offset_for_orig_pc, + bool needs_stack_repair, bool has_scalarized_args, + Label* verified_inline_entry_label) { // Make sure there is enough stack space for this method's activation. // Note that we do this before doing an enter(). This matches the // ordering of C2's stack overflow check / rsp decrement and allows // the SharedRuntime stack overflow handling to be consistent // between the two compilers. + assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); generate_stack_overflow_check(bang_size_in_bytes); - push(rbp); - if (PreserveFramePointer) { - mov(rbp, rsp); - } - decrement(rsp, frame_size_in_bytes); // does not emit code for frame_size == 0 + build_frame_helper(frame_size_in_bytes, sp_offset_for_orig_pc, 0, has_scalarized_args, needs_stack_repair); BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); // C1 code is not hot enough to micro optimize the nmethod entry barrier with an out-of-line stub bs->nmethod_entry_barrier(this, nullptr /* slow_path */, nullptr /* continuation */); -} - -void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) { - increment(rsp, frame_size_in_bytes); // Does not emit code for frame_size == 0 - pop(rbp); + if (verified_inline_entry_label != nullptr) { + // Jump here from the scalarized entry points that already created the frame. + bind(*verified_inline_entry_label); + } } @@ -251,6 +284,63 @@ void C1_MacroAssembler::verified_entry(bool breakAtEntry) { // build frame } +int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, Label& verified_inline_entry_label, bool is_inline_ro_entry) { + assert(InlineTypePassFieldsAsArgs, "sanity"); + // Make sure there is enough stack space for this method's activation. + assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); + generate_stack_overflow_check(bang_size_in_bytes); + + GrowableArray* sig = ces->sig(); + GrowableArray* sig_cc = is_inline_ro_entry ? ces->sig_cc_ro() : ces->sig_cc(); + VMRegPair* regs = ces->regs(); + VMRegPair* regs_cc = is_inline_ro_entry ? ces->regs_cc_ro() : ces->regs_cc(); + int args_on_stack = ces->args_on_stack(); + int args_on_stack_cc = is_inline_ro_entry ? ces->args_on_stack_cc_ro() : ces->args_on_stack_cc(); + + assert(sig->length() <= sig_cc->length(), "Zero-sized inline class not allowed!"); + BasicType* sig_bt = NEW_RESOURCE_ARRAY(BasicType, sig_cc->length()); + int args_passed = sig->length(); + int args_passed_cc = SigEntry::fill_sig_bt(sig_cc, sig_bt); + + // Create a temp frame so we can call into the runtime. It must be properly set up to accommodate GC. + build_frame_helper(frame_size_in_bytes, sp_offset_for_orig_pc, 0, true, ces->c1_needs_stack_repair()); + + // The runtime call might safepoint, make sure nmethod entry barrier is executed + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + // C1 code is not hot enough to micro optimize the nmethod entry barrier with an out-of-line stub + bs->nmethod_entry_barrier(this, nullptr /* slow_path */, nullptr /* continuation */); + + movptr(rbx, (intptr_t)(ces->method())); + if (is_inline_ro_entry) { + call(RuntimeAddress(Runtime1::entry_for(StubId::c1_buffer_inline_args_no_receiver_id))); + } else { + call(RuntimeAddress(Runtime1::entry_for(StubId::c1_buffer_inline_args_id))); + } + int rt_call_offset = offset(); + + // Remove the temp frame + addptr(rsp, frame_size_in_bytes); + pop(rbp); + + // Check if we need to extend the stack for packing + int sp_inc = 0; + if (args_on_stack > args_on_stack_cc) { + sp_inc = extend_stack_for_inline_args(args_on_stack); + } + + shuffle_inline_args(true, is_inline_ro_entry, sig_cc, + args_passed_cc, args_on_stack_cc, regs_cc, // from + args_passed, args_on_stack, regs, // to + sp_inc, rax); + + // Create the real frame. Below jump will then skip over the stack banging and frame + // setup code in the verified_inline_entry (which has a different real_frame_size). + build_frame_helper(frame_size_in_bytes, sp_offset_for_orig_pc, sp_inc, false, ces->c1_needs_stack_repair()); + + jmp(verified_inline_entry_label); + return rt_call_offset; +} + void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { // rbp, + 0: link // + 1: return address diff --git a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp index 96439c719907e..20b6ee0a15ff7 100644 --- a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp +++ b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -882,6 +882,7 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { case StubId::c1_new_type_array_id: case StubId::c1_new_object_array_id: + case StubId::c1_new_null_free_array_id: { Register length = rbx; // Incoming Register klass = rdx; // Incoming @@ -889,8 +890,10 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { if (id == StubId::c1_new_type_array_id) { __ set_info("new_type_array", dont_gc_arguments); - } else { + } else if (id == StubId::c1_new_object_array_id) { __ set_info("new_object_array", dont_gc_arguments); + } else { + __ set_info("new_null_free_array", dont_gc_arguments); } #ifdef ASSERT @@ -900,12 +903,28 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { Register t0 = obj; __ movl(t0, Address(klass, Klass::layout_helper_offset())); __ sarl(t0, Klass::_lh_array_tag_shift); - int tag = ((id == StubId::c1_new_type_array_id) - ? Klass::_lh_array_tag_type_value - : Klass::_lh_array_tag_obj_value); - __ cmpl(t0, tag); - __ jcc(Assembler::equal, ok); - __ stop("assert(is an array klass)"); + switch (id) { + case StubId::c1_new_type_array_id: + __ cmpl(t0, Klass::_lh_array_tag_type_value); + __ jcc(Assembler::equal, ok); + __ stop("assert(is a type array klass)"); + break; + case StubId::c1_new_object_array_id: + __ cmpl(t0, (Klass::_lh_array_tag_ref_value)); // new "[Ljava/lang/Object;" + __ jcc(Assembler::equal, ok); + __ cmpl(t0, Klass::_lh_array_tag_flat_value); // new "[LVT;" + __ jcc(Assembler::equal, ok); + __ stop("assert(is an object or inline type array klass)"); + break; + case StubId::c1_new_null_free_array_id: + __ cmpl(t0, Klass::_lh_array_tag_flat_value); // the array can be a flat array. + __ jcc(Assembler::equal, ok); + __ cmpl(t0, (Klass::_lh_array_tag_ref_value)); // the array cannot be a flat array (due to InlineArrayElementMaxFlatSize, etc) + __ jcc(Assembler::equal, ok); + __ stop("assert(is an object or inline type array klass)"); + break; + default: ShouldNotReachHere(); + } __ should_not_reach_here(); __ bind(ok); } @@ -916,8 +935,11 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { int call_offset; if (id == StubId::c1_new_type_array_id) { call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); - } else { + } else if (id == StubId::c1_new_object_array_id) { call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); + } else { + assert(id == StubId::c1_new_null_free_array_id, "must be"); + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_null_free_array), klass, length); } oop_maps = new OopMapSet(); @@ -949,6 +971,83 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { } break; + case StubId::c1_load_flat_array_id: + { + StubFrame f(sasm, "load_flat_array", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, 3); + + // Called with store_parameter and not C abi + + f.load_argument(1, rax); // rax,: array + f.load_argument(0, rbx); // rbx,: index + int call_offset = __ call_RT(rax, noreg, CAST_FROM_FN_PTR(address, load_flat_array), rax, rbx); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_rax(sasm); + + // rax,: loaded element at array[index] + __ verify_oop(rax); + } + break; + + case StubId::c1_store_flat_array_id: + { + StubFrame f(sasm, "store_flat_array", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, 4); + + // Called with store_parameter and not C abi + + f.load_argument(2, rax); // rax,: array + f.load_argument(1, rbx); // rbx,: index + f.load_argument(0, rcx); // rcx,: value + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, store_flat_array), rax, rbx, rcx); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_rax(sasm); + } + break; + + case StubId::c1_substitutability_check_id: + { + StubFrame f(sasm, "substitutability_check", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, 3); + + // Called with store_parameter and not C abi + + f.load_argument(1, rax); // rax,: left + f.load_argument(0, rbx); // rbx,: right + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, substitutability_check), rax, rbx); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_rax(sasm); + + // rax,: are the two operands substitutable + } + break; + + + case StubId::c1_buffer_inline_args_id: + case StubId::c1_buffer_inline_args_no_receiver_id: + { + const char* name = (id == StubId::c1_buffer_inline_args_id) ? + "buffer_inline_args" : "buffer_inline_args_no_receiver"; + StubFrame f(sasm, name, dont_gc_arguments); + OopMap* map = save_live_registers(sasm, 2); + Register method = rbx; + address entry = (id == StubId::c1_buffer_inline_args_id) ? + CAST_FROM_FN_PTR(address, buffer_inline_args) : + CAST_FROM_FN_PTR(address, buffer_inline_args_no_receiver); + int call_offset = __ call_RT(rax, noreg, entry, method); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_rax(sasm); + __ verify_oop(rax); // rax: an array of buffered value objects + } + break; + case StubId::c1_register_finalizer_id: { __ set_info("register_finalizer", dont_gc_arguments); @@ -1042,11 +1141,23 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { break; case StubId::c1_throw_incompatible_class_change_error_id: - { StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments); + { StubFrame f(sasm, "throw_incompatible_class_change_error", dont_gc_arguments); oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); } break; + case StubId::c1_throw_illegal_monitor_state_exception_id: + { StubFrame f(sasm, "throw_illegal_monitor_state_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_illegal_monitor_state_exception), false); + } + break; + + case StubId::c1_throw_identity_exception_id: + { StubFrame f(sasm, "throw_identity_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_identity_exception), true); + } + break; + case StubId::c1_slow_subtype_check_id: { // Typical calling sequence: diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 69308bb2a7e8d..c1b2a3e8c79e2 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -51,7 +51,30 @@ #endif // C2 compiled method's prolog code. -void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) { +// Beware! This sp_inc is NOT the same as the one mentioned in MacroAssembler::remove_frame but only the size +// of the extension space + the additional copy of the return address. That means, it doesn't contain the +// frame size (where the local and sp_inc are) and the saved RBP. +void C2_MacroAssembler::verified_entry(Compile* C, int sp_inc) { + if (C->clinit_barrier_on_entry()) { + assert(VM_Version::supports_fast_class_init_checks(), "sanity"); + assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); + + Label L_skip_barrier; + Register klass = rscratch1; + + mov_metadata(klass, C->method()->holder()->constant_encoding()); + clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/); + + jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path + + bind(L_skip_barrier); + } + + int framesize = C->output()->frame_size_in_bytes(); + int bangsize = C->output()->bang_size_in_bytes(); + bool fp_mode_24b = false; + int stack_bang_size = C->output()->need_stack_bang(bangsize) ? bangsize : 0; + assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect"); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); @@ -70,6 +93,12 @@ void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool // We always push rbp, so that on return to interpreter rbp, will be // restored correctly and we can correct the stack. push(rbp); +#ifdef ASSERT + if (sp_inc > 0) { + movl(Address(rsp, 0), badRegWordVal); + movl(Address(rsp, VMRegImpl::stack_slot_size), badRegWordVal); + } +#endif // Save caller's stack pointer into RBP if the frame pointer is preserved. if (PreserveFramePointer) { mov(rbp, rsp); @@ -87,6 +116,12 @@ void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool // Save RBP register now. framesize -= wordSize; movptr(Address(rsp, framesize), rbp); +#ifdef ASSERT + if (sp_inc > 0) { + movl(Address(rsp, framesize), badRegWordVal); + movl(Address(rsp, framesize + VMRegImpl::stack_slot_size), badRegWordVal); + } +#endif // Save caller's stack pointer into RBP if the frame pointer is preserved. if (PreserveFramePointer) { movptr(rbp, rsp); @@ -96,6 +131,12 @@ void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool } } + if (C->needs_stack_repair()) { + // Save stack increment just below the saved rbp (also account for fixed framesize and rbp) + assert((sp_inc & (StackAlignmentInBytes-1)) == 0, "stack increment not aligned"); + movptr(Address(rsp, framesize - wordSize), sp_inc + framesize); + } + if (VerifyStackAtCalls) { // Majik cookie to verify stack depth framesize -= wordSize; movptr(Address(rsp, framesize), (int32_t)0xbadb100d); @@ -114,23 +155,23 @@ void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool bind(L); } #endif +} - if (!is_stub) { - BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); - // We put the non-hot code of the nmethod entry barrier out-of-line in a stub. - Label dummy_slow_path; - Label dummy_continuation; - Label* slow_path = &dummy_slow_path; - Label* continuation = &dummy_continuation; - if (!Compile::current()->output()->in_scratch_emit_size()) { - // Use real labels from actual stub when not emitting code for the purpose of measuring its size - C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub(); - Compile::current()->output()->add_stub(stub); - slow_path = &stub->entry(); - continuation = &stub->continuation(); - } - bs->nmethod_entry_barrier(this, slow_path, continuation); +void C2_MacroAssembler::entry_barrier() { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + // We put the non-hot code of the nmethod entry barrier out-of-line in a stub. + Label dummy_slow_path; + Label dummy_continuation; + Label* slow_path = &dummy_slow_path; + Label* continuation = &dummy_continuation; + if (!Compile::current()->output()->in_scratch_emit_size()) { + // Use real labels from actual stub when not emitting code for the purpose of measuring its size + C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub(); + Compile::current()->output()->add_stub(stub); + slow_path = &stub->entry(); + continuation = &stub->continuation(); } + bs->nmethod_entry_barrier(this, slow_path, continuation); } inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) { diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp index 9b229ad72219a..b6b3d4d3d632b 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp @@ -29,8 +29,9 @@ public: // C2 compiled method's prolog code. - void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub); + void verified_entry(Compile* C, int sp_inc = 0); + void entry_barrier(); Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes); // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. diff --git a/src/hotspot/cpu/x86/continuationEntry_x86.inline.hpp b/src/hotspot/cpu/x86/continuationEntry_x86.inline.hpp index 7d13a5200eae3..263798cc7118f 100644 --- a/src/hotspot/cpu/x86/continuationEntry_x86.inline.hpp +++ b/src/hotspot/cpu/x86/continuationEntry_x86.inline.hpp @@ -27,6 +27,7 @@ #include "runtime/continuationEntry.hpp" +#include "code/codeCache.inline.hpp" #include "oops/method.inline.hpp" #include "runtime/frame.inline.hpp" #include "runtime/registerMap.hpp" diff --git a/src/hotspot/cpu/x86/continuationFreezeThaw_x86.inline.hpp b/src/hotspot/cpu/x86/continuationFreezeThaw_x86.inline.hpp index 7691a84a9fe83..943b8e7f968b1 100644 --- a/src/hotspot/cpu/x86/continuationFreezeThaw_x86.inline.hpp +++ b/src/hotspot/cpu/x86/continuationFreezeThaw_x86.inline.hpp @@ -57,22 +57,20 @@ inline frame FreezeBase::sender(const frame& f) { if (FKind::interpreted) { return frame(f.sender_sp(), f.interpreter_frame_sender_sp(), f.link(), f.sender_pc()); } - intptr_t** link_addr = link_address(f); - intptr_t* sender_sp = (intptr_t*)(link_addr + frame::sender_sp_offset); // f.unextended_sp() + (fsize/wordSize); // - address sender_pc = (address) *(sender_sp-1); - assert(sender_sp != f.sp(), "must have changed"); + frame::CompiledFramePointers cfp = f.compiled_frame_details(); int slot = 0; - CodeBlob* sender_cb = CodeCache::find_blob_and_oopmap(sender_pc, slot); + CodeBlob* sender_cb = CodeCache::find_blob_and_oopmap(*cfp.sender_pc_addr, slot); + return sender_cb != nullptr - ? frame(sender_sp, sender_sp, *link_addr, sender_pc, sender_cb, - slot == -1 ? nullptr : sender_cb->oop_map_for_slot(slot, sender_pc), false) - : frame(sender_sp, sender_sp, *link_addr, sender_pc); + ? frame(cfp.sender_sp, cfp.sender_sp, *cfp.saved_fp_addr, *cfp.sender_pc_addr, sender_cb, + slot == -1 ? nullptr : sender_cb->oop_map_for_slot(slot, *cfp.sender_pc_addr), false) + : frame(cfp.sender_sp, cfp.sender_sp, *cfp.saved_fp_addr, *cfp.sender_pc_addr); } template -frame FreezeBase::new_heap_frame(frame& f, frame& caller) { +frame FreezeBase::new_heap_frame(frame& f, frame& caller, int size_adjust) { assert(FKind::is_instance(f), ""); assert(!caller.is_interpreted_frame() || caller.unextended_sp() == (intptr_t*)caller.at(frame::interpreter_frame_last_sp_offset), ""); @@ -106,14 +104,14 @@ frame FreezeBase::new_heap_frame(frame& f, frame& caller) { fp = FKind::compiled ? *(intptr_t**)(f.sp() - frame::sender_sp_offset) : (intptr_t*)badAddressVal; int fsize = FKind::size(f); - sp = caller.unextended_sp() - fsize; - if (caller.is_interpreted_frame()) { + sp = caller.unextended_sp() - fsize - size_adjust; + if (caller.is_interpreted_frame() && size_adjust == 0) { // If the caller is interpreted, our stackargs are not supposed to overlap with it // so we make more room by moving sp down by argsize int argsize = FKind::stack_argsize(f); sp -= argsize; + caller.set_sp(sp + fsize); } - caller.set_sp(sp + fsize); assert(_cont.tail()->is_in_chunk(sp), ""); @@ -174,11 +172,12 @@ inline void FreezeBase::set_top_frame_metadata_pd(const frame& hf) { assert(frame_pc == ContinuationHelper::Frame::real_pc(hf), ""); } -inline void FreezeBase::patch_pd(frame& hf, const frame& caller) { +inline void FreezeBase::patch_pd(frame& hf, const frame& caller, bool is_bottom_frame) { if (caller.is_interpreted_frame()) { assert(!caller.is_empty(), ""); patch_callee_link_relative(caller, caller.fp()); - } else { + } else if (is_bottom_frame && caller.pc() != nullptr) { + assert(caller.is_compiled_frame(), ""); // If we're the bottom-most frame frozen in this freeze, the caller might have stayed frozen in the chunk, // and its oop-containing fp fixed. We've now just overwritten it, so we must patch it back to its value // as read from the chunk. @@ -249,7 +248,7 @@ inline frame ThawBase::new_entry_frame() { return frame(sp, sp, _cont.entryFP(), _cont.entryPC()); // TODO PERF: This finds code blob and computes deopt state } -template frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom) { +template frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom, int size_adjust) { assert(FKind::is_instance(hf), ""); // The values in the returned frame object will be written into the callee's stack in patch. @@ -276,24 +275,23 @@ template frame ThawBase::new_stack_frame(const frame& hf, frame& return f; } else { int fsize = FKind::size(hf); - intptr_t* frame_sp = caller.unextended_sp() - fsize; + intptr_t* frame_sp = caller.unextended_sp() - fsize - size_adjust; if (bottom || caller.is_interpreted_frame()) { - int argsize = FKind::stack_argsize(hf); - - fsize += argsize; - frame_sp -= argsize; - caller.set_sp(caller.sp() - argsize); - assert(caller.sp() == frame_sp + (fsize-argsize), ""); - + if (size_adjust == 0) { + int argsize = FKind::stack_argsize(hf); + frame_sp -= argsize; + } frame_sp = align(hf, frame_sp, caller, bottom); + caller.set_sp(frame_sp + fsize + size_adjust); } + assert(is_aligned(frame_sp, frame::frame_alignment), ""); assert(hf.cb() != nullptr, ""); assert(hf.oop_map() != nullptr, ""); intptr_t* fp; if (PreserveFramePointer) { // we need to recreate a "real" frame pointer, pointing into the stack - fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset; + fp = frame_sp + fsize - frame::sender_sp_offset; } else { fp = FKind::stub || FKind::native ? frame_sp + fsize - frame::sender_sp_offset // fp always points to the address below the pushed return pc. We need correct address. @@ -307,14 +305,15 @@ inline intptr_t* ThawBase::align(const frame& hf, intptr_t* frame_sp, frame& cal if (((intptr_t)frame_sp & 0xf) != 0) { assert(caller.is_interpreted_frame() || (bottom && hf.compiled_frame_stack_argsize() % 2 != 0), ""); frame_sp--; - caller.set_sp(caller.sp() - 1); } assert(is_aligned(frame_sp, frame::frame_alignment), ""); return frame_sp; } inline void ThawBase::patch_pd(frame& f, const frame& caller) { - patch_callee_link(caller, caller.fp()); + if (caller.is_interpreted_frame() || PreserveFramePointer) { + patch_callee_link(caller, caller.fp()); + } } inline void ThawBase::patch_pd(frame& f, intptr_t* caller_sp) { diff --git a/src/hotspot/cpu/x86/continuationHelper_x86.inline.hpp b/src/hotspot/cpu/x86/continuationHelper_x86.inline.hpp index ffb1b73330360..d701f03742a12 100644 --- a/src/hotspot/cpu/x86/continuationHelper_x86.inline.hpp +++ b/src/hotspot/cpu/x86/continuationHelper_x86.inline.hpp @@ -103,7 +103,8 @@ inline intptr_t** ContinuationHelper::Frame::callee_link_address(const frame& f) } inline address* ContinuationHelper::Frame::return_pc_address(const frame& f) { - return (address*)(f.real_fp() - 1); + frame::CompiledFramePointers cfp = f.compiled_frame_details(); + return cfp.sender_pc_addr; } inline address* ContinuationHelper::InterpretedFrame::return_pc_address(const frame& f) { diff --git a/src/hotspot/cpu/x86/foreignGlobals_x86_64.cpp b/src/hotspot/cpu/x86/foreignGlobals_x86_64.cpp index cc5627f6ffd82..075c16e681173 100644 --- a/src/hotspot/cpu/x86/foreignGlobals_x86_64.cpp +++ b/src/hotspot/cpu/x86/foreignGlobals_x86_64.cpp @@ -47,17 +47,17 @@ const ABIDescriptor ForeignGlobals::parse_abi_descriptor(jobject jabi) { oop abi_oop = JNIHandles::resolve_non_null(jabi); ABIDescriptor abi; - objArrayOop inputStorage = jdk_internal_foreign_abi_ABIDescriptor::inputStorage(abi_oop); + refArrayOop inputStorage = jdk_internal_foreign_abi_ABIDescriptor::inputStorage(abi_oop); parse_register_array(inputStorage, StorageType::INTEGER, abi._integer_argument_registers, as_Register); parse_register_array(inputStorage, StorageType::VECTOR, abi._vector_argument_registers, as_XMMRegister); - objArrayOop outputStorage = jdk_internal_foreign_abi_ABIDescriptor::outputStorage(abi_oop); + refArrayOop outputStorage = jdk_internal_foreign_abi_ABIDescriptor::outputStorage(abi_oop); parse_register_array(outputStorage, StorageType::INTEGER, abi._integer_return_registers, as_Register); parse_register_array(outputStorage, StorageType::VECTOR, abi._vector_return_registers, as_XMMRegister); - objArrayOop subarray = oop_cast(outputStorage->obj_at((int) StorageType::X87)); + refArrayOop subarray = oop_cast(outputStorage->obj_at((int) StorageType::X87)); abi._X87_return_registers_noof = subarray->length(); - objArrayOop volatileStorage = jdk_internal_foreign_abi_ABIDescriptor::volatileStorage(abi_oop); + refArrayOop volatileStorage = jdk_internal_foreign_abi_ABIDescriptor::volatileStorage(abi_oop); parse_register_array(volatileStorage, StorageType::INTEGER, abi._integer_additional_volatile_registers, as_Register); parse_register_array(volatileStorage, StorageType::VECTOR, abi._vector_additional_volatile_registers, as_XMMRegister); diff --git a/src/hotspot/cpu/x86/frame_x86.cpp b/src/hotspot/cpu/x86/frame_x86.cpp index 2b06f9ee80c42..8f6c7ba5c1e40 100644 --- a/src/hotspot/cpu/x86/frame_x86.cpp +++ b/src/hotspot/cpu/x86/frame_x86.cpp @@ -145,13 +145,16 @@ bool frame::safe_for_sender(JavaThread *thread) { if (!thread->is_in_full_stack_checked((address)sender_sp)) { return false; } - sender_unextended_sp = sender_sp; // On Intel the return_address is always the word on the stack sender_pc = (address) *(sender_sp-1); // Note: frame::sender_sp_offset is only valid for compiled frame - saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); - } + intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset); + saved_fp = *saved_fp_addr; + // Repair the sender sp if this is a method with scalarized inline type args + sender_sp = repair_sender_sp(sender_sp, saved_fp_addr); + sender_unextended_sp = sender_sp; + } if (Continuation::is_return_barrier_entry(sender_pc)) { // sender_pc might be invalid so check that the frame // actually belongs to a Continuation. @@ -610,13 +613,24 @@ void frame::describe_pd(FrameValues& values, int frame_no) { ret_pc_loc = fp() + return_addr_offset; fp_loc = fp(); } else { - ret_pc_loc = real_fp() - return_addr_offset; - fp_loc = real_fp() - sender_sp_offset; + if (cb()->is_nmethod() && cb()->as_nmethod_or_null()->needs_stack_repair()) { + values.describe(frame_no, real_fp() - sender_sp_offset - 1, err_msg("fsize for #%d", frame_no), 1); + } + frame::CompiledFramePointers cfp = compiled_frame_details(); + ret_pc_loc = (intptr_t*)cfp.sender_pc_addr; + fp_loc = (intptr_t*)cfp.saved_fp_addr; } address ret_pc = *(address*)ret_pc_loc; values.describe(frame_no, ret_pc_loc, Continuation::is_return_barrier_entry(ret_pc) ? "return address (return barrier)" : "return address"); values.describe(-1, fp_loc, "saved fp", 0); // "unowned" as value belongs to sender + + intptr_t* ret_pc_loc2 = real_fp() - return_addr_offset; + if (ret_pc_loc2 != ret_pc_loc) { + intptr_t* fp_loc2 = real_fp() - sender_sp_offset; + values.describe(frame_no, ret_pc_loc2, "return address copy #2"); + values.describe(-1, fp_loc2, "saved fp copy #2", 0); + } } } @@ -635,6 +649,64 @@ frame::frame(void* sp, void* fp, void* pc) { #endif +// Check for a method with scalarized inline type arguments that needs +// a stack repair and return the repaired sender stack pointer. +intptr_t* frame::repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const { + nmethod* nm = _cb->as_nmethod_or_null(); + if (nm != nullptr && nm->needs_stack_repair()) { + // The stack increment resides just below the saved rbp on the stack + // and does not account for the return address and rbp (see MacroAssembler::remove_frame). + intptr_t* real_frame_size_addr = (intptr_t*) (saved_fp_addr - 1); + int real_frame_size = (*real_frame_size_addr / wordSize) + metadata_words_at_bottom; + assert(real_frame_size >= _cb->frame_size() && real_frame_size <= 1000000, "invalid frame size"); + sender_sp = unextended_sp() + real_frame_size; + } + return sender_sp; +} + + +// See comment in MacroAssembler::remove_frame +frame::CompiledFramePointers frame::compiled_frame_details() const { + // frame owned by optimizing compiler + assert(_cb->frame_size() > 0, "must have non-zero frame size"); + intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); + assert(sender_sp == real_fp(), ""); + + // Repair the sender sp if the frame has been extended + sender_sp = repair_sender_sp(sender_sp, (intptr_t**)(sender_sp - frame::sender_sp_offset)); + + CompiledFramePointers cfp; + cfp.sender_sp = sender_sp; + cfp.saved_fp_addr = (intptr_t**)(sender_sp - frame::sender_sp_offset); + // On Intel the return_address is always the word on the stack + cfp.sender_pc_addr = (address*)(sender_sp - frame::return_addr_offset); + + return cfp; +} + +intptr_t* frame::repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr) { + assert(nm != nullptr && nm->needs_stack_repair(), ""); + // The stack increment resides just below the saved rbp on the stack + // and does not account for the return address and rbp (see MacroAssembler::remove_frame). + intptr_t* real_frame_size_addr = (intptr_t*) (saved_fp_addr - 1); + int real_frame_size = (*real_frame_size_addr / wordSize) + metadata_words_at_bottom; + assert(real_frame_size >= nm->frame_size() && real_frame_size <= 1000000, "invalid frame size"); + return sp + real_frame_size; +} + +bool frame::was_augmented_on_entry(int& real_size) const { + assert(is_compiled_frame(), ""); + if (_cb->as_nmethod_or_null()->needs_stack_repair()) { + // The stack increment resides just below the saved rbp on the stack + // and does not account for the return address and rbp (see MacroAssembler::remove_frame). + intptr_t* real_frame_size_addr = unextended_sp() + _cb->frame_size() - sender_sp_offset - 1; + real_size = (*real_frame_size_addr / wordSize) + metadata_words_at_bottom; + return real_size != _cb->frame_size(); + } + real_size = _cb->frame_size(); + return false; +} + void JavaFrameAnchor::make_walkable() { // last frame set? if (last_Java_sp() == nullptr) return; diff --git a/src/hotspot/cpu/x86/frame_x86.hpp b/src/hotspot/cpu/x86/frame_x86.hpp index 546c40fffe40f..657b9d4c14022 100644 --- a/src/hotspot/cpu/x86/frame_x86.hpp +++ b/src/hotspot/cpu/x86/frame_x86.hpp @@ -138,6 +138,17 @@ } public: + // Support for scalarized inline type calling convention + intptr_t* repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const; + struct CompiledFramePointers { + intptr_t* sender_sp; // The top of the stack of the sender + intptr_t** saved_fp_addr; // Where RBP is saved on the stack + address* sender_pc_addr; // Where return address (copy #1 in remove_frame's comment) is saved on the stack + }; + CompiledFramePointers compiled_frame_details() const; + static intptr_t* repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr); + bool was_augmented_on_entry(int& real_size) const; + // Constructors frame(intptr_t* sp, intptr_t* fp, address pc); diff --git a/src/hotspot/cpu/x86/frame_x86.inline.hpp b/src/hotspot/cpu/x86/frame_x86.inline.hpp index 3f3b951edc887..d7cb1db9b0317 100644 --- a/src/hotspot/cpu/x86/frame_x86.inline.hpp +++ b/src/hotspot/cpu/x86/frame_x86.inline.hpp @@ -32,6 +32,9 @@ #include "interpreter/interpreter.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/registerMap.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif // Inline functions for Intel frames: @@ -426,26 +429,32 @@ inline frame frame::sender_raw(RegisterMap* map) const { inline frame frame::sender_for_compiled_frame(RegisterMap* map) const { assert(map != nullptr, "map must be set"); - - // frame owned by optimizing compiler - assert(_cb->frame_size() > 0, "must have non-zero frame size"); - intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); - assert(sender_sp == real_fp(), ""); - - // On Intel the return_address is always the word on the stack - address sender_pc = (address) *(sender_sp-1); - - // This is the saved value of EBP which may or may not really be an FP. - // It is only an FP if the sender is an interpreter frame (or C1?). - // saved_fp_addr should be correct even for a bottom thawed frame (with a return barrier) - intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset); + CompiledFramePointers cfp = compiled_frame_details(); if (map->update_map()) { // Tell GC to use argument oopmaps for some runtime stubs that need it. // For C1, the runtime stub might not have oop maps, so set this flag // outside of update_register_map. - if (!_cb->is_nmethod()) { // compiled frames do not use callee-saved registers - map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + bool c1_buffering = false; +#ifdef COMPILER1 + nmethod* nm = _cb->as_nmethod_or_null(); + if (nm != nullptr && nm->is_compiled_by_c1() && nm->method()->has_scalarized_args() && + pc() < nm->verified_inline_entry_point()) { + // The VEP and VIEP(RO) of C1-compiled methods call buffer_inline_args_xxx + // before doing any argument shuffling, so we need to scan the oops + // as the caller passes them. + c1_buffering = true; +#ifdef ASSERT + NativeCall* call = nativeCall_before(pc()); + address dest = call->destination(); + assert(dest == Runtime1::entry_for(StubId::c1_buffer_inline_args_no_receiver_id) || + dest == Runtime1::entry_for(StubId::c1_buffer_inline_args_id), "unexpected safepoint in entry point"); +#endif + } +#endif + if (!_cb->is_nmethod() || c1_buffering) { // compiled frames do not use callee-saved registers + bool caller_args = _cb->caller_must_gc_arguments(map->thread()) || c1_buffering; + map->set_include_argument_oops(caller_args); if (oop_map() != nullptr) { _oop_map->update_register_map(this, map); } @@ -458,21 +467,21 @@ inline frame frame::sender_for_compiled_frame(RegisterMap* map) const { // Since the prolog does the save and restore of EBP there is no oopmap // for it so we must fill in its location as if there was an oopmap entry // since if our caller was compiled code there could be live jvm state in it. - update_map_with_saved_link(map, saved_fp_addr); + update_map_with_saved_link(map, cfp.saved_fp_addr); } - assert(sender_sp != sp(), "must have changed"); + assert(cfp.sender_sp != sp(), "must have changed"); - if (Continuation::is_return_barrier_entry(sender_pc)) { + if (Continuation::is_return_barrier_entry(*cfp.sender_pc_addr)) { if (map->walk_cont()) { // about to walk into an h-stack return Continuation::top_frame(*this, map); } else { - return Continuation::continuation_bottom_sender(map->thread(), *this, sender_sp); + return Continuation::continuation_bottom_sender(map->thread(), *this, cfp.sender_sp); } } - intptr_t* unextended_sp = sender_sp; - return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); + intptr_t* unextended_sp = cfp.sender_sp; + return frame(cfp.sender_sp, unextended_sp, *cfp.saved_fp_addr, *cfp.sender_pc_addr); } template diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp index b20d7b5cd075b..15db93064c55a 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -413,8 +413,9 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { bool in_heap = (decorators & IN_HEAP) != 0; bool as_normal = (decorators & AS_NORMAL) != 0; + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; - bool needs_pre_barrier = as_normal; + bool needs_pre_barrier = as_normal && !dest_uninitialized; bool needs_post_barrier = val != noreg && in_heap; // flatten object address if needed diff --git a/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad b/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad index 94607cd679619..c28b3c15af43e 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad +++ b/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad @@ -1,5 +1,5 @@ // -// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2024, 2026, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -99,6 +99,118 @@ instruct g1StoreP(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rRegP tmp3, ins_pipe(ialu_mem_reg); %} +// TODO 8350865 (same applies to g1StoreLSpecialTwoOops) +// - Do not set/overwrite barrier data here, also handle G1C2BarrierPostNotNull +instruct g1StoreLSpecialOneOopOff0(memory mem, rRegL src, immI_0 off, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC); + match(Set mem (StoreLSpecial mem (Binary src off))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + format %{ "movq $mem, $src\t# g1StoreLSpecialOneOopOff0" %} + ins_encode %{ + ((MachNode*)this)->set_barrier_data(G1C2BarrierPre | G1C2BarrierPost); + + __ lea($tmp1$$Register, $mem$$Address); + write_barrier_pre(masm, this, + $tmp1$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($tmp1$$Register, $src$$Register) /* preserve */); + + __ movq(Address($tmp1$$Register, 0), $src$$Register); + + // Extract the narrow oop field value + __ movl($tmp2$$Register, $src$$Register); + __ decode_heap_oop($tmp2$$Register); + write_barrier_post(masm, this, + $tmp1$$Register /* store_addr */, + $tmp2$$Register /* new_val */, + $tmp3$$Register /* tmp1 */); + %} + ins_pipe(ialu_mem_reg); +%} + +instruct g1StoreLSpecialOneOopOff4(memory mem, rRegL src, immI_4 off, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC); + match(Set mem (StoreLSpecial mem (Binary src off))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + format %{ "movq $mem, $src\t# g1StoreLSpecialOneOopOff4" %} + ins_encode %{ + ((MachNode*)this)->set_barrier_data(G1C2BarrierPre | G1C2BarrierPost); + + // Adjust address to point to narrow oop + Address dst = $mem$$Address; + __ lea($tmp1$$Register, dst.plus_disp(4)); + write_barrier_pre(masm, this, + $tmp1$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($tmp1$$Register, $src$$Register) /* preserve */); + + // The address of the oop is the address of the store plus the offset of the oop + __ movq(Address($tmp1$$Register, -4), $src$$Register); + + // Shift long value to extract the narrow oop field value + __ movq($tmp2$$Register, $src$$Register); + __ shrq($tmp2$$Register, 32); + __ decode_heap_oop($tmp2$$Register); + write_barrier_post(masm, this, + $tmp1$$Register /* store_addr */, + $tmp2$$Register /* new_val */, + $tmp3$$Register /* tmp1 */); + %} + ins_pipe(ialu_mem_reg); +%} + +instruct g1StoreLSpecialTwoOops(memory mem, rRegL src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC); + match(Set mem (StoreLSpecial mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + format %{ "movq $mem, $src\t# g1StoreLSpecialTwoOops" %} + ins_encode %{ + ((MachNode*)this)->set_barrier_data(G1C2BarrierPre | G1C2BarrierPost); + + __ lea($tmp1$$Register, $mem$$Address); + write_barrier_pre(masm, this, + $tmp1$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($tmp1$$Register, $src$$Register) /* preserve */); + // Adjust address to point to the second narrow oop in the long value + __ addq($tmp1$$Register, 4); + write_barrier_pre(masm, this, + $tmp1$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($tmp1$$Register, $src$$Register) /* preserve */); + + // The address of the second oop is the address of the store plus the offset of the second oop + __ movq(Address($tmp1$$Register, -4), $src$$Register); + + // Do the post-barrier of the second oop first since we have its address in tmp1 + __ movq($tmp2$$Register, $src$$Register); + __ shrq($tmp2$$Register, 32); + __ decode_heap_oop($tmp2$$Register); + write_barrier_post(masm, this, + $tmp1$$Register /* store_addr */, + $tmp2$$Register /* new_val */, + $tmp3$$Register /* tmp1 */); + + // Retrieve the address of the first narrow oop + __ addq($tmp1$$Register, -4); + // Extract the first narrow oop + __ movl($tmp2$$Register, $src$$Register); + __ decode_heap_oop($tmp2$$Register); + write_barrier_post(masm, this, + $tmp1$$Register /* store_addr */, + $tmp2$$Register /* new_val */, + $tmp3$$Register /* tmp1 */); + %} + ins_pipe(ialu_mem_reg); +%} + instruct g1StoreN(memory mem, rRegN src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr) %{ predicate(UseG1GC && n->as_Store()->barrier_data() != 0); diff --git a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp index 731eef09c3778..b604e84210028 100644 --- a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp @@ -22,10 +22,12 @@ * */ +#include "asm/macroAssembler.inline.hpp" #include "classfile/classLoaderData.hpp" #include "gc/shared/barrierSet.hpp" #include "gc/shared/barrierSetAssembler.hpp" #include "gc/shared/barrierSetNMethod.hpp" +#include "gc/shared/barrierSetRuntime.hpp" #include "gc/shared/collectedHeap.hpp" #include "interpreter/interp_masm.hpp" #include "memory/universe.hpp" @@ -161,6 +163,19 @@ void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators } } +void BarrierSetAssembler::flat_field_copy(MacroAssembler* masm, DecoratorSet decorators, + Register src, Register dst, Register inline_layout_info) { + // flat_field_copy implementation is fairly complex, and there are not any + // "short-cuts" to be made from asm. What there is, appears to have the same + // cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds + // of hand-rolled instructions... + if (decorators & IS_DEST_UNINITIALIZED) { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized), src, dst, inline_layout_info); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy), src, dst, inline_layout_info); + } +} + void BarrierSetAssembler::copy_load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, diff --git a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp index caf341dcdf316..7f7f65992b3d6 100644 --- a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp @@ -48,6 +48,9 @@ class BarrierSetAssembler: public CHeapObj { virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); + virtual void flat_field_copy(MacroAssembler* masm, DecoratorSet decorators, + Register src, Register dst, Register inline_layout_info); + // The copy_[load/store]_at functions are used by arraycopy stubs. Be careful to only use // r10 (aka rscratch1) in a context where restore_arg_regs_using_thread has been used instead // of the looser setup_arg_regs. Currently this is done when using type T_OBJECT. diff --git a/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp b/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp index 7ebc90b9f718e..6dd4e02dbab2b 100644 --- a/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp @@ -170,13 +170,37 @@ static NativeNMethodCmpBarrier* native_nmethod_barrier(nmethod* nm) { return barrier; } +static void set_immediate(nmethod* nm, jint val, int bit_mask) { + NativeNMethodCmpBarrier* cmp1 = native_nmethod_barrier(nm); + cmp1->set_immediate(val, bit_mask); + + if (!nm->is_osr_method() && nm->method()->has_scalarized_args()) { + // nmethods with scalarized arguments have multiple entry points that each have an own nmethod entry barrier + assert(nm->verified_entry_point() != nm->verified_inline_entry_point(), "scalarized entry point not found"); + address method_body = nm->is_compiled_by_c1() ? nm->verified_inline_entry_point() : nm->verified_entry_point(); + address entry_point2 = nm->is_compiled_by_c1() ? nm->verified_entry_point() : nm->verified_inline_entry_point(); + + int barrier_offset = reinterpret_cast
(cmp1) - method_body; + NativeNMethodCmpBarrier* cmp2 = reinterpret_cast(entry_point2 + barrier_offset); + assert(cmp1 != cmp2, "sanity"); + DEBUG_ONLY(cmp2->verify()); + cmp2->set_immediate(val, bit_mask); + + if (method_body != nm->verified_inline_ro_entry_point() && entry_point2 != nm->verified_inline_ro_entry_point()) { + NativeNMethodCmpBarrier* cmp3 = reinterpret_cast(nm->verified_inline_ro_entry_point() + barrier_offset); + assert(cmp1 != cmp3 && cmp2 != cmp3, "sanity"); + DEBUG_ONLY(cmp3->verify()); + cmp3->set_immediate(val, bit_mask); + } + } +} + void BarrierSetNMethod::set_guard_value(nmethod* nm, int value, int bit_mask) { if (!supports_entry_barrier(nm)) { return; } - NativeNMethodCmpBarrier* cmp = native_nmethod_barrier(nm); - cmp->set_immediate(value, bit_mask); + set_immediate(nm, value, bit_mask); } int BarrierSetNMethod::guard_value(nmethod* nm) { diff --git a/src/hotspot/cpu/x86/globals_x86.hpp b/src/hotspot/cpu/x86/globals_x86.hpp index 936a994ed8b16..c41be7a677242 100644 --- a/src/hotspot/cpu/x86/globals_x86.hpp +++ b/src/hotspot/cpu/x86/globals_x86.hpp @@ -86,6 +86,9 @@ define_pd_global(bool, PreserveFramePointer, false); define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); +define_pd_global(bool, InlineTypePassFieldsAsArgs, true); +define_pd_global(bool, InlineTypeReturnedAsFields, true); + #define ARCH_FLAGS(develop, \ product, \ range, \ diff --git a/src/hotspot/cpu/x86/interp_masm_x86.cpp b/src/hotspot/cpu/x86/interp_masm_x86.cpp index a9745398f7138..d5e7b22b16a26 100644 --- a/src/hotspot/cpu/x86/interp_masm_x86.cpp +++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp @@ -28,9 +28,11 @@ #include "interpreter/interpreterRuntime.hpp" #include "logging/log.hpp" #include "oops/arrayOop.hpp" +#include "oops/constMethodFlags.hpp" #include "oops/markWord.hpp" #include "oops/methodData.hpp" #include "oops/method.hpp" +#include "oops/inlineKlass.hpp" #include "oops/resolvedFieldEntry.hpp" #include "oops/resolvedIndyEntry.hpp" #include "oops/resolvedMethodEntry.hpp" @@ -165,7 +167,7 @@ void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register ca // argument. tmp is the number of cells left in the // CallTypeData/VirtualCallTypeData to reach its end. Non null // if there's a return to profile. - assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + assert(SingleTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); shll(tmp, log2i_exact((int)DataLayout::cell_size)); addptr(mdp, tmp); } @@ -210,7 +212,7 @@ void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, bind(do_profile); } - Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); + Address mdo_ret_addr(mdp, -in_bytes(SingleTypeEntry::size())); mov(tmp, ret); profile_obj_type(tmp, mdo_ret_addr); @@ -290,7 +292,7 @@ void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, // super call MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); // interpreter specific - // LP64: Used to ASSERT that r13/r14 were equal to frame's bcp/locals + // Used to ASSERT that r13/r14 were equal to frame's bcp/locals // but since they may not have been saved (and we don't want to // save them here (see note above) the assert is invalid. } @@ -428,7 +430,7 @@ void InterpreterMacroAssembler::call_VM_preemptable(Register oop_result, Register arg_1, Register arg_2, bool check_exceptions) { - LP64_ONLY(assert_different_registers(arg_1, c_rarg2)); + assert_different_registers(arg_1, c_rarg2); pass_arg2(this, arg_2); pass_arg1(this, arg_1); call_VM_preemptable_helper(oop_result, entry_point, 2, check_exceptions); @@ -582,15 +584,17 @@ void InterpreterMacroAssembler::load_resolved_klass_at_index(Register klass, // Kills: // rcx void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, - Label& ok_is_subtype) { + Label& ok_is_subtype, + bool profile) { assert(Rsub_klass != rax, "rax holds superklass"); assert(Rsub_klass != r14, "r14 holds locals"); assert(Rsub_klass != r13, "r13 holds bcp"); assert(Rsub_klass != rcx, "rcx holds 2ndary super array length"); // Profile the not-null value's klass. - profile_typecheck(rcx, Rsub_klass); // blows rcx - + if (profile) { + profile_typecheck(rcx, Rsub_klass); // blows rcx + } // Do the check. check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx } @@ -883,7 +887,7 @@ void InterpreterMacroAssembler::remove_activation(TosState state, movbool(rbx, do_not_unlock_if_synchronized); movbool(do_not_unlock_if_synchronized, false); // reset the flag - // get method access flags + // get method access flags movptr(rcx, Address(rbp, frame::interpreter_frame_method_offset * wordSize)); load_unsigned_short(rcx, Address(rcx, Method::access_flags_offset())); testl(rcx, JVM_ACC_SYNCHRONIZED); @@ -1022,11 +1026,9 @@ void InterpreterMacroAssembler::remove_activation(TosState state, notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA } - // remove activation - // get sender sp - movptr(rbx, - Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); if (StackReservedPages > 0) { + movptr(rbx, + Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // testing if reserved zone needs to be re-enabled Register rthread = r15_thread; Label no_reserved_zone_enabling; @@ -1050,6 +1052,41 @@ void InterpreterMacroAssembler::remove_activation(TosState state, bind(no_reserved_zone_enabling); } + // remove activation + // get sender sp + movptr(rbx, + Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); + + if (state == atos && InlineTypeReturnedAsFields) { + Label skip; + Label not_null; + testptr(rax, rax); + jcc(Assembler::notZero, not_null); + // Returned value is null, zero all return registers because they may belong to oop fields + xorq(j_rarg1, j_rarg1); + xorq(j_rarg2, j_rarg2); + xorq(j_rarg3, j_rarg3); + xorq(j_rarg4, j_rarg4); + xorq(j_rarg5, j_rarg5); + jmp(skip); + bind(not_null); + + // Check if we are returning a non-null inline type and load its fields into registers + test_oop_is_not_inline_type(rax, rscratch1, skip, /* can_be_null= */ false); + + // Load fields from a buffered value with an inline class specific handler + load_klass(rdi, rax, rscratch1); + movptr(rdi, Address(rdi, InlineKlass::adr_members_offset())); + movptr(rdi, Address(rdi, InlineKlass::unpack_handler_offset())); + // Unpack handler can be null if inline type is not scalarizable in returns + testptr(rdi, rdi); + jcc(Assembler::zero, skip); + call(rdi); + // call above kills the value in rbx. Reload it. + movptr(rbx, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); + bind(skip); + } + leave(); // remove frame anchor JFR_ONLY(leave_jfr_critical_section();) @@ -1086,6 +1123,43 @@ void InterpreterMacroAssembler::get_method_counters(Register method, bind(has_counters); } +void InterpreterMacroAssembler::read_flat_field(Register entry, Register obj) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flat_field), + obj, entry); + get_vm_result_oop(obj); +} + +void InterpreterMacroAssembler::write_flat_field(Register entry, Register tmp1, Register tmp2, + Register obj, Register off, Register value) { + assert_different_registers(entry, tmp1, tmp2, obj, off, value); + + Label slow_path, done; + + load_unsigned_byte(tmp2, Address(entry, in_bytes(ResolvedFieldEntry::flags_offset()))); + test_field_is_not_null_free_inline_type(tmp2, tmp1, slow_path); + + null_check(value); // FIXME JDK-8341120 + + lea(obj, Address(obj, off, Address::times_1)); + + load_klass(tmp2, value, tmp1); + payload_addr(value, value, tmp2); + + Register idx = tmp1; + load_unsigned_short(idx, Address(entry, in_bytes(ResolvedFieldEntry::field_index_offset()))); + movptr(tmp2, Address(entry, in_bytes(ResolvedFieldEntry::field_holder_offset()))); + + Register layout_info = off; + inline_layout_info(tmp2, idx, layout_info); + + flat_field_copy(IN_HEAP, value, obj, layout_info); + jmp(done); + + bind(slow_path); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::write_flat_field), obj, value, entry); + + bind(done); +} // Lock object // @@ -1338,7 +1412,7 @@ void InterpreterMacroAssembler::profile_taken_branch(Register mdp) { } -void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { +void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp, bool acmp) { if (ProfileInterpreter) { Label profile_continue; @@ -1350,7 +1424,7 @@ void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { // The method data pointer needs to be updated to correspond to // the next bytecode - update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); + update_mdp_by_constant(mdp, acmp ? in_bytes(ACmpData::acmp_data_size()): in_bytes(BranchData::branch_data_size())); bind(profile_continue); } } @@ -1540,6 +1614,120 @@ void InterpreterMacroAssembler::profile_switch_case(Register index, } } +template void InterpreterMacroAssembler::profile_array_type(Register mdp, + Register array, + Register tmp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + mov(tmp, array); + profile_obj_type(tmp, Address(mdp, in_bytes(ArrayData::array_offset()))); + + Label not_flat; + test_non_flat_array_oop(array, tmp, not_flat); + + set_mdp_flag_at(mdp, ArrayData::flat_array_byte_constant()); + + bind(not_flat); + + Label not_null_free; + test_non_null_free_array_oop(array, tmp, not_null_free); + + set_mdp_flag_at(mdp, ArrayData::null_free_array_byte_constant()); + + bind(not_null_free); + + bind(profile_continue); + } +} + +template void InterpreterMacroAssembler::profile_array_type(Register mdp, + Register array, + Register tmp); +template void InterpreterMacroAssembler::profile_array_type(Register mdp, + Register array, + Register tmp); + + +void InterpreterMacroAssembler::profile_multiple_element_types(Register mdp, Register element, Register tmp, const Register tmp2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label done, update; + testptr(element, element); + jccb(Assembler::notZero, update); + set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + jmp(done); + + bind(update); + load_klass(tmp, element, rscratch1); + + // Record the object type. + profile_receiver_type(tmp, mdp, 0); + + bind(done); + + // The method data pointer needs to be updated. + update_mdp_by_constant(mdp, in_bytes(ArrayStoreData::array_store_data_size())); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_element_type(Register mdp, + Register element, + Register tmp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + mov(tmp, element); + profile_obj_type(tmp, Address(mdp, in_bytes(ArrayLoadData::element_offset()))); + + // The method data pointer needs to be updated. + update_mdp_by_constant(mdp, in_bytes(ArrayLoadData::array_load_data_size())); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_acmp(Register mdp, + Register left, + Register right, + Register tmp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + mov(tmp, left); + profile_obj_type(tmp, Address(mdp, in_bytes(ACmpData::left_offset()))); + + Label left_not_inline_type; + test_oop_is_not_inline_type(left, tmp, left_not_inline_type); + set_mdp_flag_at(mdp, ACmpData::left_inline_type_byte_constant()); + bind(left_not_inline_type); + + mov(tmp, right); + profile_obj_type(tmp, Address(mdp, in_bytes(ACmpData::right_offset()))); + + Label right_not_inline_type; + test_oop_is_not_inline_type(right, tmp, right_not_inline_type); + set_mdp_flag_at(mdp, ACmpData::right_inline_type_byte_constant()); + bind(right_not_inline_type); + + bind(profile_continue); + } +} void InterpreterMacroAssembler::_interp_verify_oop(Register reg, TosState state, const char* file, int line) { diff --git a/src/hotspot/cpu/x86/interp_masm_x86.hpp b/src/hotspot/cpu/x86/interp_masm_x86.hpp index dfbd7ab64e040..ed4b8003ed41c 100644 --- a/src/hotspot/cpu/x86/interp_masm_x86.hpp +++ b/src/hotspot/cpu/x86/interp_masm_x86.hpp @@ -175,7 +175,7 @@ class InterpreterMacroAssembler: public MacroAssembler { // Generate a subtype check: branch to ok_is_subtype if sub_klass is // a subtype of super_klass. - void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); + void gen_subtype_check(Register sub_klass, Label &ok_is_subtype, bool profile = true); // Dispatching void dispatch_prolog(TosState state, int step = 0); @@ -215,6 +215,15 @@ class InterpreterMacroAssembler: public MacroAssembler { bool notify_jvmdi = true); void get_method_counters(Register method, Register mcs, Label& skip); + // Allocate instance in "obj" and read in the content of the inline field + // NOTES: + // - input holder object via "obj", which must be rax, + // will return new instance via the same reg + void read_flat_field(Register entry, Register obj); + void write_flat_field(Register entry, + Register tmp1, Register tmp2, + Register obj, Register off, Register value); + // Object locking void lock_object (Register lock_reg); void unlock_object(Register lock_reg); @@ -240,7 +249,7 @@ class InterpreterMacroAssembler: public MacroAssembler { void update_mdp_for_ret(Register return_bci); void profile_taken_branch(Register mdp); - void profile_not_taken_branch(Register mdp); + void profile_not_taken_branch(Register mdp, bool acmp = false); void profile_call(Register mdp); void profile_final_call(Register mdp); void profile_virtual_call(Register receiver, Register mdp); @@ -251,6 +260,11 @@ class InterpreterMacroAssembler: public MacroAssembler { void profile_switch_default(Register mdp); void profile_switch_case(Register index_in_scratch, Register mdp, Register scratch2); + template void profile_array_type(Register mdp, Register array, Register tmp); + + void profile_multiple_element_types(Register mdp, Register element, Register tmp, const Register tmp2); + void profile_element_type(Register mdp, Register element, Register tmp); + void profile_acmp(Register mdp, Register left, Register right, Register tmp); // Debugging // only if +VerifyOops && state == atos diff --git a/src/hotspot/cpu/x86/jniFastGetField_x86_64.cpp b/src/hotspot/cpu/x86/jniFastGetField_x86_64.cpp index 09ba4537854fa..2c4d34c7cd540 100644 --- a/src/hotspot/cpu/x86/jniFastGetField_x86_64.cpp +++ b/src/hotspot/cpu/x86/jniFastGetField_x86_64.cpp @@ -29,6 +29,7 @@ #include "memory/resourceArea.hpp" #include "prims/jniFastGetField.hpp" #include "prims/jvm_misc.hpp" +#include "runtime/jfieldIDWorkaround.hpp" #include "prims/jvmtiExport.hpp" #include "runtime/safepoint.hpp" @@ -80,7 +81,7 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { } __ mov (roffset, c_rarg2); - __ shrptr(roffset, 2); // offset + __ shrptr(roffset, jfieldIDWorkaround::offset_shift); // offset // Both robj and rtmp are clobbered by try_resolve_jobject_in_native. BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); @@ -182,7 +183,7 @@ address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) { DEBUG_ONLY(__ movl(rtmp, 0xDEADC0DE);) __ mov (roffset, c_rarg2); - __ shrptr(roffset, 2); // offset + __ shrptr(roffset, jfieldIDWorkaround::offset_shift); // offset assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); speculative_load_pclist[count] = __ pc(); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 80dd7ccfbca70..610b8cf0051ab 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -28,6 +28,7 @@ #include "code/compiledIC.hpp" #include "compiler/compiler_globals.hpp" #include "compiler/disassembler.hpp" +#include "ci/ciInlineKlass.hpp" #include "crc32c.h" #include "gc/shared/barrierSet.hpp" #include "gc/shared/barrierSetAssembler.hpp" @@ -43,7 +44,9 @@ #include "oops/compressedKlass.inline.hpp" #include "oops/compressedOops.inline.hpp" #include "oops/klass.inline.hpp" +#include "oops/resolvedFieldEntry.hpp" #include "prims/methodHandles.hpp" +#include "runtime/arguments.hpp" #include "runtime/continuation.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/javaThread.hpp" @@ -53,10 +56,15 @@ #include "runtime/safepoint.hpp" #include "runtime/safepointMechanism.hpp" #include "runtime/sharedRuntime.hpp" +#include "runtime/signature_cc.hpp" #include "runtime/stubRoutines.hpp" #include "utilities/checkedCast.hpp" #include "utilities/globalDefinitions.hpp" #include "utilities/macros.hpp" +#include "vmreg_x86.inline.hpp" +#ifdef COMPILER2 +#include "opto/output.hpp" +#endif #ifdef PRODUCT #define BLOCK_COMMENT(str) /* nothing */ @@ -1302,6 +1310,10 @@ void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register call_VM_leaf(entry_point, 3); } +void MacroAssembler::super_call_VM_leaf(address entry_point) { + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { pass_arg0(this, arg_0); MacroAssembler::call_VM_leaf_base(entry_point, 1); @@ -2393,6 +2405,82 @@ void MacroAssembler::null_check(Register reg, int offset) { } } +void MacroAssembler::test_markword_is_inline_type(Register markword, Label& is_inline_type) { + andptr(markword, markWord::inline_type_pattern_mask); + cmpptr(markword, markWord::inline_type_pattern); + jcc(Assembler::equal, is_inline_type); +} + +void MacroAssembler::test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type, bool can_be_null) { + if (can_be_null) { + testptr(object, object); + jcc(Assembler::zero, not_inline_type); + } + const int is_inline_type_mask = markWord::inline_type_pattern; + movptr(tmp, Address(object, oopDesc::mark_offset_in_bytes())); + andptr(tmp, is_inline_type_mask); + cmpptr(tmp, is_inline_type_mask); + jcc(Assembler::notEqual, not_inline_type); +} + +void MacroAssembler::test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free_inline_type) { + movl(temp_reg, flags); + testl(temp_reg, 1 << ResolvedFieldEntry::is_null_free_inline_type_shift); + jcc(Assembler::notEqual, is_null_free_inline_type); +} + +void MacroAssembler::test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free_inline_type) { + movl(temp_reg, flags); + testl(temp_reg, 1 << ResolvedFieldEntry::is_null_free_inline_type_shift); + jcc(Assembler::equal, not_null_free_inline_type); +} + +void MacroAssembler::test_field_is_flat(Register flags, Register temp_reg, Label& is_flat) { + movl(temp_reg, flags); + testl(temp_reg, 1 << ResolvedFieldEntry::is_flat_shift); + jcc(Assembler::notEqual, is_flat); +} + +void MacroAssembler::test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label) { + Label test_mark_word; + // load mark word + movptr(temp_reg, Address(oop, oopDesc::mark_offset_in_bytes())); + // check displaced + testl(temp_reg, markWord::unlocked_value); + jccb(Assembler::notZero, test_mark_word); + // slow path use klass prototype + push(rscratch1); + load_prototype_header(temp_reg, oop, rscratch1); + pop(rscratch1); + + bind(test_mark_word); + testl(temp_reg, test_bit); + jcc((jmp_set) ? Assembler::notZero : Assembler::zero, jmp_label); +} + +void MacroAssembler::test_flat_array_oop(Register oop, Register temp_reg, + Label& is_flat_array) { + test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, true, is_flat_array); +} + +void MacroAssembler::test_non_flat_array_oop(Register oop, Register temp_reg, + Label& is_non_flat_array) { + test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, false, is_non_flat_array); +} + +void MacroAssembler::test_null_free_array_oop(Register oop, Register temp_reg, Label&is_null_free_array) { + test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, true, is_null_free_array); +} + +void MacroAssembler::test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array) { + test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, false, is_non_null_free_array); +} + +void MacroAssembler::test_flat_array_layout(Register lh, Label& is_flat_array) { + testl(lh, Klass::_lh_array_tag_flat_value_bit_inplace); + jcc(Assembler::notZero, is_flat_array); +} + void MacroAssembler::os_breakpoint() { // instead of directly emitting a breakpoint, call os:breakpoint for better debugability // (e.g., MSVC can't call ps() otherwise) @@ -3733,6 +3821,28 @@ void MacroAssembler::zero_memory(Register address, Register length_in_bytes, int bind(done); } +void MacroAssembler::inline_layout_info(Register holder_klass, Register index, Register layout_info) { + movptr(layout_info, Address(holder_klass, InstanceKlass::inline_layout_info_array_offset())); +#ifdef ASSERT + { + Label done; + cmpptr(layout_info, 0); + jcc(Assembler::notEqual, done); + stop("inline_layout_info_array is null"); + bind(done); + } +#endif + + InlineLayoutInfo array[2]; + int size = (char*)&array[1] - (char*)&array[0]; // computing size of array elements + if (is_power_of_2(size)) { + shll(index, log2i_exact(size)); // Scale index by power of 2 + } else { + imull(index, index, size); // Scale the index to be the entry index * array_element_size + } + lea(layout_info, Address(layout_info, index, Address::times_1, Array::base_offset_in_bytes())); +} + // Look up the method for a megamorphic invokeinterface call. // The target method is determined by . // The receiver klass is in recv_klass. @@ -4783,7 +4893,11 @@ void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { } void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { - if (!VerifyOops) return; + if (!VerifyOops || VerifyAdapterSharing) { + // Below address of the code string confuses VerifyAdapterSharing + // because it may differ between otherwise equivalent adapters. + return; + } BLOCK_COMMENT("verify_oop {"); push(rscratch1); @@ -5036,7 +5150,11 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_ } void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { - if (!VerifyOops) return; + if (!VerifyOops || VerifyAdapterSharing) { + // Below address of the code string confuses VerifyAdapterSharing + // because it may differ between otherwise equivalent adapters. + return; + } push(rscratch1); push(rax); // save rax, @@ -5428,6 +5546,14 @@ void MacroAssembler::load_method_holder(Register holder, Register method) { movptr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass* } +void MacroAssembler::load_metadata(Register dst, Register src) { + if (UseCompactObjectHeaders) { + load_narrow_klass_compact(dst, src); + } else { + movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); + } +} + void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) { assert(UseCompactObjectHeaders, "expect compact object headers"); movq(dst, Address(src, oopDesc::mark_offset_in_bytes())); @@ -5447,6 +5573,11 @@ void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { } } +void MacroAssembler::load_prototype_header(Register dst, Register src, Register tmp) { + load_klass(dst, src, tmp); + movptr(dst, Address(dst, Klass::prototype_header_offset())); +} + void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { assert(!UseCompactObjectHeaders, "not with compact headers"); assert_different_registers(src, tmp); @@ -5503,6 +5634,28 @@ void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Ad } } +void MacroAssembler::flat_field_copy(DecoratorSet decorators, Register src, Register dst, + Register inline_layout_info) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->flat_field_copy(this, decorators, src, dst, inline_layout_info); +} + +void MacroAssembler::payload_offset(Register inline_klass, Register offset) { + movptr(offset, Address(inline_klass, InlineKlass::adr_members_offset())); + movl(offset, Address(offset, InlineKlass::payload_offset_offset())); +} + +void MacroAssembler::payload_addr(Register oop, Register data, Register inline_klass) { + // ((address) (void*) o) + vk->payload_offset(); + Register offset = (data == oop) ? rscratch1 : data; + payload_offset(inline_klass, offset); + if (data == oop) { + addptr(data, offset); + } else { + lea(data, Address(oop, offset)); + } +} + void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, DecoratorSet decorators) { access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1); } @@ -5865,20 +6018,505 @@ void MacroAssembler::reinit_heapbase() { } } +int MacroAssembler::store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter) { + assert(InlineTypeReturnedAsFields, "Inline types should never be returned as fields"); + // An inline type might be returned. If fields are in registers we + // need to allocate an inline type instance and initialize it with + // the value of the fields. + Label skip; + // We only need a new buffered inline type if a new one is not returned + testptr(rax, 1); + jcc(Assembler::zero, skip); + int call_offset = -1; + + // The following code is similar to allocation code in TemplateTable::_new but has some slight differences, + // e.g. object size is always not zero, sometimes it's constant; storing klass ptr after + // allocating is not necessary if vk != nullptr, etc. + Label slow_case; + // 1. Try to allocate a new buffered inline instance either from TLAB or eden space + mov(rscratch1, rax); // save rax for slow_case since *_allocate may corrupt it when allocation failed + if (vk != nullptr) { + // Called from C1, where the return type is statically known. + movptr(rbx, (intptr_t)vk->get_InlineKlass()); + jint lh = vk->layout_helper(); + assert(lh != Klass::_lh_neutral_value, "inline class in return type must have been resolved"); + if (UseTLAB && !Klass::layout_helper_needs_slow_path(lh)) { + tlab_allocate(rax, noreg, lh, r13, r14, slow_case); + } else { + jmp(slow_case); + } + } else { + // Call from interpreter. RAX contains ((the InlineKlass* of the return type) | 0x01) + mov(rbx, rax); + andptr(rbx, -2); + if (UseTLAB) { + movl(r14, Address(rbx, Klass::layout_helper_offset())); + testl(r14, Klass::_lh_instance_slow_path_bit); + jcc(Assembler::notZero, slow_case); + tlab_allocate(rax, r14, 0, r13, r14, slow_case); + } else { + jmp(slow_case); + } + } + if (UseTLAB) { + // 2. Initialize buffered inline instance header + Register buffer_obj = rax; + Register klass = rbx; + if (UseCompactObjectHeaders) { + Register mark_word = r13; + movptr(mark_word, Address(klass, Klass::prototype_header_offset())); + movptr(Address(buffer_obj, oopDesc::mark_offset_in_bytes()), mark_word); + } else { + movptr(Address(buffer_obj, oopDesc::mark_offset_in_bytes()), (intptr_t)markWord::inline_type_prototype().value()); + xorl(r13, r13); + store_klass_gap(buffer_obj, r13); + if (vk == nullptr) { + // store_klass corrupts rbx(klass), so save it in r13 for later use (interpreter case only). + mov(r13, klass); + } + store_klass(buffer_obj, klass, rscratch1); + klass = r13; + } + // 3. Initialize its fields with an inline class specific handler + if (vk != nullptr) { + call(RuntimeAddress(vk->pack_handler())); // no need for call info as this will not safepoint. + } else { + movptr(rbx, Address(klass, InlineKlass::adr_members_offset())); + movptr(rbx, Address(rbx, InlineKlass::pack_handler_offset())); + call(rbx); + } + jmp(skip); + } + bind(slow_case); + // We failed to allocate a new inline type, fall back to a runtime + // call. Some oop field may be live in some registers but we can't + // tell. That runtime call will take care of preserving them + // across a GC if there's one. + mov(rax, rscratch1); + + if (from_interpreter) { + super_call_VM_leaf(StubRoutines::store_inline_type_fields_to_buf()); + } else { + call(RuntimeAddress(StubRoutines::store_inline_type_fields_to_buf())); + call_offset = offset(); + } + + bind(skip); + return call_offset; +} + +// Move a value between registers/stack slots and update the reg_state +bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]) { + assert(from->is_valid() && to->is_valid(), "source and destination must be valid"); + if (reg_state[to->value()] == reg_written) { + return true; // Already written + } + if (from != to && bt != T_VOID) { + if (reg_state[to->value()] == reg_readonly) { + return false; // Not yet writable + } + if (from->is_reg()) { + if (to->is_reg()) { + if (from->is_XMMRegister()) { + if (bt == T_DOUBLE) { + movdbl(to->as_XMMRegister(), from->as_XMMRegister()); + } else { + assert(bt == T_FLOAT, "must be float"); + movflt(to->as_XMMRegister(), from->as_XMMRegister()); + } + } else { + movq(to->as_Register(), from->as_Register()); + } + } else { + int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + Address to_addr = Address(rsp, st_off); + if (from->is_XMMRegister()) { + if (bt == T_DOUBLE) { + movdbl(to_addr, from->as_XMMRegister()); + } else { + assert(bt == T_FLOAT, "must be float"); + movflt(to_addr, from->as_XMMRegister()); + } + } else { + movq(to_addr, from->as_Register()); + } + } + } else { + Address from_addr = Address(rsp, from->reg2stack() * VMRegImpl::stack_slot_size + wordSize); + if (to->is_reg()) { + if (to->is_XMMRegister()) { + if (bt == T_DOUBLE) { + movdbl(to->as_XMMRegister(), from_addr); + } else { + assert(bt == T_FLOAT, "must be float"); + movflt(to->as_XMMRegister(), from_addr); + } + } else { + movq(to->as_Register(), from_addr); + } + } else { + int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + movq(r13, from_addr); + movq(Address(rsp, st_off), r13); + } + } + } + // Update register states + reg_state[from->value()] = reg_writable; + reg_state[to->value()] = reg_written; + return true; +} + +// Calculate the extra stack space required for packing or unpacking inline +// args and adjust the stack pointer (see MacroAssembler::remove_frame). +int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) { + int sp_inc = args_on_stack * VMRegImpl::stack_slot_size; + sp_inc = align_up(sp_inc, StackAlignmentInBytes); + assert(sp_inc > 0, "sanity"); + // Two additional slots to account for return address + sp_inc += 2 * VMRegImpl::stack_slot_size; + + push(rbp); + subptr(rsp, sp_inc); +#ifdef ASSERT + movl(Address(rsp, 0), badRegWordVal); + movl(Address(rsp, VMRegImpl::stack_slot_size), badRegWordVal); +#endif + return sp_inc + wordSize; // account for rbp space +} + +// Read all fields from an inline type buffer and store the field values in registers/stack slots. +bool MacroAssembler::unpack_inline_helper(const GrowableArray* sig, int& sig_index, + VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index, + RegState reg_state[]) { + assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter"); + assert(from->is_valid(), "source must be valid"); + bool progress = false; +#ifdef ASSERT + const int start_offset = offset(); +#endif + + Label L_null, L_notNull; + // Don't use r14 as tmp because it's used for spilling (see MacroAssembler::spill_reg_for) + Register tmp1 = r10; + Register tmp2 = r13; + Register fromReg = noreg; + ScalarizedInlineArgsStream stream(sig, sig_index, to, to_count, to_index, true); + bool done = true; + bool mark_done = true; + VMReg toReg; + BasicType bt; + // Check if argument requires a null check + bool null_check = false; + VMReg nullCheckReg; + while (stream.next(nullCheckReg, bt)) { + if (sig->at(stream.sig_index())._offset == -1) { + null_check = true; + break; + } + } + stream.reset(sig_index, to_index); + while (stream.next(toReg, bt)) { + assert(toReg->is_valid(), "destination must be valid"); + int idx = (int)toReg->value(); + if (reg_state[idx] == reg_readonly) { + if (idx != from->value()) { + mark_done = false; + } + done = false; + continue; + } else if (reg_state[idx] == reg_written) { + continue; + } + assert(reg_state[idx] == reg_writable, "must be writable"); + reg_state[idx] = reg_written; + progress = true; + + if (fromReg == noreg) { + if (from->is_reg()) { + fromReg = from->as_Register(); + } else { + int st_off = from->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + movq(tmp1, Address(rsp, st_off)); + fromReg = tmp1; + } + if (null_check) { + // Nullable inline type argument, emit null check + testptr(fromReg, fromReg); + jcc(Assembler::zero, L_null); + } + } + int off = sig->at(stream.sig_index())._offset; + if (off == -1) { + assert(null_check, "Missing null check at"); + if (toReg->is_stack()) { + int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + movq(Address(rsp, st_off), 1); + } else { + movq(toReg->as_Register(), 1); + } + continue; + } + if (sig->at(stream.sig_index())._vt_oop) { + if (toReg->is_stack()) { + int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + movq(Address(rsp, st_off), fromReg); + } else { + movq(toReg->as_Register(), fromReg); + } + continue; + } + assert(off > 0, "offset in object should be positive"); + Address fromAddr = Address(fromReg, off); + if (!toReg->is_XMMRegister()) { + Register dst = toReg->is_stack() ? tmp2 : toReg->as_Register(); + if (is_reference_type(bt)) { + load_heap_oop(dst, fromAddr); + } else { + bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN); + load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed); + } + if (toReg->is_stack()) { + int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + movq(Address(rsp, st_off), dst); + } + } else if (bt == T_DOUBLE) { + movdbl(toReg->as_XMMRegister(), fromAddr); + } else { + assert(bt == T_FLOAT, "must be float"); + movflt(toReg->as_XMMRegister(), fromAddr); + } + } + if (progress && null_check) { + if (done) { + jmp(L_notNull); + bind(L_null); + // Set null marker to zero to signal that the argument is null. + // Also set all fields to zero since the runtime requires a canonical + // representation of a flat null. + stream.reset(sig_index, to_index); + while (stream.next(toReg, bt)) { + if (toReg->is_stack()) { + int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + movq(Address(rsp, st_off), 0); + } else if (toReg->is_XMMRegister()) { + xorps(toReg->as_XMMRegister(), toReg->as_XMMRegister()); + } else { + xorl(toReg->as_Register(), toReg->as_Register()); + } + } + bind(L_notNull); + } else { + bind(L_null); + } + } + + sig_index = stream.sig_index(); + to_index = stream.regs_index(); + + if (mark_done && reg_state[from->value()] != reg_written) { + // This is okay because no one else will write to that slot + reg_state[from->value()] = reg_writable; + } + from_index--; + assert(progress || (start_offset == offset()), "should not emit code"); + return done; +} + +bool MacroAssembler::pack_inline_helper(const GrowableArray* sig, int& sig_index, int vtarg_index, + VMRegPair* from, int from_count, int& from_index, VMReg to, + RegState reg_state[], Register val_array) { + assert(sig->at(sig_index)._bt == T_METADATA, "should be at delimiter"); + assert(to->is_valid(), "destination must be valid"); + + if (reg_state[to->value()] == reg_written) { + skip_unpacked_fields(sig, sig_index, from, from_count, from_index); + return true; // Already written + } + + // Be careful with r14 because it's used for spilling (see MacroAssembler::spill_reg_for). + Register val_obj_tmp = r11; + Register from_reg_tmp = r14; + Register tmp1 = r10; + Register tmp2 = r13; + Register tmp3 = rbx; + Register val_obj = to->is_stack() ? val_obj_tmp : to->as_Register(); + + assert_different_registers(val_obj_tmp, from_reg_tmp, tmp1, tmp2, tmp3, val_array); + + if (reg_state[to->value()] == reg_readonly) { + if (!is_reg_in_unpacked_fields(sig, sig_index, to, from, from_count, from_index)) { + skip_unpacked_fields(sig, sig_index, from, from_count, from_index); + return false; // Not yet writable + } + val_obj = val_obj_tmp; + } + + ScalarizedInlineArgsStream stream(sig, sig_index, from, from_count, from_index); + VMReg fromReg; + BasicType bt; + Label L_null; + while (stream.next(fromReg, bt)) { + assert(fromReg->is_valid(), "source must be valid"); + reg_state[fromReg->value()] = reg_writable; + + int off = sig->at(stream.sig_index())._offset; + if (off == -1) { + // Nullable inline type argument, emit null check + Label L_notNull; + if (fromReg->is_stack()) { + int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + testb(Address(rsp, ld_off), 1); + } else { + testb(fromReg->as_Register(), 1); + } + jcc(Assembler::notZero, L_notNull); + movptr(val_obj, 0); + jmp(L_null); + bind(L_notNull); + continue; + } + if (sig->at(stream.sig_index())._vt_oop) { + // buffer argument: use if non null + if (fromReg->is_stack()) { + int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + movptr(val_obj, Address(rsp, ld_off)); + } else { + movptr(val_obj, fromReg->as_Register()); + } + testptr(val_obj, val_obj); + jcc(Assembler::notEqual, L_null); + // otherwise get the buffer from the just allocated pool of buffers + int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + vtarg_index * type2aelembytes(T_OBJECT); + load_heap_oop(val_obj, Address(val_array, index)); + continue; + } + + assert(off > 0, "offset in object should be positive"); + size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; + + // Pack the scalarized field into the value object. + Address dst(val_obj, off); + if (!fromReg->is_XMMRegister()) { + Register src; + if (fromReg->is_stack()) { + src = from_reg_tmp; + int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + load_sized_value(src, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false); + } else { + src = fromReg->as_Register(); + } + assert_different_registers(dst.base(), src, tmp1, tmp2, tmp3, val_array); + if (is_reference_type(bt)) { + // store_heap_oop transitively calls oop_store_at which corrupts to.base(). We need to keep val_obj valid. + mov(tmp3, val_obj); + Address dst_with_tmp3(tmp3, off); + store_heap_oop(dst_with_tmp3, src, tmp1, tmp2, tmp3, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); + } else { + store_sized_value(dst, src, size_in_bytes); + } + } else if (bt == T_DOUBLE) { + movdbl(dst, fromReg->as_XMMRegister()); + } else { + assert(bt == T_FLOAT, "must be float"); + movflt(dst, fromReg->as_XMMRegister()); + } + } + bind(L_null); + sig_index = stream.sig_index(); + from_index = stream.regs_index(); + + assert(reg_state[to->value()] == reg_writable, "must have already been read"); + bool success = move_helper(val_obj->as_VMReg(), to, T_OBJECT, reg_state); + assert(success, "to register must be writable"); + return true; +} + +VMReg MacroAssembler::spill_reg_for(VMReg reg) { + return reg->is_XMMRegister() ? xmm8->as_VMReg() : r14->as_VMReg(); +} + +void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair) { + assert((initial_framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + if (needs_stack_repair) { + // The method has a scalarized entry point (where fields of value object arguments + // are passed through registers and stack), and a non-scalarized entry point (where + // value object arguments are given as oops). The non-scalarized entry point will + // first load each field of value object arguments and store them in registers and on + // the stack in a way compatible with the scalarized entry point. To do so, some extra + // stack space might be reserved (if argument registers are not enough). On leaving the + // method, this space must be freed. + // + // In case we used the non-scalarized entry point the stack looks like this: + // + // | Arguments from caller | + // |---------------------------| <-- caller's SP + // | Return address #1 | + // | Saved RBP #1 | + // |---------------------------| + // | Extension space for | + // | inline arg (un)packing | + // |---------------------------| <-- start of this method's frame + // | Return address #2 | + // | Saved RBP #2 | + // |---------------------------| <-- RBP (with -XX:+PreserveFramePointer) + // | sp_inc | + // | method locals | + // |---------------------------| <-- SP + // + // Space for the return pc and saved rbp is reserved twice. But only the #1 copies + // contain the real values of return pc and saved rbp. The #2 copies are not reliable + // and should not be used. They are mostly needed to add space between the extension + // space and the locals, as there would be between the real arguments and the locals + // if we don't need to do unpacking (from the scalarized entry point). + // + // When leaving, one must load RBP #1 into RBP, and use the copy #1 of the return address, + // while keeping in mind that from the scalarized entry point, there will be only one + // copy. Indeed, in the case we used the scalarized calling convention, the stack looks like this: + // + // | Arguments from caller | + // |---------------------------| <-- caller's SP + // | Return address | + // | Saved RBP | + // |---------------------------| <-- FP (with -XX:+PreserveFramePointer) + // | sp_inc | + // | method locals | + // |---------------------------| <-- SP + // + // The sp_inc stack slot holds the total size of the frame, including the extension + // space and copies #2 of the return address and the saved RBP (but never the copies + // #1 of the return address and saved RBP). That is how to find the copies #1 of the + // return address and saved rbp. This size is expressed in bytes. Be careful when using + // it from C++ in pointer arithmetic you might need to divide it by wordSize. + + // The stack increment resides just below the saved rbp + addq(rsp, Address(rsp, initial_framesize - wordSize)); + pop(rbp); + } else { + if (initial_framesize > 0) { + addq(rsp, initial_framesize); + } + pop(rbp); + } +} + #ifdef COMPILER2 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers -void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask) { +void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, KRegister mask) { // cnt - number of qwords (8-byte words). // base - start address, qword aligned. Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end; bool use64byteVector = (MaxVectorSize == 64) && (CopyAVX3Threshold == 0); if (use64byteVector) { - vpxor(xtmp, xtmp, xtmp, AVX_512bit); + evpbroadcastq(xtmp, val, AVX_512bit); } else if (MaxVectorSize >= 32) { - vpxor(xtmp, xtmp, xtmp, AVX_256bit); + movdq(xtmp, val); + punpcklqdq(xtmp, xtmp); + vinserti128_high(xtmp, xtmp); } else { - pxor(xtmp, xtmp); + movdq(xtmp, val); + punpcklqdq(xtmp, xtmp); } jmp(L_zero_64_bytes); @@ -5901,7 +6539,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X if (use64byteVector) { addptr(cnt, 8); jccb(Assembler::equal, L_end); - fill64_masked(3, base, 0, xtmp, mask, cnt, rtmp, true); + fill64_masked(3, base, 0, xtmp, mask, cnt, val, true); jmp(L_end); } else { addptr(cnt, 4); @@ -5920,7 +6558,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X addptr(cnt, 4); jccb(Assembler::lessEqual, L_end); if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) { - fill32_masked(3, base, 0, xtmp, mask, cnt, rtmp); + fill32_masked(3, base, 0, xtmp, mask, cnt, val); } else { decrement(cnt); @@ -6022,21 +6660,18 @@ void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegiste } } -void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp, - bool is_large, KRegister mask) { +void MacroAssembler::clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, + bool is_large, bool word_copy_only, KRegister mask) { // cnt - number of qwords (8-byte words). // base - start address, qword aligned. // is_large - if optimizers know cnt is larger than InitArrayShortSize assert(base==rdi, "base register must be edi for rep stos"); - assert(tmp==rax, "tmp register must be eax for rep stos"); + assert(val==rax, "val register must be eax for rep stos"); assert(cnt==rcx, "cnt register must be ecx for rep stos"); assert(InitArrayShortSize % BytesPerLong == 0, "InitArrayShortSize should be the multiple of BytesPerLong"); Label DONE; - if (!is_large || !UseXMMForObjInit) { - xorptr(tmp, tmp); - } if (!is_large) { Label LOOP, LONG; @@ -6048,7 +6683,7 @@ void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMReg // Use individual pointer-sized stores for small counts: BIND(LOOP); - movptr(Address(base, cnt, Address::times_ptr), tmp); + movptr(Address(base, cnt, Address::times_ptr), val); decrement(cnt); jccb(Assembler::greaterEqual, LOOP); jmpb(DONE); @@ -6057,11 +6692,11 @@ void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMReg } // Use longer rep-prefixed ops for non-small counts: - if (UseFastStosb) { + if (UseFastStosb && !word_copy_only) { shlptr(cnt, 3); // convert to number of bytes rep_stosb(); } else if (UseXMMForObjInit) { - xmm_clear_mem(base, cnt, tmp, xtmp, mask); + xmm_clear_mem(base, cnt, val, xtmp, mask); } else { rep_stos(); } @@ -9978,6 +10613,9 @@ void MacroAssembler::fast_lock(Register basic_lock, Register obj, Register reg_r movptr(tmp, reg_rax); andptr(tmp, ~(int32_t)markWord::unlocked_value); orptr(reg_rax, markWord::unlocked_value); + // Mask inline_type bit such that we go to the slow path if object is an inline type + andptr(reg_rax, ~((int) markWord::inline_type_bit_in_place)); + lock(); cmpxchgptr(tmp, Address(obj, oopDesc::mark_offset_in_bytes())); jcc(Assembler::notEqual, slow); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index de5ec02fe43fb..b9b31d2139190 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -30,9 +30,12 @@ #include "code/vmreg.inline.hpp" #include "compiler/oopMap.hpp" #include "utilities/macros.hpp" +#include "runtime/signature.hpp" #include "runtime/vm_version.hpp" #include "utilities/checkedCast.hpp" +class ciInlineKlass; + // MacroAssembler extends Assembler by frequently used macros. // // Instructions for which a 'better' code sequence exists depending @@ -94,6 +97,26 @@ class MacroAssembler: public Assembler { static bool needs_explicit_null_check(intptr_t offset); static bool uses_implicit_null_check(void* address); + // markWord tests, kills markWord reg + void test_markword_is_inline_type(Register markword, Label& is_inline_type); + + // inlineKlass queries, kills temp_reg + void test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type, bool can_be_null = true); + + void test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free); + void test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free); + void test_field_is_flat(Register flags, Register temp_reg, Label& is_flat); + + // Check oops for special arrays, i.e. flat arrays and/or null-free arrays + void test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label); + void test_flat_array_oop(Register oop, Register temp_reg, Label& is_flat_array); + void test_non_flat_array_oop(Register oop, Register temp_reg, Label& is_non_flat_array); + void test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array); + void test_non_null_free_array_oop(Register oop, Register temp_reg, Label& is_non_null_free_array); + + // Check array klass layout helper for flat or null-free arrays... + void test_flat_array_layout(Register lh, Label& is_flat_array); + // Required platform-specific helpers for Label::patch_instructions. // They _shadow_ the declarations in AbstractAssembler, which are undefined. void pd_patch_instruction(address branch, address target, const char* file, int line) { @@ -349,6 +372,9 @@ class MacroAssembler: public Assembler { void load_method_holder(Register holder, Register method); // oop manipulations + + // Load oopDesc._metadata without decode (useful for direct Klass* compare from oops) + void load_metadata(Register dst, Register src); void load_narrow_klass_compact(Register dst, Register src); void load_klass(Register dst, Register src, Register tmp); void store_klass(Register dst, Register src, Register tmp); @@ -365,6 +391,12 @@ class MacroAssembler: public Assembler { void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); + void flat_field_copy(DecoratorSet decorators, Register src, Register dst, Register inline_layout_info); + + // inline type data payload offsets... + void payload_offset(Register inline_klass, Register offset); + void payload_addr(Register oop, Register data, Register inline_klass); + void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, DecoratorSet decorators = 0); void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, DecoratorSet decorators = 0); void store_heap_oop(Address dst, Register val, Register tmp1 = noreg, @@ -374,6 +406,8 @@ class MacroAssembler: public Assembler { // stored using routines that take a jobject. void store_heap_oop_null(Address dst); + void load_prototype_header(Register dst, Register src, Register tmp); + void store_klass_gap(Register dst, Register src); // This dummy is to prevent a call to store_heap_oop from @@ -521,6 +555,8 @@ class MacroAssembler: public Assembler { ); void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp); + void inline_layout_info(Register klass, Register index, Register layout_info); + void population_count(Register dst, Register src, Register scratch1, Register scratch2); // interface method calling @@ -769,6 +805,7 @@ class MacroAssembler: public Assembler { void andptr(Register dst, int32_t src); void andptr(Register src1, Register src2) { andq(src1, src2); } + void andptr(Register dst, Address src) { andq(dst, src); } using Assembler::andq; void andq(Register dst, AddressLiteral src, Register rscratch = noreg); @@ -1929,9 +1966,12 @@ class MacroAssembler: public Assembler { public: + // Inline type specific methods + #include "asm/macroAssembler_common.hpp" + // clear memory of size 'cnt' qwords, starting at 'base'; // if 'is_large' is set, do not try to produce short loop - void clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, bool is_large, KRegister mask=knoreg); + void clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, bool is_large, bool word_copy_only, KRegister mask=knoreg); // clear memory initialization sequence for constant size; void clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg); diff --git a/src/hotspot/cpu/x86/methodHandles_x86.cpp b/src/hotspot/cpu/x86/methodHandles_x86.cpp index 5b15444bc3289..9c8dd0dd0ba2f 100644 --- a/src/hotspot/cpu/x86/methodHandles_x86.cpp +++ b/src/hotspot/cpu/x86/methodHandles_x86.cpp @@ -193,7 +193,11 @@ void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register meth __ BIND(run_compiled_code); } - const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : + // The following jump might pass an inline type argument that was erased to Object as oop to a + // callee that expects inline type arguments to be passed as fields. We need to call the compiled + // value entry (_code->inline_entry_point() or _adapter->c2i_inline_entry()) which will take care + // of translating between the calling conventions. + const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_inline_offset() : Method::from_interpreted_offset(); __ jmp(Address(method, entry_offset)); diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp index 8bb9982a82029..3c41da4a4b603 100644 --- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp +++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp @@ -27,6 +27,7 @@ #endif #include "asm/macroAssembler.hpp" #include "asm/macroAssembler.inline.hpp" +#include "classfile/symbolTable.hpp" #include "code/aotCodeCache.hpp" #include "code/compiledIC.hpp" #include "code/debugInfoRec.hpp" @@ -631,6 +632,87 @@ int SharedRuntime::java_calling_convention(const BasicType *sig_bt, return stk_args; } +// Same as java_calling_convention() but for multiple return +// values. There's no way to store them on the stack so if we don't +// have enough registers, multiple values can't be returned. +const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1; +const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j; +int SharedRuntime::java_return_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed) { + // Create the mapping between argument positions and + // registers. + static const Register INT_ArgReg[java_return_convention_max_int] = { + rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0 + }; + static const XMMRegister FP_ArgReg[java_return_convention_max_float] = { + j_farg0, j_farg1, j_farg2, j_farg3, + j_farg4, j_farg5, j_farg6, j_farg7 + }; + + + uint int_args = 0; + uint fp_args = 0; + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (int_args < Argument::n_int_register_parameters_j+1) { + regs[i].set1(INT_ArgReg[int_args]->as_VMReg()); + int_args++; + } else { + return -1; + } + break; + case T_VOID: + // halves of T_LONG or T_DOUBLE + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_LONG: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + case T_METADATA: + if (int_args < Argument::n_int_register_parameters_j+1) { + regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); + int_args++; + } else { + return -1; + } + break; + case T_FLOAT: + if (fp_args < Argument::n_float_register_parameters_j) { + regs[i].set1(FP_ArgReg[fp_args]->as_VMReg()); + fp_args++; + } else { + return -1; + } + break; + case T_DOUBLE: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args < Argument::n_float_register_parameters_j) { + regs[i].set2(FP_ArgReg[fp_args]->as_VMReg()); + fp_args++; + } else { + return -1; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return int_args + fp_args; +} + // Patch the callers callsite with entry to compiled code if it exists. static void patch_callers_callsite(MacroAssembler *masm) { Label L; @@ -673,12 +755,151 @@ static void patch_callers_callsite(MacroAssembler *masm) { __ bind(L); } +// For each inline type argument, sig includes the list of fields of +// the inline type. This utility function computes the number of +// arguments for the call if inline types are passed by reference (the +// calling convention the interpreter expects). +static int compute_total_args_passed_int(const GrowableArray* sig_extended) { + int total_args_passed = 0; + if (InlineTypePassFieldsAsArgs) { + for (int i = 0; i < sig_extended->length(); i++) { + BasicType bt = sig_extended->at(i)._bt; + if (bt == T_METADATA) { + // In sig_extended, an inline type argument starts with: + // T_METADATA, followed by the types of the fields of the + // inline type and T_VOID to mark the end of the value + // type. Inline types are flattened so, for instance, in the + // case of an inline type with an int field and an inline type + // field that itself has 2 fields, an int and a long: + // T_METADATA T_INT T_METADATA T_INT T_LONG T_VOID (second + // slot for the T_LONG) T_VOID (inner inline type) T_VOID + // (outer inline type) + total_args_passed++; + int vt = 1; + do { + i++; + BasicType bt = sig_extended->at(i)._bt; + BasicType prev_bt = sig_extended->at(i-1)._bt; + if (bt == T_METADATA) { + vt++; + } else if (bt == T_VOID && + prev_bt != T_LONG && + prev_bt != T_DOUBLE) { + vt--; + } + } while (vt != 0); + } else { + total_args_passed++; + } + } + } else { + total_args_passed = sig_extended->length(); + } + return total_args_passed; +} + + +static void gen_c2i_adapter_helper(MacroAssembler* masm, + BasicType bt, + BasicType prev_bt, + size_t size_in_bytes, + const VMRegPair& reg_pair, + const Address& to, + int extraspace, + bool is_oop) { + if (bt == T_VOID) { + assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); + return; + } + + // Say 4 args: + // i st_off + // 0 32 T_LONG + // 1 24 T_VOID + // 2 16 T_OBJECT + // 3 8 T_BOOL + // - 0 return address + // + // However to make thing extra confusing. Because we can fit a long/double in + // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter + // leaves one slot empty and only stores to a single slot. In this case the + // slot that is occupied is the T_VOID slot. See I said it was confusing. + + bool wide = (size_in_bytes == wordSize); + VMReg r_1 = reg_pair.first(); + VMReg r_2 = reg_pair.second(); + assert(r_2->is_valid() == wide, "invalid size"); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), "must be invalid"); + return; + } + + if (!r_1->is_XMMRegister()) { + Register val = rax; + if (r_1->is_stack()) { + int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + __ load_sized_value(val, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false); + } else { + val = r_1->as_Register(); + } + assert_different_registers(to.base(), val, rscratch1); + if (is_oop) { + __ push(r13); + __ push(rbx); + // store_heap_oop transitively calls oop_store_at which corrupts to.base(). We need to keep it valid. + __ push(to.base()); + __ store_heap_oop(to, val, rscratch1, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); + __ pop(to.base()); + __ pop(rbx); + __ pop(r13); + } else { + __ store_sized_value(to, val, size_in_bytes); + } + } else { + if (wide) { + __ movdbl(to, r_1->as_XMMRegister()); + } else { + __ movflt(to, r_1->as_XMMRegister()); + } + } +} + static void gen_c2i_adapter(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, + const GrowableArray* sig_extended, const VMRegPair *regs, - Label& skip_fixup) { + bool requires_clinit_barrier, + address& c2i_no_clinit_check_entry, + Label& skip_fixup, + address start, + OopMapSet* oop_maps, + int& frame_complete, + int& frame_size_in_words, + bool alloc_inline_receiver) { + if (requires_clinit_barrier) { + assert(VM_Version::supports_fast_class_init_checks(), "sanity"); + Label L_skip_barrier; + Register method = rbx; + + { // Bypass the barrier for non-static methods + Register flags = rscratch1; + __ load_unsigned_short(flags, Address(method, Method::access_flags_offset())); + __ testl(flags, JVM_ACC_STATIC); + __ jcc(Assembler::zero, L_skip_barrier); // non-static + } + + Register klass = rscratch1; + __ load_method_holder(klass, method); + __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/); + + __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path + + __ bind(L_skip_barrier); + c2i_no_clinit_check_entry = __ pc(); + } + + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->c2i_entry_barrier(masm); + // Before we get into the guts of the C2I adapter, see if we should be here // at all. We've come from compiled code and are attempting to jump to the // interpreter, which means the caller made a static call to get here @@ -688,9 +909,51 @@ static void gen_c2i_adapter(MacroAssembler *masm, __ bind(skip_fixup); + if (InlineTypePassFieldsAsArgs) { + // Is there an inline type argument? + bool has_inline_argument = false; + for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) { + has_inline_argument = (sig_extended->at(i)._bt == T_METADATA); + } + if (has_inline_argument) { + // There is at least a value type argument: we're coming from + // compiled code so we may not have buffers to back the value + // objects. Allocate the buffers here with a runtime call for + // the value arguments that needs a buffer. + OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ false); + + frame_complete = __ offset(); + + __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); + + __ mov(c_rarg0, r15_thread); + __ mov(c_rarg1, rbx); + __ mov64(c_rarg2, (int64_t)alloc_inline_receiver); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types))); + + oop_maps->add_gc_map((int)(__ pc() - start), map); + __ reset_last_Java_frame(false); + + RegisterSaver::restore_live_registers(masm); + + Label no_exception; + __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); + __ jcc(Assembler::equal, no_exception); + + __ movptr(Address(r15_thread, JavaThread::vm_result_oop_offset()), NULL_WORD); + __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); + __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + __ bind(no_exception); + + // We get an array of objects from the runtime call + __ get_vm_result_oop(rscratch2); // Use rscratch2 (r11) as temporary because rscratch1 (r10) is trashed by movptr() + } + } + // Since all args are passed on the stack, total_args_passed * // Interpreter::stackElementSize is the space we need. - + int total_args_passed = compute_total_args_passed_int(sig_extended); assert(total_args_passed >= 0, "total_args_passed is %d", total_args_passed); int extraspace = (total_args_passed * Interpreter::stackElementSize); @@ -725,96 +988,106 @@ static void gen_c2i_adapter(MacroAssembler *masm, #endif // Now write the args into the outgoing interpreter space - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); - continue; - } - - // offset to start parameters - int st_off = (total_args_passed - i) * Interpreter::stackElementSize; - int next_off = st_off - Interpreter::stackElementSize; - // Say 4 args: - // i st_off - // 0 32 T_LONG - // 1 24 T_VOID - // 2 16 T_OBJECT - // 3 8 T_BOOL - // - 0 return address - // - // However to make thing extra confusing. Because we can fit a long/double in - // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter - // leaves one slot empty and only stores to a single slot. In this case the - // slot that is occupied is the T_VOID slot. See I said it was confusing. - - VMReg r_1 = regs[i].first(); - VMReg r_2 = regs[i].second(); - if (!r_1->is_valid()) { - assert(!r_2->is_valid(), ""); - continue; - } - if (r_1->is_stack()) { - // memory to memory use rax - int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; - if (!r_2->is_valid()) { - // sign extend?? - __ movl(rax, Address(rsp, ld_off)); - __ movptr(Address(rsp, st_off), rax); - - } else { - - __ movq(rax, Address(rsp, ld_off)); - - // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG - // T_DOUBLE and T_LONG use two slots in the interpreter - if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { - // ld_off == LSW, ld_off+wordSize == MSW - // st_off == MSW, next_off == LSW - __ movq(Address(rsp, next_off), rax); + // next_arg_comp is the next argument from the compiler point of + // view (inline type fields are passed in registers/on the stack). In + // sig_extended, an inline type argument starts with: T_METADATA, + // followed by the types of the fields of the inline type and T_VOID + // to mark the end of the inline type. ignored counts the number of + // T_METADATA/T_VOID. next_vt_arg is the next inline type argument: + // used to get the buffer for that argument from the pool of buffers + // we allocated above and want to pass to the + // interpreter. next_arg_int is the next argument from the + // interpreter point of view (inline types are passed by reference). + for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0; + next_arg_comp < sig_extended->length(); next_arg_comp++) { + assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments"); + assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?"); + BasicType bt = sig_extended->at(next_arg_comp)._bt; + int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize; + if (!InlineTypePassFieldsAsArgs || bt != T_METADATA) { + int next_off = st_off - Interpreter::stackElementSize; + const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; + const VMRegPair reg_pair = regs[next_arg_comp-ignored]; + size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4; + gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, + size_in_bytes, reg_pair, Address(rsp, offset), extraspace, false); + next_arg_int++; #ifdef ASSERT - // Overwrite the unused slot with known junk - __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); - __ movptr(Address(rsp, st_off), rax); -#endif /* ASSERT */ - } else { - __ movq(Address(rsp, st_off), rax); - } + if (bt == T_LONG || bt == T_DOUBLE) { + // Overwrite the unused slot with known junk + __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); + __ movptr(Address(rsp, st_off), rax); } - } else if (r_1->is_Register()) { - Register r = r_1->as_Register(); - if (!r_2->is_valid()) { - // must be only an int (or less ) so move only 32bits to slot - // why not sign extend?? - __ movl(Address(rsp, st_off), r); - } else { - // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG - // T_DOUBLE and T_LONG use two slots in the interpreter - if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { - // long/double in gpr -#ifdef ASSERT - // Overwrite the unused slot with known junk - __ mov64(rax, CONST64(0xdeadffffdeadaaab)); - __ movptr(Address(rsp, st_off), rax); #endif /* ASSERT */ - __ movq(Address(rsp, next_off), r); + } else { + ignored++; + next_arg_int++; + int vt = 1; + // write fields we get from compiled code in registers/stack + // slots to the buffer: we know we are done with that inline type + // argument when we hit the T_VOID that acts as an end of inline + // type delimiter for this inline type. Inline types are flattened + // so we might encounter embedded inline types. Each entry in + // sig_extended contains a field offset in the buffer. + Label L_null; + Label not_null_buffer; + do { + next_arg_comp++; + BasicType bt = sig_extended->at(next_arg_comp)._bt; + BasicType prev_bt = sig_extended->at(next_arg_comp-1)._bt; + if (bt == T_METADATA) { + vt++; + ignored++; + } else if (bt == T_VOID && + prev_bt != T_LONG && + prev_bt != T_DOUBLE) { + vt--; + ignored++; + } else if (sig_extended->at(next_arg_comp)._vt_oop) { + // buffer argument: use if non null + VMReg buffer = regs[next_arg_comp-ignored].first(); + if (buffer->is_stack()) { + int ld_off = buffer->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + __ movptr(r14, Address(rsp, ld_off)); + } else { + __ movptr(r14, buffer->as_Register()); + } + __ testptr(r14, r14); + __ jcc(Assembler::notEqual, not_null_buffer); + // otherwise get the buffer from the just allocated pool of buffers + int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_OBJECT); + __ load_heap_oop(r14, Address(rscratch2, index)); + next_vt_arg++; } else { - __ movptr(Address(rsp, st_off), r); + int off = sig_extended->at(next_arg_comp)._offset; + if (off == -1) { + // Nullable inline type argument, emit null check + VMReg reg = regs[next_arg_comp-ignored].first(); + Label L_notNull; + if (reg->is_stack()) { + int ld_off = reg->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + __ testb(Address(rsp, ld_off), 1); + } else { + __ testb(reg->as_Register(), 1); + } + __ jcc(Assembler::notZero, L_notNull); + __ movptr(Address(rsp, st_off), 0); + __ jmp(L_null); + __ bind(L_notNull); + continue; + } + assert(off > 0, "offset in object should be positive"); + size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; + bool is_oop = is_reference_type(bt); + gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, + size_in_bytes, regs[next_arg_comp-ignored], Address(r14, off), extraspace, is_oop); } - } - } else { - assert(r_1->is_XMMRegister(), ""); - if (!r_2->is_valid()) { - // only a float use just part of the slot - __ movflt(Address(rsp, st_off), r_1->as_XMMRegister()); - } else { -#ifdef ASSERT - // Overwrite the unused slot with known junk - __ mov64(rax, CONST64(0xdeadffffdeadaaac)); - __ movptr(Address(rsp, st_off), rax); -#endif /* ASSERT */ - __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister()); - } + } while (vt != 0); + // pass the buffer to the interpreter + __ bind(not_null_buffer); + __ movptr(Address(rsp, st_off), r14); + __ bind(L_null); } } @@ -824,9 +1097,8 @@ static void gen_c2i_adapter(MacroAssembler *masm, } void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, - int total_args_passed, int comp_args_on_stack, - const BasicType *sig_bt, + const GrowableArray* sig, const VMRegPair *regs) { // Note: r13 contains the senderSP on entry. We must preserve it since @@ -882,15 +1154,19 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, // Will jump to the compiled code just as if compiled code was doing it. // Pre-load the register-jump target early, to schedule it better. - __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset()))); + __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_inline_offset()))); + + int total_args_passed = sig->length(); // Now generate the shuffle code. Pick up all register args and move the // rest through the floating point stack top. for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { + BasicType bt = sig->at(i)._bt; + if (bt == T_VOID) { // Longs and doubles are passed in native word order, but misaligned // in the 32-bit build. - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + BasicType prev_bt = (i > 0) ? sig->at(i-1)._bt : T_ILLEGAL; + assert(i > 0 && (prev_bt == T_LONG || prev_bt == T_DOUBLE), "missing half"); continue; } @@ -932,7 +1208,7 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, // are accessed as negative so LSW is at LOW address // ld_off is MSW so get LSW - const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? + const int offset = (bt==T_LONG||bt==T_DOUBLE)? next_off : ld_off; __ movq(r13, Address(saved_sp, offset)); // st_off is LSW (i.e. reg.first()) @@ -947,7 +1223,7 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case // So we must adjust where to pick up the data to match the interpreter. - const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? + const int offset = (bt==T_LONG||bt==T_DOUBLE)? next_off : ld_off; // this can be a misaligned move @@ -980,22 +1256,39 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // put Method* where a c2i would expect should we end up there - // only needed because eof c2 resolve stubs return Method* as a result in + // only needed because of c2 resolve stubs return Method* as a result in // rax __ mov(rax, rbx); __ jmp(r11); } +static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) { + Register data = rax; + __ ic_check(1 /* end_alignment */); + __ movptr(rbx, Address(data, CompiledICData::speculated_method_offset())); + + // Method might have been compiled since the call site was patched to + // interpreted if that is the case treat it as a miss so we can get + // the call site corrected. + __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD); + __ jcc(Assembler::equal, skip_fixup); + __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); +} + // --------------------------------------------------------------- -void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, +void SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm, int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - address entry_address[AdapterBlob::ENTRY_COUNT]) { + const GrowableArray* sig, + const VMRegPair* regs, + const GrowableArray* sig_cc, + const VMRegPair* regs_cc, + const GrowableArray* sig_cc_ro, + const VMRegPair* regs_cc_ro, + address entry_address[AdapterBlob::ENTRY_COUNT], + AdapterBlob*& new_adapter, + bool allocate_code_blob) { entry_address[AdapterBlob::I2C] = __ pc(); - - gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); // ------------------------------------------------------------------------- // Generate a C2I adapter. On entry we know rbx holds the Method* during calls @@ -1007,51 +1300,51 @@ void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, // compiled code, which relies solely on SP and not RBP, get sick). entry_address[AdapterBlob::C2I_Unverified] = __ pc(); + entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc(); Label skip_fixup; - Register data = rax; - Register receiver = j_rarg0; - Register temp = rbx; + gen_inline_cache_check(masm, skip_fixup); - { - __ ic_check(1 /* end_alignment */); - __ movptr(rbx, Address(data, CompiledICData::speculated_method_offset())); - // Method might have been compiled since the call site was patched to - // interpreted if that is the case treat it as a miss so we can get - // the call site corrected. - __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD); - __ jcc(Assembler::equal, skip_fixup); - __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); - } - - entry_address[AdapterBlob::C2I] = __ pc(); + OopMapSet* oop_maps = new OopMapSet(); + int frame_complete = CodeOffsets::frame_never_safe; + int frame_size_in_words = 0; - // Class initialization barrier for static methods + // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver) entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr; - assert(VM_Version::supports_fast_class_init_checks(), "sanity"); - Label L_skip_barrier; - Register method = rbx; - - // Bypass the barrier for non-static methods - Register flags = rscratch1; - __ load_unsigned_short(flags, Address(method, Method::access_flags_offset())); - __ testl(flags, JVM_ACC_STATIC); - __ jcc(Assembler::zero, L_skip_barrier); // non-static - - Register klass = rscratch1; - __ load_method_holder(klass, method); - __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/); - - __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path - - __ bind(L_skip_barrier); - entry_address[AdapterBlob::C2I_No_Clinit_Check] = __ pc(); - - BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->c2i_entry_barrier(masm); - - gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); - return; + entry_address[AdapterBlob::C2I_Inline_RO] = __ pc(); + if (regs_cc != regs_cc_ro) { + // No class init barrier needed because method is guaranteed to be non-static + gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, entry_address[AdapterBlob::C2I_No_Clinit_Check], + skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); + skip_fixup.reset(); + } + + // Scalarized c2i adapter + entry_address[AdapterBlob::C2I] = __ pc(); + entry_address[AdapterBlob::C2I_Inline] = __ pc(); + gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check], + skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true); + + // Non-scalarized c2i adapter + if (regs != regs_cc) { + entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc(); + Label inline_entry_skip_fixup; + gen_inline_cache_check(masm, inline_entry_skip_fixup); + + entry_address[AdapterBlob::C2I_Inline] = __ pc(); + gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check], + inline_entry_skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); + } + + // The c2i adapters might safepoint and trigger a GC. The caller must make sure that + // the GC knows about the location of oop argument locations passed to the c2i adapter. + if (allocate_code_blob) { + bool caller_must_gc_arguments = (regs != regs_cc); + int entry_offset[AdapterHandlerEntry::ENTRIES_COUNT]; + assert(AdapterHandlerEntry::ENTRIES_COUNT == 7, "sanity"); + AdapterHandlerLibrary::address_to_offset(entry_address, entry_offset); + new_adapter = AdapterBlob::create(masm->code(), entry_offset, frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments); + } } int SharedRuntime::c_calling_convention(const BasicType *sig_bt, @@ -3437,6 +3730,153 @@ void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, reverse_words(m, (julong *)m_ints, longwords); } +BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) { + BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K); + if (buf == nullptr) { + return nullptr; + } + CodeBuffer buffer(buf); + short buffer_locs[20]; + buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs, + sizeof(buffer_locs)/sizeof(relocInfo)); + + MacroAssembler* masm = new MacroAssembler(&buffer); + + const Array* sig_vk = vk->extended_sig(); + const Array* regs = vk->return_regs(); + + int pack_fields_jobject_off = __ offset(); + // Resolve pre-allocated buffer from JNI handle. + // We cannot do this in generate_call_stub() because it requires GC code to be initialized. + __ movptr(rax, Address(r13, 0)); + __ resolve_jobject(rax /* value */, + r12 /* tmp */); + __ movptr(Address(r13, 0), rax); + + int pack_fields_off = __ offset(); + + int j = 1; + for (int i = 0; i < sig_vk->length(); i++) { + BasicType bt = sig_vk->at(i)._bt; + if (bt == T_METADATA) { + continue; + } + if (bt == T_VOID) { + if (sig_vk->at(i-1)._bt == T_LONG || + sig_vk->at(i-1)._bt == T_DOUBLE) { + j++; + } + continue; + } + int off = sig_vk->at(i)._offset; + assert(off > 0, "offset in object should be positive"); + VMRegPair pair = regs->at(j); + VMReg r_1 = pair.first(); + Address to(rax, off); + if (bt == T_FLOAT) { + __ movflt(to, r_1->as_XMMRegister()); + } else if (bt == T_DOUBLE) { + __ movdbl(to, r_1->as_XMMRegister()); + } else { + Register val = r_1->as_Register(); + assert_different_registers(to.base(), val, r14, r13, rbx, rscratch1); + if (is_reference_type(bt)) { + // store_heap_oop transitively calls oop_store_at which corrupts to.base(). We need to keep rax valid. + __ mov(rbx, rax); + Address to_with_rbx(rbx, off); + __ store_heap_oop(to_with_rbx, val, r14, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); + } else { + __ store_sized_value(to, r_1->as_Register(), type2aelembytes(bt)); + } + } + j++; + } + assert(j == regs->length(), "missed a field?"); + if (vk->supports_nullable_layouts()) { + // Set the null marker + __ movb(Address(rax, vk->null_marker_offset()), 1); + } + __ ret(0); + + int unpack_fields_off = __ offset(); + + Label skip; + Label not_null; + __ testptr(rax, rax); + __ jcc(Assembler::notZero, not_null); + + // Return value is null. Zero all registers because the runtime requires a canonical + // representation of a flat null. + j = 1; + for (int i = 0; i < sig_vk->length(); i++) { + BasicType bt = sig_vk->at(i)._bt; + if (bt == T_METADATA) { + continue; + } + if (bt == T_VOID) { + if (sig_vk->at(i-1)._bt == T_LONG || + sig_vk->at(i-1)._bt == T_DOUBLE) { + j++; + } + continue; + } + + VMRegPair pair = regs->at(j); + VMReg r_1 = pair.first(); + if (r_1->is_XMMRegister()) { + __ xorps(r_1->as_XMMRegister(), r_1->as_XMMRegister()); + } else { + __ xorl(r_1->as_Register(), r_1->as_Register()); + } + j++; + } + __ jmp(skip); + __ bind(not_null); + + j = 1; + for (int i = 0; i < sig_vk->length(); i++) { + BasicType bt = sig_vk->at(i)._bt; + if (bt == T_METADATA) { + continue; + } + if (bt == T_VOID) { + if (sig_vk->at(i-1)._bt == T_LONG || + sig_vk->at(i-1)._bt == T_DOUBLE) { + j++; + } + continue; + } + int off = sig_vk->at(i)._offset; + assert(off > 0, "offset in object should be positive"); + VMRegPair pair = regs->at(j); + VMReg r_1 = pair.first(); + VMReg r_2 = pair.second(); + Address from(rax, off); + if (bt == T_FLOAT) { + __ movflt(r_1->as_XMMRegister(), from); + } else if (bt == T_DOUBLE) { + __ movdbl(r_1->as_XMMRegister(), from); + } else if (bt == T_OBJECT || bt == T_ARRAY) { + assert_different_registers(rax, r_1->as_Register()); + __ load_heap_oop(r_1->as_Register(), from); + } else { + assert(is_java_primitive(bt), "unexpected basic type"); + assert_different_registers(rax, r_1->as_Register()); + size_t size_in_bytes = type2aelembytes(bt); + __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN); + } + j++; + } + assert(j == regs->length(), "missed a field?"); + + __ bind(skip); + __ ret(0); + + __ flush(); + + return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, pack_fields_jobject_off, unpack_fields_off); +} + #if INCLUDE_JFR // For c2: c_rarg0 is junk, call to runtime to write a checkpoint. diff --git a/src/hotspot/cpu/x86/stackChunkFrameStream_x86.inline.hpp b/src/hotspot/cpu/x86/stackChunkFrameStream_x86.inline.hpp index 24afb960e9c2b..4b0e1417eb6bf 100644 --- a/src/hotspot/cpu/x86/stackChunkFrameStream_x86.inline.hpp +++ b/src/hotspot/cpu/x86/stackChunkFrameStream_x86.inline.hpp @@ -34,8 +34,30 @@ template inline bool StackChunkFrameStream::is_in_frame(void* p0) const { assert(!is_done(), ""); intptr_t* p = (intptr_t*)p0; - int argsize = is_compiled() ? (_cb->as_nmethod()->num_stack_arg_slots() * VMRegImpl::stack_slot_size) >> LogBytesPerWord : 0; - int frame_size = _cb->frame_size() + argsize; + int frame_size = _cb->frame_size(); + if (is_compiled()) { + nmethod* nm = _cb->as_nmethod_or_null(); + if (nm->needs_stack_repair() && nm->is_compiled_by_c2()) { + frame f = to_frame(); + bool augmented = f.was_augmented_on_entry(frame_size); + if (!augmented) { + // Fix: C2 caller, so frame was not extended and thus the + // size read from the frame does not include the arguments. + // Ideally we have to count the arg size for the scalarized + // convention. For now we include the size of the caller frame + // which would at least be equal to that. + RegisterMap map(nullptr, + RegisterMap::UpdateMap::skip, + RegisterMap::ProcessFrames::skip, + RegisterMap::WalkContinuation::skip); + frame caller = to_frame().sender(&map); + assert(caller.is_compiled_frame() && caller.cb()->as_nmethod()->is_compiled_by_c2(), "needs stack repair but was not extended with c1/interpreter caller"); + frame_size += (caller.real_fp() - caller.sp()); + } + } else { + frame_size += _cb->as_nmethod()->num_stack_arg_slots() * VMRegImpl::stack_slot_size >> LogBytesPerWord; + } + } return p == sp() - frame::sender_sp_offset || ((p - unextended_sp()) >= 0 && (p - unextended_sp()) < frame_size); } #endif diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index afd9c126a2131..a0a6f7e791953 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -22,6 +22,7 @@ * */ +#include "asm/assembler.hpp" #include "asm/macroAssembler.hpp" #include "classfile/javaClasses.hpp" #include "classfile/vmIntrinsics.hpp" @@ -31,6 +32,7 @@ #include "gc/shared/barrierSetNMethod.hpp" #include "gc/shared/gc_globals.hpp" #include "memory/universe.hpp" +#include "oops/inlineKlass.hpp" #include "prims/jvmtiExport.hpp" #include "prims/upcallLinker.hpp" #include "runtime/arguments.hpp" @@ -38,6 +40,8 @@ #include "runtime/javaThread.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" +#include "utilities/macros.hpp" +#include "vmreg_x86.inline.hpp" #include "stubGenerator_x86_64.hpp" #ifdef COMPILER2 #include "opto/runtime.hpp" @@ -309,22 +313,22 @@ address StubGenerator::generate_call_stub(address& return_address) { // store result depending on type (everything that is not // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) - __ movptr(c_rarg0, result); - Label is_long, is_float, is_double, exit; - __ movl(c_rarg1, result_type); - __ cmpl(c_rarg1, T_OBJECT); + __ movptr(r13, result); + Label is_long, is_float, is_double, check_prim, exit; + __ movl(rbx, result_type); + __ cmpl(rbx, T_OBJECT); + __ jcc(Assembler::equal, check_prim); + __ cmpl(rbx, T_LONG); __ jcc(Assembler::equal, is_long); - __ cmpl(c_rarg1, T_LONG); - __ jcc(Assembler::equal, is_long); - __ cmpl(c_rarg1, T_FLOAT); + __ cmpl(rbx, T_FLOAT); __ jcc(Assembler::equal, is_float); - __ cmpl(c_rarg1, T_DOUBLE); + __ cmpl(rbx, T_DOUBLE); __ jcc(Assembler::equal, is_double); #ifdef ASSERT // make sure the type is INT { Label L; - __ cmpl(c_rarg1, T_INT); + __ cmpl(rbx, T_INT); __ jcc(Assembler::equal, L); __ stop("StubRoutines::call_stub: unexpected result type"); __ bind(L); @@ -332,7 +336,7 @@ address StubGenerator::generate_call_stub(address& return_address) { #endif // handle T_INT case - __ movl(Address(c_rarg0, 0), rax); + __ movl(Address(r13, 0), rax); __ BIND(exit); @@ -390,16 +394,29 @@ address StubGenerator::generate_call_stub(address& return_address) { __ ret(0); // handle return types different from T_INT + __ BIND(check_prim); + if (InlineTypeReturnedAsFields) { + // Check for scalarized return value + __ testptr(rax, 1); + __ jcc(Assembler::zero, is_long); + // Load pack handler address + __ andptr(rax, -2); + __ movptr(rax, Address(rax, InlineKlass::adr_members_offset())); + __ movptr(rbx, Address(rax, InlineKlass::pack_handler_jobject_offset())); + // Call pack handler to initialize the buffer + __ call(rbx); + __ jmp(exit); + } __ BIND(is_long); - __ movq(Address(c_rarg0, 0), rax); + __ movq(Address(r13, 0), rax); __ jmp(exit); __ BIND(is_float); - __ movflt(Address(c_rarg0, 0), xmm0); + __ movflt(Address(r13, 0), xmm0); __ jmp(exit); __ BIND(is_double); - __ movdbl(Address(c_rarg0, 0), xmm0); + __ movdbl(Address(r13, 0), xmm0); __ jmp(exit); // record the stub entry and end plus the auxiliary entry @@ -4370,6 +4387,67 @@ address StubGenerator::generate_floatToFloat16() { return start; } +static void save_return_registers(MacroAssembler* masm) { + masm->push_ppx(rax); + if (InlineTypeReturnedAsFields) { + masm->push(rdi); + masm->push(rsi); + masm->push(rdx); + masm->push(rcx); + masm->push(r8); + masm->push(r9); + } + masm->push_d(xmm0); + if (InlineTypeReturnedAsFields) { + masm->push_d(xmm1); + masm->push_d(xmm2); + masm->push_d(xmm3); + masm->push_d(xmm4); + masm->push_d(xmm5); + masm->push_d(xmm6); + masm->push_d(xmm7); + } +#ifdef ASSERT + masm->movq(rax, 0xBADC0FFE); + masm->movq(rdi, rax); + masm->movq(rsi, rax); + masm->movq(rdx, rax); + masm->movq(rcx, rax); + masm->movq(r8, rax); + masm->movq(r9, rax); + masm->movq(xmm0, rax); + masm->movq(xmm1, rax); + masm->movq(xmm2, rax); + masm->movq(xmm3, rax); + masm->movq(xmm4, rax); + masm->movq(xmm5, rax); + masm->movq(xmm6, rax); + masm->movq(xmm7, rax); +#endif +} + +static void restore_return_registers(MacroAssembler* masm) { + if (InlineTypeReturnedAsFields) { + masm->pop_d(xmm7); + masm->pop_d(xmm6); + masm->pop_d(xmm5); + masm->pop_d(xmm4); + masm->pop_d(xmm3); + masm->pop_d(xmm2); + masm->pop_d(xmm1); + } + masm->pop_d(xmm0); + if (InlineTypeReturnedAsFields) { + masm->pop(r9); + masm->pop(r8); + masm->pop(rcx); + masm->pop(rdx); + masm->pop(rsi); + masm->pop(rdi); + } + masm->pop_ppx(rax); +} + address StubGenerator::generate_cont_thaw(StubId stub_id) { if (!Continuations::enabled()) return nullptr; @@ -4427,8 +4505,7 @@ address StubGenerator::generate_cont_thaw(StubId stub_id) { if (return_barrier) { // Preserve possible return value from a method returning to the return barrier. - __ push_ppx(rax); - __ push_d(xmm0); + save_return_registers(_masm); } __ movptr(c_rarg0, r15_thread); @@ -4439,8 +4516,7 @@ address StubGenerator::generate_cont_thaw(StubId stub_id) { if (return_barrier) { // Restore return value from a method returning to the return barrier. // No safepoint in the call to thaw, so even an oop return value should be OK. - __ pop_d(xmm0); - __ pop_ppx(rax); + restore_return_registers(_masm); } #ifdef ASSERT @@ -4466,8 +4542,7 @@ address StubGenerator::generate_cont_thaw(StubId stub_id) { if (return_barrier) { // Preserve possible return value from a method returning to the return barrier. (Again.) - __ push_ppx(rax); - __ push_d(xmm0); + save_return_registers(_masm); } // If we want, we can templatize thaw by kind, and have three different entries. @@ -4479,8 +4554,7 @@ address StubGenerator::generate_cont_thaw(StubId stub_id) { if (return_barrier) { // Restore return value from a method returning to the return barrier. (Again.) // No safepoint in the call to thaw, so even an oop return value should be OK. - __ pop_d(xmm0); - __ pop_ppx(rax); + restore_return_registers(_masm); } else { // Return 0 (success) from doYield. __ xorptr(rax, rax); @@ -4747,6 +4821,16 @@ void StubGenerator::generate_initial_stubs() { StubRoutines::_forward_exception_entry = generate_forward_exception(); + // Generate these first because they are called from other stubs + if (InlineTypeReturnedAsFields) { + StubRoutines::_load_inline_type_fields_in_regs = + generate_return_value_stub(CAST_FROM_FN_PTR(address, SharedRuntime::load_inline_type_fields_in_regs), + "load_inline_type_fields_in_regs", false); + StubRoutines::_store_inline_type_fields_to_buf = + generate_return_value_stub(CAST_FROM_FN_PTR(address, SharedRuntime::store_inline_type_fields_to_buf), + "store_inline_type_fields_to_buf", true); + } + StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); @@ -4790,6 +4874,161 @@ void StubGenerator::generate_initial_stubs() { StubRoutines::_fmod = generate_libmFmod(); // from stubGenerator_x86_64_fmod.cpp } +// Call here from the interpreter or compiled code to either load +// multiple returned values from the inline type instance being +// returned to registers or to store returned values to a newly +// allocated inline type instance. +// Register is a class, but it would be assigned numerical value. +// "0" is assigned for xmm0. Thus we need to ignore -Wnonnull. +PRAGMA_DIAG_PUSH +PRAGMA_NONNULL_IGNORED +address StubGenerator::generate_return_value_stub(address destination, const char* name, bool has_res) { + // We need to save all registers the calling convention may use so + // the runtime calls read or update those registers. This needs to + // be in sync with SharedRuntime::java_return_convention(). + enum layout { + pad_off = frame::arg_reg_save_area_bytes/BytesPerInt, pad_off_2, + rax_off, rax_off_2, + j_rarg5_off, j_rarg5_2, + j_rarg4_off, j_rarg4_2, + j_rarg3_off, j_rarg3_2, + j_rarg2_off, j_rarg2_2, + j_rarg1_off, j_rarg1_2, + j_rarg0_off, j_rarg0_2, + j_farg0_off, j_farg0_2, + j_farg1_off, j_farg1_2, + j_farg2_off, j_farg2_2, + j_farg3_off, j_farg3_2, + j_farg4_off, j_farg4_2, + j_farg5_off, j_farg5_2, + j_farg6_off, j_farg6_2, + j_farg7_off, j_farg7_2, + rbp_off, rbp_off_2, + return_off, return_off_2, + + framesize + }; + + CodeBuffer buffer(name, 1000, 512); + MacroAssembler* _masm = new MacroAssembler(&buffer); + + int frame_size_in_bytes = align_up(framesize*BytesPerInt, 16); + assert(frame_size_in_bytes == framesize*BytesPerInt, "misaligned"); + int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; + int frame_size_in_words = frame_size_in_bytes / wordSize; + + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = new OopMap(frame_size_in_slots, 0); + + map->set_callee_saved(VMRegImpl::stack2reg(rax_off), rax->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg5_off), j_rarg5->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg4_off), j_rarg4->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg3_off), j_rarg3->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg2_off), j_rarg2->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg1_off), j_rarg1->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg0_off), j_rarg0->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg0_off), j_farg0->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg1_off), j_farg1->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg2_off), j_farg2->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg3_off), j_farg3->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg4_off), j_farg4->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg5_off), j_farg5->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg6_off), j_farg6->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg7_off), j_farg7->as_VMReg()); + + int start = __ offset(); + + __ subptr(rsp, frame_size_in_bytes - 8 /* return address*/); + + __ movptr(Address(rsp, rbp_off * BytesPerInt), rbp); + __ movdbl(Address(rsp, j_farg7_off * BytesPerInt), j_farg7); + __ movdbl(Address(rsp, j_farg6_off * BytesPerInt), j_farg6); + __ movdbl(Address(rsp, j_farg5_off * BytesPerInt), j_farg5); + __ movdbl(Address(rsp, j_farg4_off * BytesPerInt), j_farg4); + __ movdbl(Address(rsp, j_farg3_off * BytesPerInt), j_farg3); + __ movdbl(Address(rsp, j_farg2_off * BytesPerInt), j_farg2); + __ movdbl(Address(rsp, j_farg1_off * BytesPerInt), j_farg1); + __ movdbl(Address(rsp, j_farg0_off * BytesPerInt), j_farg0); + + __ movptr(Address(rsp, j_rarg0_off * BytesPerInt), j_rarg0); + __ movptr(Address(rsp, j_rarg1_off * BytesPerInt), j_rarg1); + __ movptr(Address(rsp, j_rarg2_off * BytesPerInt), j_rarg2); + __ movptr(Address(rsp, j_rarg3_off * BytesPerInt), j_rarg3); + __ movptr(Address(rsp, j_rarg4_off * BytesPerInt), j_rarg4); + __ movptr(Address(rsp, j_rarg5_off * BytesPerInt), j_rarg5); + __ movptr(Address(rsp, rax_off * BytesPerInt), rax); + + int frame_complete = __ offset(); + + __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); + + __ mov(c_rarg0, r15_thread); + __ mov(c_rarg1, rax); + + __ call(RuntimeAddress(destination)); + + // Set an oopmap for the call site. + + oop_maps->add_gc_map( __ offset() - start, map); + + // clear last_Java_sp + __ reset_last_Java_frame(false); + + __ movptr(rbp, Address(rsp, rbp_off * BytesPerInt)); + __ movdbl(j_farg7, Address(rsp, j_farg7_off * BytesPerInt)); + __ movdbl(j_farg6, Address(rsp, j_farg6_off * BytesPerInt)); + __ movdbl(j_farg5, Address(rsp, j_farg5_off * BytesPerInt)); + __ movdbl(j_farg4, Address(rsp, j_farg4_off * BytesPerInt)); + __ movdbl(j_farg3, Address(rsp, j_farg3_off * BytesPerInt)); + __ movdbl(j_farg2, Address(rsp, j_farg2_off * BytesPerInt)); + __ movdbl(j_farg1, Address(rsp, j_farg1_off * BytesPerInt)); + __ movdbl(j_farg0, Address(rsp, j_farg0_off * BytesPerInt)); + + __ movptr(j_rarg0, Address(rsp, j_rarg0_off * BytesPerInt)); + __ movptr(j_rarg1, Address(rsp, j_rarg1_off * BytesPerInt)); + __ movptr(j_rarg2, Address(rsp, j_rarg2_off * BytesPerInt)); + __ movptr(j_rarg3, Address(rsp, j_rarg3_off * BytesPerInt)); + __ movptr(j_rarg4, Address(rsp, j_rarg4_off * BytesPerInt)); + __ movptr(j_rarg5, Address(rsp, j_rarg5_off * BytesPerInt)); + __ movptr(rax, Address(rsp, rax_off * BytesPerInt)); + + __ addptr(rsp, frame_size_in_bytes-8); + + // check for pending exceptions + Label pending; + __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); + __ jcc(Assembler::notEqual, pending); + + if (has_res) { + // We just called SharedRuntime::store_inline_type_fields_to_buf. Check if we still + // need to initialize the buffer and if so, call the inline class specific pack handler. + Label skip_pack; + __ get_vm_result_oop(rax); + __ get_vm_result_metadata(rscratch1); + __ testptr(rscratch1, rscratch1); + __ jcc(Assembler::zero, skip_pack); + __ movptr(rscratch1, Address(rscratch1, InlineKlass::adr_members_offset())); + __ movptr(rscratch1, Address(rscratch1, InlineKlass::pack_handler_offset())); + __ call(rscratch1); + __ membar(Assembler::StoreStore); + __ bind(skip_pack); + } + + __ ret(0); + + __ bind(pending); + + __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); + __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + // ------------- + // make sure all code is generated + _masm->flush(); + + RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, false); + return stub->entry_point(); +} + void StubGenerator::generate_continuation_stubs() { // Continuation stubs: StubRoutines::_cont_thaw = generate_cont_thaw(); diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp index 6e3da334f110e..6554432a4f1ad 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp @@ -638,6 +638,9 @@ class StubGenerator: public StubCodeGenerator { address generate_upcall_stub_exception_handler(); address generate_upcall_stub_load_target(); + // interpreter or compiled code marshalling registers to/from inline type instance + address generate_return_value_stub(address destination, const char* name, bool has_res); + // Specialized stub implementations for UseSecondarySupersTable. void generate_lookup_secondary_supers_table_stub(); diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp index e7dc416a96143..0beac6010d696 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp @@ -3598,9 +3598,19 @@ address StubGenerator::generate_generic_copy(address byte_copy_entry, address sh __ cmpq(r10_src_klass, rax); __ jcc(Assembler::notEqual, L_failed); + // Check for flat inline type array -> return -1 + __ test_flat_array_oop(src, rax, L_failed); + + // Check for null-free (non-flat) inline type array -> handle as object array + __ test_null_free_array_oop(src, rax, L_objArray); + const Register rax_lh = rax; // layout helper __ movl(rax_lh, Address(r10_src_klass, lh_offset)); + // Check for flat inline type array -> return -1 + __ testl(rax_lh, Klass::_lh_array_tag_flat_value_bit_inplace); + __ jcc(Assembler::notZero, L_failed); + // if (!src->is_Array()) return -1; __ cmpl(rax_lh, Klass::_lh_neutral_value); __ jcc(Assembler::greaterEqual, L_failed); @@ -3610,8 +3620,10 @@ address StubGenerator::generate_generic_copy(address byte_copy_entry, address sh { BLOCK_COMMENT("assert primitive array {"); Label L; - __ cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); - __ jcc(Assembler::greaterEqual, L); + __ movl(rklass_tmp, rax_lh); + __ sarl(rklass_tmp, Klass::_lh_array_tag_shift); + __ cmpl(rklass_tmp, Klass::_lh_array_tag_type_value); + __ jcc(Assembler::equal, L); __ stop("must be a primitive array"); __ bind(L); BLOCK_COMMENT("} assert primitive array done"); @@ -3719,9 +3731,21 @@ __ BIND(L_checkcast_copy); // live at this point: r10_src_klass, r11_length, rax (dst_klass) { // Before looking at dst.length, make sure dst is also an objArray. + // This check also fails for flat arrays which are not supported. __ cmpl(Address(rax, lh_offset), objArray_lh); __ jcc(Assembler::notEqual, L_failed); +#ifdef ASSERT + { + BLOCK_COMMENT("assert not null-free array {"); + Label L; + __ test_non_null_free_array_oop(dst, rklass_tmp, L); + __ stop("unexpected null-free array"); + __ bind(L); + BLOCK_COMMENT("} assert not null-free array"); + } +#endif + // It is safe to examine both src.length and dst.length. arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, rax, L_failed); diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp index 2edd97062724b..7da01709d726a 100644 --- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp @@ -38,6 +38,7 @@ #include "oops/methodData.hpp" #include "oops/method.hpp" #include "oops/oop.inline.hpp" +#include "oops/inlineKlass.hpp" #include "oops/resolvedIndyEntry.hpp" #include "oops/resolvedMethodEntry.hpp" #include "prims/jvmtiExport.hpp" @@ -63,7 +64,7 @@ // if too small. // Run with +PrintInterpreter to get the VM to print out the size. // Max size with JVMTI -int TemplateInterpreter::InterpreterCodeSize = 256 * 1024; +int TemplateInterpreter::InterpreterCodeSize = 268 * 1024; // Global Register Names static const Register rbcp = r13; @@ -176,11 +177,15 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, address entry = __ pc(); // Restore stack bottom in case i2c adjusted stack - __ movptr(rcx, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize)); - __ lea(rsp, Address(rbp, rcx, Address::times_ptr)); + __ movptr(rscratch1, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ lea(rsp, Address(rbp, rscratch1, Address::times_ptr)); // and null it as marker that esp is now tos until next java call __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); + if (state == atos && InlineTypeReturnedAsFields) { + __ store_inline_type_fields_to_buf(nullptr); + } + __ restore_bcp(); __ restore_locals(); @@ -1203,7 +1208,7 @@ address TemplateInterpreterGenerator::generate_abstract_entry(void) { // // Generic interpreted method entry to (asm) interpreter // -address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { +address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized, bool object_init) { // determine code generation flags bool inc_counter = UseCompiler || CountCompiledCalls; @@ -1324,6 +1329,12 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { #endif } + // If object_init == true, we should insert a StoreStore barrier here to + // prevent strict fields initial default values from being observable. + // However, x86 is a TSO platform, so if `this` escapes, strict fields + // initialized values are guaranteed to be the ones observed, so the + // barrier can be elided. + // start execution #ifdef ASSERT { diff --git a/src/hotspot/cpu/x86/templateTable_x86.cpp b/src/hotspot/cpu/x86/templateTable_x86.cpp index db7749ec48275..4c1ccde0e5268 100644 --- a/src/hotspot/cpu/x86/templateTable_x86.cpp +++ b/src/hotspot/cpu/x86/templateTable_x86.cpp @@ -36,11 +36,13 @@ #include "oops/methodData.hpp" #include "oops/objArrayKlass.hpp" #include "oops/oop.inline.hpp" +#include "oops/inlineKlass.hpp" #include "oops/resolvedFieldEntry.hpp" #include "oops/resolvedIndyEntry.hpp" #include "oops/resolvedMethodEntry.hpp" #include "prims/jvmtiExport.hpp" #include "prims/methodHandles.hpp" +#include "runtime/arguments.hpp" #include "runtime/frame.inline.hpp" #include "runtime/safepointMechanism.hpp" #include "runtime/sharedRuntime.hpp" @@ -167,6 +169,7 @@ void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, Label L_patch_done; switch (bc) { + case Bytecodes::_fast_vputfield: case Bytecodes::_fast_aputfield: case Bytecodes::_fast_bputfield: case Bytecodes::_fast_zputfield: @@ -775,15 +778,34 @@ void TemplateTable::daload() { void TemplateTable::aaload() { transition(itos, atos); - // rax: index - // rdx: array - index_check(rdx, rax); // kills rbx - do_oop_load(_masm, - Address(rdx, rax, - UseCompressedOops ? Address::times_4 : Address::times_ptr, - arrayOopDesc::base_offset_in_bytes(T_OBJECT)), - rax, - IS_ARRAY); + Register array = rdx; + Register index = rax; + + index_check(array, index); // kills rbx + __ profile_array_type(rbx, array, rcx); + if (UseArrayFlattening) { + Label is_flat_array, done; + __ test_flat_array_oop(array, rbx, is_flat_array); + do_oop_load(_masm, + Address(array, index, + UseCompressedOops ? Address::times_4 : Address::times_ptr, + arrayOopDesc::base_offset_in_bytes(T_OBJECT)), + rax, + IS_ARRAY); + __ jmp(done); + __ bind(is_flat_array); + __ movptr(rcx, array); + call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::flat_array_load), rcx, index); + __ bind(done); + } else { + do_oop_load(_masm, + Address(array, index, + UseCompressedOops ? Address::times_4 : Address::times_ptr, + arrayOopDesc::base_offset_in_bytes(T_OBJECT)), + rax, + IS_ARRAY); + } + __ profile_element_type(rbx, rax, rcx); } void TemplateTable::baload() { @@ -1057,7 +1079,7 @@ void TemplateTable::dastore() { } void TemplateTable::aastore() { - Label is_null, ok_is_subtype, done; + Label is_null, is_flat_array, ok_is_subtype, done; transition(vtos, vtos); // stack: ..., array, index, value __ movptr(rax, at_tos()); // value @@ -1069,19 +1091,30 @@ void TemplateTable::aastore() { arrayOopDesc::base_offset_in_bytes(T_OBJECT)); index_check_without_pop(rdx, rcx); // kills rbx + + __ profile_array_type(rdi, rdx, rbx); + __ profile_multiple_element_types(rdi, rax, rbx, rcx); + __ testptr(rax, rax); __ jcc(Assembler::zero, is_null); + // Move array class to rdi + __ load_klass(rdi, rdx, rscratch1); + if (UseArrayFlattening) { + __ movl(rbx, Address(rdi, Klass::layout_helper_offset())); + __ test_flat_array_layout(rbx, is_flat_array); + } + // Move subklass into rbx __ load_klass(rbx, rax, rscratch1); - // Move superklass into rax - __ load_klass(rax, rdx, rscratch1); - __ movptr(rax, Address(rax, + // Move array element superklass into rax + __ movptr(rax, Address(rdi, ObjArrayKlass::element_klass_offset())); // Generate subtype check. Blows rcx, rdi // Superklass in rax. Subklass in rbx. - __ gen_subtype_check(rbx, ok_is_subtype); + // is "rbx <: rax" ? (value subclass <: array element superclass) + __ gen_subtype_check(rbx, ok_is_subtype, false); // Come here on failure // object is at TOS @@ -1099,11 +1132,39 @@ void TemplateTable::aastore() { // Have a null in rax, rdx=array, ecx=index. Store null at ary[idx] __ bind(is_null); - __ profile_null_seen(rbx); + if (Arguments::is_valhalla_enabled()) { + Label write_null_to_null_free_array, store_null; + + // Move array class to rdi + __ load_klass(rdi, rdx, rscratch1); + if (UseArrayFlattening) { + __ movl(rbx, Address(rdi, Klass::layout_helper_offset())); + __ test_flat_array_layout(rbx, is_flat_array); + } + // No way to store null in null-free array + __ test_null_free_array_oop(rdx, rbx, write_null_to_null_free_array); + __ jmp(store_null); + + __ bind(write_null_to_null_free_array); + __ jump(RuntimeAddress(Interpreter::_throw_NullPointerException_entry)); + + __ bind(store_null); + } // Store a null do_oop_store(_masm, element_address, noreg, IS_ARRAY); + __ jmp(done); + if (UseArrayFlattening) { + Label is_type_ok; + __ bind(is_flat_array); // Store non-null value to flat + + __ movptr(rax, at_tos()); + __ movl(rcx, at_tos_p1()); // index + __ movptr(rdx, at_tos_p2()); // array + + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::flat_array_store), rax, rdx, rcx); + } // Pop stack arguments __ bind(done); __ addptr(rsp, 3 * Interpreter::stackElementSize); @@ -1891,13 +1952,59 @@ void TemplateTable::if_nullcmp(Condition cc) { void TemplateTable::if_acmp(Condition cc) { transition(atos, vtos); // assume branch is more often taken than not (loops use backward branches) - Label not_taken; + Label taken, not_taken; __ pop_ptr(rdx); + + __ profile_acmp(rbx, rdx, rax, rcx); + + const int is_inline_type_mask = markWord::inline_type_pattern; + if (Arguments::is_valhalla_enabled()) { + __ cmpoop(rdx, rax); + __ jcc(Assembler::equal, (cc == equal) ? taken : not_taken); + + // might be substitutable, test if either rax or rdx is null + __ testptr(rax, rax); + __ jcc(Assembler::zero, (cc == equal) ? not_taken : taken); + __ testptr(rdx, rdx); + __ jcc(Assembler::zero, (cc == equal) ? not_taken : taken); + + // and both are values ? + __ movptr(rbx, Address(rdx, oopDesc::mark_offset_in_bytes())); + __ andptr(rbx, Address(rax, oopDesc::mark_offset_in_bytes())); + __ andptr(rbx, is_inline_type_mask); + __ cmpptr(rbx, is_inline_type_mask); + __ jcc(Assembler::notEqual, (cc == equal) ? not_taken : taken); + + // same value klass ? + __ load_metadata(rbx, rdx); + __ load_metadata(rcx, rax); + __ cmpptr(rbx, rcx); + __ jcc(Assembler::notEqual, (cc == equal) ? not_taken : taken); + + // Know both are the same type, let's test for substitutability... + if (cc == equal) { + invoke_is_substitutable(rax, rdx, taken, not_taken); + } else { + invoke_is_substitutable(rax, rdx, not_taken, taken); + } + __ stop("Not reachable"); + } + __ cmpoop(rdx, rax); __ jcc(j_not(cc), not_taken); + __ bind(taken); branch(false, false); __ bind(not_taken); - __ profile_not_taken_branch(rax); + __ profile_not_taken_branch(rax, true); +} + +void TemplateTable::invoke_is_substitutable(Register aobj, Register bobj, + Label& is_subst, Label& not_subst) { + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::is_substitutable), aobj, bobj); + // Restored...rax answer, jmp to outcome... + __ testl(rax, rax); + __ jcc(Assembler::zero, not_subst); + __ jmp(is_subst); } void TemplateTable::ret() { @@ -2151,7 +2258,8 @@ void TemplateTable::_return(TosState state) { if (state == itos) { __ narrow(rax); } - __ remove_activation(state, rbcp); + + __ remove_activation(state, rbcp, true, true, true); __ jmp(rbcp); } @@ -2524,20 +2632,18 @@ void TemplateTable::pop_and_check_object(Register r) { void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { transition(vtos, vtos); - const Register obj = c_rarg3; + const Register obj = r9; const Register cache = rcx; const Register index = rdx; const Register off = rbx; const Register tos_state = rax; const Register flags = rdx; - const Register bc = c_rarg3; // uses same reg as obj, so don't mix them + const Register bc = c_rarg3; resolve_cache_and_index_for_field(byte_no, cache, index); jvmti_post_field_access(cache, index, is_static, false); load_resolved_field_entry(obj, cache, tos_state, off, flags, is_static); - if (!is_static) pop_and_check_object(obj); - const Address field(obj, off, Address::times_1, 0*wordSize); Label Done, notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj; @@ -2548,6 +2654,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr __ jcc(Assembler::notZero, notByte); // btos + if (!is_static) pop_and_check_object(obj); __ access_load_at(T_BYTE, IN_HEAP, rax, field, noreg); __ push(btos); // Rewrite bytecode to be faster @@ -2561,6 +2668,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr __ jcc(Assembler::notEqual, notBool); // ztos (same code as btos) + if (!is_static) pop_and_check_object(obj); __ access_load_at(T_BOOLEAN, IN_HEAP, rax, field, noreg); __ push(ztos); // Rewrite bytecode to be faster @@ -2574,14 +2682,46 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr __ cmpl(tos_state, atos); __ jcc(Assembler::notEqual, notObj); // atos - do_oop_load(_masm, field, rax); - __ push(atos); - if (!is_static && rc == may_rewrite) { - patch_bytecode(Bytecodes::_fast_agetfield, bc, rbx); + if (!Arguments::is_valhalla_enabled()) { + if (!is_static) pop_and_check_object(obj); + do_oop_load(_masm, field, rax); + __ push(atos); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, rbx); + } + __ jmp(Done); + } else { + if (is_static) { + __ load_heap_oop(rax, field); + __ push(atos); + __ jmp(Done); + } else { + Label is_flat; + __ test_field_is_flat(flags, rscratch1, is_flat); + pop_and_check_object(obj); + __ load_heap_oop(rax, field); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, rbx); + } + __ jmp(Done); + __ bind(is_flat); + // field is flat (null-free or nullable with a null-marker) + pop_and_check_object(rax); + __ read_flat_field(rcx, rax); + __ verify_oop(rax); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_vgetfield, bc, rbx); + } + __ jmp(Done); + } } - __ jmp(Done); __ bind(notObj); + + if (!is_static) pop_and_check_object(obj); + __ cmpl(tos_state, itos); __ jcc(Assembler::notEqual, notInt); // itos @@ -2681,7 +2821,6 @@ void TemplateTable::getstatic(int byte_no) { getfield_or_static(byte_no, true); } - // The registers cache and index expected to be set before call. // The function may destroy various registers, just not the cache and index registers. void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { @@ -2743,7 +2882,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr const Register index = rdx; const Register tos_state = rdx; const Register off = rbx; - const Register flags = rax; + const Register flags = r9; resolve_cache_and_index_for_field(byte_no, cache, index); jvmti_post_field_mod(cache, index, is_static); @@ -2756,23 +2895,24 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr Label notVolatile, Done; // Check for volatile store - __ andl(flags, (1 << ResolvedFieldEntry::is_volatile_shift)); - __ testl(flags, flags); + __ movl(rscratch1, flags); + __ andl(rscratch1, (1 << ResolvedFieldEntry::is_volatile_shift)); + __ testl(rscratch1, rscratch1); __ jcc(Assembler::zero, notVolatile); - putfield_or_static_helper(byte_no, is_static, rc, obj, off, tos_state); + putfield_or_static_helper(byte_no, is_static, rc, obj, off, tos_state, flags); volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad | Assembler::StoreStore)); __ jmp(Done); __ bind(notVolatile); - putfield_or_static_helper(byte_no, is_static, rc, obj, off, tos_state); + putfield_or_static_helper(byte_no, is_static, rc, obj, off, tos_state, flags); __ bind(Done); } void TemplateTable::putfield_or_static_helper(int byte_no, bool is_static, RewriteControl rc, - Register obj, Register off, Register tos_state) { + Register obj, Register off, Register tos_state, Register flags) { // field addresses const Address field(obj, off, Address::times_1, 0*wordSize); @@ -2819,14 +2959,51 @@ void TemplateTable::putfield_or_static_helper(int byte_no, bool is_static, Rewri // atos { - __ pop(atos); - if (!is_static) pop_and_check_object(obj); - // Store into the field - do_oop_store(_masm, field, rax); - if (!is_static && rc == may_rewrite) { - patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx, true, byte_no); + if (!Arguments::is_valhalla_enabled()) { + __ pop(atos); + if (!is_static) pop_and_check_object(obj); + // Store into the field + do_oop_store(_masm, field, rax); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx, true, byte_no); + } + __ jmp(Done); + } else { + __ pop(atos); + if (is_static) { + Label is_nullable; + __ test_field_is_not_null_free_inline_type(flags, rscratch1, is_nullable); + __ null_check(rax); // FIXME JDK-8341120 + __ bind(is_nullable); + do_oop_store(_masm, field, rax); + __ jmp(Done); + } else { + Label is_flat, null_free_reference, rewrite_inline; + __ test_field_is_flat(flags, rscratch1, is_flat); + __ test_field_is_null_free_inline_type(flags, rscratch1, null_free_reference); + pop_and_check_object(obj); + // Store into the field + do_oop_store(_masm, field, rax); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx, true, byte_no); + } + __ jmp(Done); + __ bind(null_free_reference); + __ null_check(rax); // FIXME JDK-8341120 + pop_and_check_object(obj); + // Store into the field + do_oop_store(_masm, field, rax); + __ jmp(rewrite_inline); + __ bind(is_flat); + pop_and_check_object(rscratch2); + __ write_flat_field(rcx, r8, rscratch1, rscratch2, rbx, rax); + __ bind(rewrite_inline); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_vputfield, bc, rbx, true, byte_no); + } + __ jmp(Done); + } } - __ jmp(Done); } __ bind(notObj); @@ -2963,6 +3140,7 @@ void TemplateTable::jvmti_post_fast_field_mod() { // to do it for every data type, we use the saved values as the // jvalue object. switch (bytecode()) { // load values into the jvalue object + case Bytecodes::_fast_vputfield: // fall through case Bytecodes::_fast_aputfield: __ push_ptr(rax); break; case Bytecodes::_fast_bputfield: // fall through case Bytecodes::_fast_zputfield: // fall through @@ -2986,6 +3164,7 @@ void TemplateTable::jvmti_post_fast_field_mod() { __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, c_rarg2, c_rarg3); switch (bytecode()) { // restore tos values + case Bytecodes::_fast_vputfield: // fall through case Bytecodes::_fast_aputfield: __ pop_ptr(rax); break; case Bytecodes::_fast_bputfield: // fall through case Bytecodes::_fast_zputfield: // fall through @@ -3004,18 +3183,15 @@ void TemplateTable::jvmti_post_fast_field_mod() { void TemplateTable::fast_storefield(TosState state) { transition(state, vtos); - Register cache = rcx; - Label notVolatile, Done; jvmti_post_fast_field_mod(); __ push(rax); __ load_field_entry(rcx, rax); - load_resolved_field_entry(noreg, cache, rax, rbx, rdx); - // RBX: field offset, RAX: TOS, RDX: flags - __ andl(rdx, (1 << ResolvedFieldEntry::is_volatile_shift)); + load_resolved_field_entry(noreg, rcx, rax, rbx, rdx); __ pop(rax); + // RBX: field offset, RCX: RAX: TOS, RDX: flags // Get object from stack pop_and_check_object(rcx); @@ -3024,26 +3200,47 @@ void TemplateTable::fast_storefield(TosState state) { const Address field(rcx, rbx, Address::times_1); // Check for volatile store - __ testl(rdx, rdx); + __ movl(rscratch2, rdx); // saving flags for is_flat test + __ andl(rscratch2, (1 << ResolvedFieldEntry::is_volatile_shift)); + __ testl(rscratch2, rscratch2); __ jcc(Assembler::zero, notVolatile); - fast_storefield_helper(field, rax); + fast_storefield_helper(field, rax, rdx); volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad | Assembler::StoreStore)); __ jmp(Done); __ bind(notVolatile); - fast_storefield_helper(field, rax); + fast_storefield_helper(field, rax, rdx); __ bind(Done); } -void TemplateTable::fast_storefield_helper(Address field, Register rax) { +void TemplateTable::fast_storefield_helper(Address field, Register rax, Register flags) { + + // DANGER: 'field' argument depends on rcx and rbx // access field switch (bytecode()) { + case Bytecodes::_fast_vputfield: + { + // Field is either flat (nullable or not) or non-flat and null-free + Label is_flat, done; + __ test_field_is_flat(flags, rscratch1, is_flat); + __ null_check(rax); // FIXME JDK-8341120 + do_oop_store(_masm, field, rax); + __ jmp(done); + __ bind(is_flat); + __ load_field_entry(r8, r9); + __ movptr(rscratch2, rcx); // re-shuffle registers because of VM call calling convention + __ write_flat_field(r8, rscratch1, r9, rscratch2, rbx, rax); + __ bind(done); + } + break; case Bytecodes::_fast_aputfield: - do_oop_store(_masm, field, rax); + { + do_oop_store(_masm, field, rax); + } break; case Bytecodes::_fast_lputfield: __ access_store_at(T_LONG, IN_HEAP, field, noreg /* ltos */, noreg, noreg, noreg); @@ -3099,15 +3296,19 @@ void TemplateTable::fast_accessfield(TosState state) { // access constant pool cache __ load_field_entry(rcx, rbx); - __ load_sized_value(rbx, Address(rcx, in_bytes(ResolvedFieldEntry::field_offset_offset())), sizeof(int), true /*is_signed*/); + __ load_sized_value(rdx, Address(rcx, in_bytes(ResolvedFieldEntry::field_offset_offset())), sizeof(int), true /*is_signed*/); // rax: object __ verify_oop(rax); __ null_check(rax); - Address field(rax, rbx, Address::times_1); + Address field(rax, rdx, Address::times_1); // access field switch (bytecode()) { + case Bytecodes::_fast_vgetfield: + __ read_flat_field(rcx, rax); + __ verify_oop(rax); + break; case Bytecodes::_fast_agetfield: do_oop_load(_masm, field, rax); __ verify_oop(rax); @@ -3614,7 +3815,7 @@ void TemplateTable::_new() { // initialize object header only. __ bind(initialize_header); - if (UseCompactObjectHeaders) { + if (UseCompactObjectHeaders || Arguments::is_valhalla_enabled()) { __ pop(rcx); // get saved klass back in the register. __ movptr(rbx, Address(rcx, Klass::prototype_header_offset())); __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()), rbx); @@ -3622,6 +3823,8 @@ void TemplateTable::_new() { __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()), (intptr_t)markWord::prototype().value()); // header __ pop(rcx); // get saved klass back in the register. + } + if (!UseCompactObjectHeaders) { __ xorl(rsi, rsi); // use zero reg to clear memory (shorter code) __ store_klass_gap(rax, rsi); // zero klass gap for compressed oops __ store_klass(rax, rcx, rscratch1); // klass @@ -3683,10 +3886,10 @@ void TemplateTable::checkcast() { __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index // See if bytecode has already been quicked - __ cmpb(Address(rdx, rbx, - Address::times_1, - Array::base_offset_in_bytes()), - JVM_CONSTANT_Class); + __ movzbl(rdx, Address(rdx, rbx, + Address::times_1, + Array::base_offset_in_bytes())); + __ cmpl(rdx, JVM_CONSTANT_Class); __ jcc(Assembler::equal, quicked); __ push(atos); // save receiver for result, and for GC call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); @@ -3716,15 +3919,15 @@ void TemplateTable::checkcast() { // Come here on success __ bind(ok_is_subtype); __ mov(rax, rdx); // Restore object in rdx + __ jmp(done); + + __ bind(is_null); // Collect counts on whether this check-cast sees nulls a lot or not. if (ProfileInterpreter) { - __ jmp(done); - __ bind(is_null); __ profile_null_seen(rcx); - } else { - __ bind(is_null); // same as 'done' } + __ bind(done); } @@ -3738,10 +3941,10 @@ void TemplateTable::instanceof() { __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index // See if bytecode has already been quicked - __ cmpb(Address(rdx, rbx, - Address::times_1, - Array::base_offset_in_bytes()), - JVM_CONSTANT_Class); + __ movzbl(rdx, Address(rdx, rbx, + Address::times_1, + Array::base_offset_in_bytes())); + __ cmpl(rdx, JVM_CONSTANT_Class); __ jcc(Assembler::equal, quicked); __ push(atos); // save receiver for result, and for GC @@ -3845,6 +4048,10 @@ void TemplateTable::monitorenter() { // check for null object __ null_check(rax); + Label is_inline_type; + __ movptr(rbx, Address(rax, oopDesc::mark_offset_in_bytes())); + __ test_markword_is_inline_type(rbx, is_inline_type); + const Address monitor_block_top( rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize); const Address monitor_block_bot( @@ -3937,6 +4144,11 @@ void TemplateTable::monitorenter() { // The bcp has already been incremented. Just need to dispatch to // next instruction. __ dispatch_next(vtos); + + __ bind(is_inline_type); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_identity_exception), rax); + __ should_not_reach_here(); } void TemplateTable::monitorexit() { @@ -3945,6 +4157,17 @@ void TemplateTable::monitorexit() { // check for null object __ null_check(rax); + const int is_inline_type_mask = markWord::inline_type_pattern; + Label has_identity; + __ movptr(rbx, Address(rax, oopDesc::mark_offset_in_bytes())); + __ andptr(rbx, is_inline_type_mask); + __ cmpl(rbx, is_inline_type_mask); + __ jcc(Assembler::notEqual, has_identity); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + __ bind(has_identity); + const Address monitor_block_top( rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize); const Address monitor_block_bot( diff --git a/src/hotspot/cpu/x86/templateTable_x86.hpp b/src/hotspot/cpu/x86/templateTable_x86.hpp index 15493d2f98a00..3e4bab769457c 100644 --- a/src/hotspot/cpu/x86/templateTable_x86.hpp +++ b/src/hotspot/cpu/x86/templateTable_x86.hpp @@ -37,7 +37,9 @@ static void index_check_without_pop(Register array, Register index); static void putfield_or_static_helper(int byte_no, bool is_static, RewriteControl rc, - Register obj, Register off, Register flags); - static void fast_storefield_helper(Address field, Register rax); + Register obj, Register off, Register tos_state, Register flags); + static void fast_storefield_helper(Address field, Register obj, Register flags); + + static void invoke_is_substitutable(Register aobj, Register bobj, Label& is_subst, Label& not_subst); #endif // CPU_X86_TEMPLATETABLE_X86_HPP diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp index 2ca1c17254277..c41df7f1f3bb1 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.cpp +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp @@ -1724,7 +1724,7 @@ void VM_Version::get_processor_features() { #endif // Use XMM/YMM MOVDQU instruction for Object Initialization - if (!UseFastStosb && UseUnalignedLoadStores) { + if (UseUnalignedLoadStores) { if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { UseXMMForObjInit = true; } diff --git a/src/hotspot/cpu/x86/vtableStubs_x86_64.cpp b/src/hotspot/cpu/x86/vtableStubs_x86_64.cpp index b27755a243f24..81929e78d585b 100644 --- a/src/hotspot/cpu/x86/vtableStubs_x86_64.cpp +++ b/src/hotspot/cpu/x86/vtableStubs_x86_64.cpp @@ -44,11 +44,11 @@ extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); #endif -VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { +VtableStub* VtableStubs::create_vtable_stub(int vtable_index, bool caller_is_c1) { // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. const int stub_code_length = code_size_limit(true); - VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); - // Can be null if there is no free space in the code cache. + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index, caller_is_c1); + // Can be nullptr if there is no free space in the code cache. if (s == nullptr) { return nullptr; } @@ -61,6 +61,7 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { int slop_delta = 0; // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation. const int index_dependent_slop = 0; + ByteSize entry_offset = caller_is_c1 ? Method::from_compiled_inline_offset() : Method::from_compiled_inline_ro_offset(); ResourceMark rm; CodeBuffer cb(s->entry_point(), stub_code_length); @@ -117,7 +118,7 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { Label L; __ cmpptr(method, NULL_WORD); __ jcc(Assembler::equal, L); - __ cmpptr(Address(method, Method::from_compiled_offset()), NULL_WORD); + __ cmpptr(Address(method, entry_offset), NULL_WORD); __ jcc(Assembler::notZero, L); __ stop("Vtable entry is null"); __ bind(L); @@ -128,7 +129,7 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { // method (rbx): Method* // rcx: receiver address ame_addr = __ pc(); - __ jmp( Address(rbx, Method::from_compiled_offset())); + __ jmp( Address(rbx, entry_offset)); masm->flush(); slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets @@ -138,11 +139,12 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { } -VtableStub* VtableStubs::create_itable_stub(int itable_index) { +VtableStub* VtableStubs::create_itable_stub(int itable_index, bool caller_is_c1) { // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. const int stub_code_length = code_size_limit(false); - VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); - // Can be null if there is no free space in the code cache. + ByteSize entry_offset = caller_is_c1 ? Method::from_compiled_inline_offset() : Method::from_compiled_inline_ro_offset(); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index, caller_is_c1); + // Can be nullptr if there is no free space in the code cache. if (s == nullptr) { return nullptr; } @@ -209,7 +211,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { // We expect we need index_dependent_slop extra bytes. Reason: // The emitted code in lookup_interface_method changes when itable_index exceeds 15. // For linux, a very narrow estimate would be 112, but Solaris requires some more space (130). - const ptrdiff_t estimate = 136; + const ptrdiff_t estimate = 144; const ptrdiff_t codesize = lookupSize + index_dependent_slop; slop_delta = (int)(estimate - codesize); slop_bytes += slop_delta; @@ -228,7 +230,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { Label L2; __ cmpptr(method, NULL_WORD); __ jcc(Assembler::equal, L2); - __ cmpptr(Address(method, Method::from_compiled_offset()), NULL_WORD); + __ cmpptr(Address(method, entry_offset), NULL_WORD); __ jcc(Assembler::notZero, L2); __ stop("compiler entrypoint is null"); __ bind(L2); @@ -236,7 +238,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) { #endif // ASSERT address ame_addr = __ pc(); - __ jmp(Address(method, Method::from_compiled_offset())); + __ jmp(Address(method, entry_offset)); __ bind(L_no_such_interface); // Handle IncompatibleClassChangeError in itable stubs. diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 370437edee2c0..aa8c292896fd1 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -1649,6 +1649,10 @@ int MachCallDynamicJavaNode::ret_addr_offset() } int MachCallRuntimeNode::ret_addr_offset() { + if (_entry_point == nullptr) { + // CallLeafNoFPInDirect + return 3; // callq (register) + } int offset = 13; // movq r10,#addr; callq (r10) if (this->ideal_Opcode() != Op_CallLeafVector) { offset += clear_avx_size(); @@ -1887,25 +1891,15 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { Compile* C = ra_->C; - int framesize = C->output()->frame_size_in_bytes(); - int bangsize = C->output()->bang_size_in_bytes(); - - if (C->clinit_barrier_on_entry()) { - assert(VM_Version::supports_fast_class_init_checks(), "sanity"); - assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); - - Label L_skip_barrier; - Register klass = rscratch1; + __ verified_entry(C); - __ mov_metadata(klass, C->method()->holder()->constant_encoding()); - __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/); - - __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path - - __ bind(L_skip_barrier); + if (ra_->C->stub_function() == nullptr) { + __ entry_barrier(); } - __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr); + if (!Compile::current()->output()->in_scratch_emit_size()) { + __ bind(*_verified_entry); + } C->output()->set_frame_complete(__ offset()); @@ -1917,11 +1911,6 @@ void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { } } -uint MachPrologNode::size(PhaseRegAlloc* ra_) const -{ - return MachNode::size(ra_); // too many variables; just compute it - // the hard way -} int MachPrologNode::reloc() const { @@ -1969,19 +1958,9 @@ void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const __ vzeroupper(); } - int framesize = C->output()->frame_size_in_bytes(); - assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); - // Remove word for return adr already pushed - // and RBP - framesize -= 2*wordSize; - - // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here - - if (framesize) { - __ addq(rsp, framesize); - } - - __ popq(rbp); + // Subtract two words to account for return address and rbp + int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize; + __ remove_frame(initial_framesize, C->needs_stack_repair()); if (StackReservedPages > 0 && C->has_reserved_stack_access()) { __ reserved_stack_check(); @@ -2000,12 +1979,6 @@ void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const } } -uint MachEpilogNode::size(PhaseRegAlloc* ra_) const -{ - return MachNode::size(ra_); // too many variables; just compute it - // the hard way -} - int MachEpilogNode::reloc() const { return 2; // a large enough number @@ -2607,6 +2580,51 @@ uint BoxLockNode::size(PhaseRegAlloc *ra_) const } } +//============================================================================= +#ifndef PRODUCT +void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const +{ + st->print_cr("MachVEPNode"); +} +#endif + +void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const +{ + CodeBuffer* cbuf = masm->code(); + if (!_verified) { + __ ic_check(1); + } else { + if (ra_->C->stub_function() == nullptr) { + // Emit the entry barrier in a temporary frame before unpacking because + // it can deopt, which would require packing the scalarized args again. + __ verified_entry(ra_->C, 0); + __ entry_barrier(); + int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize; + __ remove_frame(initial_framesize, false); + } + // Unpack inline type args passed as oop and then jump to + // the verified entry point (skipping the unverified entry). + int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only); + // Emit code for verified entry and save increment for stack repair on return + __ verified_entry(ra_->C, sp_inc); + if (Compile::current()->output()->in_scratch_emit_size()) { + Label dummy_verified_entry; + __ jmp(dummy_verified_entry); + } else { + __ jmp(*_verified_entry); + } + } + if (ra_->C->stub_function() == nullptr) { + // Pad so that the next call to MachVEPNode::emit() starts out with the + // correct alignment. This is needed by entry_barrier() to align the + // compare. But unfortunately we need to align all 4 MachVEPNodes because + // entry point offsets are computed using scratch_emit_size(), so starting + // alignment must match the alignment of the scratch buffer, otherwise the sizes + // will be off. + __ align(4); + } +} + //============================================================================= #ifndef PRODUCT void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const @@ -2622,12 +2640,6 @@ void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const __ ic_check(InteriorEntryAlignment); } -uint MachUEPNode::size(PhaseRegAlloc* ra_) const -{ - return MachNode::size(ra_); // too many variables; just compute it - // the hard way -} - //============================================================================= @@ -4601,6 +4613,39 @@ encode %{ __ int3(); __ bind(L); } + if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) { + // The last return value is not set by the callee but used to pass the null marker to compiled code. + // Search for the corresponding projection, get the register and emit code that initializes it. + uint con = (tf()->range_cc()->cnt() - 1); + for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { + ProjNode* proj = fast_out(i)->as_Proj(); + if (proj->_con == con) { + // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized) + OptoReg::Name optoReg = ra_->get_reg_first(proj); + VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP)); + Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1; + __ testq(rax, rax); + __ setb(Assembler::notZero, toReg); + __ movzbl(toReg, toReg); + if (reg->is_stack()) { + int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size; + __ movq(Address(rsp, st_off), toReg); + } + break; + } + } + if (return_value_is_used()) { + // An inline type is returned as fields in multiple registers. + // Rax either contains an oop if the inline type is buffered or a pointer + // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax + // if the lowest bit is set to allow C2 to use the oop after null checking. + // rax &= (rax & 1) - 1 + __ movptr(rscratch1, rax); + __ andptr(rscratch1, 0x1); + __ subptr(rscratch1, 0x1); + __ andptr(rax, rscratch1); + } + } %} %} @@ -5779,6 +5824,22 @@ operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale) %} %} +// Indirect Narrow Oop Operand +operand indCompressedOop(rRegN reg) %{ + predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8)); + constraint(ALLOC_IN_RC(ptr_reg)); + match(DecodeN reg); + + op_cost(10); + format %{"[R12 + $reg << 3] (compressed oop addressing)" %} + interface(MEMORY_INTER) %{ + base(0xc); // R12 + index($reg); + scale(0x3); + disp(0x0); + %} +%} + // Indirect Narrow Oop Plus Offset Operand // Note: x86 architecture doesn't support "scale * index + offset" without a base // we can't free r12 even with CompressedOops::base() == nullptr. @@ -6288,7 +6349,7 @@ operand legVecZ() %{ opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex, indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset, - indCompressedOopOffset, + indCompressedOop, indCompressedOopOffset, indirectNarrow, indOffset8Narrow, indOffset32Narrow, indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow, indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow); @@ -8918,6 +8979,32 @@ instruct castX2P(rRegP dst, rRegL src) ins_pipe(ialu_reg_reg); // XXX %} +instruct castI2N(rRegN dst, rRegI src) +%{ + match(Set dst (CastI2N src)); + + format %{ "movq $dst, $src\t# int -> narrow ptr" %} + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ movl($dst$$Register, $src$$Register); + } + %} + ins_pipe(ialu_reg_reg); // XXX +%} + +instruct castN2X(rRegL dst, rRegN src) +%{ + match(Set dst (CastP2X src)); + + format %{ "movq $dst, $src\t# ptr -> long" %} + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ movptr($dst$$Register, $src$$Register); + } + %} + ins_pipe(ialu_reg_reg); // XXX +%} + instruct castP2X(rRegL dst, rRegP src) %{ match(Set dst (CastP2X src)); @@ -14656,14 +14743,132 @@ instruct MoveL2D_reg_reg(regD dst, rRegL src) %{ ins_pipe( pipe_slow ); %} + // Fast clearing of an array // Small non-constant lenght ClearArray for non-AVX512 targets. -instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero, +instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val, Universe dummy, rFlagsReg cr) %{ - predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); - match(Set dummy (ClearArray cnt base)); - effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); + predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2)); + match(Set dummy (ClearArray (Binary cnt base) val)); + effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr); + + format %{ $$template + $$emit$$"cmp InitArrayShortSize,rcx\n\t" + $$emit$$"jg LARGE\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"js DONE\t# Zero length\n\t" + $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"jge LOOP\n\t" + $$emit$$"jmp DONE\n\t" + $$emit$$"# LARGE:\n\t" + if (UseFastStosb) { + $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t" + $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t" + } else if (UseXMMForObjInit) { + $$emit$$"movdq $tmp, $val\n\t" + $$emit$$"punpcklqdq $tmp, $tmp\n\t" + $$emit$$"vinserti128_high $tmp, $tmp\n\t" + $$emit$$"jmpq L_zero_64_bytes\n\t" + $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" + $$emit$$"vmovdqu $tmp,(rax)\n\t" + $$emit$$"vmovdqu $tmp,0x20(rax)\n\t" + $$emit$$"add 0x40,rax\n\t" + $$emit$$"# L_zero_64_bytes:\n\t" + $$emit$$"sub 0x8,rcx\n\t" + $$emit$$"jge L_loop\n\t" + $$emit$$"add 0x4,rcx\n\t" + $$emit$$"jl L_tail\n\t" + $$emit$$"vmovdqu $tmp,(rax)\n\t" + $$emit$$"add 0x20,rax\n\t" + $$emit$$"sub 0x4,rcx\n\t" + $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" + $$emit$$"add 0x4,rcx\n\t" + $$emit$$"jle L_end\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" + $$emit$$"vmovq xmm0,(rax)\n\t" + $$emit$$"add 0x8,rax\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"jge L_sloop\n\t" + $$emit$$"# L_end:\n\t" + } else { + $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t" + } + $$emit$$"# DONE" + %} + ins_encode %{ + __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, + $tmp$$XMMRegister, false, false); + %} + ins_pipe(pipe_slow); +%} + +instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val, + Universe dummy, rFlagsReg cr) +%{ + predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2)); + match(Set dummy (ClearArray (Binary cnt base) val)); + effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr); + + format %{ $$template + $$emit$$"cmp InitArrayShortSize,rcx\n\t" + $$emit$$"jg LARGE\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"js DONE\t# Zero length\n\t" + $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"jge LOOP\n\t" + $$emit$$"jmp DONE\n\t" + $$emit$$"# LARGE:\n\t" + if (UseXMMForObjInit) { + $$emit$$"movdq $tmp, $val\n\t" + $$emit$$"punpcklqdq $tmp, $tmp\n\t" + $$emit$$"vinserti128_high $tmp, $tmp\n\t" + $$emit$$"jmpq L_zero_64_bytes\n\t" + $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" + $$emit$$"vmovdqu $tmp,(rax)\n\t" + $$emit$$"vmovdqu $tmp,0x20(rax)\n\t" + $$emit$$"add 0x40,rax\n\t" + $$emit$$"# L_zero_64_bytes:\n\t" + $$emit$$"sub 0x8,rcx\n\t" + $$emit$$"jge L_loop\n\t" + $$emit$$"add 0x4,rcx\n\t" + $$emit$$"jl L_tail\n\t" + $$emit$$"vmovdqu $tmp,(rax)\n\t" + $$emit$$"add 0x20,rax\n\t" + $$emit$$"sub 0x4,rcx\n\t" + $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" + $$emit$$"add 0x4,rcx\n\t" + $$emit$$"jle L_end\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" + $$emit$$"vmovq xmm0,(rax)\n\t" + $$emit$$"add 0x8,rax\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"jge L_sloop\n\t" + $$emit$$"# L_end:\n\t" + } else { + $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t" + } + $$emit$$"# DONE" + %} + ins_encode %{ + __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, + $tmp$$XMMRegister, false, true); + %} + ins_pipe(pipe_slow); +%} + +// Small non-constant length ClearArray for AVX512 targets. +instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val, + Universe dummy, rFlagsReg cr) +%{ + predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2)); + match(Set dummy (ClearArray (Binary cnt base) val)); + ins_cost(125); + effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr); format %{ $$template $$emit$$"xorq rax, rax\t# ClearArray:\n\t" @@ -14711,20 +14916,19 @@ instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero, $$emit$$"# DONE" %} ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, - $tmp$$XMMRegister, false, knoreg); + __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, + $tmp$$XMMRegister, false, false, $ktmp$$KRegister); %} ins_pipe(pipe_slow); %} -// Small non-constant length ClearArray for AVX512 targets. -instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero, - Universe dummy, rFlagsReg cr) +instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val, + Universe dummy, rFlagsReg cr) %{ - predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); - match(Set dummy (ClearArray cnt base)); + predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2)); + match(Set dummy (ClearArray (Binary cnt base) val)); ins_cost(125); - effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); + effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr); format %{ $$template $$emit$$"xorq rax, rax\t# ClearArray:\n\t" @@ -14772,19 +14976,115 @@ instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_ $$emit$$"# DONE" %} ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, - $tmp$$XMMRegister, false, $ktmp$$KRegister); + __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, + $tmp$$XMMRegister, false, true, $ktmp$$KRegister); %} ins_pipe(pipe_slow); %} // Large non-constant length ClearArray for non-AVX512 targets. -instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero, +instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val, Universe dummy, rFlagsReg cr) %{ - predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large()); - match(Set dummy (ClearArray cnt base)); - effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); + predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2)); + match(Set dummy (ClearArray (Binary cnt base) val)); + effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr); + + format %{ $$template + if (UseFastStosb) { + $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t" + $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--" + } else if (UseXMMForObjInit) { + $$emit$$"movdq $tmp, $val\n\t" + $$emit$$"punpcklqdq $tmp, $tmp\n\t" + $$emit$$"vinserti128_high $tmp, $tmp\n\t" + $$emit$$"jmpq L_zero_64_bytes\n\t" + $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" + $$emit$$"vmovdqu $tmp,(rax)\n\t" + $$emit$$"vmovdqu $tmp,0x20(rax)\n\t" + $$emit$$"add 0x40,rax\n\t" + $$emit$$"# L_zero_64_bytes:\n\t" + $$emit$$"sub 0x8,rcx\n\t" + $$emit$$"jge L_loop\n\t" + $$emit$$"add 0x4,rcx\n\t" + $$emit$$"jl L_tail\n\t" + $$emit$$"vmovdqu $tmp,(rax)\n\t" + $$emit$$"add 0x20,rax\n\t" + $$emit$$"sub 0x4,rcx\n\t" + $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" + $$emit$$"add 0x4,rcx\n\t" + $$emit$$"jle L_end\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" + $$emit$$"vmovq xmm0,(rax)\n\t" + $$emit$$"add 0x8,rax\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"jge L_sloop\n\t" + $$emit$$"# L_end:\n\t" + } else { + $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--" + } + %} + ins_encode %{ + __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, + $tmp$$XMMRegister, true, false); + %} + ins_pipe(pipe_slow); +%} + +instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val, + Universe dummy, rFlagsReg cr) +%{ + predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2)); + match(Set dummy (ClearArray (Binary cnt base) val)); + effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr); + + format %{ $$template + if (UseXMMForObjInit) { + $$emit$$"movdq $tmp, $val\n\t" + $$emit$$"punpcklqdq $tmp, $tmp\n\t" + $$emit$$"vinserti128_high $tmp, $tmp\n\t" + $$emit$$"jmpq L_zero_64_bytes\n\t" + $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" + $$emit$$"vmovdqu $tmp,(rax)\n\t" + $$emit$$"vmovdqu $tmp,0x20(rax)\n\t" + $$emit$$"add 0x40,rax\n\t" + $$emit$$"# L_zero_64_bytes:\n\t" + $$emit$$"sub 0x8,rcx\n\t" + $$emit$$"jge L_loop\n\t" + $$emit$$"add 0x4,rcx\n\t" + $$emit$$"jl L_tail\n\t" + $$emit$$"vmovdqu $tmp,(rax)\n\t" + $$emit$$"add 0x20,rax\n\t" + $$emit$$"sub 0x4,rcx\n\t" + $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" + $$emit$$"add 0x4,rcx\n\t" + $$emit$$"jle L_end\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" + $$emit$$"vmovq xmm0,(rax)\n\t" + $$emit$$"add 0x8,rax\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"jge L_sloop\n\t" + $$emit$$"# L_end:\n\t" + } else { + $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--" + } + %} + ins_encode %{ + __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, + $tmp$$XMMRegister, true, true); + %} + ins_pipe(pipe_slow); +%} + +// Large non-constant length ClearArray for AVX512 targets. +instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val, + Universe dummy, rFlagsReg cr) +%{ + predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2)); + match(Set dummy (ClearArray (Binary cnt base) val)); + effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr); format %{ $$template if (UseFastStosb) { @@ -14823,19 +15123,18 @@ instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero, } %} ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, - $tmp$$XMMRegister, true, knoreg); + __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, + $tmp$$XMMRegister, true, false, $ktmp$$KRegister); %} ins_pipe(pipe_slow); %} -// Large non-constant length ClearArray for AVX512 targets. -instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero, - Universe dummy, rFlagsReg cr) +instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val, + Universe dummy, rFlagsReg cr) %{ - predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); - match(Set dummy (ClearArray cnt base)); - effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); + predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2)); + match(Set dummy (ClearArray (Binary cnt base) val)); + effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr); format %{ $$template if (UseFastStosb) { @@ -14874,22 +15173,23 @@ instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp } %} ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, - $tmp$$XMMRegister, true, $ktmp$$KRegister); + __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, + $tmp$$XMMRegister, true, true, $ktmp$$KRegister); %} ins_pipe(pipe_slow); %} // Small constant length ClearArray for AVX512 targets. -instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr) +instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr) %{ - predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl()); - match(Set dummy (ClearArray cnt base)); + predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && + ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl())); + match(Set dummy (ClearArray (Binary cnt base) val)); ins_cost(100); - effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); + effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr); format %{ "clear_mem_imm $base , $cnt \n\t" %} ins_encode %{ - __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); + __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); %} ins_pipe(pipe_slow); %} @@ -16767,9 +17067,26 @@ instruct CallLeafDirectVector(method meth) ins_pipe(pipe_slow); %} +// Call runtime without safepoint +// entry point is null, target holds the address to call +instruct CallLeafNoFPInDirect(rRegP target) +%{ + predicate(n->as_Call()->entry_point() == nullptr); + match(CallLeafNoFP target); + + ins_cost(300); + format %{ "call_leaf_nofp,runtime indirect " %} + ins_encode %{ + __ call($target$$Register); + %} + + ins_pipe(pipe_slow); +%} + // Call runtime without safepoint instruct CallLeafNoFPDirect(method meth) %{ + predicate(n->as_Call()->entry_point() != nullptr); match(CallLeafNoFP); effect(USE meth); diff --git a/src/hotspot/cpu/zero/continuationFreezeThaw_zero.inline.hpp b/src/hotspot/cpu/zero/continuationFreezeThaw_zero.inline.hpp index 740f65e346054..f3c0ae80e0f26 100644 --- a/src/hotspot/cpu/zero/continuationFreezeThaw_zero.inline.hpp +++ b/src/hotspot/cpu/zero/continuationFreezeThaw_zero.inline.hpp @@ -39,7 +39,7 @@ inline frame FreezeBase::sender(const frame& f) { return frame(); } -template frame FreezeBase::new_heap_frame(frame& f, frame& caller) { +template frame FreezeBase::new_heap_frame(frame& f, frame& caller, int size_adjust) { Unimplemented(); return frame(); } @@ -56,7 +56,7 @@ inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, co Unimplemented(); } -inline void FreezeBase::patch_pd(frame& hf, const frame& caller) { +inline void FreezeBase::patch_pd(frame& hf, const frame& caller, bool is_bottom_frame) { Unimplemented(); } @@ -82,7 +82,7 @@ inline frame ThawBase::new_entry_frame() { return frame(); } -template frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom) { +template frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom, int size_adjust) { Unimplemented(); return frame(); } diff --git a/src/hotspot/cpu/zero/frame_zero.cpp b/src/hotspot/cpu/zero/frame_zero.cpp index 69bbea2972a63..3dcf75fc40991 100644 --- a/src/hotspot/cpu/zero/frame_zero.cpp +++ b/src/hotspot/cpu/zero/frame_zero.cpp @@ -430,3 +430,20 @@ frame::frame(void* sp, void* fp, void* pc) { } #endif + +intptr_t* frame::repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const { + // Only called for nmethods, which Zero does not have. + ShouldNotReachHere(); + return nullptr; +} + +intptr_t* frame::repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr) { + // Only called for nmethods, which Zero does not have. + ShouldNotReachHere(); + return nullptr; +} + +bool frame::was_augmented_on_entry(int& real_size) const { + ShouldNotReachHere(); + return false; +} diff --git a/src/hotspot/cpu/zero/frame_zero.hpp b/src/hotspot/cpu/zero/frame_zero.hpp index 190966155944e..91d3b031ceea6 100644 --- a/src/hotspot/cpu/zero/frame_zero.hpp +++ b/src/hotspot/cpu/zero/frame_zero.hpp @@ -90,4 +90,8 @@ template static void update_map_with_saved_link(RegisterMapT* map, intptr_t** link_addr); + intptr_t* repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const; + static intptr_t* repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr); + bool was_augmented_on_entry(int& real_size) const; + #endif // CPU_ZERO_FRAME_ZERO_HPP diff --git a/src/hotspot/cpu/zero/globals_zero.hpp b/src/hotspot/cpu/zero/globals_zero.hpp index 6dc7d81275c31..7a3917489f2e5 100644 --- a/src/hotspot/cpu/zero/globals_zero.hpp +++ b/src/hotspot/cpu/zero/globals_zero.hpp @@ -73,6 +73,9 @@ define_pd_global(uintx, TypeProfileLevel, 0); define_pd_global(bool, PreserveFramePointer, false); +define_pd_global(bool, InlineTypePassFieldsAsArgs, false); +define_pd_global(bool, InlineTypeReturnedAsFields, false); + define_pd_global(bool, CompactStrings, true); #define ARCH_FLAGS(develop, \ diff --git a/src/hotspot/cpu/zero/sharedRuntime_zero.cpp b/src/hotspot/cpu/zero/sharedRuntime_zero.cpp index 6e18097c99269..23a096d77e7dc 100644 --- a/src/hotspot/cpu/zero/sharedRuntime_zero.cpp +++ b/src/hotspot/cpu/zero/sharedRuntime_zero.cpp @@ -44,12 +44,29 @@ int SharedRuntime::java_calling_convention(const BasicType *sig_bt, return 0; } +int SharedRuntime::java_return_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed) { + Unimplemented(); + return 0; +} + +BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) { + Unimplemented(); + return nullptr; +} + void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - address entry_address[AdapterBlob::ENTRY_COUNT]) { + const GrowableArray * sig, + const VMRegPair* regs, + const GrowableArray * sig_cc, + const VMRegPair* regs_cc, + const GrowableArray * sig_cc_ro, + const VMRegPair* regs_cc_ro, + address entry_address[AdapterBlob::ENTRY_COUNT], + AdapterBlob*& new_adapter, + bool allocate_code_blob) { ShouldNotCallThis(); return; } diff --git a/src/hotspot/cpu/zero/vtableStubs_zero.cpp b/src/hotspot/cpu/zero/vtableStubs_zero.cpp index 12819b484b2b6..ff559e9d26900 100644 --- a/src/hotspot/cpu/zero/vtableStubs_zero.cpp +++ b/src/hotspot/cpu/zero/vtableStubs_zero.cpp @@ -26,12 +26,12 @@ #include "code/vtableStubs.hpp" #include "utilities/debug.hpp" -VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { +VtableStub* VtableStubs::create_vtable_stub(int vtable_index, bool caller_is_c1) { ShouldNotCallThis(); return nullptr; } -VtableStub* VtableStubs::create_itable_stub(int vtable_index) { +VtableStub* VtableStubs::create_itable_stub(int vtable_index, bool caller_is_c1) { ShouldNotCallThis(); return nullptr; } diff --git a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp index 1b20761f6e49f..810d9db8d58e7 100644 --- a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp +++ b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved. * Copyright 2007, 2008, 2009, 2010, 2011 Red Hat, Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -582,6 +582,12 @@ int ZeroInterpreter::getter_entry(Method* method, intptr_t UNUSED, TRAPS) { return normal_entry(method, 0, THREAD); } + // Flattened entries require handling beyond a direct field load. + // Bail to slow path. + if (entry->is_flat()) { + return normal_entry(method, 0, THREAD); + } + ZeroStack* stack = thread->zero_stack(); intptr_t* topOfStack = stack->sp(); @@ -673,6 +679,12 @@ int ZeroInterpreter::setter_entry(Method* method, intptr_t UNUSED, TRAPS) { return normal_entry(method, 0, THREAD); } + // Flattened entries require handling beyond a direct field store. + // Bail to slow path. + if (entry->is_flat()) { + return normal_entry(method, 0, THREAD); + } + ZeroStack* stack = thread->zero_stack(); intptr_t* topOfStack = stack->sp(); diff --git a/src/hotspot/os/posix/dtrace/hotspot_jni.d b/src/hotspot/os/posix/dtrace/hotspot_jni.d index 1937769dcb267..631ad8522944d 100644 --- a/src/hotspot/os/posix/dtrace/hotspot_jni.d +++ b/src/hotspot/os/posix/dtrace/hotspot_jni.d @@ -360,6 +360,8 @@ provider hotspot_jni { probe GetSuperclass__return(void*); probe GetVersion__entry(void*); probe GetVersion__return(uint32_t); + probe HasIdentity__entry(void*, void*); + probe HasIdentity__return(uintptr_t); probe IsAssignableFrom__entry(void*, void*, void*); probe IsAssignableFrom__return(uintptr_t); probe IsInstanceOf__entry(void*, void*, void*); diff --git a/src/hotspot/share/adlc/forms.cpp b/src/hotspot/share/adlc/forms.cpp index e2265f70ed946..ddcec91c4aba4 100644 --- a/src/hotspot/share/adlc/forms.cpp +++ b/src/hotspot/share/adlc/forms.cpp @@ -282,6 +282,7 @@ Form::DataType Form::is_store_to_memory(const char *opType) const { if( strcmp(opType,"StoreF")==0) return Form::idealF; if( strcmp(opType,"StoreI")==0) return Form::idealI; if( strcmp(opType,"StoreL")==0) return Form::idealL; + if( strcmp(opType,"StoreLSpecial")==0) return Form::idealL; if( strcmp(opType,"StoreP")==0) return Form::idealP; if( strcmp(opType,"StoreN")==0) return Form::idealN; if( strcmp(opType,"StoreNKlass")==0) return Form::idealNKlass; diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp index c80c1ac379c88..5d8a19ab5dd98 100644 --- a/src/hotspot/share/adlc/formssel.cpp +++ b/src/hotspot/share/adlc/formssel.cpp @@ -846,7 +846,8 @@ uint InstructForm::oper_input_base(FormDict &globals) { strcmp(_matrule->_opType,"TailJump" )==0 || strcmp(_matrule->_opType,"ForwardException")==0 || strcmp(_matrule->_opType,"SafePoint" )==0 || - strcmp(_matrule->_opType,"Halt" )==0 ) + strcmp(_matrule->_opType,"Halt" )==0 || + strcmp(_matrule->_opType,"CallLeafNoFP")==0) return AdlcVMDeps::Parms; // Skip the machine-state edges if( _matrule->_rChild && @@ -3587,7 +3588,7 @@ void MatchNode::forms_do(FormClosure *f) { int MatchNode::needs_ideal_memory_edge(FormDict &globals) const { static const char *needs_ideal_memory_list[] = { - "StoreI","StoreL","StoreP","StoreN","StoreNKlass","StoreD","StoreF" , + "StoreI","StoreL","StoreLSpecial","StoreP","StoreN","StoreNKlass","StoreD","StoreF" , "StoreB","StoreC","Store" ,"StoreFP", "LoadI", "LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF" , "LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" , diff --git a/src/hotspot/share/adlc/main.cpp b/src/hotspot/share/adlc/main.cpp index 8e6ea5bbec976..5b244eca34f94 100644 --- a/src/hotspot/share/adlc/main.cpp +++ b/src/hotspot/share/adlc/main.cpp @@ -218,6 +218,7 @@ int main(int argc, char *argv[]) AD.addInclude(AD._CPP_file, "code/compiledIC.hpp"); AD.addInclude(AD._CPP_file, "code/nativeInst.hpp"); AD.addInclude(AD._CPP_file, "code/vmreg.inline.hpp"); + AD.addInclude(AD._CPP_file, "gc/shared/barrierSetAssembler.hpp"); AD.addInclude(AD._CPP_file, "gc/shared/collectedHeap.inline.hpp"); AD.addInclude(AD._CPP_file, "oops/compressedOops.hpp"); AD.addInclude(AD._CPP_file, "oops/markWord.hpp"); diff --git a/src/hotspot/share/adlc/output_h.cpp b/src/hotspot/share/adlc/output_h.cpp index 8aa1762f90251..616e457cdd3ed 100644 --- a/src/hotspot/share/adlc/output_h.cpp +++ b/src/hotspot/share/adlc/output_h.cpp @@ -1631,7 +1631,7 @@ void ArchDesc::declareClasses(FILE *fp) { if (instr->is_ideal_call() != Form::invalid_type && instr->is_ideal_call() != Form::JAVA_LEAF) { // MachConstantBase goes behind arguments, but before jvms. - fprintf(fp,"assert(tf() && tf()->domain(), \"\"); return tf()->domain()->cnt();"); + fprintf(fp,"assert(tf() && tf()->domain_cc(), \"\"); return tf()->domain_cc()->cnt();"); } else { fprintf(fp,"return req()-1;"); } diff --git a/src/hotspot/share/asm/codeBuffer.hpp b/src/hotspot/share/asm/codeBuffer.hpp index ea0237f140184..d56ab27f02531 100644 --- a/src/hotspot/share/asm/codeBuffer.hpp +++ b/src/hotspot/share/asm/codeBuffer.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -53,6 +53,9 @@ class CodeOffsets: public StackObj { public: enum Entries { Entry, Verified_Entry, + Inline_Entry, + Verified_Inline_Entry, + Verified_Inline_Entry_RO, Frame_Complete, // Offset in the code where the frame setup is (for forte stackwalks) is complete OSR_Entry, Exceptions, // Offset where exception handler lives @@ -63,15 +66,20 @@ class CodeOffsets: public StackObj { // special value to note codeBlobs where profile (forte) stack walking is // always dangerous and suspect. - enum { frame_never_safe = -1 }; + static const int frame_never_safe = -1; + static const int no_such_entry_point = -1; private: int _values[max_Entries]; + void check(int e) const { assert(0 <= e && e < max_Entries, "must be"); } public: CodeOffsets() { _values[Entry ] = 0; _values[Verified_Entry] = 0; + _values[Inline_Entry ] = 0; + _values[Verified_Inline_Entry ] = no_such_entry_point; + _values[Verified_Inline_Entry_RO] = no_such_entry_point; _values[Frame_Complete] = frame_never_safe; _values[OSR_Entry ] = 0; _values[Exceptions ] = -1; @@ -79,8 +87,8 @@ class CodeOffsets: public StackObj { _values[UnwindHandler ] = -1; } - int value(Entries e) { return _values[e]; } - void set_value(Entries e, int val) { _values[e] = val; } + int value(Entries e) const { check(e); return _values[e]; } + void set_value(Entries e, int val) { check(e); _values[e] = val; } }; // This class represents a stream of code and associated relocations. diff --git a/src/hotspot/share/asm/macroAssembler.hpp b/src/hotspot/share/asm/macroAssembler.hpp index 35265de35efaf..742af7c0f8ccb 100644 --- a/src/hotspot/share/asm/macroAssembler.hpp +++ b/src/hotspot/share/asm/macroAssembler.hpp @@ -26,6 +26,7 @@ #define SHARE_ASM_MACROASSEMBLER_HPP #include "asm/assembler.hpp" +#include "utilities/growableArray.hpp" #include "utilities/macros.hpp" #include CPU_HEADER(macroAssembler) diff --git a/src/hotspot/share/asm/macroAssembler_common.cpp b/src/hotspot/share/asm/macroAssembler_common.cpp new file mode 100644 index 0000000000000..96f0b5f8a5003 --- /dev/null +++ b/src/hotspot/share/asm/macroAssembler_common.cpp @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "jvm.h" +#include "oops/inlineKlass.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature_cc.hpp" +#ifdef COMPILER2 +#include "opto/compile.hpp" +#include "opto/node.hpp" +#endif + +void MacroAssembler::skip_unpacked_fields(const GrowableArray* sig, int& sig_index, VMRegPair* regs_from, int regs_from_count, int& from_index) { + ScalarizedInlineArgsStream stream(sig, sig_index, regs_from, regs_from_count, from_index); + VMReg reg; + BasicType bt; + while (stream.next(reg, bt)) {} + sig_index = stream.sig_index(); + from_index = stream.regs_index(); +} + +bool MacroAssembler::is_reg_in_unpacked_fields(const GrowableArray* sig, int sig_index, VMReg to, VMRegPair* regs_from, int regs_from_count, int from_index) { + ScalarizedInlineArgsStream stream(sig, sig_index, regs_from, regs_from_count, from_index); + VMReg reg; + BasicType bt; + while (stream.next(reg, bt)) { + if (reg == to) { + return true; + } + } + return false; +} + +MacroAssembler::RegState* MacroAssembler::init_reg_state(VMRegPair* regs, int num_regs, int sp_inc, int max_stack) { + int max_reg = VMRegImpl::stack2reg(max_stack)->value(); + MacroAssembler::RegState* reg_state = NEW_RESOURCE_ARRAY(MacroAssembler::RegState, max_reg); + + // Make all writable + for (int i = 0; i < max_reg; ++i) { + reg_state[i] = MacroAssembler::reg_writable; + } + // Set all source registers/stack slots to readonly to prevent accidental overwriting + for (int i = 0; i < num_regs; ++i) { + VMReg reg = regs[i].first(); + if (!reg->is_valid()) continue; + if (reg->is_stack()) { + // Update source stack location by adding stack increment + reg = VMRegImpl::stack2reg(reg->reg2stack() + sp_inc/VMRegImpl::stack_slot_size); + regs[i] = reg; + } + assert(reg->value() >= 0 && reg->value() < max_reg, "reg value out of bounds"); + reg_state[reg->value()] = MacroAssembler::reg_readonly; + } + return reg_state; +} + +#ifdef COMPILER2 +int MacroAssembler::unpack_inline_args(Compile* C, bool receiver_only) { + assert(C->has_scalarized_args(), "inline type argument scalarization is disabled"); + ciMethod* method = C->method(); + const GrowableArray* sig = method->get_sig_cc(); + assert(sig != nullptr, "must have scalarized signature"); + + // Get unscalarized calling convention + BasicType* sig_bt = NEW_RESOURCE_ARRAY(BasicType, 256); + int args_passed = 0; + if (!method->is_static()) { + sig_bt[args_passed++] = T_OBJECT; + } + if (!receiver_only) { + for (ciSignatureStream ss(method->signature()); !ss.at_return_type(); ss.next()) { + BasicType bt = ss.type()->basic_type(); + sig_bt[args_passed++] = bt; + if (type2size[bt] == 2) { + sig_bt[args_passed++] = T_VOID; + } + } + } else { + // Only unpack the receiver, all other arguments are already scalarized + ciInstanceKlass* holder = method->holder(); + int rec_len = (holder->is_inlinetype() && method->is_scalarized_arg(0)) ? holder->as_inline_klass()->inline_arg_length() : 1; + // Copy scalarized signature but skip receiver and inline type delimiters + for (int i = 0; i < sig->length(); i++) { + if (SigEntry::skip_value_delimiters(sig, i) && rec_len <= 0) { + sig_bt[args_passed++] = sig->at(i)._bt; + } + rec_len--; + } + } + VMRegPair* regs = NEW_RESOURCE_ARRAY(VMRegPair, args_passed); + int args_on_stack = SharedRuntime::java_calling_convention(sig_bt, regs, args_passed); + + // Get scalarized calling convention + int args_passed_cc = SigEntry::fill_sig_bt(sig, sig_bt); + VMRegPair* regs_cc = NEW_RESOURCE_ARRAY(VMRegPair, sig->length()); + int args_on_stack_cc = SharedRuntime::java_calling_convention(sig_bt, regs_cc, args_passed_cc); + + // Check if we need to extend the stack for unpacking + int sp_inc = 0; + if (args_on_stack_cc > args_on_stack) { + sp_inc = extend_stack_for_inline_args(args_on_stack_cc); + } + shuffle_inline_args(false, receiver_only, sig, + args_passed, args_on_stack, regs, // from + args_passed_cc, args_on_stack_cc, regs_cc, // to + sp_inc, noreg); + return sp_inc; +} +#endif // COMPILER2 + +void MacroAssembler::shuffle_inline_args(bool is_packing, bool receiver_only, + const GrowableArray* sig, + int args_passed, int args_on_stack, VMRegPair* regs, + int args_passed_to, int args_on_stack_to, VMRegPair* regs_to, + int sp_inc, Register val_array) { + int max_stack = MAX2(args_on_stack + sp_inc/VMRegImpl::stack_slot_size, args_on_stack_to); + RegState* reg_state = init_reg_state(regs, args_passed, sp_inc, max_stack); + + // Emit code for packing/unpacking inline type arguments + // We try multiple times and eventually start spilling to resolve (circular) dependencies + bool done = (args_passed_to == 0); + for (int i = 0; i < 2*args_passed_to && !done; ++i) { + done = true; + bool spill = (i > args_passed_to); // Start spilling? + // Iterate over all arguments (when unpacking, do in reverse) + int step = is_packing ? 1 : -1; + int from_index = is_packing ? 0 : args_passed - 1; + int to_index = is_packing ? 0 : args_passed_to - 1; + int sig_index = is_packing ? 0 : sig->length() - 1; + int sig_index_end = is_packing ? sig->length() : -1; + int vtarg_index = 0; + for (; sig_index != sig_index_end; sig_index += step) { + assert(0 <= sig_index && sig_index < sig->length(), "index out of bounds"); + if (spill) { + // This call returns true IFF we should keep trying to spill in this round. + spill = shuffle_inline_args_spill(is_packing, sig, sig_index, regs, from_index, args_passed, + reg_state); + } + BasicType bt = sig->at(sig_index)._bt; + if (SigEntry::skip_value_delimiters(sig, sig_index)) { + VMReg from_reg = regs[from_index].first(); + if (from_reg->is_valid()) { + done &= move_helper(from_reg, regs_to[to_index].first(), bt, reg_state); + } else { + // halves of T_LONG or T_DOUBLE + assert(bt == T_VOID, "unexpected basic type"); + } + to_index += step; + from_index += step; + } else if (is_packing) { + assert(val_array != noreg, "must be"); + VMReg reg_to = regs_to[to_index].first(); + done &= pack_inline_helper(sig, sig_index, vtarg_index, + regs, args_passed, from_index, reg_to, + reg_state, val_array); + vtarg_index++; + to_index++; + } else if (!receiver_only || (from_index == 0 && bt == T_VOID)) { + VMReg from_reg = regs[from_index].first(); + done &= unpack_inline_helper(sig, sig_index, + from_reg, from_index, regs_to, args_passed_to, to_index, + reg_state); + if (from_index == -1 && sig_index != 0) { + // This can happen when we are confusing an empty inline type argument which is + // not counted in the scalarized signature for the receiver. Just ignore it. + assert(receiver_only, "sanity"); + from_index = 0; + } + } + } + } + guarantee(done, "Could not resolve circular dependency when shuffling inline type arguments"); +} + +bool MacroAssembler::shuffle_inline_args_spill(bool is_packing, const GrowableArray* sig, int sig_index, + VMRegPair* regs_from, int from_index, int regs_from_count, RegState* reg_state) { + VMReg reg; + if (!is_packing || SigEntry::skip_value_delimiters(sig, sig_index)) { + reg = regs_from[from_index].first(); + if (!reg->is_valid() || reg_state[reg->value()] != reg_readonly) { + // Spilling this won't break cycles + return true; + } + } else { + ScalarizedInlineArgsStream stream(sig, sig_index, regs_from, regs_from_count, from_index); + VMReg from_reg; + BasicType bt; + bool found = false; + while (stream.next(from_reg, bt)) { + reg = from_reg; + assert(from_reg->is_valid(), "must be"); + if (reg_state[from_reg->value()] == reg_readonly) { + found = true; + break; + } + } + if (!found) { + // Spilling fields in this inline type arg won't break cycles + return true; + } + } + + // Spill argument to be able to write the source and resolve circular dependencies + VMReg spill_reg = spill_reg_for(reg); + if (reg_state[spill_reg->value()] == reg_readonly) { + // We have already spilled (in previous round). The spilled register should be consumed by this round. + } else { + bool res = move_helper(reg, spill_reg, T_DOUBLE, reg_state); + assert(res, "Spilling should not fail"); + // Set spill_reg as new source and update state + reg = spill_reg; + regs_from[from_index].set1(reg); + reg_state[reg->value()] = reg_readonly; + } + + return false; // Do not spill again in this round +} diff --git a/src/hotspot/share/asm/macroAssembler_common.hpp b/src/hotspot/share/asm/macroAssembler_common.hpp new file mode 100644 index 0000000000000..b9552601d7e1f --- /dev/null +++ b/src/hotspot/share/asm/macroAssembler_common.hpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_ASM_MACROASSEMBLER_COMMON_HPP +#define SHARE_ASM_MACROASSEMBLER_COMMON_HPP + +// These are part of the MacroAssembler class that are common for all CPUs + +// class MacroAssembler ... { + + enum RegState { + reg_readonly, + reg_writable, + reg_written + }; + + void skip_unpacked_fields(const GrowableArray* sig, int& sig_index, VMRegPair* regs_from, + int regs_from_count, int& from_index); + bool is_reg_in_unpacked_fields(const GrowableArray* sig, int sig_index, VMReg to, VMRegPair* regs_from, + int regs_from_count, int from_index); + RegState* init_reg_state(VMRegPair* regs, int num_regs, int sp_inc, int max_stack); + int unpack_inline_args(Compile* C, bool receiver_only); + void shuffle_inline_args(bool is_packing, bool receiver_only, + const GrowableArray* sig, + int args_passed, int args_on_stack, VMRegPair* regs, + int args_passed_to, int args_on_stack_to, VMRegPair* regs_to, + int sp_inc, Register val_array); + bool shuffle_inline_args_spill(bool is_packing, const GrowableArray* sig, int sig_index, + VMRegPair* regs_from, int from_index, int regs_from_count, RegState* reg_state); + + int store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter = true); + bool move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]); + bool unpack_inline_helper(const GrowableArray* sig, int& sig_index, + VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index, + RegState reg_state[]); + bool pack_inline_helper(const GrowableArray* sig, int& sig_index, int vtarg_index, + VMRegPair* from, int from_count, int& from_index, VMReg to, + RegState reg_state[], Register val_array); + int extend_stack_for_inline_args(int args_on_stack); + void remove_frame(int initial_framesize, bool needs_stack_repair); + VMReg spill_reg_for(VMReg reg); + +// }; + +#endif // SHARE_ASM_MACROASSEMBLER_COMMON_HPP diff --git a/src/hotspot/share/c1/c1_Canonicalizer.cpp b/src/hotspot/share/c1/c1_Canonicalizer.cpp index 573e1ac24d73c..96e376812ab16 100644 --- a/src/hotspot/share/c1/c1_Canonicalizer.cpp +++ b/src/hotspot/share/c1/c1_Canonicalizer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -280,7 +280,7 @@ void Canonicalizer::do_LoadIndexed (LoadIndexed* x) { assert(array == nullptr || FoldStableValues, "not enabled"); // Constant fold loads from stable arrays. - if (!x->mismatched() && array != nullptr && index != nullptr) { + if (!x->should_profile() && !x->mismatched() && array != nullptr && index != nullptr) { jint idx = index->value(); if (idx < 0 || idx >= array->value()->length()) { // Leave the load as is. The range check will handle it. @@ -468,11 +468,8 @@ void Canonicalizer::do_CompareOp (CompareOp* x) { void Canonicalizer::do_IfOp(IfOp* x) { - // Currently, Canonicalizer is only used by GraphBuilder, - // and IfOp is not created by GraphBuilder but only later - // when eliminating conditional expressions with CE_Eliminator, - // so this method will not be called. - ShouldNotReachHere(); + // Currently, Canonicalizer is only used by GraphBuilder, and IfOp is only created by + // GraphBuilder when loading/storing flat fields, do nothing for now. } @@ -648,13 +645,14 @@ void Canonicalizer::do_CheckCast (CheckCast* x) { klass->as_instance_klass()->is_interface(); // Interface casts can't be statically optimized away since verifier doesn't // enforce interface types in bytecode. - if (!is_interface && klass->is_subtype_of(x->klass())) { + if (!is_interface && klass->is_subtype_of(x->klass()) && (!x->is_null_free() || obj->is_null_free())) { + assert(!x->klass()->is_inlinetype() || x->klass() == klass, "Inline klasses can't have subtypes"); set_canonical(obj); return; } } - // checkcast of null returns null - if (obj->as_Constant() && obj->type()->as_ObjectType()->constant_value()->is_null_object()) { + // checkcast of null returns null for non null-free klasses + if (!x->is_null_free() && obj->is_null_obj()) { set_canonical(obj); } } @@ -668,7 +666,7 @@ void Canonicalizer::do_InstanceOf (InstanceOf* x) { return; } // instanceof null returns false - if (obj->as_Constant() && obj->type()->as_ObjectType()->constant_value()->is_null_object()) { + if (obj->as_Constant() && obj->is_null_obj()) { set_constant(0); } } @@ -726,7 +724,9 @@ void Canonicalizer::do_If(If* x) { return; } - if (lt->is_constant() && rt->is_constant()) { + // Simplify further when we have two constants. However, if we have a substitutability check + // we must not constant fold as this would loose the substitutability semantics. + if (lt->is_constant() && rt->is_constant() && !x->substitutability_check()) { if (x->x()->as_Constant() != nullptr) { // pattern: If (lc cond rc) => simplify to: Goto BlockBegin* sux = x->x()->as_Constant()->compare(x->cond(), x->y(), @@ -845,8 +845,9 @@ void Canonicalizer::do_UnsafePut (UnsafePut* x) {} void Canonicalizer::do_UnsafeGetAndSet(UnsafeGetAndSet* x) {} void Canonicalizer::do_ProfileCall (ProfileCall* x) {} void Canonicalizer::do_ProfileReturnType(ProfileReturnType* x) {} -void Canonicalizer::do_ProfileInvoke (ProfileInvoke* x) {} -void Canonicalizer::do_RuntimeCall (RuntimeCall* x) {} +void Canonicalizer::do_ProfileInvoke (ProfileInvoke* x) {} +void Canonicalizer::do_ProfileACmpTypes (ProfileACmpTypes* x) {} +void Canonicalizer::do_RuntimeCall (RuntimeCall* x) {} void Canonicalizer::do_RangeCheckPredicate(RangeCheckPredicate* x) {} #ifdef ASSERT void Canonicalizer::do_Assert (Assert* x) {} diff --git a/src/hotspot/share/c1/c1_Canonicalizer.hpp b/src/hotspot/share/c1/c1_Canonicalizer.hpp index dc0b8c8dba77b..09fc4055101a4 100644 --- a/src/hotspot/share/c1/c1_Canonicalizer.hpp +++ b/src/hotspot/share/c1/c1_Canonicalizer.hpp @@ -93,6 +93,7 @@ class Canonicalizer: InstructionVisitor { virtual void do_UnsafeGetAndSet(UnsafeGetAndSet* x); virtual void do_ProfileCall (ProfileCall* x); virtual void do_ProfileReturnType (ProfileReturnType* x); + virtual void do_ProfileACmpTypes(ProfileACmpTypes* x); virtual void do_ProfileInvoke (ProfileInvoke* x); virtual void do_RuntimeCall (RuntimeCall* x); virtual void do_MemBar (MemBar* x); diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp index c8b29f91bb2b5..8283f6b104dc8 100644 --- a/src/hotspot/share/c1/c1_CodeStubs.hpp +++ b/src/hotspot/share/c1/c1_CodeStubs.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -228,6 +228,83 @@ class ImplicitNullCheckStub: public CodeStub { }; +class LoadFlattenedArrayStub: public CodeStub { + private: + LIR_Opr _array; + LIR_Opr _index; + LIR_Opr _result; + LIR_Opr _scratch_reg; + CodeEmitInfo* _info; + + public: + LoadFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info); + virtual void emit_code(LIR_Assembler* e); + virtual CodeEmitInfo* info() const { return _info; } + virtual void visit(LIR_OpVisitState* visitor) { + visitor->do_slow_case(_info); + visitor->do_input(_array); + visitor->do_input(_index); + visitor->do_output(_result); + if (_scratch_reg != LIR_OprFact::illegalOpr) { + visitor->do_temp(_scratch_reg); + } + } + +#ifndef PRODUCT + virtual void print_name(outputStream* out) const { out->print("LoadFlattenedArrayStub"); } +#endif // PRODUCT +}; + + +class StoreFlattenedArrayStub: public CodeStub { + private: + LIR_Opr _array; + LIR_Opr _index; + LIR_Opr _value; + LIR_Opr _scratch_reg; + CodeEmitInfo* _info; + + public: + StoreFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr value, CodeEmitInfo* info); + virtual void emit_code(LIR_Assembler* e); + virtual CodeEmitInfo* info() const { return _info; } + virtual void visit(LIR_OpVisitState* visitor) { + visitor->do_slow_case(_info); + visitor->do_input(_array); + visitor->do_input(_index); + visitor->do_input(_value); + if (_scratch_reg != LIR_OprFact::illegalOpr) { + visitor->do_temp(_scratch_reg); + } + } +#ifndef PRODUCT + virtual void print_name(outputStream* out) const { out->print("StoreFlattenedArrayStub"); } +#endif // PRODUCT +}; + +class SubstitutabilityCheckStub: public CodeStub { + private: + LIR_Opr _left; + LIR_Opr _right; + LIR_Opr _scratch_reg; + CodeEmitInfo* _info; + public: + SubstitutabilityCheckStub(LIR_Opr left, LIR_Opr right, CodeEmitInfo* info); + virtual void emit_code(LIR_Assembler* e); + virtual CodeEmitInfo* info() const { return _info; } + virtual void visit(LIR_OpVisitState* visitor) { + visitor->do_slow_case(_info); + visitor->do_input(_left); + visitor->do_input(_right); + if (_scratch_reg != LIR_OprFact::illegalOpr) { + visitor->do_temp(_scratch_reg); + } + } +#ifndef PRODUCT + virtual void print_name(outputStream* out) const { out->print("SubstitutabilityCheckStub"); } +#endif // PRODUCT +}; + class NewInstanceStub: public CodeStub { private: ciInstanceKlass* _klass; @@ -280,9 +357,9 @@ class NewObjectArrayStub: public CodeStub { LIR_Opr _length; LIR_Opr _result; CodeEmitInfo* _info; - + bool _is_null_free; public: - NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info); + NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info, bool is_null_free = false); virtual void emit_code(LIR_Assembler* e); virtual CodeEmitInfo* info() const { return _info; } virtual void visit(LIR_OpVisitState* visitor) { @@ -317,11 +394,19 @@ class MonitorAccessStub: public CodeStub { class MonitorEnterStub: public MonitorAccessStub { private: CodeEmitInfo* _info; + CodeStub* _throw_ie_stub; + LIR_Opr _scratch_reg; public: - MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) + MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info, + CodeStub* throw_ie_stub = nullptr, LIR_Opr scratch_reg = LIR_OprFact::illegalOpr) : MonitorAccessStub(obj_reg, lock_reg) { _info = new CodeEmitInfo(info); + _scratch_reg = scratch_reg; + _throw_ie_stub = throw_ie_stub; + if (_throw_ie_stub != nullptr) { + assert(_scratch_reg != LIR_OprFact::illegalOpr, "must be"); + } FrameMap* f = Compilation::current()->frame_map(); f->update_reserved_argument_area_size(2 * BytesPerWord); } @@ -331,6 +416,9 @@ class MonitorEnterStub: public MonitorAccessStub { virtual void visit(LIR_OpVisitState* visitor) { visitor->do_input(_obj_reg); visitor->do_input(_lock_reg); + if (_scratch_reg != LIR_OprFact::illegalOpr) { + visitor->do_temp(_scratch_reg); + } visitor->do_slow_case(_info); } #ifndef PRODUCT diff --git a/src/hotspot/share/c1/c1_Compilation.hpp b/src/hotspot/share/c1/c1_Compilation.hpp index 5de201592f94a..53f1368d5e92d 100644 --- a/src/hotspot/share/c1/c1_Compilation.hpp +++ b/src/hotspot/share/c1/c1_Compilation.hpp @@ -32,6 +32,7 @@ #include "compiler/compilerDefinitions.inline.hpp" #include "compiler/compilerDirectives.hpp" #include "runtime/deoptimization.hpp" +#include "runtime/sharedRuntime.hpp" class CompilationFailureInfo; class CompilationResourceObj; @@ -252,6 +253,10 @@ class Compilation: public StackObj { return env()->comp_level() == CompLevel_full_profile && C1UpdateMethodData && MethodData::profile_return(); } + bool profile_array_accesses() { + return env()->comp_level() == CompLevel_full_profile && + C1UpdateMethodData; + } // will compilation make optimistic assumptions that might lead to // deoptimization and that the runtime will account for? diff --git a/src/hotspot/share/c1/c1_FrameMap.cpp b/src/hotspot/share/c1/c1_FrameMap.cpp index f42a9f7035b67..a97c320bf96cf 100644 --- a/src/hotspot/share/c1/c1_FrameMap.cpp +++ b/src/hotspot/share/c1/c1_FrameMap.cpp @@ -186,14 +186,15 @@ FrameMap::FrameMap(ciMethod* method, int monitors, int reserved_argument_area_si } -bool FrameMap::finalize_frame(int nof_slots) { +bool FrameMap::finalize_frame(int nof_slots, bool needs_stack_repair) { assert(nof_slots >= 0, "must be positive"); assert(_num_spills == -1, "can only be set once"); _num_spills = nof_slots; assert(_framesize == -1, "should only be calculated once"); _framesize = align_up(in_bytes(sp_offset_for_monitor_base(0)) + _num_monitors * (int)sizeof(BasicObjectLock) + - (int)sizeof(intptr_t) + // offset of deopt orig pc + (int)sizeof(intptr_t) + // offset of deopt orig pc + (needs_stack_repair ? (int)sizeof(intptr_t) : 0) + // stack increment value frame_pad_in_bytes, StackAlignmentInBytes) / 4; int java_index = 0; diff --git a/src/hotspot/share/c1/c1_FrameMap.hpp b/src/hotspot/share/c1/c1_FrameMap.hpp index 67ae92e98755e..6c107f85d610c 100644 --- a/src/hotspot/share/c1/c1_FrameMap.hpp +++ b/src/hotspot/share/c1/c1_FrameMap.hpp @@ -181,7 +181,7 @@ class FrameMap : public CompilationResourceObj { } FrameMap(ciMethod* method, int monitors, int reserved_argument_area_size); - bool finalize_frame(int nof_slots); + bool finalize_frame(int nof_slots, bool needs_stack_repair); int reserved_argument_area_size () const { return _reserved_argument_area_size; } int framesize () const { assert(_framesize != -1, "hasn't been calculated"); return _framesize; } @@ -207,6 +207,9 @@ class FrameMap : public CompilationResourceObj { Address address_for_monitor_object(int monitor_index) const { return make_new_address(sp_offset_for_monitor_object(monitor_index)); } + Address address_for_orig_pc_addr() const { + return make_new_address(sp_offset_for_monitor_base(_num_monitors)); + } // Creates Location describing desired slot and returns it via pointer // to Location object. Returns true if the stack frame offset was legal diff --git a/src/hotspot/share/c1/c1_GraphBuilder.cpp b/src/hotspot/share/c1/c1_GraphBuilder.cpp index db55b8c5fa81d..ab35a35963e43 100644 --- a/src/hotspot/share/c1/c1_GraphBuilder.cpp +++ b/src/hotspot/share/c1/c1_GraphBuilder.cpp @@ -26,9 +26,13 @@ #include "c1/c1_CFGPrinter.hpp" #include "c1/c1_Compilation.hpp" #include "c1/c1_GraphBuilder.hpp" +#include "c1/c1_Instruction.hpp" #include "c1/c1_InstructionPrinter.hpp" +#include "c1/c1_ValueType.hpp" #include "ci/ciCallSite.hpp" #include "ci/ciField.hpp" +#include "ci/ciFlatArrayKlass.hpp" +#include "ci/ciInlineKlass.hpp" #include "ci/ciKlass.hpp" #include "ci/ciMemberName.hpp" #include "ci/ciSymbols.hpp" @@ -40,6 +44,7 @@ #include "interpreter/bytecode.hpp" #include "jfr/jfrEvents.hpp" #include "memory/resourceArea.hpp" +#include "runtime/arguments.hpp" #include "runtime/sharedRuntime.hpp" #include "utilities/checkedCast.hpp" #include "utilities/macros.hpp" @@ -1047,7 +1052,15 @@ void GraphBuilder::store_local(ValueStack* state, Value x, int index) { void GraphBuilder::load_indexed(BasicType type) { // In case of in block code motion in range check elimination - ValueStack* state_before = copy_state_indexed_access(); + ValueStack* state_before = nullptr; + int array_idx = state()->stack_size() - 2; + if (type == T_OBJECT && state()->stack_at(array_idx)->maybe_flat_array()) { + // Save the entire state and re-execute on deopt when accessing flat arrays + state_before = copy_state_before(); + state_before->set_should_reexecute(true); + } else { + state_before = copy_state_indexed_access(); + } compilation()->set_has_access_indexed(true); Value index = ipop(); Value array = apop(); @@ -1059,13 +1072,78 @@ void GraphBuilder::load_indexed(BasicType type) { (array->as_NewMultiArray() && array->as_NewMultiArray()->dims()->at(0)->type()->is_constant())) { length = append(new ArrayLength(array, state_before)); } - push(as_ValueType(type), append(new LoadIndexed(array, index, length, type, state_before))); + + bool need_membar = false; + LoadIndexed* load_indexed = nullptr; + Instruction* result = nullptr; + if (array->is_loaded_flat_array()) { + ciType* array_type = array->declared_type(); + ciFlatArrayKlass* array_klass = array_type->as_flat_array_klass(); + ciInlineKlass* elem_klass = array_klass->element_klass()->as_inline_klass(); + + bool can_delay_access = false; + ciBytecodeStream s(method()); + s.force_bci(bci()); + s.next(); + if (s.cur_bc() == Bytecodes::_getfield) { + bool is_null_free = array_klass->is_elem_null_free(); + bool will_link; + ciField* next_field = s.get_field(will_link); + bool next_needs_patching = !next_field->holder()->is_initialized() || + !next_field->will_link(method(), Bytecodes::_getfield) || + PatchALot; + bool needs_atomic_access = array_klass->is_elem_atomic(); + can_delay_access = is_null_free && C1UseDelayedFlattenedFieldReads && + !next_needs_patching && !needs_atomic_access; + } + if (can_delay_access) { + // potentially optimizable array access, storing information for delayed decision + LoadIndexed* li = new LoadIndexed(array, index, length, type, state_before); + DelayedLoadIndexed* dli = new DelayedLoadIndexed(li, state_before); + li->set_delayed(dli); + set_pending_load_indexed(dli); + return; // Nothing else to do for now + } else { + NewInstance* buffer = new NewInstance(elem_klass, state_before, false, true); + buffer->set_null_free(true); + _memory->new_instance(buffer); + result = append_split(buffer); + load_indexed = new LoadIndexed(array, index, length, type, state_before); + load_indexed->set_buffer(buffer); + // The LoadIndexed node will initialize this instance by copying from + // the flat field. Ensure these stores are visible before any + // subsequent store that publishes this reference. + need_membar = true; + } + } else { + load_indexed = new LoadIndexed(array, index, length, type, state_before); + if (profile_array_accesses() && is_reference_type(type)) { + compilation()->set_would_profile(true); + load_indexed->set_should_profile(true); + load_indexed->set_profiled_method(method()); + load_indexed->set_profiled_bci(bci()); + } + } + result = append(load_indexed); + if (need_membar) { + append(new MemBar(lir_membar_storestore)); + } + assert(!load_indexed->should_profile() || load_indexed == result, "should not be optimized out"); + push(as_ValueType(type), result); } void GraphBuilder::store_indexed(BasicType type) { // In case of in block code motion in range check elimination - ValueStack* state_before = copy_state_indexed_access(); + ValueStack* state_before = nullptr; + int array_idx = state()->stack_size() - 3; + if (type == T_OBJECT && state()->stack_at(array_idx)->maybe_flat_array()) { + // Save the entire state and re-execute on deopt when accessing flat arrays + state_before = copy_state_before(); + state_before->set_should_reexecute(true); + } else { + state_before = copy_state_indexed_access(); + } compilation()->set_has_access_indexed(true); Value value = pop(as_ValueType(type)); Value index = ipop(); @@ -1090,23 +1168,19 @@ void GraphBuilder::store_indexed(BasicType type) { } else if (type == T_BYTE) { check_boolean = true; } - StoreIndexed* result = new StoreIndexed(array, index, length, type, value, state_before, check_boolean); - append(result); - _memory->store_value(value); - if (type == T_OBJECT && is_profiling()) { - // Note that we'd collect profile data in this method if we wanted it. + StoreIndexed* store_indexed = new StoreIndexed(array, index, length, type, value, state_before, check_boolean); + if (profile_array_accesses() && is_reference_type(type) && !array->is_loaded_flat_array()) { compilation()->set_would_profile(true); - - if (profile_checkcasts()) { - result->set_profiled_method(method()); - result->set_profiled_bci(bci()); - result->set_should_profile(true); - } + store_indexed->set_should_profile(true); + store_indexed->set_profiled_method(method()); + store_indexed->set_profiled_bci(bci()); } + Instruction* result = append(store_indexed); + assert(!store_indexed->should_profile() || store_indexed == result, "should not be optimized out"); + _memory->store_value(value); } - void GraphBuilder::stack_op(Bytecodes::Code code) { switch (code) { case Bytecodes::_pop: @@ -1291,9 +1365,36 @@ void GraphBuilder::if_node(Value x, If::Condition cond, Value y, ValueStack* sta BlockBegin* tsux = block_at(stream()->get_dest()); BlockBegin* fsux = block_at(stream()->next_bci()); bool is_bb = tsux->bci() < stream()->cur_bci() || fsux->bci() < stream()->cur_bci(); + + bool subst_check = false; + if (Arguments::is_valhalla_enabled() && (stream()->cur_bc() == Bytecodes::_if_acmpeq || stream()->cur_bc() == Bytecodes::_if_acmpne)) { + ValueType* left_vt = x->type(); + ValueType* right_vt = y->type(); + if (left_vt->is_object()) { + assert(right_vt->is_object(), "must be"); + ciKlass* left_klass = x->as_loaded_klass_or_null(); + ciKlass* right_klass = y->as_loaded_klass_or_null(); + + if (left_klass == nullptr || right_klass == nullptr) { + // The klass is still unloaded, or came from a Phi node. Go slow case; + subst_check = true; + } else if (left_klass->can_be_inline_klass() || right_klass->can_be_inline_klass()) { + // Either operand may be a value object, but we're not sure. Go slow case; + subst_check = true; + } else { + // No need to do substitutability check + } + } + } + if ((stream()->cur_bc() == Bytecodes::_if_acmpeq || stream()->cur_bc() == Bytecodes::_if_acmpne) && + is_profiling() && profile_branches()) { + compilation()->set_would_profile(true); + append(new ProfileACmpTypes(method(), bci(), x, y)); + } + // In case of loop invariant code motion or predicate insertion // before the body of a loop the state is needed - Instruction *i = append(new If(x, cond, false, y, tsux, fsux, (is_bb || compilation()->is_optimistic()) ? state_before : nullptr, is_bb)); + Instruction *i = append(new If(x, cond, false, y, tsux, fsux, (is_bb || compilation()->is_optimistic() || subst_check) ? state_before : nullptr, is_bb, subst_check)); assert(i->as_Goto() == nullptr || (i->as_Goto()->sux_at(0) == tsux && i->as_Goto()->is_safepoint() == (tsux->bci() < stream()->cur_bci())) || @@ -1548,8 +1649,8 @@ void GraphBuilder::method_return(Value x, bool ignore_return) { // The conditions for a memory barrier are described in Parse::do_exits(). bool need_mem_bar = false; - if (method()->name() == ciSymbols::object_initializer_name() && - (scope()->wrote_final() || scope()->wrote_stable() || + if (method()->is_object_constructor() && + (scope()->wrote_non_strict_final() || scope()->wrote_stable() || (AlwaysSafeConstructors && scope()->wrote_fields()) || (support_IRIW_for_not_multiple_copy_atomic_cpu && scope()->wrote_volatile()))) { need_mem_bar = true; @@ -1699,16 +1800,40 @@ Value GraphBuilder::make_constant(ciConstant field_value, ciField* field) { } } +void GraphBuilder::copy_inline_content(ciInlineKlass* vk, Value src, int src_off, Value dest, int dest_off, ValueStack* state_before, ciField* enclosing_field) { + for (int i = 0; i < vk->nof_declared_nonstatic_fields(); i++) { + ciField* field = vk->declared_nonstatic_field_at(i); + int offset = field->offset_in_bytes() - vk->payload_offset(); + if (field->is_flat()) { + copy_inline_content(field->type()->as_inline_klass(), src, src_off + offset, dest, dest_off + offset, state_before, enclosing_field); + if (!field->is_null_free()) { + // Nullable, copy the null marker using Unsafe because null markers are no real fields + int null_marker_offset = field->null_marker_offset() - vk->payload_offset(); + Value offset = append(new Constant(new LongConstant(src_off + null_marker_offset))); + Value nm = append(new UnsafeGet(T_BOOLEAN, src, offset, false)); + offset = append(new Constant(new LongConstant(dest_off + null_marker_offset))); + append(new UnsafePut(T_BOOLEAN, dest, offset, nm, false)); + } + } else { + Value value = append(new LoadField(src, src_off + offset, field, false, state_before, false)); + StoreField* store = new StoreField(dest, dest_off + offset, field, value, false, state_before, false); + store->set_enclosing_field(enclosing_field); + append(store); + } + } +} + void GraphBuilder::access_field(Bytecodes::Code code) { bool will_link; ciField* field = stream()->get_field(will_link); ciInstanceKlass* holder = field->holder(); - BasicType field_type = field->type()->basic_type(); - ValueType* type = as_ValueType(field_type); + BasicType field_basic_type = field->type()->basic_type(); + ValueType* type = as_ValueType(field_basic_type); + // call will_link again to determine if the field is valid. const bool needs_patching = !holder->is_loaded() || !field->will_link(method(), code) || - PatchALot; + (!field->is_flat() && PatchALot); ValueStack* state_before = nullptr; if (!holder->is_initialized() || needs_patching) { @@ -1732,15 +1857,15 @@ void GraphBuilder::access_field(Bytecodes::Code code) { if (field->is_volatile()) { scope()->set_wrote_volatile(); } - if (field->is_final()) { - scope()->set_wrote_final(); + if (field->is_final() && !field->is_strict()) { + scope()->set_wrote_non_strict_final(); } if (field->is_stable()) { scope()->set_wrote_stable(); } } - const int offset = !needs_patching ? field->offset_in_bytes() : -1; + int offset = !needs_patching ? field->offset_in_bytes() : -1; switch (code) { case Bytecodes::_getstatic: { // check for compile-time constants, i.e., initialized static final fields @@ -1757,8 +1882,9 @@ void GraphBuilder::access_field(Bytecodes::Code code) { if (state_before == nullptr) { state_before = copy_state_for_exception(); } - push(type, append(new LoadField(append(obj), offset, field, true, - state_before, needs_patching))); + LoadField* load_field = new LoadField(append(obj), offset, field, true, + state_before, needs_patching); + push(type, append(load_field)); } break; } @@ -1767,30 +1893,47 @@ void GraphBuilder::access_field(Bytecodes::Code code) { if (state_before == nullptr) { state_before = copy_state_for_exception(); } - if (field->type()->basic_type() == T_BOOLEAN) { + if (field_basic_type == T_BOOLEAN) { Value mask = append(new Constant(new IntConstant(1))); val = append(new LogicOp(Bytecodes::_iand, val, mask)); } + if (field->is_null_free()) { + null_check(val); + + ciType* field_type = field->type(); + if (field_type->is_loaded() && field_type->is_inlinetype() && field_type->as_inline_klass()->is_empty() && + (!method()->is_class_initializer() || field->is_flat())) { + // Storing to a field of an empty, null-free inline type that is already initialized. Ignore. + break; + } + } append(new StoreField(append(obj), offset, field, val, true, state_before, needs_patching)); break; } case Bytecodes::_getfield: { // Check for compile-time constants, i.e., trusted final non-static fields. Value constant = nullptr; - obj = apop(); - ObjectType* obj_type = obj->type()->as_ObjectType(); - if (field->is_constant() && obj_type->is_constant() && !PatchALot) { - ciObject* const_oop = obj_type->constant_value(); - if (!const_oop->is_null_object() && const_oop->is_loaded()) { - ciConstant field_value = field->constant_value_of(const_oop); - if (field_value.is_valid()) { - constant = make_constant(field_value, field); - // For CallSite objects add a dependency for invalidation of the optimization. - if (field->is_call_site_target()) { - ciCallSite* call_site = const_oop->as_call_site(); - if (!call_site->is_fully_initialized_constant_call_site()) { - ciMethodHandle* target = field_value.as_object()->as_method_handle(); - dependency_recorder()->assert_call_site_target_value(call_site, target); + if (state_before == nullptr && field->is_flat()) { + // Save the entire state and re-execute on deopt when accessing flat fields + assert(Interpreter::bytecode_should_reexecute(code), "should reexecute"); + state_before = copy_state_before(); + } + if (!has_pending_field_access() && !has_pending_load_indexed()) { + obj = apop(); + ObjectType* obj_type = obj->type()->as_ObjectType(); + if (field->is_constant() && !field->is_flat() && obj_type->is_constant() && !PatchALot) { + ciObject* const_oop = obj_type->constant_value(); + if (!const_oop->is_null_object() && const_oop->is_loaded()) { + ciConstant field_value = field->constant_value_of(const_oop); + if (field_value.is_valid()) { + constant = make_constant(field_value, field); + // For CallSite objects add a dependency for invalidation of the optimization. + if (field->is_call_site_target()) { + ciCallSite* call_site = const_oop->as_call_site(); + if (!call_site->is_fully_initialized_constant_call_site()) { + ciMethodHandle* target = field_value.as_object()->as_method_handle(); + dependency_recorder()->assert_call_site_target_value(call_site, target); + } } } } @@ -1802,30 +1945,144 @@ void GraphBuilder::access_field(Bytecodes::Code code) { if (state_before == nullptr) { state_before = copy_state_for_exception(); } - LoadField* load = new LoadField(obj, offset, field, false, state_before, needs_patching); - Value replacement = !needs_patching ? _memory->load(load) : load; - if (replacement != load) { - assert(replacement->is_linked() || !replacement->can_be_linked(), "should already by linked"); - // Writing an (integer) value to a boolean, byte, char or short field includes an implicit narrowing - // conversion. Emit an explicit conversion here to get the correct field value after the write. - BasicType bt = field->type()->basic_type(); - switch (bt) { - case T_BOOLEAN: - case T_BYTE: - replacement = append(new Convert(Bytecodes::_i2b, replacement, as_ValueType(bt))); - break; - case T_CHAR: - replacement = append(new Convert(Bytecodes::_i2c, replacement, as_ValueType(bt))); - break; - case T_SHORT: - replacement = append(new Convert(Bytecodes::_i2s, replacement, as_ValueType(bt))); - break; - default: + if (!field->is_flat()) { + if (has_pending_field_access()) { + assert(!needs_patching, "Can't patch delayed field access"); + obj = pending_field_access()->obj(); + offset += pending_field_access()->offset() - field->holder()->as_inline_klass()->payload_offset(); + field = pending_field_access()->holder()->get_field_by_offset(offset, false); + assert(field != nullptr, "field not found"); + set_pending_field_access(nullptr); + } else if (has_pending_load_indexed()) { + assert(!needs_patching, "Can't patch delayed field access"); + pending_load_indexed()->update(field, offset - field->holder()->as_inline_klass()->payload_offset()); + LoadIndexed* li = pending_load_indexed()->load_instr(); + li->set_type(type); + push(type, append(li)); + set_pending_load_indexed(nullptr); break; } - push(type, replacement); + LoadField* load = new LoadField(obj, offset, field, false, state_before, needs_patching); + Value replacement = !needs_patching ? _memory->load(load) : load; + if (replacement != load) { + assert(replacement->is_linked() || !replacement->can_be_linked(), "should already be linked"); + // Writing an (integer) value to a boolean, byte, char or short field includes an implicit narrowing + // conversion. Emit an explicit conversion here to get the correct field value after the write. + switch (field_basic_type) { + case T_BOOLEAN: + case T_BYTE: + replacement = append(new Convert(Bytecodes::_i2b, replacement, type)); + break; + case T_CHAR: + replacement = append(new Convert(Bytecodes::_i2c, replacement, type)); + break; + case T_SHORT: + replacement = append(new Convert(Bytecodes::_i2s, replacement, type)); + break; + default: + break; + } + push(type, replacement); + } else { + push(type, append(load)); + } } else { - push(type, append(load)); + // Flat field + assert(!needs_patching, "Can't patch flat inline type field access"); + ciInlineKlass* inline_klass = field->type()->as_inline_klass(); + if (field->is_atomic()) { + assert(!has_pending_field_access(), "Pending field accesses are not supported"); + LoadField* load = new LoadField(obj, offset, field, false, state_before, needs_patching); + push(type, append(load)); + } else { + // Look at the next bytecode to check if we can delay the field access + bool can_delay_access = false; + if (field->is_null_free()) { + ciBytecodeStream s(method()); + s.force_bci(bci()); + s.next(); + if (s.cur_bc() == Bytecodes::_getfield && !needs_patching) { + ciField* next_field = s.get_field(will_link); + bool next_needs_patching = !next_field->holder()->is_loaded() || + !next_field->will_link(method(), Bytecodes::_getfield) || + PatchALot; + // We can't update the offset for atomic accesses + bool next_needs_atomic_access = next_field->is_flat() && next_field->is_atomic(); + can_delay_access = C1UseDelayedFlattenedFieldReads && !next_needs_patching && !next_needs_atomic_access && next_field->is_null_free(); + } + } + + if (can_delay_access) { + if (has_pending_load_indexed()) { + pending_load_indexed()->update(field, offset - field->holder()->as_inline_klass()->payload_offset()); + } else if (has_pending_field_access()) { + pending_field_access()->inc_offset(offset - field->holder()->as_inline_klass()->payload_offset()); + } else { + null_check(obj); + DelayedFieldAccess* dfa = new DelayedFieldAccess(obj, field->holder(), field->offset_in_bytes(), state_before); + set_pending_field_access(dfa); + } + } else { + if (!field->is_strict()) { + scope()->set_wrote_non_strict_final(); + } + scope()->set_wrote_fields(); + if (has_pending_load_indexed()) { + assert(field->is_null_free(), "nullable fields do not support delayed accesses yet"); + assert(!needs_patching, "Can't patch delayed field access"); + pending_load_indexed()->update(field, offset - field->holder()->as_inline_klass()->payload_offset()); + NewInstance* buffer = new NewInstance(inline_klass, pending_load_indexed()->state_before(), false, true); + buffer->set_null_free(true); + _memory->new_instance(buffer); + pending_load_indexed()->load_instr()->set_buffer(buffer); + apush(append_split(buffer)); + append(pending_load_indexed()->load_instr()); + set_pending_load_indexed(nullptr); + } else if (has_pending_field_access()) { + assert(field->is_null_free(), "nullable fields do not support delayed accesses yet"); + state_before = pending_field_access()->state_before(); + NewInstance* buffer = new NewInstance(inline_klass, state_before, false, true); + _memory->new_instance(buffer); + apush(append_split(buffer)); + copy_inline_content(inline_klass, pending_field_access()->obj(), + pending_field_access()->offset() + field->offset_in_bytes() - field->holder()->as_inline_klass()->payload_offset(), + buffer, inline_klass->payload_offset(), state_before); + set_pending_field_access(nullptr); + } else { + if (!field->is_null_free() && !inline_klass->is_initialized()) { + // Cannot allocate an instance of inline_klass because it may have not been + // initialized, bailout for now + bailout("load from an uninitialized nullable non-atomic flat field"); + return; + } + + NewInstance* buffer = new NewInstance(inline_klass, state_before, false, true); + _memory->new_instance(buffer); + append_split(buffer); + + if (inline_klass->is_initialized() && inline_klass->is_empty()) { + // Needs an explicit null check because below code does not perform any actual load if there are no fields + null_check(obj); + } + copy_inline_content(inline_klass, obj, field->offset_in_bytes(), buffer, inline_klass->payload_offset(), state_before); + + Instruction* result = buffer; + if (!field->is_null_free()) { + Value int_zero = append(new Constant(intZero)); + Value object_null = append(new Constant(objectNull)); + Value nm_offset = append(new Constant(new LongConstant(offset + inline_klass->null_marker_offset_in_payload()))); + Value nm = append(new UnsafeGet(T_BOOLEAN, obj, nm_offset, false)); + result = append(new IfOp(nm, Instruction::neq, int_zero, buffer, object_null, state_before, false)); + } + apush(result); + } + + // If we allocated a new instance ensure the stores to copy the + // field contents are visible before any subsequent store that + // publishes this reference. + append(new MemBar(lir_membar_storestore)); + } + } } } break; @@ -1836,14 +2093,58 @@ void GraphBuilder::access_field(Bytecodes::Code code) { if (state_before == nullptr) { state_before = copy_state_for_exception(); } - if (field->type()->basic_type() == T_BOOLEAN) { + if (field_basic_type == T_BOOLEAN) { Value mask = append(new Constant(new IntConstant(1))); val = append(new LogicOp(Bytecodes::_iand, val, mask)); } - StoreField* store = new StoreField(obj, offset, field, val, false, state_before, needs_patching); - if (!needs_patching) store = _memory->store(store); - if (store != nullptr) { - append(store); + + ciType* field_type = field->type(); + if (field->is_null_free() && field_type->is_loaded() && field_type->is_inlinetype() && + field_type->as_inline_klass()->is_empty() && (!method()->is_object_constructor() || field->is_flat())) { + // Storing to a field of an empty, null-free inline type that is already initialized. Ignore. + null_check(obj); + null_check(val); + } else if (!field->is_flat()) { + if (field->is_null_free()) { + null_check(val); + } + StoreField* store = new StoreField(obj, offset, field, val, false, state_before, needs_patching); + if (!needs_patching) store = _memory->store(store); + if (store != nullptr) { + append(store); + } + } else { + // Flat field + assert(!needs_patching, "Can't patch flat inline type field access"); + ciInlineKlass* inline_klass = field_type->as_inline_klass(); + if (field->is_atomic()) { + if (field->is_null_free()) { + null_check(val); + } + append(new StoreField(obj, offset, field, val, false, state_before, needs_patching)); + } else if (field->is_null_free()) { + assert(!inline_klass->is_empty(), "should have been handled"); + copy_inline_content(inline_klass, val, inline_klass->payload_offset(), obj, offset, state_before, field); + } else { + if (!inline_klass->is_initialized()) { + // null_reset_value is not available, bailout for now + bailout("store to an uninitialized nullable non-atomic flat field"); + return; + } + + // Store the subfields when field is a nullable non-atomic field + Value object_null = append(new Constant(objectNull)); + Value null_reset_value = append(new Constant(new ObjectConstant(inline_klass->get_null_reset_value().as_object()))); + Value src = append(new IfOp(val, Instruction::neq, object_null, val, null_reset_value, state_before, false)); + copy_inline_content(inline_klass, src, inline_klass->payload_offset(), obj, offset, state_before); + + // Store the null marker + Value int_one = append(new Constant(new IntConstant(1))); + Value int_zero = append(new Constant(intZero)); + Value nm = append(new IfOp(val, Instruction::neq, object_null, int_one, int_zero, state_before, false)); + Value nm_offset = append(new Constant(new LongConstant(offset + inline_klass->null_marker_offset_in_payload()))); + append(new UnsafePut(T_BOOLEAN, obj, nm_offset, nm, false)); + } } break; } @@ -1853,7 +2154,6 @@ void GraphBuilder::access_field(Bytecodes::Code code) { } } - Dependencies* GraphBuilder::dependency_recorder() const { return compilation()->dependency_recorder(); } @@ -1969,7 +2269,7 @@ void GraphBuilder::invoke(Bytecodes::Code code) { if (bc_raw == Bytecodes::_invokeinterface) { receiver_constraint = holder; - } else if (bc_raw == Bytecodes::_invokespecial && !target->is_object_initializer() && calling_klass->is_interface()) { + } else if (bc_raw == Bytecodes::_invokespecial && !target->is_object_constructor() && calling_klass->is_interface()) { receiver_constraint = calling_klass; } @@ -2159,7 +2459,7 @@ void GraphBuilder::invoke(Bytecodes::Code code) { CHECK_BAILOUT(); // inlining not successful => standard invoke - ValueType* result_type = as_ValueType(declared_signature->return_type()); + ciType* return_type = declared_signature->return_type(); ValueStack* state_before = copy_state_exhandling(); // The bytecode (code) might change in this method so we are checking this very late. @@ -2208,14 +2508,15 @@ void GraphBuilder::invoke(Bytecodes::Code code) { } } - Invoke* result = new Invoke(code, result_type, recv, args, target, state_before); + Invoke* result = new Invoke(code, return_type, recv, args, target, state_before); // push result append_split(result); - if (result_type != voidType) { - push(result_type, result); + if (!return_type->is_void()) { + push(as_ValueType(return_type), result); } - if (profile_return() && result_type->is_object_kind()) { + + if (profile_return() && return_type->is_object()) { profile_return_type(result, target); } } @@ -2225,12 +2526,11 @@ void GraphBuilder::new_instance(int klass_index) { ValueStack* state_before = copy_state_exhandling(); ciKlass* klass = stream()->get_klass(); assert(klass->is_instance_klass(), "must be an instance klass"); - NewInstance* new_instance = new NewInstance(klass->as_instance_klass(), state_before, stream()->is_unresolved_klass()); + NewInstance* new_instance = new NewInstance(klass->as_instance_klass(), state_before, stream()->is_unresolved_klass(), false); _memory->new_instance(new_instance); apush(append_split(new_instance)); } - void GraphBuilder::new_type_array() { ValueStack* state_before = copy_state_exhandling(); apush(append_split(new NewTypeArray(ipop(), (BasicType)stream()->get_index(), state_before, true))); @@ -2303,9 +2603,28 @@ void GraphBuilder::instance_of(int klass_index) { void GraphBuilder::monitorenter(Value x, int bci) { + bool maybe_inlinetype = false; + if (bci == InvocationEntryBci) { + // Called by GraphBuilder::inline_sync_entry. +#ifdef ASSERT + ciType* obj_type = x->declared_type(); + assert(obj_type == nullptr || !obj_type->is_inlinetype(), "inline types cannot have synchronized methods"); +#endif + } else { + // We are compiling a monitorenter bytecode + if (Arguments::is_valhalla_enabled()) { + ciType* obj_type = x->declared_type(); + if (obj_type == nullptr || obj_type->can_be_inline_klass()) { + // If we're (possibly) locking on an inline type, check for markWord::always_locked_pattern + // and throw IMSE. (obj_type is null for Phi nodes, so let's just be conservative). + maybe_inlinetype = true; + } + } + } + // save state before locking in case of deoptimization after a NullPointerException ValueStack* state_before = copy_state_for_exception_with_bci(bci); - append_with_bci(new MonitorEnter(x, state()->lock(x), state_before), bci); + append_with_bci(new MonitorEnter(x, state()->lock(x), state_before, maybe_inlinetype), bci); kill_all(); } @@ -2430,6 +2749,7 @@ void GraphBuilder::null_check(Value value) { } } } + if (value->is_null_free()) return; } append(new NullCheck(value, copy_state_for_exception())); } @@ -2455,7 +2775,9 @@ XHandlers* GraphBuilder::handle_exception(Instruction* instruction) { do { int cur_bci = cur_state->bci(); assert(cur_scope_data->scope() == cur_state->scope(), "scopes do not match"); - assert(cur_bci == SynchronizationEntryBCI || cur_bci == cur_scope_data->stream()->cur_bci(), "invalid bci"); + assert(cur_bci == SynchronizationEntryBCI || cur_bci == cur_scope_data->stream()->cur_bci() + || has_pending_field_access() || has_pending_load_indexed(), "invalid bci"); + // join with all potential exception handlers XHandlers* list = cur_scope_data->xhandlers(); @@ -3269,6 +3591,8 @@ GraphBuilder::GraphBuilder(Compilation* compilation, IRScope* scope) , _inline_bailout_msg(nullptr) , _instruction_count(0) , _osr_entry(nullptr) + , _pending_field_access(nullptr) + , _pending_load_indexed(nullptr) { int osr_bci = compilation->osr_bci(); @@ -4021,6 +4345,34 @@ bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, bool ign caller_state->truncate_stack(args_base); assert(callee_state->stack_size() == 0, "callee stack must be empty"); + // Check if we need a membar at the beginning of the java.lang.Object + // constructor to satisfy the memory model for strict fields. + if (Arguments::is_valhalla_enabled() && method()->intrinsic_id() == vmIntrinsics::_Object_init) { + Value receiver = state()->local_at(0); + ciType* klass = receiver->exact_type(); + if (klass == nullptr) { + // No exact type, check if the declared type has no implementors and add a dependency + klass = receiver->declared_type(); + klass = compilation()->cha_exact_type(klass); + } + if (klass != nullptr && klass->is_instance_klass()) { + // Exact receiver type, check if there is a strict field + ciInstanceKlass* holder = klass->as_instance_klass(); + for (int i = 0; i < holder->nof_nonstatic_fields(); i++) { + ciField* field = holder->nonstatic_field_at(i); + if (field->is_strict()) { + // Found a strict field, a membar is needed + append(new MemBar(lir_membar_storestore)); + break; + } + } + } else if (klass == nullptr) { + // We can't statically determine the type of the receiver and therefore need + // to put a membar here because it could have a strict field. + append(new MemBar(lir_membar_storestore)); + } + } + Value lock = nullptr; BlockBegin* sync_handler = nullptr; diff --git a/src/hotspot/share/c1/c1_GraphBuilder.hpp b/src/hotspot/share/c1/c1_GraphBuilder.hpp index 81983829ccba8..eecaf3a5f5945 100644 --- a/src/hotspot/share/c1/c1_GraphBuilder.hpp +++ b/src/hotspot/share/c1/c1_GraphBuilder.hpp @@ -35,6 +35,24 @@ class MemoryBuffer; +class DelayedFieldAccess : public CompilationResourceObj { +private: + Value _obj; + ciInstanceKlass* _holder; + int _offset; + ValueStack* _state_before; + +public: + DelayedFieldAccess(Value obj, ciInstanceKlass* holder, int offset, ValueStack* state_before) + : _obj(obj), _holder(holder) , _offset(offset), _state_before(state_before) { } + + Value obj() const { return _obj; } + ciInstanceKlass* holder() const { return _holder; } + int offset() const { return _offset; } + void inc_offset(int offset) { _offset += offset; } + ValueStack* state_before() const { return _state_before; } +}; + class GraphBuilder { friend class JfrResolution; private: @@ -192,6 +210,10 @@ class GraphBuilder { Instruction* _last; // the last instruction added bool _skip_block; // skip processing of the rest of this block + // support for optimization of accesses to flat fields and flat arrays + DelayedFieldAccess* _pending_field_access; + DelayedLoadIndexed* _pending_load_indexed; + // accessors ScopeData* scope_data() const { return _scope_data; } Compilation* compilation() const { return _compilation; } @@ -209,6 +231,12 @@ class GraphBuilder { Bytecodes::Code code() const { return stream()->cur_bc(); } int bci() const { return stream()->cur_bci(); } int next_bci() const { return stream()->next_bci(); } + bool has_pending_field_access() { return _pending_field_access != nullptr; } + DelayedFieldAccess* pending_field_access() { return _pending_field_access; } + void set_pending_field_access(DelayedFieldAccess* delayed) { _pending_field_access = delayed; } + bool has_pending_load_indexed() { return _pending_load_indexed != nullptr; } + DelayedLoadIndexed* pending_load_indexed() { return _pending_load_indexed; } + void set_pending_load_indexed(DelayedLoadIndexed* delayed) { _pending_load_indexed = delayed; } // unified bailout support void bailout(const char* msg) const { compilation()->bailout(msg); } @@ -267,6 +295,9 @@ class GraphBuilder { void new_multi_array(int dimensions); void throw_op(int bci); + // inline types + void copy_inline_content(ciInlineKlass* vk, Value src, int src_off, Value dest, int dest_off, ValueStack* state_before, ciField* enclosing_field = nullptr); + // stack/code manipulation helpers Instruction* append_with_bci(Instruction* instr, int bci); Instruction* append(Instruction* instr); @@ -395,6 +426,7 @@ class GraphBuilder { bool profile_parameters() { return _compilation->profile_parameters(); } bool profile_arguments() { return _compilation->profile_arguments(); } bool profile_return() { return _compilation->profile_return(); } + bool profile_array_accesses(){ return _compilation->profile_array_accesses();} Values* args_list_for_profiling(ciMethod* target, int& start, bool may_have_receiver); Values* collect_args_for_profiling(Values* args, ciMethod* target, bool may_have_receiver); diff --git a/src/hotspot/share/c1/c1_IR.cpp b/src/hotspot/share/c1/c1_IR.cpp index 238a9bdda0d8f..e3ff62f66e3ac 100644 --- a/src/hotspot/share/c1/c1_IR.cpp +++ b/src/hotspot/share/c1/c1_IR.cpp @@ -142,7 +142,7 @@ IRScope::IRScope(Compilation* compilation, IRScope* caller, int caller_bci, ciMe _xhandlers = new XHandlers(method); _number_of_locks = 0; _monitor_pairing_ok = method->has_balanced_monitors(); - _wrote_final = false; + _wrote_non_strict_final = false; _wrote_fields = false; _wrote_volatile = false; _wrote_stable = false; @@ -171,6 +171,9 @@ int IRScope::max_stack() const { bool IRScopeDebugInfo::should_reexecute() { + if (_should_reexecute) { + return true; + } ciMethod* cur_method = scope()->method(); int cur_bci = bci(); if (cur_method != nullptr && cur_bci != SynchronizationEntryBCI) { @@ -212,11 +215,11 @@ CodeEmitInfo::CodeEmitInfo(CodeEmitInfo* info, ValueStack* stack) } -void CodeEmitInfo::record_debug_info(DebugInformationRecorder* recorder, int pc_offset) { +void CodeEmitInfo::record_debug_info(DebugInformationRecorder* recorder, int pc_offset, bool maybe_return_as_fields) { // record the safepoint before recording the debug info for enclosing scopes recorder->add_safepoint(pc_offset, _oop_map->deep_copy()); bool reexecute = _force_reexecute || _scope_debug_info->should_reexecute(); - _scope_debug_info->record_debug_info(recorder, pc_offset, reexecute); + _scope_debug_info->record_debug_info(recorder, pc_offset, reexecute, maybe_return_as_fields); recorder->end_safepoint(pc_offset); } diff --git a/src/hotspot/share/c1/c1_IR.hpp b/src/hotspot/share/c1/c1_IR.hpp index d6a4cddb9d784..74396129f917d 100644 --- a/src/hotspot/share/c1/c1_IR.hpp +++ b/src/hotspot/share/c1/c1_IR.hpp @@ -145,7 +145,7 @@ class IRScope: public CompilationResourceObj { XHandlers* _xhandlers; // the exception handlers int _number_of_locks; // the number of monitor lock slots needed bool _monitor_pairing_ok; // the monitor pairing info - bool _wrote_final; // has written final field + bool _wrote_non_strict_final; // has written non-strict final field bool _wrote_fields; // has written fields bool _wrote_volatile; // has written volatile field bool _wrote_stable; // has written @Stable field @@ -181,8 +181,8 @@ class IRScope: public CompilationResourceObj { void set_min_number_of_locks(int n) { if (n > _number_of_locks) _number_of_locks = n; } bool monitor_pairing_ok() const { return _monitor_pairing_ok; } BlockBegin* start() const { return _start; } - void set_wrote_final() { _wrote_final = true; } - bool wrote_final () const { return _wrote_final; } + void set_wrote_non_strict_final() { _wrote_non_strict_final = true; } + bool wrote_non_strict_final() const { return _wrote_non_strict_final; } void set_wrote_fields() { _wrote_fields = true; } bool wrote_fields () const { return _wrote_fields; } void set_wrote_volatile() { _wrote_volatile = true; } @@ -208,6 +208,7 @@ class IRScopeDebugInfo: public CompilationResourceObj { GrowableArray* _expressions; GrowableArray* _monitors; IRScopeDebugInfo* _caller; + bool _should_reexecute; public: IRScopeDebugInfo(IRScope* scope, @@ -215,13 +216,15 @@ class IRScopeDebugInfo: public CompilationResourceObj { GrowableArray* locals, GrowableArray* expressions, GrowableArray* monitors, - IRScopeDebugInfo* caller): + IRScopeDebugInfo* caller, + bool should_reexecute): _scope(scope) , _bci(bci) , _locals(locals) , _expressions(expressions) , _monitors(monitors) - , _caller(caller) {} + , _caller(caller) + , _should_reexecute(should_reexecute) {} IRScope* scope() { return _scope; } @@ -234,7 +237,7 @@ class IRScopeDebugInfo: public CompilationResourceObj { //Whether we should reexecute this bytecode for deopt bool should_reexecute(); - void record_debug_info(DebugInformationRecorder* recorder, int pc_offset, bool reexecute) { + void record_debug_info(DebugInformationRecorder* recorder, int pc_offset, bool reexecute, bool maybe_return_as_fields = false) { if (caller() != nullptr) { // Order is significant: Must record caller first. caller()->record_debug_info(recorder, pc_offset, false/*reexecute*/); @@ -243,12 +246,17 @@ class IRScopeDebugInfo: public CompilationResourceObj { DebugToken* expvals = recorder->create_scope_values(expressions()); DebugToken* monvals = recorder->create_monitor_values(monitors()); // reexecute allowed only for the topmost frame - bool return_oop = false; // This flag will be ignored since it used only for C2 with escape analysis. + bool return_oop = false; + bool return_scalarized = false; + if (maybe_return_as_fields) { + return_oop = true; + return_scalarized = true; + } bool rethrow_exception = false; bool has_ea_local_in_scope = false; bool arg_escape = false; recorder->describe_scope(pc_offset, methodHandle(), scope()->method(), bci(), - reexecute, rethrow_exception, return_oop, + reexecute, rethrow_exception, return_oop, return_scalarized, has_ea_local_in_scope, arg_escape, locvals, expvals, monvals); } }; @@ -285,7 +293,7 @@ class CodeEmitInfo: public CompilationResourceObj { bool deoptimize_on_exception() const { return _deoptimize_on_exception; } void add_register_oop(LIR_Opr opr); - void record_debug_info(DebugInformationRecorder* recorder, int pc_offset); + void record_debug_info(DebugInformationRecorder* recorder, int pc_offset, bool maybe_return_as_fields = false); bool force_reexecute() const { return _force_reexecute; } void set_force_reexecute() { _force_reexecute = true; } diff --git a/src/hotspot/share/c1/c1_Instruction.cpp b/src/hotspot/share/c1/c1_Instruction.cpp index 3a7edef0088ec..c40c7de366783 100644 --- a/src/hotspot/share/c1/c1_Instruction.cpp +++ b/src/hotspot/share/c1/c1_Instruction.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,6 +26,8 @@ #include "c1/c1_InstructionPrinter.hpp" #include "c1/c1_IR.hpp" #include "c1/c1_ValueStack.hpp" +#include "ci/ciFlatArrayKlass.hpp" +#include "ci/ciInlineKlass.hpp" #include "ci/ciObjArrayKlass.hpp" #include "ci/ciTypeArrayKlass.hpp" #include "utilities/bitMap.inline.hpp" @@ -105,13 +107,77 @@ void Instruction::state_values_do(ValueVisitor* f) { } ciType* Instruction::exact_type() const { - ciType* t = declared_type(); + ciType* t = declared_type(); if (t != nullptr && t->is_klass()) { return t->as_klass()->exact_klass(); } return nullptr; } +ciKlass* Instruction::as_loaded_klass_or_null() const { + ciType* type = declared_type(); + if (type != nullptr && type->is_klass()) { + ciKlass* klass = type->as_klass(); + if (klass->is_loaded()) { + return klass; + } + } + return nullptr; +} + +bool Instruction::is_loaded_flat_array() const { + if (UseArrayFlattening) { + ciType* type = declared_type(); + return type != nullptr && type->is_flat_array_klass(); + } + return false; +} + +bool Instruction::maybe_flat_array() const { + if (UseArrayFlattening) { + ciType* type = declared_type(); + if (type != nullptr) { + if (type->is_ref_array_klass()) { + return false; + } else if (type->is_flat_array_klass()) { + return true; + } else if (type->is_obj_array_klass()) { + // This is the unrefined array type + ciKlass* element_klass = type->as_obj_array_klass()->element_klass(); + if (element_klass->can_be_inline_klass() && (!element_klass->is_inlinetype() || element_klass->as_inline_klass()->maybe_flat_in_array())) { + return true; + } + } else if (type->is_klass() && type->as_klass()->is_java_lang_Object()) { + // This can happen as a parameter to System.arraycopy() + return true; + } + } else { + // Type info gets lost during Phi merging (Phi, IfOp, etc), but we might be storing into a + // flat array, so we should do a runtime check. + return true; + } + } + return false; +} + +bool Instruction::maybe_null_free_array() const { + ciType* type = declared_type(); + if (type != nullptr) { + if (type->is_loaded() && type->is_array_klass() && type->as_array_klass()->is_refined()) { + return type->as_array_klass()->is_elem_null_free(); + } else if (type->is_obj_array_klass()) { + // Due to array covariance, the runtime type might be a null-free array. + if (type->as_obj_array_klass()->can_be_inline_array_klass()) { + return true; + } + } + } else { + // Type info gets lost during Phi merging (Phi, IfOp, etc), but we might be storing into a + // null-free array, so we should do a runtime check. + return true; + } + return false; +} #ifndef PRODUCT void Instruction::check_state(ValueStack* state) { @@ -172,7 +238,7 @@ ciType* Constant::exact_type() const { ciType* LoadIndexed::exact_type() const { ciType* array_type = array()->exact_type(); - if (array_type != nullptr) { + if (delayed() == nullptr && array_type != nullptr) { assert(array_type->is_array_klass(), "what else?"); ciArrayKlass* ak = (ciArrayKlass*)array_type; @@ -186,8 +252,10 @@ ciType* LoadIndexed::exact_type() const { return Instruction::exact_type(); } - ciType* LoadIndexed::declared_type() const { + if (delayed() != nullptr) { + return delayed()->field()->type(); + } ciType* array_type = array()->declared_type(); if (array_type == nullptr || !array_type->is_loaded()) { return nullptr; @@ -197,6 +265,20 @@ ciType* LoadIndexed::declared_type() const { return ak->element_type(); } +bool StoreIndexed::is_exact_flat_array_store() const { + if (array()->is_loaded_flat_array() && value()->as_Constant() == nullptr && value()->declared_type() != nullptr) { + ciKlass* element_klass = array()->declared_type()->as_flat_array_klass()->element_klass(); + ciKlass* actual_klass = value()->declared_type()->as_klass(); + + // The following check can fail with inlining: + // void test45_inline(Object[] oa, Object o, int index) { oa[index] = o; } + // void test45(MyValue1[] va, int index, MyValue2 v) { test45_inline(va, v, index); } + if (element_klass == actual_klass) { + return true; + } + } + return false; +} ciType* LoadField::declared_type() const { return field()->type(); @@ -208,9 +290,14 @@ ciType* NewTypeArray::exact_type() const { } ciType* NewObjectArray::exact_type() const { + // Returns the refined type return ciObjArrayKlass::make(klass()); } +ciType* NewMultiArray::exact_type() const { + return _klass; +} + ciType* NewArray::declared_type() const { return exact_type(); } @@ -318,16 +405,43 @@ void BlockBegin::state_values_do(ValueVisitor* f) { } +StoreField::StoreField(Value obj, int offset, ciField* field, Value value, bool is_static, + ValueStack* state_before, bool needs_patching) + : AccessField(obj, offset, field, is_static, state_before, needs_patching) + , _value(value) + , _enclosing_field(nullptr) +{ +#ifdef ASSERT + AssertValues assert_value; + values_do(&assert_value); +#endif + pin(); +} + +StoreIndexed::StoreIndexed(Value array, Value index, Value length, BasicType elt_type, Value value, + ValueStack* state_before, bool check_boolean, bool mismatched) + : AccessIndexed(array, index, length, elt_type, state_before, mismatched) + , _value(value), _check_boolean(check_boolean) +{ +#ifdef ASSERT + AssertValues assert_value; + values_do(&assert_value); +#endif + pin(); +} + + // Implementation of Invoke -Invoke::Invoke(Bytecodes::Code code, ValueType* result_type, Value recv, Values* args, +Invoke::Invoke(Bytecodes::Code code, ciType* return_type, Value recv, Values* args, ciMethod* target, ValueStack* state_before) - : StateSplit(result_type, state_before) + : StateSplit(as_ValueType(return_type), state_before) , _code(code) , _recv(recv) , _args(args) , _target(target) + , _return_type(return_type) { set_flag(TargetIsLoadedFlag, target->is_loaded()); set_flag(TargetIsFinalFlag, target_is_loaded() && target->is_final_method()); @@ -344,7 +458,8 @@ Invoke::Invoke(Bytecodes::Code code, ValueType* result_type, Value recv, Values* _signature->append(as_BasicType(receiver()->type())); } for (int i = 0; i < number_of_arguments(); i++) { - ValueType* t = argument_at(i)->type(); + Value v = argument_at(i); + ValueType* t = v->type(); BasicType bt = as_BasicType(t); _signature->append(bt); } @@ -358,10 +473,8 @@ void Invoke::state_values_do(ValueVisitor* f) { } ciType* Invoke::declared_type() const { - ciSignature* declared_signature = state()->scope()->method()->get_declared_signature_at_bci(state()->bci()); - ciType *t = declared_signature->return_type(); - assert(t->basic_type() != T_VOID, "need return value of void method?"); - return t; + assert(_return_type->basic_type() != T_VOID, "need return value of void method?"); + return _return_type; } // Implementation of Constant @@ -989,3 +1102,4 @@ void RangeCheckPredicate::check_state() { void ProfileInvoke::state_values_do(ValueVisitor* f) { if (state() != nullptr) state()->values_do(f); } + diff --git a/src/hotspot/share/c1/c1_Instruction.hpp b/src/hotspot/share/c1/c1_Instruction.hpp index 7f6fb0f5191de..4bb70fdf9a459 100644 --- a/src/hotspot/share/c1/c1_Instruction.hpp +++ b/src/hotspot/share/c1/c1_Instruction.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -74,6 +74,7 @@ class NewArray; class NewTypeArray; class NewObjectArray; class NewMultiArray; +class Deoptimize; class TypeCheck; class CheckCast; class InstanceOf; @@ -97,6 +98,7 @@ class UnsafePut; class UnsafeGetAndSet; class ProfileCall; class ProfileReturnType; +class ProfileACmpTypes; class ProfileInvoke; class RuntimeCall; class MemBar; @@ -191,6 +193,7 @@ class InstructionVisitor: public StackObj { virtual void do_UnsafeGetAndSet(UnsafeGetAndSet* x) = 0; virtual void do_ProfileCall (ProfileCall* x) = 0; virtual void do_ProfileReturnType (ProfileReturnType* x) = 0; + virtual void do_ProfileACmpTypes(ProfileACmpTypes* x) = 0; virtual void do_ProfileInvoke (ProfileInvoke* x) = 0; virtual void do_RuntimeCall (RuntimeCall* x) = 0; virtual void do_MemBar (MemBar* x) = 0; @@ -207,9 +210,10 @@ class InstructionVisitor: public StackObj { // of ValueMap - make changes carefully! #define HASH1(x1 ) ((intx)(x1)) -#define HASH2(x1, x2 ) ((HASH1(x1 ) << 7) ^ HASH1(x2)) -#define HASH3(x1, x2, x3 ) ((HASH2(x1, x2 ) << 7) ^ HASH1(x3)) -#define HASH4(x1, x2, x3, x4) ((HASH3(x1, x2, x3) << 7) ^ HASH1(x4)) +#define HASH2(x1, x2 ) ((HASH1(x1 ) << 7) ^ HASH1(x2)) +#define HASH3(x1, x2, x3 ) ((HASH2(x1, x2 ) << 7) ^ HASH1(x3)) +#define HASH4(x1, x2, x3, x4) ((HASH3(x1, x2, x3 ) << 7) ^ HASH1(x4)) +#define HASH5(x1, x2, x3, x4, x5) ((HASH4(x1, x2, x3, x4) << 7) ^ HASH1(x5)) // The following macros are used to implement instruction-specific hashing. @@ -268,6 +272,21 @@ class InstructionVisitor: public StackObj { return true; \ } \ +#define HASHING4(class_name, enabled, f1, f2, f3, f4) \ + virtual intx hash() const { \ + return (enabled) ? HASH5(name(), f1, f2, f3, f4) : 0; \ + } \ + virtual bool is_equal(Value v) const { \ + if (!(enabled)) return false; \ + class_name* _v = v->as_##class_name(); \ + if (_v == nullptr) return false; \ + if (f1 != _v->f1) return false; \ + if (f2 != _v->f2) return false; \ + if (f3 != _v->f3) return false; \ + if (f4 != _v->f4) return false; \ + return true; \ + } \ + // The mother of all instructions... @@ -290,6 +309,7 @@ class Instruction: public CompilationResourceObj { XHandlers* _exception_handlers; // Flat list of exception handlers covering this instruction friend class UseCountComputer; + friend class GraphBuilder; void update_exception_state(ValueStack* state); @@ -342,6 +362,7 @@ class Instruction: public CompilationResourceObj { enum InstructionFlag { NeedsNullCheckFlag = 0, + NeverNullFlag, CanTrapFlag, DirectCompareFlag, IsSafepointFlag, @@ -432,6 +453,8 @@ class Instruction: public CompilationResourceObj { void set_needs_null_check(bool f) { set_flag(NeedsNullCheckFlag, f); } bool needs_null_check() const { return check_flag(NeedsNullCheckFlag); } + void set_null_free(bool f) { set_flag(NeverNullFlag, f); } + bool is_null_free() const { return check_flag(NeverNullFlag); } bool is_linked() const { return check_flag(IsLinkedInBlockFlag); } bool can_be_linked() { return as_Local() == nullptr && as_Phi() == nullptr; } @@ -442,6 +465,7 @@ class Instruction: public CompilationResourceObj { ValueStack* exception_state() const { return _exception_state; } virtual bool needs_exception_state() const { return true; } XHandlers* exception_handlers() const { return _exception_handlers; } + ciKlass* as_loaded_klass_or_null() const; // manipulation void pin(PinReason reason) { _pin_state |= reason; } @@ -486,6 +510,10 @@ class Instruction: public CompilationResourceObj { return _next; } + bool is_loaded_flat_array() const; + bool maybe_flat_array() const; + bool maybe_null_free_array() const; + Instruction *insert_after_same_bci(Instruction *i) { #ifndef PRODUCT i->set_printable_bci(printable_bci()); @@ -813,7 +841,9 @@ LEAF(LoadField, AccessField) LoadField(Value obj, int offset, ciField* field, bool is_static, ValueStack* state_before, bool needs_patching) : AccessField(obj, offset, field, is_static, state_before, needs_patching) - {} + { + set_null_free(field->is_null_free()); + } ciType* declared_type() const; @@ -825,20 +855,17 @@ LEAF(LoadField, AccessField) LEAF(StoreField, AccessField) private: Value _value; + ciField* _enclosing_field; // enclosing field (the flat one) for nested fields public: // creation StoreField(Value obj, int offset, ciField* field, Value value, bool is_static, - ValueStack* state_before, bool needs_patching) - : AccessField(obj, offset, field, is_static, state_before, needs_patching) - , _value(value) - { - ASSERT_VALUES - pin(); - } + ValueStack* state_before, bool needs_patching); // accessors Value value() const { return _value; } + ciField* enclosing_field() const { return _enclosing_field; } + void set_enclosing_field(ciField* field) { _enclosing_field = field; } // generic virtual void input_values_do(ValueVisitor* f) { AccessField::input_values_do(f); f->visit(&_value); } @@ -896,6 +923,8 @@ BASE(AccessIndexed, AccessArray) Value _length; BasicType _elt_type; bool _mismatched; + ciMethod* _profiled_method; + int _profiled_bci; public: // creation @@ -905,6 +934,8 @@ BASE(AccessIndexed, AccessArray) , _length(length) , _elt_type(elt_type) , _mismatched(mismatched) + , _profiled_method(nullptr) + , _profiled_bci(0) { set_flag(Instruction::NeedsRangeCheckFlag, true); ASSERT_VALUES @@ -920,20 +951,31 @@ BASE(AccessIndexed, AccessArray) // perform elimination of range checks involving constants bool compute_needs_range_check(); + // Helpers for MethodData* profiling + void set_should_profile(bool value) { set_flag(ProfileMDOFlag, value); } + void set_profiled_method(ciMethod* method) { _profiled_method = method; } + void set_profiled_bci(int bci) { _profiled_bci = bci; } + bool should_profile() const { return check_flag(ProfileMDOFlag); } + ciMethod* profiled_method() const { return _profiled_method; } + int profiled_bci() const { return _profiled_bci; } + // generic virtual void input_values_do(ValueVisitor* f) { AccessArray::input_values_do(f); f->visit(&_index); if (_length != nullptr) f->visit(&_length); } }; +class DelayedLoadIndexed; LEAF(LoadIndexed, AccessIndexed) private: - NullCheck* _explicit_null_check; // For explicit null check elimination + NullCheck* _explicit_null_check; // For explicit null check elimination + Value _buffer; // Buffer for load from flat arrays + DelayedLoadIndexed* _delayed; public: // creation LoadIndexed(Value array, Value index, Value length, BasicType elt_type, ValueStack* state_before, bool mismatched = false) : AccessIndexed(array, index, length, elt_type, state_before, mismatched) - , _explicit_null_check(nullptr) {} + , _explicit_null_check(nullptr), _buffer(nullptr), _delayed(nullptr) {} // accessors NullCheck* explicit_null_check() const { return _explicit_null_check; } @@ -945,40 +987,70 @@ LEAF(LoadIndexed, AccessIndexed) ciType* exact_type() const; ciType* declared_type() const; + Value buffer() const { return _buffer; } + + void set_buffer(Value buffer) { + assert(buffer == nullptr || buffer->as_NewInstance() != nullptr, "LoadIndexed flat array buffer must be a NewInstance"); + _buffer = buffer; + } + + DelayedLoadIndexed* delayed() const { return _delayed; } + void set_delayed(DelayedLoadIndexed* delayed) { _delayed = delayed; } + + virtual void input_values_do(ValueVisitor* f) { + AccessIndexed::input_values_do(f); + if (_buffer != nullptr) { + f->visit(&_buffer); + assert(_buffer->as_NewInstance() != nullptr, "LoadIndexed flat array buffer must stay a NewInstance"); + } + } + // generic; - HASHING3(LoadIndexed, true, elt_type(), array()->subst(), index()->subst()) + HASHING4(LoadIndexed, delayed() == nullptr && !should_profile(), elt_type(), array()->subst(), index()->subst(), buffer()) }; +class DelayedLoadIndexed : public CompilationResourceObj { +private: + LoadIndexed* _load_instr; + ValueStack* _state_before; + ciField* _field; + size_t _offset; + public: + DelayedLoadIndexed(LoadIndexed* load, ValueStack* state_before) + : _load_instr(load) + , _state_before(state_before) + , _field(nullptr) + , _offset(0) { } + + void update(ciField* field, int offset) { + assert(offset >= 0, "must be"); + _field = field; + _offset += offset; + } + + LoadIndexed* load_instr() const { return _load_instr; } + ValueStack* state_before() const { return _state_before; } + ciField* field() const { return _field; } + size_t offset() const { return _offset; } +}; LEAF(StoreIndexed, AccessIndexed) private: Value _value; - ciMethod* _profiled_method; - int _profiled_bci; bool _check_boolean; public: // creation StoreIndexed(Value array, Value index, Value length, BasicType elt_type, Value value, ValueStack* state_before, - bool check_boolean, bool mismatched = false) - : AccessIndexed(array, index, length, elt_type, state_before, mismatched) - , _value(value), _profiled_method(nullptr), _profiled_bci(0), _check_boolean(check_boolean) - { - ASSERT_VALUES - pin(); - } + bool check_boolean, bool mismatched = false); // accessors Value value() const { return _value; } bool check_boolean() const { return _check_boolean; } - // Helpers for MethodData* profiling - void set_should_profile(bool value) { set_flag(ProfileMDOFlag, value); } - void set_profiled_method(ciMethod* method) { _profiled_method = method; } - void set_profiled_bci(int bci) { _profiled_bci = bci; } - bool should_profile() const { return check_flag(ProfileMDOFlag); } - ciMethod* profiled_method() const { return _profiled_method; } - int profiled_bci() const { return _profiled_bci; } + + // Flattened array support + bool is_exact_flat_array_store() const; // generic virtual void input_values_do(ValueVisitor* f) { AccessIndexed::input_values_do(f); f->visit(&_value); } }; @@ -1089,16 +1161,19 @@ LEAF(IfOp, Op2) private: Value _tval; Value _fval; + bool _substitutability_check; public: // creation - IfOp(Value x, Condition cond, Value y, Value tval, Value fval) + IfOp(Value x, Condition cond, Value y, Value tval, Value fval, ValueStack* state_before, bool substitutability_check) : Op2(tval->type()->meet(fval->type()), (Bytecodes::Code)cond, x, y) , _tval(tval) , _fval(fval) + , _substitutability_check(substitutability_check) { ASSERT_VALUES assert(tval->type()->tag() == fval->type()->tag(), "types must match"); + set_state_before(state_before); } // accessors @@ -1107,7 +1182,7 @@ LEAF(IfOp, Op2) Condition cond() const { return (Condition)Op2::op(); } Value tval() const { return _tval; } Value fval() const { return _fval; } - + bool substitutability_check() const { return _substitutability_check; } // generic virtual void input_values_do(ValueVisitor* f) { Op2::input_values_do(f); f->visit(&_tval); f->visit(&_fval); } }; @@ -1222,10 +1297,11 @@ LEAF(Invoke, StateSplit) Values* _args; BasicTypeList* _signature; ciMethod* _target; + ciType* _return_type; public: // creation - Invoke(Bytecodes::Code code, ValueType* result_type, Value recv, Values* args, + Invoke(Bytecodes::Code code, ciType* return_type, Value recv, Values* args, ciMethod* target, ValueStack* state_before); // accessors @@ -1264,17 +1340,19 @@ LEAF(NewInstance, StateSplit) private: ciInstanceKlass* _klass; bool _is_unresolved; + bool _needs_state_before; public: // creation - NewInstance(ciInstanceKlass* klass, ValueStack* state_before, bool is_unresolved) + NewInstance(ciInstanceKlass* klass, ValueStack* state_before, bool is_unresolved, bool needs_state_before) : StateSplit(instanceType, state_before) - , _klass(klass), _is_unresolved(is_unresolved) + , _klass(klass), _is_unresolved(is_unresolved), _needs_state_before(needs_state_before) {} // accessors ciInstanceKlass* klass() const { return _klass; } bool is_unresolved() const { return _is_unresolved; } + bool needs_state_before() const { return _needs_state_before; } virtual bool needs_exception_state() const { return false; } @@ -1284,7 +1362,6 @@ LEAF(NewInstance, StateSplit) ciType* declared_type() const; }; - BASE(NewArray, StateSplit) private: Value _length; @@ -1338,7 +1415,8 @@ LEAF(NewObjectArray, NewArray) public: // creation - NewObjectArray(ciKlass* klass, Value length, ValueStack* state_before) : NewArray(length, state_before), _klass(klass) {} + NewObjectArray(ciKlass* klass, Value length, ValueStack* state_before) + : NewArray(length, state_before), _klass(klass) { } // accessors ciKlass* klass() const { return _klass; } @@ -1373,6 +1451,8 @@ LEAF(NewMultiArray, NewArray) StateSplit::input_values_do(f); for (int i = 0; i < _dims->length(); i++) f->visit(_dims->adr_at(i)); } + + ciType* exact_type() const; }; @@ -1478,14 +1558,19 @@ BASE(AccessMonitor, StateSplit) LEAF(MonitorEnter, AccessMonitor) + bool _maybe_inlinetype; public: // creation - MonitorEnter(Value obj, int monitor_no, ValueStack* state_before) + MonitorEnter(Value obj, int monitor_no, ValueStack* state_before, bool maybe_inlinetype) : AccessMonitor(obj, monitor_no, state_before) + , _maybe_inlinetype(maybe_inlinetype) { ASSERT_VALUES } + // accessors + bool maybe_inlinetype() const { return _maybe_inlinetype; } + // generic virtual bool can_trap() const { return true; } }; @@ -1941,10 +2026,11 @@ LEAF(If, BlockEnd) int _profiled_bci; // Canonicalizer may alter bci of If node bool _swapped; // Is the order reversed with respect to the original If in the // bytecode stream? + bool _substitutability_check; public: // creation // unordered_is_true is valid for float/double compares only - If(Value x, Condition cond, bool unordered_is_true, Value y, BlockBegin* tsux, BlockBegin* fsux, ValueStack* state_before, bool is_safepoint) + If(Value x, Condition cond, bool unordered_is_true, Value y, BlockBegin* tsux, BlockBegin* fsux, ValueStack* state_before, bool is_safepoint, bool substitutability_check=false) : BlockEnd(illegalType, state_before, is_safepoint) , _x(x) , _cond(cond) @@ -1952,6 +2038,7 @@ LEAF(If, BlockEnd) , _profiled_method(nullptr) , _profiled_bci(0) , _swapped(false) + , _substitutability_check(substitutability_check) { ASSERT_VALUES set_flag(UnorderedIsTrueFlag, unordered_is_true); @@ -1986,6 +2073,7 @@ LEAF(If, BlockEnd) void set_profiled_method(ciMethod* method) { _profiled_method = method; } void set_profiled_bci(int bci) { _profiled_bci = bci; } void set_swapped(bool value) { _swapped = value; } + bool substitutability_check() const { return _substitutability_check; } // generic virtual void input_values_do(ValueVisitor* f) { BlockEnd::input_values_do(f); f->visit(&_x); f->visit(&_y); } }; @@ -2296,7 +2384,7 @@ LEAF(ProfileReturnType, Instruction) , _ret(ret) { set_needs_null_check(true); - // The ProfileType has side-effects and must occur precisely where located + // The ProfileReturnType has side-effects and must occur precisely where located pin(); } @@ -2312,6 +2400,48 @@ LEAF(ProfileReturnType, Instruction) } }; +LEAF(ProfileACmpTypes, Instruction) + private: + ciMethod* _method; + int _bci; + Value _left; + Value _right; + bool _left_maybe_null; + bool _right_maybe_null; + + public: + ProfileACmpTypes(ciMethod* method, int bci, Value left, Value right) + : Instruction(voidType) + , _method(method) + , _bci(bci) + , _left(left) + , _right(right) + { + // The ProfileACmp has side-effects and must occur precisely where located + pin(); + _left_maybe_null = true; + _right_maybe_null = true; + } + + ciMethod* method() const { return _method; } + int bci() const { return _bci; } + Value left() const { return _left; } + Value right() const { return _right; } + bool left_maybe_null() const { return _left_maybe_null; } + bool right_maybe_null() const { return _right_maybe_null; } + void set_left_maybe_null(bool v) { _left_maybe_null = v; } + void set_right_maybe_null(bool v) { _right_maybe_null = v; } + + virtual void input_values_do(ValueVisitor* f) { + if (_left != nullptr) { + f->visit(&_left); + } + if (_right != nullptr) { + f->visit(&_right); + } + } +}; + // Call some C runtime function that doesn't safepoint, // optionally passing the current thread as the first argument. LEAF(RuntimeCall, Instruction) diff --git a/src/hotspot/share/c1/c1_InstructionPrinter.cpp b/src/hotspot/share/c1/c1_InstructionPrinter.cpp index 59e24c3e6c52d..35a7b3dc2b310 100644 --- a/src/hotspot/share/c1/c1_InstructionPrinter.cpp +++ b/src/hotspot/share/c1/c1_InstructionPrinter.cpp @@ -25,6 +25,7 @@ #include "c1/c1_InstructionPrinter.hpp" #include "c1/c1_ValueStack.hpp" #include "ci/ciArray.hpp" +#include "ci/ciInlineKlass.hpp" #include "ci/ciInstance.hpp" #include "ci/ciObject.hpp" #include "classfile/vmSymbols.hpp" @@ -380,7 +381,12 @@ void InstructionPrinter::do_ArrayLength(ArrayLength* x) { void InstructionPrinter::do_LoadIndexed(LoadIndexed* x) { print_indexed(x); - output()->print(" (%c)", type2char(x->elt_type())); + if (x->delayed() != nullptr) { + output()->print(" +%zu", x->delayed()->offset()); + output()->print(" (%c)", type2char(x->delayed()->field()->type()->basic_type())); + } else { + output()->print(" (%c)", type2char(x->elt_type())); + } if (x->check_flag(Instruction::NeedsRangeCheckFlag)) { output()->print(" [rc]"); } @@ -845,6 +851,7 @@ void InstructionPrinter::do_ProfileReturnType(ProfileReturnType* x) { output()->print(" %s.%s", x->method()->holder()->name()->as_utf8(), x->method()->name()->as_utf8()); output()->put(')'); } + void InstructionPrinter::do_ProfileInvoke(ProfileInvoke* x) { output()->print("profile_invoke "); output()->print(" %s.%s", x->inlinee()->holder()->name()->as_utf8(), x->inlinee()->name()->as_utf8()); @@ -852,6 +859,13 @@ void InstructionPrinter::do_ProfileInvoke(ProfileInvoke* x) { } +void InstructionPrinter::do_ProfileACmpTypes(ProfileACmpTypes* x) { + output()->print("profile acmp types "); + print_value(x->left()); + output()->print(", "); + print_value(x->right()); +} + void InstructionPrinter::do_RuntimeCall(RuntimeCall* x) { output()->print("call_rt %s(", x->entry_name()); for (int i = 0; i < x->number_of_arguments(); i++) { diff --git a/src/hotspot/share/c1/c1_InstructionPrinter.hpp b/src/hotspot/share/c1/c1_InstructionPrinter.hpp index 6edf270ec6ea5..116c636cf44de 100644 --- a/src/hotspot/share/c1/c1_InstructionPrinter.hpp +++ b/src/hotspot/share/c1/c1_InstructionPrinter.hpp @@ -125,6 +125,7 @@ class InstructionPrinter: public InstructionVisitor { virtual void do_UnsafeGetAndSet(UnsafeGetAndSet* x); virtual void do_ProfileCall (ProfileCall* x); virtual void do_ProfileReturnType (ProfileReturnType* x); + virtual void do_ProfileACmpTypes(ProfileACmpTypes* x); virtual void do_ProfileInvoke (ProfileInvoke* x); virtual void do_RuntimeCall (RuntimeCall* x); virtual void do_MemBar (MemBar* x); diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index a67fa98f8f8dd..65b28be7911d0 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,6 +27,7 @@ #include "c1/c1_LIR.hpp" #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_ValueStack.hpp" +#include "ci/ciInlineKlass.hpp" #include "ci/ciInstance.hpp" #include "runtime/safepointMechanism.inline.hpp" #include "runtime/sharedRuntime.hpp" @@ -286,7 +287,7 @@ void LIR_OpBranch::negate_cond() { LIR_OpTypeCheck::LIR_OpTypeCheck(LIR_Code code, LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, CodeEmitInfo* info_for_exception, CodeEmitInfo* info_for_patch, - CodeStub* stub) + CodeStub* stub, bool need_null_check) : LIR_Op(code, result, nullptr) , _object(object) @@ -302,6 +303,7 @@ LIR_OpTypeCheck::LIR_OpTypeCheck(LIR_Code code, LIR_Opr result, LIR_Opr object, , _profiled_bci(-1) , _should_profile(false) , _fast_check(fast_check) + , _need_null_check(need_null_check) { if (code == lir_checkcast) { assert(info_for_exception != nullptr, "checkcast throws exceptions"); @@ -329,6 +331,7 @@ LIR_OpTypeCheck::LIR_OpTypeCheck(LIR_Code code, LIR_Opr object, LIR_Opr array, L , _profiled_bci(-1) , _should_profile(false) , _fast_check(false) + , _need_null_check(true) { if (code == lir_store_check) { _stub = new ArrayStoreExceptionStub(object, info_for_exception); @@ -338,6 +341,33 @@ LIR_OpTypeCheck::LIR_OpTypeCheck(LIR_Code code, LIR_Opr object, LIR_Opr array, L } } +LIR_OpFlattenedArrayCheck::LIR_OpFlattenedArrayCheck(LIR_Opr array, LIR_Opr tmp, CodeStub* stub) + : LIR_Op(lir_flat_array_check, LIR_OprFact::illegalOpr, nullptr) + , _array(array) + , _tmp(tmp) + , _stub(stub) {} + + +LIR_OpNullFreeArrayCheck::LIR_OpNullFreeArrayCheck(LIR_Opr array, LIR_Opr tmp) + : LIR_Op(lir_null_free_array_check, LIR_OprFact::illegalOpr, nullptr) + , _array(array) + , _tmp(tmp) {} + + +LIR_OpSubstitutabilityCheck::LIR_OpSubstitutabilityCheck(LIR_Opr result, LIR_Opr left, LIR_Opr right, LIR_Opr equal_result, LIR_Opr not_equal_result, + ciKlass* left_klass, ciKlass* right_klass, LIR_Opr tmp1, LIR_Opr tmp2, + CodeEmitInfo* info, CodeStub* stub) + : LIR_Op(lir_substitutability_check, result, info) + , _left(left) + , _right(right) + , _equal_result(equal_result) + , _not_equal_result(not_equal_result) + , _left_klass(left_klass) + , _right_klass(right_klass) + , _tmp1(tmp1) + , _tmp2(tmp2) + , _stub(stub) {} + LIR_OpArrayCopy::LIR_OpArrayCopy(LIR_Opr src, LIR_Opr src_pos, LIR_Opr dst, LIR_Opr dst_pos, LIR_Opr length, LIR_Opr tmp, ciArrayKlass* expected_type, int flags, CodeEmitInfo* info) @@ -412,6 +442,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { case lir_membar_storestore: // result and info always invalid case lir_membar_loadstore: // result and info always invalid case lir_membar_storeload: // result and info always invalid + case lir_check_orig_pc: // result and info always invalid case lir_on_spin_wait: { assert(op->as_Op0() != nullptr, "must be"); @@ -789,6 +820,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { assert(opLock->_result->is_illegal(), "unused"); do_stub(opLock->_stub); + do_stub(opLock->_throw_ie_stub); break; } @@ -816,6 +848,56 @@ void LIR_OpVisitState::visit(LIR_Op* op) { break; } +// LIR_OpFlattenedArrayCheck + case lir_flat_array_check: { + assert(op->as_OpFlattenedArrayCheck() != nullptr, "must be"); + LIR_OpFlattenedArrayCheck* opFlattenedArrayCheck = (LIR_OpFlattenedArrayCheck*)op; + + if (opFlattenedArrayCheck->_array->is_valid()) do_input(opFlattenedArrayCheck->_array); + if (opFlattenedArrayCheck->_tmp->is_valid()) do_temp(opFlattenedArrayCheck->_tmp); + + do_stub(opFlattenedArrayCheck->_stub); + + break; + } + +// LIR_OpNullFreeArrayCheck + case lir_null_free_array_check: { + assert(op->as_OpNullFreeArrayCheck() != nullptr, "must be"); + LIR_OpNullFreeArrayCheck* opNullFreeArrayCheck = (LIR_OpNullFreeArrayCheck*)op; + + if (opNullFreeArrayCheck->_array->is_valid()) do_input(opNullFreeArrayCheck->_array); +#ifdef RISCV + // tmp is used to hold the result of null free array check on riscv + // See LIR_Assembler::emit_opNullFreeArrayCheck + if (opNullFreeArrayCheck->_tmp->is_valid()) do_output(opNullFreeArrayCheck->_tmp); +#else + if (opNullFreeArrayCheck->_tmp->is_valid()) do_temp(opNullFreeArrayCheck->_tmp); +#endif + break; + } + +// LIR_OpSubstitutabilityCheck + case lir_substitutability_check: { + assert(op->as_OpSubstitutabilityCheck() != nullptr, "must be"); + LIR_OpSubstitutabilityCheck* opSubstitutabilityCheck = (LIR_OpSubstitutabilityCheck*)op; + do_input(opSubstitutabilityCheck->_left); + do_temp (opSubstitutabilityCheck->_left); + do_input(opSubstitutabilityCheck->_right); + do_temp (opSubstitutabilityCheck->_right); + do_input(opSubstitutabilityCheck->_equal_result); + do_temp (opSubstitutabilityCheck->_equal_result); + do_input(opSubstitutabilityCheck->_not_equal_result); + do_temp (opSubstitutabilityCheck->_not_equal_result); + if (opSubstitutabilityCheck->_tmp1->is_valid()) do_temp(opSubstitutabilityCheck->_tmp1); + if (opSubstitutabilityCheck->_tmp2->is_valid()) do_temp(opSubstitutabilityCheck->_tmp2); + if (opSubstitutabilityCheck->_result->is_valid()) do_output(opSubstitutabilityCheck->_result); + + do_info(opSubstitutabilityCheck->_info); + do_stub(opSubstitutabilityCheck->_stub); + break; + } + // LIR_OpCompareAndSwap case lir_cas_long: case lir_cas_obj: @@ -893,7 +975,18 @@ void LIR_OpVisitState::visit(LIR_Op* op) { do_temp(opProfileType->_tmp); break; } - default: + + // LIR_OpProfileInlineType: + case lir_profile_inline_type: { + assert(op->as_OpProfileInlineType() != nullptr, "must be"); + LIR_OpProfileInlineType* opProfileInlineType = (LIR_OpProfileInlineType*)op; + + do_input(opProfileInlineType->_mdp); do_temp(opProfileInlineType->_mdp); + do_input(opProfileInlineType->_obj); + do_temp(opProfileInlineType->_tmp); + break; + } +default: op->visit(this); } } @@ -966,6 +1059,34 @@ void LIR_OpJavaCall::emit_code(LIR_Assembler* masm) { masm->emit_call(this); } +bool LIR_OpJavaCall::maybe_return_as_fields(ciInlineKlass** vk_ret) const { + ciType* return_type = method()->return_type(); + if (InlineTypeReturnedAsFields) { + if (return_type->is_inlinetype()) { + ciInlineKlass* vk = return_type->as_inline_klass(); + if (vk->can_be_returned_as_fields()) { + if (vk_ret != nullptr) { + *vk_ret = vk; + } + return true; + } + } else if (return_type->is_instance_klass() && + (method()->is_method_handle_intrinsic() || !return_type->is_loaded() || + StressCallingConvention)) { + // An inline type might be returned from the call but we don't know its type. + // This can happen with method handle intrinsics or when the return type is + // not loaded (method holder is not loaded or preload attribute is missing). + // If an inline type is returned, we either get an oop to a buffer and nothing + // needs to be done or one of the values being returned is the klass of the + // inline type (RAX on x64, with LSB set to 1) and we need to allocate an inline + // type instance of that type and initialize it with the fields values being + // returned in other registers. + return true; + } + } + return false; +} + void LIR_OpRTCall::emit_code(LIR_Assembler* masm) { masm->emit_rtcall(this); } @@ -1026,6 +1147,24 @@ void LIR_OpTypeCheck::emit_code(LIR_Assembler* masm) { } } +void LIR_OpFlattenedArrayCheck::emit_code(LIR_Assembler* masm) { + masm->emit_opFlattenedArrayCheck(this); + if (stub() != nullptr) { + masm->append_code_stub(stub()); + } +} + +void LIR_OpNullFreeArrayCheck::emit_code(LIR_Assembler* masm) { + masm->emit_opNullFreeArrayCheck(this); +} + +void LIR_OpSubstitutabilityCheck::emit_code(LIR_Assembler* masm) { + masm->emit_opSubstitutabilityCheck(this); + if (stub() != nullptr) { + masm->append_code_stub(stub()); + } +} + void LIR_OpCompareAndSwap::emit_code(LIR_Assembler* masm) { masm->emit_compare_and_swap(this); } @@ -1043,6 +1182,9 @@ void LIR_OpLock::emit_code(LIR_Assembler* masm) { if (stub()) { masm->append_code_stub(stub()); } + if (throw_ie_stub()) { + masm->append_code_stub(throw_ie_stub()); + } } void LIR_OpLoadKlass::emit_code(LIR_Assembler* masm) { @@ -1063,6 +1205,10 @@ void LIR_OpProfileType::emit_code(LIR_Assembler* masm) { masm->emit_profile_type(this); } +void LIR_OpProfileInlineType::emit_code(LIR_Assembler* masm) { + masm->emit_profile_inline_type(this); +} + // LIR_List LIR_List::LIR_List(Compilation* compilation, BlockBegin* block) : _operations(8) @@ -1338,7 +1484,7 @@ void LIR_List::allocate_object(LIR_Opr dst, LIR_Opr t1, LIR_Opr t2, LIR_Opr t3, stub)); } -void LIR_List::allocate_array(LIR_Opr dst, LIR_Opr len, LIR_Opr t1,LIR_Opr t2, LIR_Opr t3,LIR_Opr t4, BasicType type, LIR_Opr klass, CodeStub* stub, bool zero_array) { +void LIR_List::allocate_array(LIR_Opr dst, LIR_Opr len, LIR_Opr t1,LIR_Opr t2, LIR_Opr t3,LIR_Opr t4, BasicType type, LIR_Opr klass, CodeStub* stub, bool zero_array, bool always_slow_path) { append(new LIR_OpAllocArray( klass, len, @@ -1349,7 +1495,8 @@ void LIR_List::allocate_array(LIR_Opr dst, LIR_Opr len, LIR_Opr t1,LIR_Opr t2, L t4, type, stub, - zero_array)); + zero_array, + always_slow_path)); } void LIR_List::shift_left(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp) { @@ -1387,7 +1534,7 @@ void LIR_List::fcmp2int(LIR_Opr left, LIR_Opr right, LIR_Opr dst, bool is_unorde dst)); } -void LIR_List::lock_object(LIR_Opr hdr, LIR_Opr obj, LIR_Opr lock, LIR_Opr scratch, CodeStub* stub, CodeEmitInfo* info) { +void LIR_List::lock_object(LIR_Opr hdr, LIR_Opr obj, LIR_Opr lock, LIR_Opr scratch, CodeStub* stub, CodeEmitInfo* info, CodeStub* throw_ie_stub) { append(new LIR_OpLock( lir_lock, hdr, @@ -1395,7 +1542,8 @@ void LIR_List::lock_object(LIR_Opr hdr, LIR_Opr obj, LIR_Opr lock, LIR_Opr scrat lock, scratch, stub, - info)); + info, + throw_ie_stub)); } void LIR_List::unlock_object(LIR_Opr hdr, LIR_Opr obj, LIR_Opr lock, LIR_Opr scratch, CodeStub* stub) { @@ -1420,9 +1568,13 @@ void check_LIR() { void LIR_List::checkcast (LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, CodeEmitInfo* info_for_exception, CodeEmitInfo* info_for_patch, CodeStub* stub, - ciMethod* profiled_method, int profiled_bci) { + ciMethod* profiled_method, int profiled_bci, bool is_null_free) { + // If klass is non-nullable, LIRGenerator::do_CheckCast has already performed null-check + // on the object. + bool need_null_check = !is_null_free; LIR_OpTypeCheck* c = new LIR_OpTypeCheck(lir_checkcast, result, object, klass, - tmp1, tmp2, tmp3, fast_check, info_for_exception, info_for_patch, stub); + tmp1, tmp2, tmp3, fast_check, info_for_exception, info_for_patch, stub, + need_null_check); if (profiled_method != nullptr && TypeProfileCasts) { c->set_profiled_method(profiled_method); c->set_profiled_bci(profiled_bci); @@ -1465,6 +1617,24 @@ void LIR_List::null_check(LIR_Opr opr, CodeEmitInfo* info, bool deoptimize_on_nu } } +void LIR_List::check_flat_array(LIR_Opr array, LIR_Opr tmp, CodeStub* stub) { + LIR_OpFlattenedArrayCheck* c = new LIR_OpFlattenedArrayCheck(array, tmp, stub); + append(c); +} + +void LIR_List::check_null_free_array(LIR_Opr array, LIR_Opr tmp) { + LIR_OpNullFreeArrayCheck* c = new LIR_OpNullFreeArrayCheck(array, tmp); + append(c); +} + +void LIR_List::substitutability_check(LIR_Opr result, LIR_Opr left, LIR_Opr right, LIR_Opr equal_result, LIR_Opr not_equal_result, + ciKlass* left_klass, ciKlass* right_klass, LIR_Opr tmp1, LIR_Opr tmp2, + CodeEmitInfo* info, CodeStub* stub) { + LIR_OpSubstitutabilityCheck* c = new LIR_OpSubstitutabilityCheck(result, left, right, equal_result, not_equal_result, + left_klass, right_klass, tmp1, tmp2, info, stub); + append(c); +} + void LIR_List::cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, LIR_Opr t1, LIR_Opr t2, LIR_Opr result) { append(new LIR_OpCompareAndSwap(lir_cas_long, addr, cmp_value, new_value, t1, t2, result)); @@ -1680,6 +1850,7 @@ const char * LIR_Op::name() const { case lir_osr_entry: s = "osr_entry"; break; case lir_breakpoint: s = "breakpoint"; break; case lir_get_thread: s = "get_thread"; break; + case lir_check_orig_pc: s = "check_orig_pc"; break; // LIR_Op1 case lir_push: s = "push"; break; case lir_pop: s = "pop"; break; @@ -1743,6 +1914,12 @@ const char * LIR_Op::name() const { case lir_instanceof: s = "instanceof"; break; case lir_checkcast: s = "checkcast"; break; case lir_store_check: s = "store_check"; break; + // LIR_OpFlattenedArrayCheck + case lir_flat_array_check: s = "flat_array_check"; break; + // LIR_OpNullFreeArrayCheck + case lir_null_free_array_check: s = "null_free_array_check"; break; + // LIR_OpSubstitutabilityCheck + case lir_substitutability_check: s = "substitutability_check"; break; // LIR_OpCompareAndSwap case lir_cas_long: s = "cas_long"; break; case lir_cas_obj: s = "cas_obj"; break; @@ -1751,6 +1928,8 @@ const char * LIR_Op::name() const { case lir_profile_call: s = "profile_call"; break; // LIR_OpProfileType case lir_profile_type: s = "profile_type"; break; + // LIR_OpProfileInlineType + case lir_profile_inline_type: s = "profile_inline_type"; break; // LIR_OpAssert #ifdef ASSERT case lir_assert: s = "assert"; break; @@ -1976,6 +2155,41 @@ void LIR_OpTypeCheck::print_instr(outputStream* out) const { if (info_for_exception() != nullptr) out->print(" [bci:%d]", info_for_exception()->stack()->bci()); } +void LIR_OpFlattenedArrayCheck::print_instr(outputStream* out) const { + array()->print(out); out->print(" "); + tmp()->print(out); out->print(" "); + if (stub() != nullptr) { + out->print("[label:" INTPTR_FORMAT "]", p2i(stub()->entry())); + } +} + +void LIR_OpNullFreeArrayCheck::print_instr(outputStream* out) const { + array()->print(out); out->print(" "); + tmp()->print(out); out->print(" "); +} + +void LIR_OpSubstitutabilityCheck::print_instr(outputStream* out) const { + result_opr()->print(out); out->print(" "); + left()->print(out); out->print(" "); + right()->print(out); out->print(" "); + equal_result()->print(out); out->print(" "); + not_equal_result()->print(out); out->print(" "); + if (left_klass() == nullptr) { + out->print("unknown "); + } else { + left_klass()->print(out); out->print(" "); + } + if (right_klass() == nullptr) { + out->print("unknown "); + } else { + right_klass()->print(out); out->print(" "); + } + tmp1()->print(out); out->print(" "); + tmp2()->print(out); out->print(" "); + if (stub() != nullptr) { + out->print("[label:" INTPTR_FORMAT "]", p2i(stub()->entry())); + } +} // LIR_Op3 void LIR_Op3::print_instr(outputStream* out) const { @@ -2046,6 +2260,14 @@ void LIR_OpProfileType::print_instr(outputStream* out) const { tmp()->print(out); out->print(" "); } +// LIR_OpProfileInlineType +void LIR_OpProfileInlineType::print_instr(outputStream* out) const { + out->print(" flag = %x ", flag()); + mdp()->print(out); out->print(" "); + obj()->print(out); out->print(" "); + tmp()->print(out); out->print(" "); +} + #endif // PRODUCT // Implementation of LIR_InsertionBuffer diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index 80b0fd65bc146..91a80bfdd12ec 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -889,10 +889,14 @@ class LIR_OpArrayCopy; class LIR_OpUpdateCRC32; class LIR_OpLock; class LIR_OpTypeCheck; +class LIR_OpFlattenedArrayCheck; +class LIR_OpNullFreeArrayCheck; +class LIR_OpSubstitutabilityCheck; class LIR_OpCompareAndSwap; class LIR_OpLoadKlass; class LIR_OpProfileCall; class LIR_OpProfileType; +class LIR_OpProfileInlineType; #ifdef ASSERT class LIR_OpAssert; #endif @@ -916,6 +920,7 @@ enum LIR_Code { , lir_membar_storeload , lir_get_thread , lir_on_spin_wait + , lir_check_orig_pc , end_op0 , begin_op1 , lir_push @@ -989,6 +994,15 @@ enum LIR_Code { , lir_checkcast , lir_store_check , end_opTypeCheck + , begin_opFlattenedArrayCheck + , lir_flat_array_check + , end_opFlattenedArrayCheck + , begin_opNullFreeArrayCheck + , lir_null_free_array_check + , end_opNullFreeArrayCheck + , begin_opSubstitutabilityCheck + , lir_substitutability_check + , end_opSubstitutabilityCheck , begin_opCompareAndSwap , lir_cas_long , lir_cas_obj @@ -997,6 +1011,7 @@ enum LIR_Code { , begin_opMDOProfile , lir_profile_call , lir_profile_type + , lir_profile_inline_type , end_opMDOProfile , begin_opAssert , lir_assert @@ -1135,10 +1150,14 @@ class LIR_Op: public CompilationResourceObj { virtual LIR_OpArrayCopy* as_OpArrayCopy() { return nullptr; } virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return nullptr; } virtual LIR_OpTypeCheck* as_OpTypeCheck() { return nullptr; } + virtual LIR_OpFlattenedArrayCheck* as_OpFlattenedArrayCheck() { return nullptr; } + virtual LIR_OpNullFreeArrayCheck* as_OpNullFreeArrayCheck() { return nullptr; } + virtual LIR_OpSubstitutabilityCheck* as_OpSubstitutabilityCheck() { return nullptr; } virtual LIR_OpCompareAndSwap* as_OpCompareAndSwap() { return nullptr; } virtual LIR_OpLoadKlass* as_OpLoadKlass() { return nullptr; } virtual LIR_OpProfileCall* as_OpProfileCall() { return nullptr; } virtual LIR_OpProfileType* as_OpProfileType() { return nullptr; } + virtual LIR_OpProfileInlineType* as_OpProfileInlineType() { return nullptr; } #ifdef ASSERT virtual LIR_OpAssert* as_OpAssert() { return nullptr; } #endif @@ -1208,6 +1227,8 @@ class LIR_OpJavaCall: public LIR_OpCall { virtual void emit_code(LIR_Assembler* masm); virtual LIR_OpJavaCall* as_OpJavaCall() { return this; } virtual void print_instr(outputStream* out) const PRODUCT_RETURN; + + bool maybe_return_as_fields(ciInlineKlass** vk = nullptr) const; }; // -------------------------------------------------- @@ -1259,7 +1280,10 @@ class LIR_OpArrayCopy: public LIR_Op { unaligned = 1 << 9, src_objarray = 1 << 10, dst_objarray = 1 << 11, - all_flags = (1 << 12) - 1 + always_slow_path = 1 << 12, + src_inlinetype_check = 1 << 13, + dst_inlinetype_check = 1 << 14, + all_flags = (1 << 15) - 1 }; LIR_OpArrayCopy(LIR_Opr src, LIR_Opr src_pos, LIR_Opr dst, LIR_Opr dst_pos, LIR_Opr length, LIR_Opr tmp, @@ -1514,11 +1538,12 @@ class LIR_OpTypeCheck: public LIR_Op { int _profiled_bci; bool _should_profile; bool _fast_check; + bool _need_null_check; public: LIR_OpTypeCheck(LIR_Code code, LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, - CodeEmitInfo* info_for_exception, CodeEmitInfo* info_for_patch, CodeStub* stub); + CodeEmitInfo* info_for_exception, CodeEmitInfo* info_for_patch, CodeStub* stub, bool need_null_check = true); LIR_OpTypeCheck(LIR_Code code, LIR_Opr object, LIR_Opr array, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, CodeEmitInfo* info_for_exception); @@ -1540,13 +1565,82 @@ class LIR_OpTypeCheck: public LIR_Op { ciMethod* profiled_method() const { return _profiled_method; } int profiled_bci() const { return _profiled_bci; } bool should_profile() const { return _should_profile; } - + bool need_null_check() const { return _need_null_check; } virtual bool is_patching() { return _info_for_patch != nullptr; } virtual void emit_code(LIR_Assembler* masm); virtual LIR_OpTypeCheck* as_OpTypeCheck() { return this; } void print_instr(outputStream* out) const PRODUCT_RETURN; }; +// LIR_OpFlattenedArrayCheck +class LIR_OpFlattenedArrayCheck: public LIR_Op { + friend class LIR_OpVisitState; + + private: + LIR_Opr _array; + LIR_Opr _tmp; + CodeStub* _stub; +public: + LIR_OpFlattenedArrayCheck(LIR_Opr array, LIR_Opr tmp, CodeStub* stub); + LIR_Opr array() const { return _array; } + LIR_Opr tmp() const { return _tmp; } + CodeStub* stub() const { return _stub; } + + virtual void emit_code(LIR_Assembler* masm); + virtual LIR_OpFlattenedArrayCheck* as_OpFlattenedArrayCheck() { return this; } + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; +}; + +// LIR_OpNullFreeArrayCheck +class LIR_OpNullFreeArrayCheck: public LIR_Op { + friend class LIR_OpVisitState; + + private: + LIR_Opr _array; + LIR_Opr _tmp; +public: + LIR_OpNullFreeArrayCheck(LIR_Opr array, LIR_Opr tmp); + LIR_Opr array() const { return _array; } + LIR_Opr tmp() const { return _tmp; } + + virtual void emit_code(LIR_Assembler* masm); + virtual LIR_OpNullFreeArrayCheck* as_OpNullFreeArrayCheck() { return this; } + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; +}; + +class LIR_OpSubstitutabilityCheck: public LIR_Op { + friend class LIR_OpVisitState; + + private: + LIR_Opr _left; + LIR_Opr _right; + LIR_Opr _equal_result; + LIR_Opr _not_equal_result; + ciKlass* _left_klass; + ciKlass* _right_klass; + LIR_Opr _tmp1; + LIR_Opr _tmp2; + CodeStub* _stub; +public: + LIR_OpSubstitutabilityCheck(LIR_Opr result, LIR_Opr left, LIR_Opr right, LIR_Opr equal_result, LIR_Opr not_equal_result, + ciKlass* left_klass, ciKlass* right_klass, LIR_Opr tmp1, LIR_Opr tmp2, + CodeEmitInfo* info, CodeStub* stub); + + LIR_Opr left() const { return _left; } + LIR_Opr right() const { return _right; } + LIR_Opr equal_result() const { return _equal_result; } + LIR_Opr not_equal_result() const { return _not_equal_result; } + ciKlass* left_klass() const { return _left_klass; } + ciKlass* right_klass() const { return _right_klass; } + LIR_Opr tmp1() const { return _tmp1; } + LIR_Opr tmp2() const { return _tmp2; } + CodeStub* stub() const { return _stub; } + + virtual void emit_code(LIR_Assembler* masm); + virtual LIR_OpSubstitutabilityCheck* as_OpSubstitutabilityCheck() { return this; } + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; +}; + // LIR_Op2 class LIR_Op2: public LIR_Op { friend class LIR_OpVisitState; @@ -1705,9 +1799,10 @@ class LIR_OpAllocArray : public LIR_Op { CodeStub* _stub; BasicType _type; bool _zero_array; + bool _always_slow_path; public: - LIR_OpAllocArray(LIR_Opr klass, LIR_Opr len, LIR_Opr result, LIR_Opr t1, LIR_Opr t2, LIR_Opr t3, LIR_Opr t4, BasicType type, CodeStub* stub, bool zero_array) + LIR_OpAllocArray(LIR_Opr klass, LIR_Opr len, LIR_Opr result, LIR_Opr t1, LIR_Opr t2, LIR_Opr t3, LIR_Opr t4, BasicType type, CodeStub* stub, bool zero_array, bool always_slow_path) : LIR_Op(lir_alloc_array, result, nullptr) , _klass(klass) , _len(len) @@ -1717,7 +1812,8 @@ class LIR_OpAllocArray : public LIR_Op { , _tmp4(t4) , _stub(stub) , _type(type) - , _zero_array(zero_array) {} + , _zero_array(zero_array) + , _always_slow_path(always_slow_path) {} LIR_Opr klass() const { return _klass; } LIR_Opr len() const { return _len; } @@ -1728,7 +1824,8 @@ class LIR_OpAllocArray : public LIR_Op { LIR_Opr tmp4() const { return _tmp4; } BasicType type() const { return _type; } CodeStub* stub() const { return _stub; } - bool zero_array() const { return _zero_array; } + bool zero_array() const { return _zero_array; } + bool always_slow_path() const { return _always_slow_path; } virtual void emit_code(LIR_Assembler* masm); virtual LIR_OpAllocArray * as_OpAllocArray () { return this; } @@ -1835,20 +1932,23 @@ class LIR_OpLock: public LIR_Op { LIR_Opr _lock; LIR_Opr _scratch; CodeStub* _stub; + CodeStub* _throw_ie_stub; public: - LIR_OpLock(LIR_Code code, LIR_Opr hdr, LIR_Opr obj, LIR_Opr lock, LIR_Opr scratch, CodeStub* stub, CodeEmitInfo* info) + LIR_OpLock(LIR_Code code, LIR_Opr hdr, LIR_Opr obj, LIR_Opr lock, LIR_Opr scratch, CodeStub* stub, CodeEmitInfo* info, CodeStub* throw_ie_stub=nullptr) : LIR_Op(code, LIR_OprFact::illegalOpr, info) , _hdr(hdr) , _obj(obj) , _lock(lock) , _scratch(scratch) - , _stub(stub) {} + , _stub(stub) + , _throw_ie_stub(throw_ie_stub) {} LIR_Opr hdr_opr() const { return _hdr; } LIR_Opr obj_opr() const { return _obj; } LIR_Opr lock_opr() const { return _lock; } LIR_Opr scratch_opr() const { return _scratch; } CodeStub* stub() const { return _stub; } + CodeStub* throw_ie_stub() const { return _throw_ie_stub; } virtual void emit_code(LIR_Assembler* masm); virtual LIR_OpLock* as_OpLock() { return this; } @@ -2014,6 +2114,38 @@ class LIR_OpProfileType : public LIR_Op { virtual void print_instr(outputStream* out) const PRODUCT_RETURN; }; +// LIR_OpProfileInlineType +class LIR_OpProfileInlineType : public LIR_Op { + friend class LIR_OpVisitState; + + private: + LIR_Opr _mdp; + LIR_Opr _obj; + int _flag; + LIR_Opr _tmp; + bool _not_null; // true if we know statically that _obj cannot be null + + public: + // Destroys recv + LIR_OpProfileInlineType(LIR_Opr mdp, LIR_Opr obj, int flag, LIR_Opr tmp, bool not_null) + : LIR_Op(lir_profile_inline_type, LIR_OprFact::illegalOpr, nullptr) // no result, no info + , _mdp(mdp) + , _obj(obj) + , _flag(flag) + , _tmp(tmp) + , _not_null(not_null) { } + + LIR_Opr mdp() const { return _mdp; } + LIR_Opr obj() const { return _obj; } + int flag() const { return _flag; } + LIR_Opr tmp() const { return _tmp; } + bool not_null() const { return _not_null; } + + virtual void emit_code(LIR_Assembler* masm); + virtual LIR_OpProfileInlineType* as_OpProfileInlineType() { return this; } + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; +}; + class LIR_InsertionBuffer; //--------------------------------LIR_List--------------------------------------------------- @@ -2238,7 +2370,7 @@ class LIR_List: public CompilationResourceObj { void irem(LIR_Opr left, int right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); void allocate_object(LIR_Opr dst, LIR_Opr t1, LIR_Opr t2, LIR_Opr t3, LIR_Opr t4, int header_size, int object_size, LIR_Opr klass, bool init_check, CodeStub* stub); - void allocate_array(LIR_Opr dst, LIR_Opr len, LIR_Opr t1,LIR_Opr t2, LIR_Opr t3,LIR_Opr t4, BasicType type, LIR_Opr klass, CodeStub* stub, bool zero_array = true); + void allocate_array(LIR_Opr dst, LIR_Opr len, LIR_Opr t1,LIR_Opr t2, LIR_Opr t3,LIR_Opr t4, BasicType type, LIR_Opr klass, CodeStub* stub, bool zero_array = true, bool always_slow_path = false); // jump is an unconditional branch void jump(BlockBegin* block) { @@ -2285,7 +2417,7 @@ class LIR_List: public CompilationResourceObj { void load_stack_address_monitor(int monitor_ix, LIR_Opr dst) { append(new LIR_Op1(lir_monaddr, LIR_OprFact::intConst(monitor_ix), dst)); } void unlock_object(LIR_Opr hdr, LIR_Opr obj, LIR_Opr lock, LIR_Opr scratch, CodeStub* stub); - void lock_object(LIR_Opr hdr, LIR_Opr obj, LIR_Opr lock, LIR_Opr scratch, CodeStub* stub, CodeEmitInfo* info); + void lock_object(LIR_Opr hdr, LIR_Opr obj, LIR_Opr lock, LIR_Opr scratch, CodeStub* stub, CodeEmitInfo* info, CodeStub* throw_ie_stub=nullptr); void breakpoint() { append(new LIR_Op0(lir_breakpoint)); } @@ -2295,11 +2427,16 @@ class LIR_List: public CompilationResourceObj { void instanceof(LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, CodeEmitInfo* info_for_patch, ciMethod* profiled_method, int profiled_bci); void store_check(LIR_Opr object, LIR_Opr array, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, CodeEmitInfo* info_for_exception, ciMethod* profiled_method, int profiled_bci); + void check_flat_array(LIR_Opr array, LIR_Opr tmp, CodeStub* stub); + void check_null_free_array(LIR_Opr array, LIR_Opr tmp); + void substitutability_check(LIR_Opr result, LIR_Opr left, LIR_Opr right, LIR_Opr equal_result, LIR_Opr not_equal_result, + ciKlass* left_klass, ciKlass* right_klass, LIR_Opr tmp1, LIR_Opr tmp2, + CodeEmitInfo* info, CodeStub* stub); void checkcast (LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, CodeEmitInfo* info_for_exception, CodeEmitInfo* info_for_patch, CodeStub* stub, - ciMethod* profiled_method, int profiled_bci); + ciMethod* profiled_method, int profiled_bci, bool is_null_free); // MethodData* profiling void profile_call(ciMethod* method, int bci, ciMethod* callee, LIR_Opr mdo, LIR_Opr recv, LIR_Opr t1, ciKlass* cha_klass) { append(new LIR_OpProfileCall(method, bci, callee, mdo, recv, t1, cha_klass)); @@ -2307,6 +2444,9 @@ class LIR_List: public CompilationResourceObj { void profile_type(LIR_Address* mdp, LIR_Opr obj, ciKlass* exact_klass, intptr_t current_klass, LIR_Opr tmp, bool not_null, bool no_conflict) { append(new LIR_OpProfileType(LIR_OprFact::address(mdp), obj, exact_klass, current_klass, tmp, not_null, no_conflict)); } + void profile_inline_type(LIR_Address* mdp, LIR_Opr obj, int flag, LIR_Opr tmp, bool not_null) { + append(new LIR_OpProfileInlineType(LIR_OprFact::address(mdp), obj, flag, tmp, not_null)); + } void xadd(LIR_Opr src, LIR_Opr add, LIR_Opr res, LIR_Opr tmp) { append(new LIR_Op2(lir_xadd, src, add, res, tmp)); } void xchg(LIR_Opr src, LIR_Opr set, LIR_Opr res, LIR_Opr tmp) { append(new LIR_Op2(lir_xchg, src, set, res, tmp)); } diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index e22b510351464..1a7608f80e40a 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -29,9 +29,12 @@ #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "c1/c1_ValueStack.hpp" +#include "ci/ciInlineKlass.hpp" +#include "ci/ciUtilities.inline.hpp" #include "compiler/compilerDefinitions.inline.hpp" #include "compiler/oopMap.hpp" #include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" #include "runtime/vm_version.hpp" void LIR_Assembler::patching_epilog(PatchingStub* patch, LIR_PatchCode patch_code, Register obj, CodeEmitInfo* info) { @@ -117,6 +120,7 @@ LIR_Assembler::~LIR_Assembler() { // The unwind handler label may be unnbound if this destructor is invoked because of a bail-out. // Reset it here to avoid an assertion. _unwind_handler_entry.reset(); + _verified_inline_entry.reset(); } @@ -337,10 +341,9 @@ void LIR_Assembler::add_debug_info_for_branch(CodeEmitInfo* info) { } } - -void LIR_Assembler::add_call_info(int pc_offset, CodeEmitInfo* cinfo) { +void LIR_Assembler::add_call_info(int pc_offset, CodeEmitInfo* cinfo, bool maybe_return_as_fields) { flush_debug_info(pc_offset); - cinfo->record_debug_info(compilation()->debug_info_recorder(), pc_offset); + cinfo->record_debug_info(compilation()->debug_info_recorder(), pc_offset, maybe_return_as_fields); if (cinfo->exception_handlers() != nullptr) { compilation()->add_exception_handlers_for_pco(pc_offset, cinfo->exception_handlers()); } @@ -478,6 +481,12 @@ void LIR_Assembler::emit_call(LIR_OpJavaCall* op) { fatal("unexpected op code: %s", op->name()); break; } + + ciInlineKlass* vk = nullptr; + if (op->maybe_return_as_fields(&vk)) { + int offset = store_inline_type_fields_to_buf(vk); + add_call_info(offset, op->info(), true); + } } @@ -567,6 +576,140 @@ void LIR_Assembler::emit_op1(LIR_Op1* op) { } } +void LIR_Assembler::add_scalarized_debug_info(int pc_offset) { + // The VEP and VIEP(RO) of a C1-compiled method call buffer_inline_args_xxx() + // before doing any argument shuffling. This call may cause GC. When GC happens, + // all the parameters are still as passed by the caller, so we just use + // map->set_include_argument_oops() inside frame::sender_for_compiled_frame(RegisterMap* map). + // Deoptimization is delayed until we enter the method body, so we only need a + // scope for stack walking here. There are no materialized locals, expression + // stack entries, or monitors yet. + flush_debug_info(pc_offset); + OopMap* oop_map = new OopMap(0, 0); + DebugInformationRecorder* debug_info = compilation()->debug_info_recorder(); + debug_info->add_safepoint(pc_offset, oop_map); + bool reexecute = false; + debug_info->describe_scope(pc_offset, methodHandle(), method(), 0, reexecute); + debug_info->end_safepoint(pc_offset); +} + +// The entries points of C1-compiled methods can have the following types: +// (1) Methods with no inline type args +// (2) Methods with inline type receiver but no inline type args +// VIEP_RO is the same as VIEP +// (3) Methods with non-inline type receiver and some inline type args +// VIEP_RO is the same as VEP +// (4) Methods with inline type receiver and other inline type args +// Separate VEP, VIEP and VIEP_RO +// +// (1) (2) (3) (4) +// UEP/UIEP: VEP: UEP: UEP: +// check_icache pack receiver check_icache check_icache +// VEP/VIEP/VIEP_RO jump to VIEP VEP/VIEP_RO: VIEP_RO: +// body UEP/UIEP: pack inline args pack inline args (except receiver) +// check_icache jump to VIEP jump to VIEP +// VIEP/VIEP_RO UIEP: VEP: +// body check_icache pack all inline args +// VIEP: jump to VIEP +// body UIEP: +// check_icache +// VIEP: +// body +void LIR_Assembler::emit_std_entries() { + offsets()->set_value(CodeOffsets::OSR_Entry, _masm->offset()); + + _masm->align(CodeEntryAlignment); + + if (method()->has_scalarized_args()) { + VM_ENTRY_MARK; + assert(InlineTypePassFieldsAsArgs, "must be"); + CompiledEntrySignature ces(method()->get_Method()); + ces.compute_calling_conventions(false); + CodeOffsets::Entries ro_entry_type = ces.c1_inline_ro_entry_type(); + + // UEP: check icache and fall-through + if (ro_entry_type != CodeOffsets::Verified_Inline_Entry) { + offsets()->set_value(CodeOffsets::Entry, _masm->offset()); + if (needs_icache(method())) { + check_icache(); + } + } + + // VIEP_RO: pack all value parameters, except the receiver + if (ro_entry_type == CodeOffsets::Verified_Inline_Entry_RO) { + emit_std_entry(CodeOffsets::Verified_Inline_Entry_RO, &ces); + } + + // VEP: pack all value parameters + _masm->align(CodeEntryAlignment); + emit_std_entry(CodeOffsets::Verified_Entry, &ces); + + // UIEP: check icache and fall-through + _masm->align(CodeEntryAlignment); + offsets()->set_value(CodeOffsets::Inline_Entry, _masm->offset()); + if (ro_entry_type == CodeOffsets::Verified_Inline_Entry) { + // Special case if we have VIEP == VIEP(RO): + // this means UIEP (called by C1) == UEP (called by C2). + offsets()->set_value(CodeOffsets::Entry, _masm->offset()); + } + if (needs_icache(method())) { + check_icache(); + } + + // VIEP: all value parameters are passed as refs - no packing. + emit_std_entry(CodeOffsets::Verified_Inline_Entry, nullptr); + + if (ro_entry_type != CodeOffsets::Verified_Inline_Entry_RO) { + // The VIEP(RO) is the same as VEP or VIEP + assert(ro_entry_type == CodeOffsets::Verified_Entry || + ro_entry_type == CodeOffsets::Verified_Inline_Entry, "must be"); + offsets()->set_value(CodeOffsets::Verified_Inline_Entry_RO, + offsets()->value(ro_entry_type)); + } + } else { + // All 3 entries are the same (no inline type packing) + offsets()->set_value(CodeOffsets::Entry, _masm->offset()); + offsets()->set_value(CodeOffsets::Inline_Entry, _masm->offset()); + if (needs_icache(method())) { + check_icache(); + } + emit_std_entry(CodeOffsets::Verified_Inline_Entry, nullptr); + offsets()->set_value(CodeOffsets::Verified_Entry, offsets()->value(CodeOffsets::Verified_Inline_Entry)); + offsets()->set_value(CodeOffsets::Verified_Inline_Entry_RO, offsets()->value(CodeOffsets::Verified_Inline_Entry)); + } +} + +void LIR_Assembler::emit_std_entry(CodeOffsets::Entries entry, const CompiledEntrySignature* ces) { + offsets()->set_value(entry, _masm->offset()); + _masm->verified_entry(compilation()->directive()->BreakAtExecuteOption); + switch (entry) { + case CodeOffsets::Verified_Entry: { + if (needs_clinit_barrier_on_entry(method())) { + clinit_barrier(method()); + } + int rt_call_offset = _masm->verified_entry(ces, initial_frame_size_in_bytes(), bang_size_in_bytes(), in_bytes(frame_map()->sp_offset_for_orig_pc()), _verified_inline_entry); + add_scalarized_debug_info(rt_call_offset); + break; + } + case CodeOffsets::Verified_Inline_Entry_RO: { + assert(!needs_clinit_barrier_on_entry(method()), "can't be static"); + int rt_call_offset = _masm->verified_inline_ro_entry(ces, initial_frame_size_in_bytes(), bang_size_in_bytes(), in_bytes(frame_map()->sp_offset_for_orig_pc()), _verified_inline_entry); + add_scalarized_debug_info(rt_call_offset); + break; + } + case CodeOffsets::Verified_Inline_Entry: { + if (needs_clinit_barrier_on_entry(method())) { + clinit_barrier(method()); + } + build_frame(); + offsets()->set_value(CodeOffsets::Frame_Complete, _masm->offset()); + break; + } + default: + ShouldNotReachHere(); + break; + } +} void LIR_Assembler::emit_op0(LIR_Op0* op) { switch (op->code()) { @@ -579,23 +722,9 @@ void LIR_Assembler::emit_op0(LIR_Op0* op) { Unimplemented(); break; - case lir_std_entry: { - // init offsets - offsets()->set_value(CodeOffsets::OSR_Entry, _masm->offset()); - if (needs_icache(compilation()->method())) { - int offset = check_icache(); - offsets()->set_value(CodeOffsets::Entry, offset); - } - _masm->align(CodeEntryAlignment); - offsets()->set_value(CodeOffsets::Verified_Entry, _masm->offset()); - _masm->verified_entry(compilation()->directive()->BreakAtExecuteOption); - if (needs_clinit_barrier_on_entry(compilation()->method())) { - clinit_barrier(compilation()->method()); - } - build_frame(); - offsets()->set_value(CodeOffsets::Frame_Complete, _masm->offset()); + case lir_std_entry: + emit_std_entries(); break; - } case lir_osr_entry: offsets()->set_value(CodeOffsets::OSR_Entry, _masm->offset()); @@ -642,6 +771,10 @@ void LIR_Assembler::emit_op0(LIR_Op0* op) { on_spin_wait(); break; + case lir_check_orig_pc: + check_orig_pc(); + break; + default: ShouldNotReachHere(); break; @@ -727,7 +860,8 @@ void LIR_Assembler::emit_op4(LIR_Op4* op) { } void LIR_Assembler::build_frame() { - _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); + _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes(), in_bytes(frame_map()->sp_offset_for_orig_pc()), + needs_stack_repair(), method()->has_scalarized_args(), &_verified_inline_entry); } diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp index 4cb313af90152..6b98a0b97dde7 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -31,6 +31,7 @@ #include "utilities/macros.hpp" class Compilation; +class CompiledEntrySignature; class ScopeValue; class LIR_Assembler: public CompilationResourceObj { @@ -47,6 +48,7 @@ class LIR_Assembler: public CompilationResourceObj { int _immediate_oops_patched; Label _unwind_handler_entry; + Label _verified_inline_entry; #ifdef ASSERT BlockList _branch_target_blocks; @@ -91,6 +93,10 @@ class LIR_Assembler: public CompilationResourceObj { void emit_stubs(CodeStubList* stub_list); + bool needs_stack_repair() const { + return method()->c1_needs_stack_repair(); + } + public: // addresses Address as_Address(LIR_Address* addr); @@ -98,7 +104,7 @@ class LIR_Assembler: public CompilationResourceObj { Address as_Address_hi(LIR_Address* addr); // debug information - void add_call_info(int pc_offset, CodeEmitInfo* cinfo); + void add_call_info(int pc_offset, CodeEmitInfo* cinfo, bool maybe_return_as_fields = false); void add_debug_info_for_branch(CodeEmitInfo* info); void add_debug_info_for_div0(int pc_offset, CodeEmitInfo* cinfo); void add_debug_info_for_div0_here(CodeEmitInfo* info); @@ -195,6 +201,9 @@ class LIR_Assembler: public CompilationResourceObj { void emit_alloc_obj(LIR_OpAllocObj* op); void emit_alloc_array(LIR_OpAllocArray* op); void emit_opTypeCheck(LIR_OpTypeCheck* op); + void emit_opFlattenedArrayCheck(LIR_OpFlattenedArrayCheck* op); + void emit_opNullFreeArrayCheck(LIR_OpNullFreeArrayCheck* op); + void emit_opSubstitutabilityCheck(LIR_OpSubstitutabilityCheck* op); void emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null); void emit_compare_and_swap(LIR_OpCompareAndSwap* op); void emit_lock(LIR_OpLock* op); @@ -203,6 +212,10 @@ class LIR_Assembler: public CompilationResourceObj { void emit_rtcall(LIR_OpRTCall* op); void emit_profile_call(LIR_OpProfileCall* op); void emit_profile_type(LIR_OpProfileType* op); + void emit_profile_inline_type(LIR_OpProfileInlineType* op); + void emit_std_entries(); + void emit_std_entry(CodeOffsets::Entries entry, const CompiledEntrySignature* ces); + void add_scalarized_debug_info(int call_offset); void arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info); void arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info); @@ -223,6 +236,7 @@ class LIR_Assembler: public CompilationResourceObj { void call( LIR_OpJavaCall* op, relocInfo::relocType rtype); void ic_call( LIR_OpJavaCall* op); void vtable_call( LIR_OpJavaCall* op); + int store_inline_type_fields_to_buf(ciInlineKlass* vk); void osr_entry(); @@ -249,6 +263,7 @@ class LIR_Assembler: public CompilationResourceObj { void membar_storeload(); void on_spin_wait(); void get_thread(LIR_Opr result); + void check_orig_pc(); void verify_oop_map(CodeEmitInfo* info); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index de3b08e08e866..0227615241c28 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -30,8 +30,11 @@ #include "c1/c1_LIRGenerator.hpp" #include "c1/c1_ValueStack.hpp" #include "ci/ciArrayKlass.hpp" +#include "ci/ciFlatArrayKlass.hpp" +#include "ci/ciInlineKlass.hpp" #include "ci/ciInstance.hpp" #include "ci/ciObjArray.hpp" +#include "ci/ciObjArrayKlass.hpp" #include "ci/ciUtilities.hpp" #include "compiler/compilerDefinitions.inline.hpp" #include "compiler/compilerOracle.hpp" @@ -39,6 +42,7 @@ #include "gc/shared/c1/barrierSetC1.hpp" #include "oops/klass.inline.hpp" #include "oops/methodCounters.hpp" +#include "runtime/arguments.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" #include "runtime/vm_version.hpp" @@ -215,6 +219,8 @@ void LIRItem::set_result(LIR_Opr opr) { } void LIRItem::load_item() { + assert(!_gen->in_conditional_code(), "LIRItem cannot be loaded in conditional code"); + if (result()->is_illegal()) { // update the items result _result = value()->operand(); @@ -622,12 +628,13 @@ void LIRGenerator::logic_op (Bytecodes::Code code, LIR_Opr result_op, LIR_Opr le } -void LIRGenerator::monitor_enter(LIR_Opr object, LIR_Opr lock, LIR_Opr hdr, LIR_Opr scratch, int monitor_no, CodeEmitInfo* info_for_exception, CodeEmitInfo* info) { +void LIRGenerator::monitor_enter(LIR_Opr object, LIR_Opr lock, LIR_Opr hdr, LIR_Opr scratch, int monitor_no, + CodeEmitInfo* info_for_exception, CodeEmitInfo* info, CodeStub* throw_ie_stub) { // for slow path, use debug info for state after successful locking - CodeStub* slow_path = new MonitorEnterStub(object, lock, info); + CodeStub* slow_path = new MonitorEnterStub(object, lock, info, throw_ie_stub, scratch); __ load_stack_address_monitor(monitor_no, lock); // for handling NullPointerException, use debug info representing just the lock stack before this monitorenter - __ lock_object(hdr, object, lock, scratch, slow_path, info_for_exception); + __ lock_object(hdr, object, lock, scratch, slow_path, info_for_exception, throw_ie_stub); } @@ -650,10 +657,15 @@ void LIRGenerator::print_if_not_loaded(const NewInstance* new_instance) { } #endif -void LIRGenerator::new_instance(LIR_Opr dst, ciInstanceKlass* klass, bool is_unresolved, LIR_Opr scratch1, LIR_Opr scratch2, LIR_Opr scratch3, LIR_Opr scratch4, LIR_Opr klass_reg, CodeEmitInfo* info) { - klass2reg_with_patching(klass_reg, klass, info, is_unresolved); - // If klass is not loaded we do not know if the klass has finalizers: - if (UseFastNewInstance && klass->is_loaded() +void LIRGenerator::new_instance(LIR_Opr dst, ciInstanceKlass* klass, bool is_unresolved, bool allow_inline, LIR_Opr scratch1, LIR_Opr scratch2, LIR_Opr scratch3, LIR_Opr scratch4, LIR_Opr klass_reg, CodeEmitInfo* info) { + if (allow_inline) { + assert(!is_unresolved && klass->is_loaded(), "inline type klass should be resolved"); + __ metadata2reg(klass->constant_encoding(), klass_reg); + } else { + klass2reg_with_patching(klass_reg, klass, info, is_unresolved); + } + // If klass is not loaded we do not know if the klass has finalizers or is an unexpected inline klass + if (UseFastNewInstance && klass->is_loaded() && (allow_inline || !klass->is_inlinetype()) && !Klass::layout_helper_needs_slow_path(klass->layout_helper())) { StubId stub_id = klass->is_initialized() ? StubId::c1_fast_new_instance_id : StubId::c1_fast_new_instance_init_check_id; @@ -668,7 +680,7 @@ void LIRGenerator::new_instance(LIR_Opr dst, ciInstanceKlass* klass, bool is_unr oopDesc::header_size(), instance_size, klass_reg, !klass->is_initialized(), slow_path); } else { CodeStub* slow_path = new NewInstanceStub(klass_reg, dst, klass, info, StubId::c1_new_instance_id); - __ branch(lir_cond_always, slow_path); + __ jump(slow_path); __ branch_destination(slow_path->continuation()); } } @@ -760,6 +772,11 @@ void LIRGenerator::arraycopy_helper(Intrinsic* x, int* flagsp, ciArrayKlass** ex if (expected_type == nullptr) expected_type = src_declared_type; if (expected_type == nullptr) expected_type = dst_declared_type; + if (expected_type != nullptr && expected_type->is_obj_array_klass()) { + // For a direct pointer comparison, we need the refined array klass pointer + expected_type = ciObjArrayKlass::make(expected_type->as_array_klass()->element_klass()); + } + src_objarray = (src_exact_type && src_exact_type->is_obj_array_klass()) || (src_declared_type && src_declared_type->is_obj_array_klass()); dst_objarray = (dst_exact_type && dst_exact_type->is_obj_array_klass()) || (dst_declared_type && dst_declared_type->is_obj_array_klass()); } @@ -768,6 +785,18 @@ void LIRGenerator::arraycopy_helper(Intrinsic* x, int* flagsp, ciArrayKlass** ex // of the required checks for a fast case can be elided. int flags = LIR_OpArrayCopy::all_flags; + // TODO 8251971 Compare ArrayKlass::properties() of source and destination + // array here instead, see also LIR_Assembler::arraycopy_inlinetype_check + if (!src->is_loaded_flat_array() && !dst->is_loaded_flat_array()) { + flags &= ~LIR_OpArrayCopy::always_slow_path; + } + if (!src->maybe_flat_array()) { + flags &= ~LIR_OpArrayCopy::src_inlinetype_check; + } + if (!dst->maybe_flat_array() && !dst->maybe_null_free_array()) { + flags &= ~LIR_OpArrayCopy::dst_inlinetype_check; + } + if (!src_objarray) flags &= ~LIR_OpArrayCopy::src_objarray; if (!dst_objarray) @@ -1460,7 +1489,7 @@ LIR_Opr LIRGenerator::load_constant(Constant* x) { LIR_Opr LIRGenerator::load_constant(LIR_Const* c) { BasicType t = c->type(); - for (int i = 0; i < _constants.length(); i++) { + for (int i = 0; i < _constants.length() && !in_conditional_code(); i++) { LIR_Const* other = _constants.at(i); if (t == other->type()) { switch (t) { @@ -1485,11 +1514,19 @@ LIR_Opr LIRGenerator::load_constant(LIR_Const* c) { LIR_Opr result = new_register(t); __ move((LIR_Opr)c, result); - _constants.append(c); - _reg_for_constants.append(result); + if (!in_conditional_code()) { + _constants.append(c); + _reg_for_constants.append(result); + } return result; } +void LIRGenerator::set_in_conditional_code(bool v) { + assert(v != _in_conditional_code, "must change state"); + _in_conditional_code = v; +} + + //------------------------field access-------------------------------------- void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) { @@ -1507,6 +1544,18 @@ void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) { set_result(x, result); } +// Returns an int/long value with the null marker bit set. +static LIR_Opr null_marker_mask(BasicType bt, int nm_offset) { + assert(nm_offset >= 0, "field does not have null marker"); + jlong null_marker = 1ULL << (nm_offset << LogBitsPerByte); + return (bt == T_LONG) ? LIR_OprFact::longConst(null_marker) : LIR_OprFact::intConst(null_marker); +} + +static LIR_Opr null_marker_mask(BasicType bt, ciField* field) { + assert(field->null_marker_offset() != -1, "field does not have null marker"); + return null_marker_mask(bt, field->null_marker_offset() - field->offset_in_bytes()); +} + // Comment copied form templateTable_i486.cpp // ---------------------------------------------------------------------------- // Volatile variables demand their effects be made known to all CPU's in @@ -1536,8 +1585,9 @@ void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) { void LIRGenerator::do_StoreField(StoreField* x) { + ciField* field = x->field(); bool needs_patching = x->needs_patching(); - bool is_volatile = x->field()->is_volatile(); + bool is_volatile = field->is_volatile(); BasicType field_type = x->field_type(); CodeEmitInfo* info = nullptr; @@ -1558,18 +1608,22 @@ void LIRGenerator::do_StoreField(StoreField* x) { object.load_item(); - if (is_volatile || needs_patching) { - // load item if field is volatile (fewer special cases for volatiles) - // load item if field not initialized - // load item if field not constant - // because of code patching we cannot inline constants - if (field_type == T_BYTE || field_type == T_BOOLEAN) { - value.load_byte_item(); - } else { - value.load_item(); - } + if (field->is_flat()) { + value.load_item(); } else { - value.load_for_store(field_type); + if (is_volatile || needs_patching) { + // load item if field is volatile (fewer special cases for volatiles) + // load item if field not initialized + // load item if field not constant + // because of code patching we cannot inline constants + if (field_type == T_BYTE || field_type == T_BOOLEAN) { + value.load_byte_item(); + } else { + value.load_item(); + } + } else { + value.load_for_store(field_type); + } } set_no_result(x); @@ -1598,18 +1652,264 @@ void LIRGenerator::do_StoreField(StoreField* x) { decorators |= C1_NEEDS_PATCHING; } + if (field->is_flat()) { + ciInlineKlass* vk = field->type()->as_inline_klass(); + +#ifdef ASSERT + assert(field->is_atomic(), "No atomic access required %s.%s", field->holder()->name()->as_utf8(), field->name()->as_utf8()); + // ZGC does not support compressed oops, so only one oop can be in the payload which is written by a "normal" oop store. + assert(!vk->contains_oops() || !UseZGC, "ZGC does not support embedded oops in flat fields"); +#endif + + // Zero the payload + BasicType bt = vk->atomic_size_to_basic_type(field->is_null_free()); + LIR_Opr payload = new_register((bt == T_LONG) ? bt : T_INT); + LIR_Opr zero = (bt == T_LONG) ? LIR_OprFact::longConst(0) : LIR_OprFact::intConst(0); + __ move(zero, payload); + + bool is_constant_null = value.is_constant() && value.value()->is_null_obj(); + if (!is_constant_null) { + LabelObj* L_isNull = new LabelObj(); + bool needs_null_check = !value.is_constant(); + if (needs_null_check) { + __ cmp(lir_cond_equal, value.result(), LIR_OprFact::oopConst(nullptr)); + __ branch(lir_cond_equal, L_isNull->label()); + } + // Load payload (if not empty) and set null marker (if not null-free) + if (!vk->is_empty()) { + access_load_at(decorators, bt, value, LIR_OprFact::intConst(vk->payload_offset()), payload); + } + if (!field->is_null_free()) { + __ logical_or(payload, null_marker_mask(bt, field), payload); + } + if (needs_null_check) { + __ branch_destination(L_isNull->label()); + } + } + access_store_at(decorators, bt, object, LIR_OprFact::intConst(x->offset()), payload, + // Make sure to emit an implicit null check and pass the information + // that this is a flat store that might require gc barriers for oop fields. + info != nullptr ? new CodeEmitInfo(info) : nullptr, info, vk); + return; + } + access_store_at(decorators, field_type, object, LIR_OprFact::intConst(x->offset()), value.result(), info != nullptr ? new CodeEmitInfo(info) : nullptr, info); } +// Wrap an already computed address register as a C1 Instruction so it +// can be passed as LIRItem into access_load_at() / access_store_at(). +class ComputedAddressValue: public Instruction { + public: + ComputedAddressValue(ValueType* type, LIR_Opr addr) : Instruction(type) { + set_operand(addr); + } + virtual void input_values_do(ValueVisitor*) {} + virtual void visit(InstructionVisitor* v) {} + virtual const char* name() const { return "ComputedAddressValue"; } +}; + +LIR_Opr LIRGenerator::get_and_load_element_address(LIRItem& array, LIRItem& index) { +#ifndef _LP64 + // We need to be careful with overflows in 32-bit arithmetic + Unimplemented(); +#endif + ciType* array_type = array.value()->declared_type(); + ciFlatArrayKlass* flat_array_klass = array_type->as_flat_array_klass(); + assert(flat_array_klass->is_loaded(), "must be"); + + int array_header_size = flat_array_klass->array_header_in_bytes(); + int shift = flat_array_klass->log2_element_size(); + + LIR_Opr index_op = new_register(T_LONG); + if (index.result()->is_constant()) { + jint const_index = index.result()->as_jint(); + __ move(LIR_OprFact::longConst(static_cast(const_index) << shift), index_op); + } else { + __ convert(Bytecodes::_i2l, index.result(), index_op); + // Need to shift manually, as LIR_Address can scale only up to 3. + __ shift_left(index_op, shift, index_op); + } + + LIR_Opr elm_op = new_pointer_register(); + LIR_Address* elm_address = generate_address(array.result(), index_op, 0, array_header_size, T_ADDRESS); + __ leal(LIR_OprFact::address(elm_address), elm_op); + return elm_op; +} + +void LIRGenerator::access_sub_element(LIRItem& array, LIRItem& index, LIR_Opr& result, ciField* field, size_t sub_offset) { + assert(field != nullptr, "Need a subelement type specified"); + + // Find the starting address of the source (inside the array) + LIR_Opr elm_op = get_and_load_element_address(array, index); + + BasicType subelt_type = field->type()->basic_type(); + ComputedAddressValue* elm_resolved_addr = new ComputedAddressValue(as_ValueType(subelt_type), elm_op); + LIRItem elm_item(elm_resolved_addr, this); + + DecoratorSet decorators = IN_HEAP; + access_load_at(decorators, subelt_type, + elm_item, LIR_OprFact::longConst(sub_offset), result, + nullptr, nullptr); +} + +LIR_Opr LIRGenerator::access_flat_array(bool is_load, LIRItem& array, LIRItem& index, LIRItem& obj_item, + ciField* field, size_t sub_offset) { + assert(sub_offset == 0 || field != nullptr, "Sanity check"); + + // Find the starting address of the source (inside the array) + LIR_Opr elm_op = get_and_load_element_address(array, index); + + ciFlatArrayKlass* array_klass = array.value()->declared_type()->as_flat_array_klass(); + ciInlineKlass* elem_klass = nullptr; + if (field != nullptr) { + elem_klass = field->type()->as_inline_klass(); + } else { + elem_klass = array_klass->element_klass()->as_inline_klass(); + } + + bool null_free = array_klass->is_elem_null_free(); + bool atomic = array_klass->is_elem_atomic(); + assert(null_free || atomic, "nullable flat arrays must use an atomic layout"); + if (atomic) { + assert(field == nullptr && sub_offset == 0, "delayed sub-element access is only supported for non-atomic arrays"); + BasicType bt = elem_klass->atomic_size_to_basic_type(null_free); + LIR_Opr payload = new_register((bt == T_LONG) ? bt : T_INT); + ComputedAddressValue* elm_resolved_addr = new ComputedAddressValue(as_ValueType(bt), elm_op); + LIRItem elm_item(elm_resolved_addr, this); + DecoratorSet decorators = IN_HEAP; + if (is_load) { + access_load_at(decorators, bt, elm_item, LIR_OprFact::intConst(0), payload, nullptr, nullptr); + access_store_at(decorators, bt, obj_item, LIR_OprFact::intConst(elem_klass->payload_offset()), payload, + nullptr, nullptr, elem_klass); + // Null check is performed in the caller + } else { + // Zero the payload + LIR_Opr zero = (bt == T_LONG) ? LIR_OprFact::longConst(0) : LIR_OprFact::intConst(0); + __ move(zero, payload); + + if (null_free) { + if (!elem_klass->is_empty()) { + access_load_at(decorators, bt, obj_item, LIR_OprFact::intConst(elem_klass->payload_offset()), payload); + } + } else { + bool is_constant_null = obj_item.is_constant() && obj_item.value()->is_null_obj(); + if (!is_constant_null) { + LabelObj* L_isNull = new LabelObj(); + bool needs_null_check = !obj_item.is_constant(); + if (needs_null_check) { + __ cmp(lir_cond_equal, obj_item.result(), LIR_OprFact::oopConst(nullptr)); + __ branch(lir_cond_equal, L_isNull->label()); + } + // Load payload (if not empty) and set null marker. + if (!elem_klass->is_empty()) { + access_load_at(decorators, bt, obj_item, LIR_OprFact::intConst(elem_klass->payload_offset()), payload); + } + __ logical_or(payload, null_marker_mask(bt, elem_klass->null_marker_offset_in_payload()), payload); + if (needs_null_check) { + __ branch_destination(L_isNull->label()); + } + } + } + access_store_at(decorators, bt, elm_item, LIR_OprFact::intConst(0), payload, nullptr, nullptr, elem_klass); + } + return payload; + } + + for (int i = 0; i < elem_klass->nof_nonstatic_fields(); i++) { + ciField* inner_field = elem_klass->nonstatic_field_at(i); + assert(!inner_field->is_flat(), "flat fields must have been expanded"); + int obj_offset = inner_field->offset_in_bytes(); + size_t elm_offset = obj_offset - elem_klass->payload_offset() + sub_offset; // object header is not stored in array. + BasicType field_type = inner_field->type()->basic_type(); + + // Types which are smaller than int are still passed in an int register. + BasicType reg_type = field_type; + switch (reg_type) { + case T_BYTE: + case T_BOOLEAN: + case T_SHORT: + case T_CHAR: + reg_type = T_INT; + break; + default: + break; + } + + LIR_Opr temp = new_register(reg_type); + ComputedAddressValue* elm_resolved_addr = new ComputedAddressValue(as_ValueType(field_type), elm_op); + LIRItem elm_item(elm_resolved_addr, this); + + DecoratorSet decorators = IN_HEAP; + if (is_load) { + access_load_at(decorators, field_type, + elm_item, LIR_OprFact::longConst(elm_offset), temp, + nullptr, nullptr); + access_store_at(decorators, field_type, + obj_item, LIR_OprFact::intConst(obj_offset), temp, + nullptr, nullptr); + } else { + access_load_at(decorators, field_type, + obj_item, LIR_OprFact::intConst(obj_offset), temp, + nullptr, nullptr); + access_store_at(decorators, field_type, + elm_item, LIR_OprFact::longConst(elm_offset), temp, + nullptr, nullptr); + } + } + return LIR_OprFact::illegalOpr; +} + +void LIRGenerator::check_flat_array(LIR_Opr array, CodeStub* slow_path) { + LIR_Opr tmp = new_register(T_METADATA); + __ check_flat_array(array, tmp, slow_path); +} + +void LIRGenerator::check_null_free_array(LIRItem& array, LIRItem& value, CodeEmitInfo* info) { + LabelObj* L_end = new LabelObj(); + LIR_Opr tmp = new_register(T_METADATA); + __ check_null_free_array(array.result(), tmp); +#ifdef RISCV + // tmp is used to hold the result of null free array check on riscv + // See LIR_Assembler::emit_opNullFreeArrayCheck + __ cmp(lir_cond_equal, tmp, LIR_OprFact::metadataConst(nullptr)); +#endif + __ branch(lir_cond_equal, L_end->label()); + __ null_check(value.result(), info); + __ branch_destination(L_end->label()); +} + +bool LIRGenerator::needs_flat_array_store_check(StoreIndexed* x) { + if (x->elt_type() == T_OBJECT && x->array()->maybe_flat_array()) { + ciType* type = x->value()->declared_type(); + if (type != nullptr && type->is_klass()) { + ciKlass* klass = type->as_klass(); + if (!klass->can_be_inline_klass() || (klass->is_inlinetype() && !klass->as_inline_klass()->maybe_flat_in_array())) { + // This is known to be a non-flat object. If the array is a flat array, + // it will be caught by the code generated by array_store_check(). + return false; + } + } + // We're not 100% sure, so let's do the flat_array_store_check. + return true; + } + return false; +} + +bool LIRGenerator::needs_null_free_array_store_check(StoreIndexed* x) { + return x->elt_type() == T_OBJECT && x->array()->maybe_null_free_array(); +} + void LIRGenerator::do_StoreIndexed(StoreIndexed* x) { assert(x->is_pinned(),""); + assert(x->elt_type() != T_ARRAY, "never used"); + bool is_loaded_flat_array = x->array()->is_loaded_flat_array(); bool needs_range_check = x->compute_needs_range_check(); bool use_length = x->length() != nullptr; bool obj_store = is_reference_type(x->elt_type()); - bool needs_store_check = obj_store && (x->value()->as_Constant() == nullptr || - !get_jobject_constant(x->value())->is_null_object() || - x->should_profile()); + bool needs_store_check = obj_store && !(is_loaded_flat_array && x->is_exact_flat_array_store()) && + (x->value()->as_Constant() == nullptr || + !get_jobject_constant(x->value())->is_null_object()); LIRItem array(x->array(), this); LIRItem index(x->index(), this); @@ -1622,9 +1922,10 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) { if (use_length && needs_range_check) { length.set_instruction(x->length()); length.load_item(); - } - if (needs_store_check || x->check_boolean()) { + + if (needs_store_check || x->check_boolean() + || is_loaded_flat_array || needs_flat_array_store_check(x) || needs_null_free_array_store_check(x)) { value.load_item(); } else { value.load_for_store(x->elt_type()); @@ -1657,13 +1958,65 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) { array_store_check(value.result(), array.result(), store_check_info, x->profiled_method(), x->profiled_bci()); } - DecoratorSet decorators = IN_HEAP | IS_ARRAY; - if (x->check_boolean()) { - decorators |= C1_MASK_BOOLEAN; + if (x->should_profile()) { + if (is_loaded_flat_array) { + // No need to profile a store to a flat array of known type. This can happen if + // the type only became known after optimizations (for example, after the PhiSimplifier). + x->set_should_profile(false); + } else { + int bci = x->profiled_bci(); + ciMethodData* md = x->profiled_method()->method_data(); + assert(md != nullptr, "Sanity"); + ciProfileData* data = md->bci_to_data(bci); + assert(data != nullptr && data->is_ArrayStoreData(), "incorrect profiling entry"); + ciArrayStoreData* store_data = (ciArrayStoreData*)data; + profile_array_type(x, md, store_data); + assert(store_data->is_ArrayStoreData(), "incorrect profiling entry"); + if (x->array()->maybe_null_free_array()) { + profile_null_free_array(array, md, data); + } + } } - access_store_at(decorators, x->elt_type(), array, index.result(), value.result(), - nullptr, null_check_info); + if (is_loaded_flat_array) { + ciFlatArrayKlass* array_klass = x->array()->declared_type()->as_flat_array_klass(); + ciInlineKlass* elem_klass = array_klass->element_klass()->as_inline_klass(); + bool null_free = array_klass->is_elem_null_free(); + if (null_free && !x->value()->is_null_free()) { + __ null_check(value.result(), new CodeEmitInfo(range_check_info)); + } + // If array element is an empty null-free inline type, no need to copy anything. + // Nullable empty arrays still need their null marker updated. + if (!elem_klass->is_empty() || !null_free) { + access_flat_array(false, array, index, value); + } + } else { + StoreFlattenedArrayStub* slow_path = nullptr; + + if (needs_flat_array_store_check(x)) { + // Check if we indeed have a flat array + index.load_item(); + slow_path = new StoreFlattenedArrayStub(array.result(), index.result(), value.result(), state_for(x, x->state_before())); + check_flat_array(array.result(), slow_path); + set_in_conditional_code(true); + } + + if (needs_null_free_array_store_check(x)) { + CodeEmitInfo* info = new CodeEmitInfo(range_check_info); + check_null_free_array(array, value, info); + } + + DecoratorSet decorators = IN_HEAP | IS_ARRAY; + if (x->check_boolean()) { + decorators |= C1_MASK_BOOLEAN; + } + + access_store_at(decorators, x->elt_type(), array, index.result(), value.result(), nullptr, null_check_info); + if (slow_path != nullptr) { + __ branch_destination(slow_path->continuation()); + set_in_conditional_code(false); + } + } } void LIRGenerator::access_load_at(DecoratorSet decorators, BasicType type, @@ -1692,9 +2045,10 @@ void LIRGenerator::access_load(DecoratorSet decorators, BasicType type, void LIRGenerator::access_store_at(DecoratorSet decorators, BasicType type, LIRItem& base, LIR_Opr offset, LIR_Opr value, - CodeEmitInfo* patch_info, CodeEmitInfo* store_emit_info) { + CodeEmitInfo* patch_info, CodeEmitInfo* store_emit_info, + ciInlineKlass* vk) { decorators |= ACCESS_WRITE; - LIRAccess access(this, decorators, base, offset, type, patch_info, store_emit_info); + LIRAccess access(this, decorators, base, offset, type, patch_info, store_emit_info, vk); if (access.is_raw()) { _barrier_set->BarrierSetC1::store_at(access, value); } else { @@ -1745,8 +2099,9 @@ LIR_Opr LIRGenerator::access_atomic_add_at(DecoratorSet decorators, BasicType ty } void LIRGenerator::do_LoadField(LoadField* x) { + ciField* field = x->field(); bool needs_patching = x->needs_patching(); - bool is_volatile = x->field()->is_volatile(); + bool is_volatile = field->is_volatile(); BasicType field_type = x->field_type(); CodeEmitInfo* info = nullptr; @@ -1797,6 +2152,41 @@ void LIRGenerator::do_LoadField(LoadField* x) { decorators |= C1_NEEDS_PATCHING; } + if (field->is_flat()) { + ciInlineKlass* vk = field->type()->as_inline_klass(); +#ifdef ASSERT + assert(field->is_atomic(), "No atomic access required"); + assert(x->state_before() != nullptr, "Needs state before"); +#endif + + // Allocate buffer (we can't easily do this conditionally on the null check below + // because branches added in the LIR are opaque to the register allocator). + NewInstance* buffer = new NewInstance(vk, x->state_before(), false, true); + do_NewInstance(buffer); + LIRItem dest(buffer, this); + + // Copy the payload to the buffer + BasicType bt = vk->atomic_size_to_basic_type(field->is_null_free()); + LIR_Opr payload = new_register((bt == T_LONG) ? bt : T_INT); + access_load_at(decorators, bt, object, LIR_OprFact::intConst(field->offset_in_bytes()), payload, + // Make sure to emit an implicit null check + info ? new CodeEmitInfo(info) : nullptr, info); + access_store_at(decorators, bt, dest, LIR_OprFact::intConst(vk->payload_offset()), payload); + + if (field->is_null_free()) { + set_result(x, buffer->operand()); + } else { + // Check the null marker and set result to null if it's not set + __ logical_and(payload, null_marker_mask(bt, field), payload); + __ cmp(lir_cond_equal, payload, (bt == T_LONG) ? LIR_OprFact::longConst(0) : LIR_OprFact::intConst(0)); + __ cmove(lir_cond_equal, LIR_OprFact::oopConst(nullptr), buffer->operand(), rlock_result(x), T_OBJECT); + } + + // Ensure the copy is visible before any subsequent store that publishes the buffer. + __ membar_storestore(); + return; + } + LIR_Opr result = rlock_result(x, field_type); access_load_at(decorators, field_type, object, LIR_OprFact::intConst(x->offset()), result, @@ -1945,12 +2335,81 @@ void LIRGenerator::do_LoadIndexed(LoadIndexed* x) { } } - DecoratorSet decorators = IN_HEAP | IS_ARRAY; + ciMethodData* md = nullptr; + ciProfileData* data = nullptr; + if (x->should_profile()) { + if (x->array()->is_loaded_flat_array()) { + // No need to profile a load from a flat array of known type. This can happen if + // the type only became known after optimizations (for example, after the PhiSimplifier). + x->set_should_profile(false); + } else { + int bci = x->profiled_bci(); + md = x->profiled_method()->method_data(); + assert(md != nullptr, "Sanity"); + data = md->bci_to_data(bci); + assert(data != nullptr && data->is_ArrayLoadData(), "incorrect profiling entry"); + ciArrayLoadData* load_data = (ciArrayLoadData*)data; + profile_array_type(x, md, load_data); + } + } + + Value element = nullptr; + if (x->buffer() != nullptr) { + assert(x->array()->is_loaded_flat_array(), "must be"); + // Find the destination address (of the NewInlineTypeInstance). + LIRItem buffer(x->buffer(), this); + LIR_Opr payload = access_flat_array(true, array, index, buffer, + x->delayed() == nullptr ? nullptr : x->delayed()->field(), + x->delayed() == nullptr ? 0 : x->delayed()->offset()); + ciFlatArrayKlass* array_klass = x->array()->declared_type()->as_flat_array_klass(); + if (array_klass->is_elem_null_free()) { + set_result(x, x->buffer()->operand()); + } else { + // Check the null marker and set result to null if it's not set + ciInlineKlass* elem_klass = array_klass->element_klass()->as_inline_klass(); + BasicType bt = elem_klass->atomic_size_to_basic_type(false); + assert(payload->is_valid(), "nullable flat array load must return the atomic payload"); + __ logical_and(payload, null_marker_mask(bt, elem_klass->null_marker_offset_in_payload()), payload); + __ cmp(lir_cond_equal, payload, (bt == T_LONG) ? LIR_OprFact::longConst(0) : LIR_OprFact::intConst(0)); + __ cmove(lir_cond_equal, LIR_OprFact::oopConst(nullptr), buffer.result(), rlock_result(x), T_OBJECT); + } + } else if (x->delayed() != nullptr) { + assert(x->array()->is_loaded_flat_array(), "must be"); + LIR_Opr result = rlock_result(x, x->delayed()->field()->type()->basic_type()); + access_sub_element(array, index, result, x->delayed()->field(), x->delayed()->offset()); + } else { + LIR_Opr result = rlock_result(x, x->elt_type()); + LoadFlattenedArrayStub* slow_path = nullptr; + + if (x->should_profile() && x->array()->maybe_null_free_array()) { + profile_null_free_array(array, md, data); + } + + if (x->elt_type() == T_OBJECT && x->array()->maybe_flat_array()) { + assert(x->delayed() == nullptr, "Delayed LoadIndexed only apply to loaded_flat_arrays"); + index.load_item(); + // if we are loading from a flat array, load it using a runtime call + slow_path = new LoadFlattenedArrayStub(array.result(), index.result(), result, state_for(x, x->state_before())); + check_flat_array(array.result(), slow_path); + set_in_conditional_code(true); + } + + DecoratorSet decorators = IN_HEAP | IS_ARRAY; + access_load_at(decorators, x->elt_type(), + array, index.result(), result, + nullptr, null_check_info); - LIR_Opr result = rlock_result(x, x->elt_type()); - access_load_at(decorators, x->elt_type(), - array, index.result(), result, - nullptr, null_check_info); + if (slow_path != nullptr) { + __ branch_destination(slow_path->continuation()); + set_in_conditional_code(false); + } + + element = x; + } + + if (x->should_profile()) { + profile_element_type(element, md, (ciArrayLoadData*)data); + } } @@ -2433,7 +2892,7 @@ ciKlass* LIRGenerator::profile_type(ciMethodData* md, int md_base_offset, int md } ciKlass* exact_signature_k = nullptr; - if (do_update) { + if (do_update && signature_at_call_k != nullptr) { // Is the type from the signature exact (the only one possible)? exact_signature_k = signature_at_call_k->exact_klass(); if (exact_signature_k == nullptr) { @@ -2465,6 +2924,21 @@ ciKlass* LIRGenerator::profile_type(ciMethodData* md, int md_base_offset, int md do_update = exact_klass == nullptr || ciTypeEntries::valid_ciklass(profiled_k) != exact_klass; } + if (exact_klass != nullptr && exact_klass->is_obj_array_klass()) { + ciArrayKlass* exact_array_klass = exact_klass->as_array_klass(); + if (exact_array_klass->is_refined()) { + do_update = ciTypeEntries::valid_ciklass(profiled_k) != exact_klass; + } else if (exact_klass->can_be_inline_array_klass()) { + // Inline type arrays can have additional properties. Load the klass unless + // the C1 type already carries refined array properties. + exact_klass = nullptr; + do_update = true; + } else { + // For a direct pointer comparison, we need the refined array klass pointer + exact_klass = ciObjArrayKlass::make(exact_array_klass->element_klass()); + do_update = ciTypeEntries::valid_ciklass(profiled_k) != exact_klass; + } + } if (!do_null && !do_update) { return result; } @@ -2518,6 +2992,52 @@ void LIRGenerator::profile_parameters(Base* x) { } } +void LIRGenerator::profile_flags(ciMethodData* md, ciProfileData* data, int flag, LIR_Condition condition) { + assert(md != nullptr && data != nullptr, "should have been initialized"); + LIR_Opr mdp = new_register(T_METADATA); + __ metadata2reg(md->constant_encoding(), mdp); + LIR_Address* addr = new LIR_Address(mdp, md->byte_offset_of_slot(data, DataLayout::flags_offset()), T_BYTE); + LIR_Opr flags = new_register(T_INT); + __ move(addr, flags); + LIR_Opr update; + if (condition != lir_cond_always) { + update = new_register(T_INT); + __ cmove(condition, LIR_OprFact::intConst(0), LIR_OprFact::intConst(flag), update, T_INT); + } else { + update = LIR_OprFact::intConst(flag); + } + __ logical_or(flags, update, flags); + __ store(flags, addr); +} + +void LIRGenerator::profile_null_free_array(LIRItem array, ciMethodData* md, ciProfileData* data) { + assert(compilation()->profile_array_accesses(), "array access profiling is disabled"); + LabelObj* L_end = new LabelObj(); + LIR_Opr tmp = new_register(T_METADATA); + __ check_null_free_array(array.result(), tmp); +#ifdef RISCV + // tmp is used to hold the result of null free array check on riscv + // See LIR_Assembler::emit_opNullFreeArrayCheck + __ cmp(lir_cond_equal, tmp, LIR_OprFact::metadataConst(nullptr)); +#endif + profile_flags(md, data, ArrayStoreData::null_free_array_byte_constant(), lir_cond_equal); +} + +template void LIRGenerator::profile_array_type(AccessIndexed* x, ciMethodData*& md, ArrayData*& load_store) { + assert(compilation()->profile_array_accesses(), "array access profiling is disabled"); + LIR_Opr mdp = LIR_OprFact::illegalOpr; + profile_type(md, md->byte_offset_of_slot(load_store, ArrayData::array_offset()), 0, + load_store->array()->type(), x->array(), mdp, true, nullptr, nullptr); +} + +void LIRGenerator::profile_element_type(Value element, ciMethodData* md, ciArrayLoadData* load_data) { + assert(compilation()->profile_array_accesses(), "array access profiling is disabled"); + assert(md != nullptr && load_data != nullptr, "should have been initialized"); + LIR_Opr mdp = LIR_OprFact::illegalOpr; + profile_type(md, md->byte_offset_of_slot(load_data, ArrayLoadData::element_offset()), 0, + load_data->element()->type(), element, mdp, false, nullptr, nullptr); +} + void LIRGenerator::do_Base(Base* x) { __ std_entry(LIR_OprFact::illegalOpr); // Emit moves from physical registers / stack slots to virtual registers @@ -2559,6 +3079,12 @@ void LIRGenerator::do_Base(Base* x) { java_index += type2size[t]; } + // Check if we need a membar at the beginning of the java.lang.Object + // constructor to satisfy the memory model for strict fields. + if (Arguments::is_valhalla_enabled() && method()->intrinsic_id() == vmIntrinsics::_Object_init) { + __ membar_storestore(); + } + if (compilation()->env()->dtrace_method_probes()) { BasicTypeList signature; signature.append(LP64_ONLY(T_LONG) NOT_LP64(T_INT)); // thread @@ -2600,6 +3126,14 @@ void LIRGenerator::do_Base(Base* x) { CodeEmitInfo* info = new CodeEmitInfo(scope()->start()->state()->copy(ValueStack::StateBefore, SynchronizationEntryBCI), nullptr, false); increment_invocation_counter(info); } + if (method()->has_scalarized_args()) { + // Check if deoptimization was triggered (i.e. orig_pc was set) while buffering scalarized inline type arguments + // in the entry point (see comments in frame::deoptimize). If so, deoptimize only now that we have the right state. + CodeEmitInfo* info = new CodeEmitInfo(scope()->start()->state()->copy(ValueStack::StateBefore, 0), nullptr, false); + CodeStub* deopt_stub = new DeoptimizeStub(info, Deoptimization::Reason_none, Deoptimization::Action_none); + __ append(new LIR_Op0(lir_check_orig_pc)); + __ branch(lir_cond_notEqual, deopt_stub); + } // all blocks with a successor must end with an unconditional jump // to the successor even if they are consecutive @@ -2615,6 +3149,19 @@ void LIRGenerator::do_OsrEntry(OsrEntry* x) { __ move(LIR_Assembler::osrBufferPointer(), result); } +void LIRGenerator::invoke_load_one_argument(LIRItem* param, LIR_Opr loc) { + if (loc->is_register()) { + param->load_item_force(loc); + } else { + LIR_Address* addr = loc->as_address_ptr(); + param->load_for_store(addr->type()); + if (addr->type() == T_OBJECT) { + __ move_wide(param->result(), addr); + } else { + __ move(param->result(), addr); + } + } +} void LIRGenerator::invoke_load_arguments(Invoke* x, LIRItemList* args, const LIR_OprList* arg_list) { assert(args->length() == arg_list->length(), @@ -2622,16 +3169,7 @@ void LIRGenerator::invoke_load_arguments(Invoke* x, LIRItemList* args, const LIR for (int i = x->has_receiver() ? 1 : 0; i < args->length(); i++) { LIRItem* param = args->at(i); LIR_Opr loc = arg_list->at(i); - if (loc->is_register()) { - param->load_item_force(loc); - } else { - LIR_Address* addr = loc->as_address_ptr(); - param->load_for_store(addr->type()); - if (addr->type() == T_OBJECT) { - __ move_wide(param->result(), addr); - } else - __ move(param->result(), addr); - } + invoke_load_one_argument(param, loc); } if (x->has_receiver()) { @@ -2778,9 +3316,10 @@ void LIRGenerator::do_IfOp(IfOp* x) { LIRItem left(x->x(), this); LIRItem right(x->y(), this); left.load_item(); - if (can_inline_as_constant(right.value())) { + if (can_inline_as_constant(right.value()) && !x->substitutability_check()) { right.dont_load_item(); } else { + // substitutability_check() needs to use right as a base register. right.load_item(); } @@ -2788,10 +3327,58 @@ void LIRGenerator::do_IfOp(IfOp* x) { LIRItem f_val(x->fval(), this); t_val.dont_load_item(); f_val.dont_load_item(); - LIR_Opr reg = rlock_result(x); - __ cmp(lir_cond(x->cond()), left.result(), right.result()); - __ cmove(lir_cond(x->cond()), t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type())); + if (x->substitutability_check()) { + substitutability_check(x, left, right, t_val, f_val); + } else { + LIR_Opr reg = rlock_result(x); + __ cmp(lir_cond(x->cond()), left.result(), right.result()); + __ cmove(lir_cond(x->cond()), t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type())); + } +} + +void LIRGenerator::substitutability_check(IfOp* x, LIRItem& left, LIRItem& right, LIRItem& t_val, LIRItem& f_val) { + assert(x->cond() == If::eql || x->cond() == If::neq, "must be"); + bool is_acmpeq = (x->cond() == If::eql); + LIR_Opr equal_result = is_acmpeq ? t_val.result() : f_val.result(); + LIR_Opr not_equal_result = is_acmpeq ? f_val.result() : t_val.result(); + LIR_Opr result = rlock_result(x); + CodeEmitInfo* info = state_for(x, x->state_before()); + + substitutability_check_common(x->x(), x->y(), left, right, equal_result, not_equal_result, result, info); +} + +void LIRGenerator::substitutability_check(If* x, LIRItem& left, LIRItem& right) { + LIR_Opr equal_result = LIR_OprFact::intConst(1); + LIR_Opr not_equal_result = LIR_OprFact::intConst(0); + LIR_Opr result = new_register(T_INT); + CodeEmitInfo* info = state_for(x, x->state_before()); + + substitutability_check_common(x->x(), x->y(), left, right, equal_result, not_equal_result, result, info); + + assert(x->cond() == If::eql || x->cond() == If::neq, "must be"); + __ cmp(lir_cond(x->cond()), result, equal_result); +} + +void LIRGenerator::substitutability_check_common(Value left_val, Value right_val, LIRItem& left, LIRItem& right, + LIR_Opr equal_result, LIR_Opr not_equal_result, LIR_Opr result, + CodeEmitInfo* info) { + LIR_Opr tmp1 = LIR_OprFact::illegalOpr; + LIR_Opr tmp2 = LIR_OprFact::illegalOpr; + + ciKlass* left_klass = left_val->as_loaded_klass_or_null(); + ciKlass* right_klass = right_val->as_loaded_klass_or_null(); + if (left_klass != nullptr && left_klass->is_inlinetype() && left_klass == right_klass) { + // No need to load klass -- the operands are statically known to be the same inline klass. + } else { + BasicType t_klass = UseCompressedOops ? T_INT : T_METADATA; + tmp1 = new_register(t_klass); + tmp2 = new_register(t_klass); + } + + CodeStub* slow_path = new SubstitutabilityCheckStub(left.result(), right.result(), info); + __ substitutability_check(result, left.result(), right.result(), equal_result, not_equal_result, + left_klass, right_klass, tmp1, tmp2, info, slow_path); } void LIRGenerator::do_RuntimeCall(address routine, Intrinsic* x) { @@ -3066,7 +3653,7 @@ void LIRGenerator::do_ProfileReturnType(ProfileReturnType* x) { ciProfileData* data = md->bci_to_data(bci); if (data != nullptr) { assert(data->is_CallTypeData() || data->is_VirtualCallTypeData(), "wrong profile data type"); - ciReturnTypeEntry* ret = data->is_CallTypeData() ? ((ciCallTypeData*)data)->ret() : ((ciVirtualCallTypeData*)data)->ret(); + ciSingleTypeEntry* ret = data->is_CallTypeData() ? ((ciCallTypeData*)data)->ret() : ((ciVirtualCallTypeData*)data)->ret(); LIR_Opr mdp = LIR_OprFact::illegalOpr; bool ignored_will_link; @@ -3087,6 +3674,52 @@ void LIRGenerator::do_ProfileReturnType(ProfileReturnType* x) { } } +bool LIRGenerator::profile_inline_klass(ciMethodData* md, ciProfileData* data, Value value, int flag) { + ciKlass* klass = value->as_loaded_klass_or_null(); + if (klass != nullptr) { + if (klass->is_inlinetype()) { + profile_flags(md, data, flag, lir_cond_always); + } else if (klass->can_be_inline_klass()) { + return false; + } + } else { + return false; + } + return true; +} + +void LIRGenerator::do_ProfileACmpTypes(ProfileACmpTypes* x) { + ciMethod* method = x->method(); + assert(method != nullptr, "method should be set if branch is profiled"); + ciMethodData* md = method->method_data_or_null(); + assert(md != nullptr, "Sanity"); + ciProfileData* data = md->bci_to_data(x->bci()); + assert(data != nullptr, "must have profiling data"); + assert(data->is_ACmpData(), "need BranchData for two-way branches"); + ciACmpData* acmp = (ciACmpData*)data; + LIR_Opr mdp = LIR_OprFact::illegalOpr; + profile_type(md, md->byte_offset_of_slot(acmp, ACmpData::left_offset()), 0, + acmp->left()->type(), x->left(), mdp, !x->left_maybe_null(), nullptr, nullptr); + int flags_offset = md->byte_offset_of_slot(data, DataLayout::flags_offset()); + if (!profile_inline_klass(md, acmp, x->left(), ACmpData::left_inline_type_byte_constant())) { + LIR_Opr mdp = new_register(T_METADATA); + __ metadata2reg(md->constant_encoding(), mdp); + LIRItem value(x->left(), this); + value.load_item(); + __ profile_inline_type(new LIR_Address(mdp, flags_offset, T_INT), value.result(), ACmpData::left_inline_type_byte_constant(), new_register(T_INT), !x->left_maybe_null()); + } + profile_type(md, md->byte_offset_of_slot(acmp, ACmpData::left_offset()), + in_bytes(ACmpData::right_offset()) - in_bytes(ACmpData::left_offset()), + acmp->right()->type(), x->right(), mdp, !x->right_maybe_null(), nullptr, nullptr); + if (!profile_inline_klass(md, acmp, x->right(), ACmpData::right_inline_type_byte_constant())) { + LIR_Opr mdp = new_register(T_METADATA); + __ metadata2reg(md->constant_encoding(), mdp); + LIRItem value(x->right(), this); + value.load_item(); + __ profile_inline_type(new LIR_Address(mdp, flags_offset, T_INT), value.result(), ACmpData::right_inline_type_byte_constant(), new_register(T_INT), !x->right_maybe_null()); + } +} + void LIRGenerator::do_ProfileInvoke(ProfileInvoke* x) { // We can safely ignore accessors here, since c2 will inline them anyway, // accessors are also always mature. diff --git a/src/hotspot/share/c1/c1_LIRGenerator.hpp b/src/hotspot/share/c1/c1_LIRGenerator.hpp index 8e30d05af6dbb..65c64d2262cdf 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.hpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -169,6 +169,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { #endif BitMap2D _vreg_flags; // flags which can be set on a per-vreg basis LIR_List* _lir; + bool _in_conditional_code; LIRGenerator* gen() { return this; @@ -195,6 +196,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { friend class PhiResolver; + void set_in_conditional_code(bool v); public: // unified bailout support void bailout(const char* msg) const { compilation()->bailout(msg); } @@ -214,6 +216,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { LIR_Opr load_constant(Constant* x); LIR_Opr load_constant(LIR_Const* constant); + bool in_conditional_code() const { return _in_conditional_code; } // Given an immediate value, return an operand usable in logical ops. LIR_Opr load_immediate(jlong x, BasicType type); @@ -272,6 +275,18 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { void do_vectorizedMismatch(Intrinsic* x); void do_blackhole(Intrinsic* x); + LIR_Opr access_flat_array(bool is_load, LIRItem& array, LIRItem& index, LIRItem& obj_item, ciField* field = nullptr, size_t offset = 0); + void access_sub_element(LIRItem& array, LIRItem& index, LIR_Opr& result, ciField* field, size_t sub_offset); + LIR_Opr get_and_load_element_address(LIRItem& array, LIRItem& index); + static bool needs_flat_array_store_check(StoreIndexed* x); + void check_flat_array(LIR_Opr array, CodeStub* slow_path); + static bool needs_null_free_array_store_check(StoreIndexed* x); + void check_null_free_array(LIRItem& array, LIRItem& value, CodeEmitInfo* info); + void substitutability_check(IfOp* x, LIRItem& left, LIRItem& right, LIRItem& t_val, LIRItem& f_val); + void substitutability_check(If* x, LIRItem& left, LIRItem& right); + void substitutability_check_common(Value left_val, Value right_val, LIRItem& left, LIRItem& right, + LIR_Opr equal_result, LIR_Opr not_equal_result, LIR_Opr result, CodeEmitInfo* info); + public: LIR_Opr call_runtime(BasicTypeArray* signature, LIRItemList* args, address entry, ValueType* result_type, CodeEmitInfo* info); LIR_Opr call_runtime(BasicTypeArray* signature, LIR_OprList* args, address entry, ValueType* result_type, CodeEmitInfo* info); @@ -288,7 +303,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { public: void access_store_at(DecoratorSet decorators, BasicType type, LIRItem& base, LIR_Opr offset, LIR_Opr value, - CodeEmitInfo* patch_info = nullptr, CodeEmitInfo* store_emit_info = nullptr); + CodeEmitInfo* patch_info = nullptr, CodeEmitInfo* store_emit_info = nullptr, ciInlineKlass* vk = nullptr); void access_load_at(DecoratorSet decorators, BasicType type, LIRItem& base, LIR_Opr offset, LIR_Opr result, @@ -325,7 +340,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { LIRItemList* invoke_visit_arguments(Invoke* x); void invoke_load_arguments(Invoke* x, LIRItemList* args, const LIR_OprList* arg_list); - + void invoke_load_one_argument(LIRItem* param, LIR_Opr loc); void trace_block_entry(BlockBegin* block); // volatile field operations are never patchable because a klass @@ -362,10 +377,10 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { void logic_op (Bytecodes::Code code, LIR_Opr dst_reg, LIR_Opr left, LIR_Opr right); - void monitor_enter (LIR_Opr object, LIR_Opr lock, LIR_Opr hdr, LIR_Opr scratch, int monitor_no, CodeEmitInfo* info_for_exception, CodeEmitInfo* info); + void monitor_enter (LIR_Opr object, LIR_Opr lock, LIR_Opr hdr, LIR_Opr scratch, int monitor_no, CodeEmitInfo* info_for_exception, CodeEmitInfo* info, CodeStub* throw_ie_stub = nullptr); void monitor_exit (LIR_Opr object, LIR_Opr lock, LIR_Opr hdr, LIR_Opr scratch, int monitor_no); - void new_instance (LIR_Opr dst, ciInstanceKlass* klass, bool is_unresolved, LIR_Opr scratch1, LIR_Opr scratch2, LIR_Opr scratch3, LIR_Opr scratch4, LIR_Opr klass_reg, CodeEmitInfo* info); + void new_instance(LIR_Opr dst, ciInstanceKlass* klass, bool is_unresolved, bool allow_inline, LIR_Opr scratch1, LIR_Opr scratch2, LIR_Opr scratch3, LIR_Opr scratch4, LIR_Opr klass_reg, CodeEmitInfo* info); // machine dependent void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); @@ -477,6 +492,11 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { void profile_arguments(ProfileCall* x); void profile_parameters(Base* x); void profile_parameters_at_call(ProfileCall* x); + void profile_flags(ciMethodData* md, ciProfileData* load_store, int flag, LIR_Condition condition = lir_cond_always); + void profile_null_free_array(LIRItem array, ciMethodData* md, ciProfileData* load_store); + template void profile_array_type(AccessIndexed* x, ciMethodData*& md, ArrayData*& load_store); + void profile_element_type(Value element, ciMethodData* md, ciArrayLoadData* load_store); + bool profile_inline_klass(ciMethodData* md, ciProfileData* data, Value value, int flag); LIR_Opr mask_boolean(LIR_Opr array, LIR_Opr value, CodeEmitInfo*& null_check_info); public: @@ -504,6 +524,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { , _method(method) , _virtual_register_number(LIR_Opr::vreg_base) , _vreg_flags(num_vreg_flags) + , _in_conditional_code(false) , _barrier_set(BarrierSet::barrier_set()->barrier_set_c1()) { } @@ -585,6 +606,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { virtual void do_ProfileCall (ProfileCall* x); virtual void do_ProfileReturnType (ProfileReturnType* x); virtual void do_ProfileInvoke (ProfileInvoke* x); + virtual void do_ProfileACmpTypes(ProfileACmpTypes* x); virtual void do_RuntimeCall (RuntimeCall* x); virtual void do_MemBar (MemBar* x); virtual void do_RangeCheckPredicate(RangeCheckPredicate* x); diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp index b00ab25b8f0b9..2ab99160197f2 100644 --- a/src/hotspot/share/c1/c1_LinearScan.cpp +++ b/src/hotspot/share/c1/c1_LinearScan.cpp @@ -62,9 +62,9 @@ // Map BasicType to spill size in 32-bit words, matching VMReg's notion of words #ifdef _LP64 -static int type2spill_size[T_CONFLICT+1]={ -1, 0, 0, 0, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 0, 2, 1, 2, 1, -1}; +static int type2spill_size[T_CONFLICT+1]={ -1, 0, 0, 0, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 0, 2, 1, 2, 1, -1}; #else -static int type2spill_size[T_CONFLICT+1]={ -1, 0, 0, 0, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 0, 1, -1, 1, 1, -1}; +static int type2spill_size[T_CONFLICT+1]={ -1, 0, 0, 0, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 0, 1, -1, 1, 1, -1}; #endif @@ -256,7 +256,7 @@ void LinearScan::assign_spill_slot(Interval* it) { } void LinearScan::propagate_spill_slots() { - if (!frame_map()->finalize_frame(max_spills())) { + if (!frame_map()->finalize_frame(max_spills(), compilation()->method()->c1_needs_stack_repair())) { bailout("frame too large"); } } @@ -2888,7 +2888,7 @@ IRScopeDebugInfo* LinearScan::compute_debug_info_for_scope(int op_id, IRScope* c } } - return new IRScopeDebugInfo(cur_scope, cur_state->bci(), locals, expressions, monitors, caller_debug_info); + return new IRScopeDebugInfo(cur_scope, cur_state->bci(), locals, expressions, monitors, caller_debug_info, cur_state->should_reexecute()); } diff --git a/src/hotspot/share/c1/c1_MacroAssembler.hpp b/src/hotspot/share/c1/c1_MacroAssembler.hpp index 0fe0d0ff285f4..f6d177f803fbc 100644 --- a/src/hotspot/share/c1/c1_MacroAssembler.hpp +++ b/src/hotspot/share/c1/c1_MacroAssembler.hpp @@ -29,8 +29,11 @@ #include "utilities/macros.hpp" class CodeEmitInfo; - +class CompiledEntrySignature; class C1_MacroAssembler: public MacroAssembler { + private: + int scalarized_entry(const CompiledEntrySignature* ces, int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, Label& verified_inline_entry_label, bool is_inline_ro_entry); + void build_frame_helper(int frame_size_in_bytes, int sp_offset_for_orig_pc, int sp_inc, bool reset_orig_pc, bool needs_stack_repair); public: // creation C1_MacroAssembler(CodeBuffer* code) : MacroAssembler(code) { pd_init(); } @@ -38,9 +41,14 @@ class C1_MacroAssembler: public MacroAssembler { //---------------------------------------------------- void explicit_null_check(Register base); - void build_frame(int frame_size_in_bytes, int bang_size_in_bytes); - void remove_frame(int frame_size_in_bytes); + void build_frame(int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc = 0, bool needs_stack_repair = false, bool has_scalarized_args = false, Label* verified_inline_entry_label = nullptr); + int verified_entry(const CompiledEntrySignature* ces, int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, Label& verified_inline_entry_label) { + return scalarized_entry(ces, frame_size_in_bytes, bang_size_in_bytes, sp_offset_for_orig_pc, verified_inline_entry_label, false); + } + int verified_inline_ro_entry(const CompiledEntrySignature* ces, int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, Label& verified_inline_entry_label) { + return scalarized_entry(ces, frame_size_in_bytes, bang_size_in_bytes, sp_offset_for_orig_pc, verified_inline_entry_label, true); + } void verified_entry(bool breakAtEntry); void verify_stack_oop(int offset) PRODUCT_RETURN; void verify_not_null_oop(Register r) PRODUCT_RETURN; diff --git a/src/hotspot/share/c1/c1_Optimizer.cpp b/src/hotspot/share/c1/c1_Optimizer.cpp index 0c18694df785b..17ad26a311a24 100644 --- a/src/hotspot/share/c1/c1_Optimizer.cpp +++ b/src/hotspot/share/c1/c1_Optimizer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -86,7 +86,8 @@ class CE_Eliminator: public BlockClosure { virtual void block_do(BlockBegin* block); private: - Value make_ifop(Value x, Instruction::Condition cond, Value y, Value tval, Value fval); + Value make_ifop(Value x, Instruction::Condition cond, Value y, Value tval, Value fval, + ValueStack* state_before, bool substitutability_check); }; void CE_Eliminator::block_do(BlockBegin* block) { @@ -215,7 +216,8 @@ void CE_Eliminator::block_do(BlockBegin* block) { cur_end = cur_end->set_next(f_value); } - Value result = make_ifop(if_->x(), if_->cond(), if_->y(), t_value, f_value); + Value result = make_ifop(if_->x(), if_->cond(), if_->y(), t_value, f_value, + if_->state_before(), if_->substitutability_check()); assert(result != nullptr, "make_ifop must return a non-null instruction"); if (!result->is_linked() && result->can_be_linked()) { NOT_PRODUCT(result->set_printable_bci(if_->printable_bci())); @@ -270,9 +272,10 @@ void CE_Eliminator::block_do(BlockBegin* block) { } -Value CE_Eliminator::make_ifop(Value x, Instruction::Condition cond, Value y, Value tval, Value fval) { +Value CE_Eliminator::make_ifop(Value x, Instruction::Condition cond, Value y, Value tval, Value fval, + ValueStack* state_before, bool substitutability_check) { if (!OptimizeIfOps) { - return new IfOp(x, cond, y, tval, fval); + return new IfOp(x, cond, y, tval, fval, state_before, substitutability_check); } tval = tval->subst(); @@ -286,7 +289,8 @@ Value CE_Eliminator::make_ifop(Value x, Instruction::Condition cond, Value y, Va y = y->subst(); Constant* y_const = y->as_Constant(); - if (y_const != nullptr) { + // We must not optimize a substitutability check to a pointer comparison. + if (!substitutability_check && y_const != nullptr) { IfOp* x_ifop = x->as_IfOp(); if (x_ifop != nullptr) { // x is an ifop, y is a constant Constant* x_tval_const = x_ifop->tval()->subst()->as_Constant(); @@ -307,7 +311,7 @@ Value CE_Eliminator::make_ifop(Value x, Instruction::Condition cond, Value y, Va if (new_tval == new_fval) { return new_tval; } else { - return new IfOp(x_ifop->x(), x_ifop_cond, x_ifop->y(), new_tval, new_fval); + return new IfOp(x_ifop->x(), x_ifop_cond, x_ifop->y(), new_tval, new_fval, x_ifop->state_before(), x_ifop->substitutability_check()); } } } @@ -323,7 +327,7 @@ Value CE_Eliminator::make_ifop(Value x, Instruction::Condition cond, Value y, Va } } } - return new IfOp(x, cond, y, tval, fval); + return new IfOp(x, cond, y, tval, fval, state_before, substitutability_check); } void Optimizer::eliminate_conditional_expressions() { @@ -463,7 +467,7 @@ class BlockMerger: public BlockClosure { con = if_->x()->as_Constant(); swapped = true; } - if (con && ifop) { + if (con && ifop && !ifop->substitutability_check()) { Constant* tval = ifop->tval()->as_Constant(); Constant* fval = ifop->fval()->as_Constant(); if (tval && fval) { @@ -488,7 +492,7 @@ class BlockMerger: public BlockClosure { BlockBegin* fblock = fval->compare(cond, con, tsux, fsux); if (tblock != fblock && !if_->is_safepoint()) { If* newif = new If(ifop->x(), ifop->cond(), false, ifop->y(), - tblock, fblock, if_->state_before(), if_->is_safepoint()); + tblock, fblock, if_->state_before(), if_->is_safepoint(), ifop->substitutability_check()); newif->set_state(if_->state()->copy()); assert(prev->next() == if_, "must be guaranteed by above search"); @@ -582,6 +586,7 @@ class NullCheckVisitor: public InstructionVisitor { void do_UnsafeGetAndSet(UnsafeGetAndSet* x); void do_ProfileCall (ProfileCall* x); void do_ProfileReturnType (ProfileReturnType* x); + void do_ProfileACmpTypes(ProfileACmpTypes* x); void do_ProfileInvoke (ProfileInvoke* x); void do_RuntimeCall (RuntimeCall* x); void do_MemBar (MemBar* x); @@ -710,6 +715,7 @@ class NullCheckEliminator: public ValueVisitor { void handle_Phi (Phi* x); void handle_ProfileCall (ProfileCall* x); void handle_ProfileReturnType (ProfileReturnType* x); + void handle_ProfileACmpTypes(ProfileACmpTypes* x); void handle_Constant (Constant* x); void handle_IfOp (IfOp* x); }; @@ -768,6 +774,7 @@ void NullCheckVisitor::do_ProfileCall (ProfileCall* x) { nce()->clear_las nce()->handle_ProfileCall(x); } void NullCheckVisitor::do_ProfileReturnType (ProfileReturnType* x) { nce()->handle_ProfileReturnType(x); } void NullCheckVisitor::do_ProfileInvoke (ProfileInvoke* x) {} +void NullCheckVisitor::do_ProfileACmpTypes(ProfileACmpTypes* x) { nce()->handle_ProfileACmpTypes(x); } void NullCheckVisitor::do_RuntimeCall (RuntimeCall* x) {} void NullCheckVisitor::do_MemBar (MemBar* x) {} void NullCheckVisitor::do_RangeCheckPredicate(RangeCheckPredicate* x) {} @@ -1197,6 +1204,11 @@ void NullCheckEliminator::handle_ProfileReturnType(ProfileReturnType* x) { x->set_needs_null_check(!set_contains(x->ret())); } +void NullCheckEliminator::handle_ProfileACmpTypes(ProfileACmpTypes* x) { + x->set_left_maybe_null(!set_contains(x->left())); + x->set_right_maybe_null(!set_contains(x->right())); +} + void NullCheckEliminator::handle_Constant(Constant *x) { ObjectType* ot = x->type()->as_ObjectType(); if (ot != nullptr && ot->is_loaded()) { diff --git a/src/hotspot/share/c1/c1_RangeCheckElimination.hpp b/src/hotspot/share/c1/c1_RangeCheckElimination.hpp index f36a2065109c4..128feae0b59f2 100644 --- a/src/hotspot/share/c1/c1_RangeCheckElimination.hpp +++ b/src/hotspot/share/c1/c1_RangeCheckElimination.hpp @@ -159,6 +159,7 @@ class RangeCheckEliminator { void do_UnsafeGetAndSet(UnsafeGetAndSet* x) { /* nothing to do */ }; void do_ProfileCall (ProfileCall* x) { /* nothing to do */ }; void do_ProfileReturnType (ProfileReturnType* x) { /* nothing to do */ }; + void do_ProfileACmpTypes(ProfileACmpTypes* x) { /* nothing to do */ }; void do_ProfileInvoke (ProfileInvoke* x) { /* nothing to do */ }; void do_RuntimeCall (RuntimeCall* x) { /* nothing to do */ }; void do_MemBar (MemBar* x) { /* nothing to do */ }; diff --git a/src/hotspot/share/c1/c1_Runtime1.cpp b/src/hotspot/share/c1/c1_Runtime1.cpp index 41504c74dd23a..bcf55b454b66e 100644 --- a/src/hotspot/share/c1/c1_Runtime1.cpp +++ b/src/hotspot/share/c1/c1_Runtime1.cpp @@ -50,9 +50,14 @@ #include "memory/resourceArea.hpp" #include "memory/universe.hpp" #include "oops/access.inline.hpp" +#include "oops/arrayOop.inline.hpp" +#include "oops/arrayProperties.hpp" +#include "oops/flatArrayKlass.hpp" +#include "oops/flatArrayOop.inline.hpp" #include "oops/objArrayKlass.hpp" #include "oops/objArrayOop.inline.hpp" #include "oops/oop.inline.hpp" +#include "oops/oopCast.inline.hpp" #include "prims/jvmtiExport.hpp" #include "runtime/atomicAccess.hpp" #include "runtime/fieldDescriptor.inline.hpp" @@ -114,8 +119,14 @@ uint Runtime1::_arraycopy_checkcast_cnt = 0; uint Runtime1::_arraycopy_checkcast_attempt_cnt = 0; uint Runtime1::_new_type_array_slowcase_cnt = 0; uint Runtime1::_new_object_array_slowcase_cnt = 0; +uint Runtime1::_new_null_free_array_slowcase_cnt = 0; uint Runtime1::_new_instance_slowcase_cnt = 0; uint Runtime1::_new_multi_array_slowcase_cnt = 0; +uint Runtime1::_load_flat_array_slowcase_cnt = 0; +uint Runtime1::_store_flat_array_slowcase_cnt = 0; +uint Runtime1::_substitutability_check_slowcase_cnt = 0; +uint Runtime1::_buffer_inline_args_slowcase_cnt = 0; +uint Runtime1::_buffer_inline_args_no_receiver_slowcase_cnt = 0; uint Runtime1::_monitorenter_slowcase_cnt = 0; uint Runtime1::_monitorexit_slowcase_cnt = 0; uint Runtime1::_patch_code_slowcase_cnt = 0; @@ -125,6 +136,8 @@ uint Runtime1::_throw_div0_exception_count = 0; uint Runtime1::_throw_null_pointer_exception_count = 0; uint Runtime1::_throw_class_cast_exception_count = 0; uint Runtime1::_throw_incompatible_class_change_error_count = 0; +uint Runtime1::_throw_illegal_monitor_state_exception_count = 0; +uint Runtime1::_throw_identity_exception_count = 0; uint Runtime1::_throw_count = 0; static uint _byte_arraycopy_stub_cnt = 0; @@ -370,11 +383,10 @@ const char* Runtime1::name_for_address(address entry) { return pd_name_for_address(entry); } - -JRT_ENTRY(void, Runtime1::new_instance(JavaThread* current, Klass* klass)) +static void allocate_instance(JavaThread* current, Klass* klass, TRAPS) { #ifndef PRODUCT if (PrintC1Statistics) { - _new_instance_slowcase_cnt++; + Runtime1::_new_instance_slowcase_cnt++; } #endif assert(klass->is_klass(), "not a class"); @@ -388,6 +400,9 @@ JRT_ENTRY(void, Runtime1::new_instance(JavaThread* current, Klass* klass)) current->set_vm_result_oop(obj); JRT_END +JRT_ENTRY(void, Runtime1::new_instance(JavaThread* current, Klass* klass)) + allocate_instance(current, klass, CHECK); +JRT_END JRT_ENTRY(void, Runtime1::new_type_array(JavaThread* current, Klass* klass, jint length)) #ifndef PRODUCT @@ -432,6 +447,31 @@ JRT_ENTRY(void, Runtime1::new_object_array(JavaThread* current, Klass* array_kla } JRT_END +// TODO 8265122 This is currently dead code until the array factory methods are intrinsified +JRT_ENTRY(void, Runtime1::new_null_free_array(JavaThread* current, Klass* array_klass, jint length)) + NOT_PRODUCT(_new_null_free_array_slowcase_cnt++;) + + // Note: no handle for klass needed since they are not used + // anymore after new_objArray() and no GC can happen before. + // (This may have to change if this code changes!) + assert(array_klass->is_klass(), "not a class"); + Handle holder(THREAD, array_klass->klass_holder()); // keep the klass alive + Klass* elem_klass = ObjArrayKlass::cast(array_klass)->element_klass(); + assert(elem_klass->is_inline_klass(), "must be"); + // Logically creates elements, ensure klass init + elem_klass->initialize(CHECK); + + const ArrayProperties props = ArrayProperties::Default().with_null_restricted(); + arrayOop obj = oopFactory::new_objArray(elem_klass, length, props, CHECK); + + current->set_vm_result_oop(obj); + // This is pretty rare but this runtime patch is stressful to deoptimization + // if we deoptimize here so force a deopt to stress the path. + if (DeoptimizeALot) { + deopt_caller(current); + } +JRT_END + JRT_ENTRY(void, Runtime1::new_multi_array(JavaThread* current, Klass* klass, int rank, jint* dims)) #ifndef PRODUCT @@ -447,6 +487,96 @@ JRT_ENTRY(void, Runtime1::new_multi_array(JavaThread* current, Klass* klass, int JRT_END +static void profile_flat_array(JavaThread* current, bool load, bool null_free) { + ResourceMark rm(current); + vframeStream vfst(current, true); + assert(!vfst.at_end(), "Java frame must exist"); + // Check if array access profiling is enabled + if (vfst.nm()->comp_level() != CompLevel_full_profile || !C1UpdateMethodData) { + return; + } + int bci = vfst.bci(); + Method* method = vfst.method(); + MethodData* md = method->method_data(); + if (md != nullptr) { + // Lock to access ProfileData, and ensure lock is not broken by a safepoint + MutexLocker ml(md->extra_data_lock(), Mutex::_no_safepoint_check_flag); + + ProfileData* data = md->bci_to_data(bci); + assert(data != nullptr, "incorrect profiling entry"); + if (data->is_ArrayLoadData()) { + assert(load, "should be an array load"); + ArrayLoadData* load_data = (ArrayLoadData*) data; + load_data->set_flat_array(); + if (null_free) { + load_data->set_null_free_array(); + } + } else { + assert(data->is_ArrayStoreData(), ""); + assert(!load, "should be an array store"); + ArrayStoreData* store_data = (ArrayStoreData*) data; + store_data->set_flat_array(); + if (null_free) { + store_data->set_null_free_array(); + } + } + } +} + +JRT_ENTRY(void, Runtime1::load_flat_array(JavaThread* current, flatArrayOopDesc* array, int index)) + assert(array->klass()->is_flatArray_klass(), "should not be called"); + profile_flat_array(current, true, array->is_null_free_array()); + + NOT_PRODUCT(_load_flat_array_slowcase_cnt++;) + assert(array->length() > 0 && index < array->length(), "already checked"); + flatArrayHandle vah(current, array); + oop obj = array->obj_at(index, CHECK); + current->set_vm_result_oop(obj); +JRT_END + +JRT_ENTRY(void, Runtime1::store_flat_array(JavaThread* current, flatArrayOopDesc* array, int index, oopDesc* value)) + assert(array->is_flatArray(), "should not be called"); + profile_flat_array(current, false, array->is_null_free_array()); + + NOT_PRODUCT(_store_flat_array_slowcase_cnt++;) + if (value == nullptr && array->is_null_free_array()) { + SharedRuntime::throw_and_post_jvmti_exception(current, vmSymbols::java_lang_NullPointerException()); + } else { + array->obj_at_put(index, value, CHECK); + } +JRT_END + +JRT_ENTRY(int, Runtime1::substitutability_check(JavaThread* current, oopDesc* left, oopDesc* right)) + NOT_PRODUCT(_substitutability_check_slowcase_cnt++;) + JavaCallArguments args; + args.push_oop(Handle(THREAD, left)); + args.push_oop(Handle(THREAD, right)); + JavaValue result(T_BOOLEAN); + JavaCalls::call_static(&result, + vmClasses::ValueObjectMethods_klass(), + vmSymbols::isSubstitutable_name(), + vmSymbols::object_object_boolean_signature(), + &args, CHECK_0); + return result.get_jboolean() ? 1 : 0; +JRT_END + +void Runtime1::buffer_inline_args_impl(JavaThread* current, Method* m, bool allocate_receiver) { + JavaThread* THREAD = current; + methodHandle method(current, m); // We are inside the verified_entry or verified_inline_ro_entry of this method. + oop obj = SharedRuntime::allocate_inline_types_impl(current, method, allocate_receiver, true, CHECK); + current->set_vm_result_oop(obj); +} + +JRT_ENTRY(void, Runtime1::buffer_inline_args(JavaThread* current, Method* method)) + NOT_PRODUCT(_buffer_inline_args_slowcase_cnt++;) + buffer_inline_args_impl(current, method, true); +JRT_END + +JRT_ENTRY(void, Runtime1::buffer_inline_args_no_receiver(JavaThread* current, Method* method)) + NOT_PRODUCT(_buffer_inline_args_no_receiver_slowcase_cnt++;) + buffer_inline_args_impl(current, method, false); +JRT_END + JRT_ENTRY(void, Runtime1::unimplemented_entry(JavaThread* current, StubId id)) tty->print_cr("Runtime1::entry_for(%d) returned unimplemented entry point", (int)id); JRT_END @@ -764,6 +894,19 @@ JRT_ENTRY(void, Runtime1::throw_incompatible_class_change_error(JavaThread* curr JRT_END +JRT_ENTRY(void, Runtime1::throw_illegal_monitor_state_exception(JavaThread* current)) + NOT_PRODUCT(_throw_illegal_monitor_state_exception_count++;) + ResourceMark rm(current); + SharedRuntime::throw_and_post_jvmti_exception(current, vmSymbols::java_lang_IllegalMonitorStateException()); +JRT_END + +JRT_ENTRY(void, Runtime1::throw_identity_exception(JavaThread* current, oopDesc* object)) + NOT_PRODUCT(_throw_identity_exception_count++;) + ResourceMark rm(current); + char* message = SharedRuntime::generate_identity_exception_message(current, object->klass()); + SharedRuntime::throw_and_post_jvmti_exception(current, vmSymbols::java_lang_IdentityException(), message); +JRT_END + JRT_BLOCK_ENTRY(void, Runtime1::monitorenter(JavaThread* current, oopDesc* obj, BasicObjectLock* lock)) #ifndef PRODUCT if (PrintC1Statistics) { @@ -966,6 +1109,9 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, StubId stub_id )) BasicType patch_field_type = T_ILLEGAL; bool deoptimize_for_volatile = false; bool deoptimize_for_atomic = false; + bool deoptimize_for_null_free = false; + bool deoptimize_for_flat = false; + bool deoptimize_for_strict_static = false; int patch_field_offset = -1; Klass* init_klass = nullptr; // klass needed by load_klass_patching code Klass* load_klass = nullptr; // klass needed by load_klass_patching code @@ -994,6 +1140,20 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, StubId stub_id )) // handling in the volatile case. deoptimize_for_volatile = result.access_flags().is_volatile(); + + // The field we are patching is null-free. Deoptimize and regenerate + // the compiled code if we patch a putfield/putstatic because it + // does not contain the required null check. + deoptimize_for_null_free = result.is_null_free_inline_type() && (field_access.is_putfield() || field_access.is_putstatic()); + + // The field we are patching is flat. Deoptimize and regenerate + // the compiled code which can't handle the layout of the flat + // field because it was unknown at compile time. + deoptimize_for_flat = result.is_flat(); + + // Strict statics may require tracking if their class is not fully initialized. + // For now we can bail out of the compiler and let the interpreter handle it. + deoptimize_for_strict_static = result.is_strict_static_unset(); } else if (load_klass_or_mirror_patch_id) { Klass* k = nullptr; switch (code) { @@ -1028,6 +1188,12 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, StubId stub_id )) { Bytecode_anewarray anew(caller_method(), caller_method->bcp_from(bci)); Klass* ek = caller_method->constants()->klass_at(anew.index(), CHECK); k = ek->array_klass(CHECK); + if (!k->is_typeArray_klass() && !k->is_refArray_klass() && !k->is_flatArray_klass()) { + k = ObjArrayKlass::cast(k)->klass_with_properties(ArrayProperties::Default(), THREAD); + } + if (k->is_flatArray_klass()) { + deoptimize_for_flat = true; + } } break; case Bytecodes::_ldc: @@ -1066,12 +1232,26 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* current, StubId stub_id )) ShouldNotReachHere(); } - if (deoptimize_for_volatile) { + if (deoptimize_for_volatile || + deoptimize_for_null_free || + deoptimize_for_flat || + deoptimize_for_strict_static) { // At compile time we assumed the field wasn't volatile but after // loading it turns out it was volatile so we have to throw the // compiled code out and let it be regenerated. if (TracePatching) { - tty->print_cr("Deoptimizing for patching volatile field reference"); + if (deoptimize_for_volatile) { + tty->print_cr("Deoptimizing for patching volatile field reference"); + } + if (deoptimize_for_null_free) { + tty->print_cr("Deoptimizing for patching null-free field reference"); + } + if (deoptimize_for_flat) { + tty->print_cr("Deoptimizing for patching flat field or array reference"); + } + if (deoptimize_for_strict_static) { + tty->print_cr("Deoptimizing for patching strict static field reference"); + } } // It's possible the nmethod was invalidated in the last @@ -1522,8 +1702,15 @@ void Runtime1::print_statistics() { tty->print_cr(" _new_type_array_slowcase_cnt: %u", _new_type_array_slowcase_cnt); tty->print_cr(" _new_object_array_slowcase_cnt: %u", _new_object_array_slowcase_cnt); + tty->print_cr(" _new_null_free_array_slowcase_cnt: %u", _new_null_free_array_slowcase_cnt); tty->print_cr(" _new_instance_slowcase_cnt: %u", _new_instance_slowcase_cnt); tty->print_cr(" _new_multi_array_slowcase_cnt: %u", _new_multi_array_slowcase_cnt); + tty->print_cr(" _load_flat_array_slowcase_cnt: %u", _load_flat_array_slowcase_cnt); + tty->print_cr(" _store_flat_array_slowcase_cnt: %u", _store_flat_array_slowcase_cnt); + tty->print_cr(" _substitutability_check_slowcase_cnt: %u", _substitutability_check_slowcase_cnt); + tty->print_cr(" _buffer_inline_args_slowcase_cnt:%u", _buffer_inline_args_slowcase_cnt); + tty->print_cr(" _buffer_inline_args_no_receiver_slowcase_cnt:%u", _buffer_inline_args_no_receiver_slowcase_cnt); + tty->print_cr(" _monitorenter_slowcase_cnt: %u", _monitorenter_slowcase_cnt); tty->print_cr(" _monitorexit_slowcase_cnt: %u", _monitorexit_slowcase_cnt); tty->print_cr(" _patch_code_slowcase_cnt: %u", _patch_code_slowcase_cnt); @@ -1534,6 +1721,8 @@ void Runtime1::print_statistics() { tty->print_cr(" _throw_null_pointer_exception_count: %u:", _throw_null_pointer_exception_count); tty->print_cr(" _throw_class_cast_exception_count: %u:", _throw_class_cast_exception_count); tty->print_cr(" _throw_incompatible_class_change_error_count: %u:", _throw_incompatible_class_change_error_count); + tty->print_cr(" _throw_illegal_monitor_state_exception_count: %u:", _throw_illegal_monitor_state_exception_count); + tty->print_cr(" _throw_identity_exception_count: %u:", _throw_identity_exception_count); tty->print_cr(" _throw_count: %u:", _throw_count); SharedRuntime::print_ic_miss_histogram(); diff --git a/src/hotspot/share/c1/c1_Runtime1.hpp b/src/hotspot/share/c1/c1_Runtime1.hpp index 6fa17e53c19a2..c8a19d9bf3efc 100644 --- a/src/hotspot/share/c1/c1_Runtime1.hpp +++ b/src/hotspot/share/c1/c1_Runtime1.hpp @@ -56,8 +56,14 @@ class Runtime1: public AllStatic { static uint _arraycopy_checkcast_attempt_cnt; static uint _new_type_array_slowcase_cnt; static uint _new_object_array_slowcase_cnt; + static uint _new_null_free_array_slowcase_cnt; static uint _new_instance_slowcase_cnt; static uint _new_multi_array_slowcase_cnt; + static uint _load_flat_array_slowcase_cnt; + static uint _store_flat_array_slowcase_cnt; + static uint _substitutability_check_slowcase_cnt; + static uint _buffer_inline_args_slowcase_cnt; + static uint _buffer_inline_args_no_receiver_slowcase_cnt; static uint _monitorenter_slowcase_cnt; static uint _monitorexit_slowcase_cnt; static uint _patch_code_slowcase_cnt; @@ -67,11 +73,14 @@ class Runtime1: public AllStatic { static uint _throw_null_pointer_exception_count; static uint _throw_class_cast_exception_count; static uint _throw_incompatible_class_change_error_count; + static uint _throw_illegal_monitor_state_exception_count; + static uint _throw_identity_exception_count; static uint _throw_count; #endif private: static CodeBlob* _blobs[(int)StubInfo::C1_STUB_COUNT]; + static void buffer_inline_args_impl(JavaThread* current, Method* m, bool allocate_receiver); // stub generation public: @@ -91,7 +100,13 @@ class Runtime1: public AllStatic { static void new_instance (JavaThread* current, Klass* klass); static void new_type_array (JavaThread* current, Klass* klass, jint length); static void new_object_array(JavaThread* current, Klass* klass, jint length); + static void new_null_free_array(JavaThread* current, Klass* klass, jint length); static void new_multi_array (JavaThread* current, Klass* klass, int rank, jint* dims); + static void load_flat_array(JavaThread* current, flatArrayOopDesc* array, int index); + static void store_flat_array(JavaThread* current, flatArrayOopDesc* array, int index, oopDesc* value); + static int substitutability_check(JavaThread* current, oopDesc* left, oopDesc* right); + static void buffer_inline_args(JavaThread* current, Method* method); + static void buffer_inline_args_no_receiver(JavaThread* current, Method* method); static address counter_overflow(JavaThread* current, int bci, Method* method); @@ -105,6 +120,8 @@ class Runtime1: public AllStatic { static void throw_null_pointer_exception(JavaThread* current); static void throw_class_cast_exception(JavaThread* current, oopDesc* object); static void throw_incompatible_class_change_error(JavaThread* current); + static void throw_illegal_monitor_state_exception(JavaThread* current); + static void throw_identity_exception(JavaThread* current, oopDesc* object); static void throw_array_store_exception(JavaThread* current, oopDesc* object); static void monitorenter(JavaThread* current, oopDesc* obj, BasicObjectLock* lock); diff --git a/src/hotspot/share/c1/c1_ValueMap.cpp b/src/hotspot/share/c1/c1_ValueMap.cpp index 2d7634f6308a5..56bbdaf6c442a 100644 --- a/src/hotspot/share/c1/c1_ValueMap.cpp +++ b/src/hotspot/share/c1/c1_ValueMap.cpp @@ -613,6 +613,8 @@ void GlobalValueNumbering::substitute(Instruction* instr) { assert(!instr->has_subst(), "substitution already set"); Value subst = current_map()->find_insert(instr); if (subst != instr) { + assert(instr->as_LoadIndexed() == nullptr || !instr->as_LoadIndexed()->should_profile(), "should not be optimized out"); + assert(instr->as_StoreIndexed() == nullptr, "should not be optimized out"); assert(!subst->has_subst(), "can't have a substitution"); TRACE_VALUE_NUMBERING(tty->print_cr("substitution for %c%d set to %c%d", instr->type()->tchar(), instr->id(), subst->type()->tchar(), subst->id())); diff --git a/src/hotspot/share/c1/c1_ValueMap.hpp b/src/hotspot/share/c1/c1_ValueMap.hpp index 6583a07c920d4..d5c26e28ff38a 100644 --- a/src/hotspot/share/c1/c1_ValueMap.hpp +++ b/src/hotspot/share/c1/c1_ValueMap.hpp @@ -147,6 +147,9 @@ class ValueNumberingVisitor: public InstructionVisitor { kill_memory(); } else { kill_field(x->field(), x->needs_patching()); + if (x->enclosing_field() != nullptr) { + kill_field(x->enclosing_field(), true); + } } } void do_StoreIndexed (StoreIndexed* x) { kill_array(x->type()); } @@ -211,6 +214,7 @@ class ValueNumberingVisitor: public InstructionVisitor { void do_ExceptionObject(ExceptionObject* x) { /* nothing to do */ } void do_ProfileCall (ProfileCall* x) { /* nothing to do */ } void do_ProfileReturnType (ProfileReturnType* x) { /* nothing to do */ } + void do_ProfileACmpTypes(ProfileACmpTypes* x) { /* nothing to do */ } void do_ProfileInvoke (ProfileInvoke* x) { /* nothing to do */ }; void do_RuntimeCall (RuntimeCall* x) { /* nothing to do */ }; void do_MemBar (MemBar* x) { /* nothing to do */ }; diff --git a/src/hotspot/share/c1/c1_ValueStack.cpp b/src/hotspot/share/c1/c1_ValueStack.cpp index 9a09c1865413f..2430850c671fc 100644 --- a/src/hotspot/share/c1/c1_ValueStack.cpp +++ b/src/hotspot/share/c1/c1_ValueStack.cpp @@ -34,6 +34,7 @@ ValueStack::ValueStack(IRScope* scope, ValueStack* caller_state) , _caller_state(caller_state) , _bci(-99) , _kind(Parsing) +, _should_reexecute(false) , _locals(scope->method()->max_locals(), scope->method()->max_locals(), nullptr) , _stack(scope->method()->max_stack()) , _locks(nullptr) @@ -42,11 +43,12 @@ ValueStack::ValueStack(IRScope* scope, ValueStack* caller_state) verify(); } -ValueStack::ValueStack(ValueStack* copy_from, Kind kind, int bci) +ValueStack::ValueStack(ValueStack* copy_from, Kind kind, int bci, bool reexecute) : _scope(copy_from->scope()) , _caller_state(copy_from->caller_state()) , _bci(bci) , _kind(kind) + , _should_reexecute(reexecute) , _locals(copy_from->locals_size_for_copy(kind)) , _stack(copy_from->stack_size_for_copy(kind)) , _locks(copy_from->locks_size() == 0 ? nullptr : new Values(copy_from->locks_size())) @@ -210,7 +212,6 @@ int ValueStack::unlock() { return total_locks_size(); } - void ValueStack::setup_phi_for_stack(BlockBegin* b, int index) { assert(stack_at(index)->as_Phi() == nullptr || stack_at(index)->as_Phi()->block() != b, "phi function already created"); diff --git a/src/hotspot/share/c1/c1_ValueStack.hpp b/src/hotspot/share/c1/c1_ValueStack.hpp index bb0c475585c86..9b8bdc217d3f8 100644 --- a/src/hotspot/share/c1/c1_ValueStack.hpp +++ b/src/hotspot/share/c1/c1_ValueStack.hpp @@ -54,6 +54,7 @@ class ValueStack: public CompilationResourceObj { ValueStack* _caller_state; int _bci; Kind _kind; + bool _should_reexecute; Values _locals; // the locals Values _stack; // the expression stack @@ -74,7 +75,7 @@ class ValueStack: public CompilationResourceObj { static void apply(const Values& list, ValueVisitor* f); // for simplified copying - ValueStack(ValueStack* copy_from, Kind kind, int bci); + ValueStack(ValueStack* copy_from, Kind kind, int bci, bool reexecute); int locals_size_for_copy(Kind kind) const; int stack_size_for_copy(Kind kind) const; @@ -82,9 +83,9 @@ class ValueStack: public CompilationResourceObj { // creation ValueStack(IRScope* scope, ValueStack* caller_state); - ValueStack* copy() { return new ValueStack(this, _kind, _bci); } - ValueStack* copy(Kind new_kind, int new_bci) { return new ValueStack(this, new_kind, new_bci); } - ValueStack* copy_for_parsing() { return new ValueStack(this, Parsing, -99); } + ValueStack* copy() { return new ValueStack(this, _kind, _bci, _should_reexecute); } + ValueStack* copy(Kind new_kind, int new_bci) { return new ValueStack(this, new_kind, new_bci, _should_reexecute); } + ValueStack* copy_for_parsing() { return new ValueStack(this, Parsing, -99, false); } // Used when no exception handler is found static Kind empty_exception_kind(bool caller = false) { @@ -106,6 +107,8 @@ class ValueStack: public CompilationResourceObj { ValueStack* caller_state() const { return _caller_state; } int bci() const { return _bci; } Kind kind() const { return _kind; } + bool should_reexecute() const { return _should_reexecute; } + void set_should_reexecute(bool reexec) { _should_reexecute = reexec; } int locals_size() const { return _locals.length(); } int stack_size() const { return _stack.length(); } diff --git a/src/hotspot/share/c1/c1_globals.hpp b/src/hotspot/share/c1/c1_globals.hpp index ff39231287449..fad45f747b22c 100644 --- a/src/hotspot/share/c1/c1_globals.hpp +++ b/src/hotspot/share/c1/c1_globals.hpp @@ -294,7 +294,10 @@ "Update MethodData*s in Tier 3 C1 generated code") \ \ develop(bool, PrintCFGToFile, false, \ - "print control flow graph to a separate file during compilation") + "print control flow graph to a separate file during compilation") \ + \ + develop(bool, C1UseDelayedFlattenedFieldReads, true, \ + "Use delayed reads of flat fields to reduce heap buffering") // end of C1_FLAGS diff --git a/src/hotspot/share/cds/aotMapLogger.cpp b/src/hotspot/share/cds/aotMapLogger.cpp index 9f338826fd6c0..728213ce60910 100644 --- a/src/hotspot/share/cds/aotMapLogger.cpp +++ b/src/hotspot/share/cds/aotMapLogger.cpp @@ -536,7 +536,7 @@ void AOTMapLogger::log_as_hex(address base, address top, address requested_base, } #if INCLUDE_CDS_JAVA_HEAP -// FakeOop (and subclasses FakeMirror, FakeString, FakeObjArray, FakeTypeArray) are used to traverse +// FakeOop (and subclasses FakeMirror, FakeString, FakeRefArray, FakeFlatArray, FakeTypeArray) are used to traverse // and print the (image of) heap objects stored in the AOT cache. These objects are different than regular oops: // - They do not reside inside the range of the heap. // - For +UseCompressedOops: pointers may use a different narrowOop encoding: see FakeOop::read_oop_at(narrowOop*) @@ -552,10 +552,6 @@ class AOTMapLogger::FakeOop { OopDataIterator* _iter; OopData _data; - address* buffered_field_addr(int field_offset) { - return (address*)(buffered_addr() + field_offset); - } - public: RequestedMetadataAddr metadata_field(int field_offset) { return RequestedMetadataAddr(*(address*)(buffered_field_addr(field_offset))); @@ -565,6 +561,10 @@ class AOTMapLogger::FakeOop { return _data._buffered_addr; } + address* buffered_field_addr(int field_offset) { + return (address*)(buffered_addr() + field_offset); + } + // Return an "oop" pointer so we can use APIs that accept regular oops. This // must be used with care, as only a limited number of APIs can work with oops that // live outside of the range of the heap. @@ -574,7 +574,8 @@ class AOTMapLogger::FakeOop { FakeOop(OopDataIterator* iter, OopData data) : _iter(iter), _data(data) {} FakeMirror as_mirror(); - FakeObjArray as_obj_array(); + FakeRefArray as_ref_array(); + FakeFlatArray as_flat_array(); FakeString as_string(); FakeTypeArray as_type_array(); @@ -630,6 +631,20 @@ class AOTMapLogger::FakeOop { return FakeOop(_iter, _iter->obj_at(addr)); } + FakeOop read_inline_oop_at(address value_addr, Klass* k) { + OopData data = { + value_addr, // _buffered_addr, address of the flat value shifted by the payload offset + requested_addr() + (value_addr - buffered_addr()), // _requested_addr + target_location() + (value_addr - buffered_addr()), // _target_location + 0, // _narrow_location, narrow oop not used + cast_to_oop(value_addr), // _raw_oop + k, // _klass + 0, // _size + false // _is_root_segment + }; + return FakeOop(_iter, data); + } + FakeOop obj_field(int field_offset) { if (UseCompressedOops) { return read_oop_at(raw_oop()->field_addr(field_offset)); @@ -638,10 +653,10 @@ class AOTMapLogger::FakeOop { } } - void print_non_oop_field(outputStream* st, fieldDescriptor* fd) { + void print_non_oop_field(outputStream* st, fieldDescriptor* fd, int indent = 0, int base_offset = 0) { // fd->print_on_for() works for non-oop fields in fake oops precond(fd->field_type() != T_ARRAY && fd->field_type() != T_OBJECT); - fd->print_on_for(st, raw_oop()); + fd->print_on_for(st, raw_oop(), indent, base_offset); } }; // AOTMapLogger::FakeOop @@ -661,25 +676,49 @@ class AOTMapLogger::FakeMirror : public AOTMapLogger::FakeOop { } }; // AOTMapLogger::FakeMirror -class AOTMapLogger::FakeObjArray : public AOTMapLogger::FakeOop { - objArrayOop raw_objArrayOop() { - return (objArrayOop)raw_oop(); +class AOTMapLogger::FakeRefArray : public AOTMapLogger::FakeOop { + refArrayOop raw_refArrayOop() { + return (refArrayOop)raw_oop(); } public: - FakeObjArray(OopDataIterator* iter, OopData data) : FakeOop(iter, data) {} + FakeRefArray(OopDataIterator* iter, OopData data) : FakeOop(iter, data) {} int length() { - return raw_objArrayOop()->length(); + return raw_refArrayOop()->length(); } FakeOop obj_at(int i) { if (UseCompressedOops) { - return read_oop_at(raw_objArrayOop()->obj_at_addr(i)); + return read_oop_at(raw_refArrayOop()->obj_at_addr(i)); } else { - return read_oop_at(raw_objArrayOop()->obj_at_addr(i)); + return read_oop_at(raw_refArrayOop()->obj_at_addr(i)); } } -}; // AOTMapLogger::FakeObjArray +}; // AOTMapLogger::FakeRefArray + +class AOTMapLogger::FakeFlatArray : public AOTMapLogger::FakeOop { + flatArrayOop raw_flatArrayOop() { + return (flatArrayOop)raw_oop(); + } + +public: + FakeFlatArray(OopDataIterator* iter, OopData data) : FakeOop(iter, data) {} + + int length() { + return raw_flatArrayOop()->length(); + } + + // Create a wrapper for an archived flat array element + FakeOop element_at(int i) { + InlineKlass* elem_k = ((FlatArrayKlass*)real_klass())->element_klass(); + address value_addr = (address)raw_flatArrayOop()->value_at_addr(i, real_klass()->layout_helper()) - elem_k->payload_offset(); + return read_inline_oop_at(value_addr, elem_k); + } + + int element_offset_at(int i) { + return (address)raw_flatArrayOop()->value_at_addr(i, real_klass()->layout_helper()) - cast_from_oop
(raw_flatArrayOop()); + } +}; // AOTMapLogger::FakeFlatArray class AOTMapLogger::FakeString : public AOTMapLogger::FakeOop { public: @@ -719,9 +758,14 @@ AOTMapLogger::FakeMirror AOTMapLogger::FakeOop::as_mirror() { return FakeMirror(_iter, _data); } -AOTMapLogger::FakeObjArray AOTMapLogger::FakeOop::as_obj_array() { - precond(real_klass()->is_objArray_klass()); - return FakeObjArray(_iter, _data); +AOTMapLogger::FakeRefArray AOTMapLogger::FakeOop::as_ref_array() { + precond(real_klass()->is_refArray_klass()); + return FakeRefArray(_iter, _data); +} + +AOTMapLogger::FakeFlatArray AOTMapLogger::FakeOop::as_flat_array() { + precond(real_klass()->is_flatArray_klass()); + return FakeFlatArray(_iter, _data); } AOTMapLogger::FakeTypeArray AOTMapLogger::FakeOop::as_type_array() { @@ -812,23 +856,67 @@ void AOTMapLogger::FakeString::print_on(outputStream* st, int max_length) { class AOTMapLogger::ArchivedFieldPrinter : public FieldClosure { FakeOop _fake_oop; outputStream* _st; + int _indent; + int _base_offset; public: - ArchivedFieldPrinter(FakeOop fake_oop, outputStream* st) : _fake_oop(fake_oop), _st(st) {} + ArchivedFieldPrinter(FakeOop fake_oop, outputStream* st, int indent = 1, int base_offset = 0) : + _fake_oop(fake_oop), _st(st), _indent(indent), _base_offset(base_offset) {} void do_field(fieldDescriptor* fd) { + for (int i = 0; i < _indent; i++) _st->print(" "); _st->print(" - "); + + if (_fake_oop.raw_oop() == nullptr) { + fd->print_on(_st, _base_offset); + _st->cr(); + return; + } + BasicType ft = fd->field_type(); switch (ft) { case T_ARRAY: case T_OBJECT: { - fd->print_on(_st); // print just the name and offset - FakeOop field_value = _fake_oop.obj_field(fd->offset()); - print_oop_info_cr(_st, field_value); + if (fd->is_flat()) { + int index = fd->index(); + InlineKlass* vk = fd->field_holder()->get_inline_type_field_klass(index); + int field_offset = fd->offset() - vk->payload_offset(); + address field_addr = (address)_fake_oop.buffered_field_addr(field_offset); + bool is_null = false; + + if (!fd->is_null_free_inline_type()) { + assert(fd->has_null_marker(), "should have null marker"); + is_null = vk->is_payload_marked_as_null(_fake_oop.buffered_addr() + fd->offset()); + _st->print("Flat inline type field '%s':", vk->name()->as_C_string()); + } else { + _st->print("Flat inline null-free type field '%s':", vk->name()->as_C_string()); + } + // Print fields of flat field (recursively) + if (!is_null) { + _st->cr(); + FakeOop obj = _fake_oop.read_inline_oop_at(field_addr, vk); + ArchivedFieldPrinter print_field(obj, _st, _indent + 1, _base_offset + field_offset); + vk->do_nonstatic_fields(&print_field); + } else { + _st->print_cr(" null"); + } + + if (fd->field_flags().has_null_marker()) { + for (int i = 0; i < _indent + 1; i++) _st->print(" "); + _st->print_cr(" - [null_marker] @%d %s", + vk->null_marker_offset() + _base_offset + field_offset, + is_null ? "Field marked as null" : "Field marked as non-null"); + } + return; // Do not print underlying representation + } else { + fd->print_on(_st); // print just the name and offset + FakeOop field_value = _fake_oop.obj_field(fd->offset()); + print_oop_info_cr(_st, field_value); + } } break; default: - _fake_oop.print_non_oop_field(_st, fd); // name, offset, value + _fake_oop.print_non_oop_field(_st, fd, _indent, _base_offset); // name, offset, value _st->cr(); } } @@ -966,8 +1054,39 @@ void AOTMapLogger::print_oop_details(FakeOop fake_oop, outputStream* st) { if (real_klass->is_typeArray_klass()) { fake_oop.as_type_array().print_elements_on(st); - } else if (real_klass->is_objArray_klass()) { - FakeObjArray fake_obj_array = fake_oop.as_obj_array(); + } else if (real_klass->is_flatArray_klass()) { + FakeFlatArray fake_flat_array = fake_oop.as_flat_array(); + InlineKlass* elem_k = ((FlatArrayKlass*)real_klass)->element_klass(); + for (int i = 0; i < fake_flat_array.length(); i++) { + bool is_null = false; + int off = fake_flat_array.element_offset_at(i); + FakeOop elm = fake_flat_array.element_at(i); + + if (!real_klass->is_null_free_array_klass()) { + is_null = elem_k->is_payload_marked_as_null(elm.buffered_addr() + elem_k->payload_offset()); + st->print(" - Flat inline type element '%s':", elem_k->name()->as_C_string()); + } else { + st->print(" - Flat inline null-free type element '%s':", elem_k->name()->as_C_string()); + } + st->print(" - Index %3d offset %3d: ", i, off); + + if (!is_null) { + st->cr(); + ArchivedFieldPrinter print_field(elm, st); + elem_k->do_nonstatic_fields(&print_field); + } else { + assert(!real_klass->is_null_free_array_klass(), "must be"); + st->print_cr(" null"); + } + + if (!real_klass->is_null_free_array_klass()) { + st->print_cr(" - [null_marker] @%d %s", + off + elem_k->null_marker_offset_in_payload(), + is_null ? "Field marked as null" : "Field marked as non-null"); + } + } + } else if (real_klass->is_refArray_klass()) { + FakeRefArray fake_obj_array = fake_oop.as_ref_array(); bool is_logging_root_segment = fake_oop.is_root_segment(); for (int i = 0; i < fake_obj_array.length(); i++) { diff --git a/src/hotspot/share/cds/aotMapLogger.hpp b/src/hotspot/share/cds/aotMapLogger.hpp index 0a89f1e5012ca..1a8a2549809ee 100644 --- a/src/hotspot/share/cds/aotMapLogger.hpp +++ b/src/hotspot/share/cds/aotMapLogger.hpp @@ -74,7 +74,8 @@ class AOTMapLogger : AllStatic { // FakeOop and subtypes class FakeOop; class FakeMirror; - class FakeObjArray; + class FakeRefArray; + class FakeFlatArray; class FakeString; class FakeTypeArray; diff --git a/src/hotspot/share/cds/aotMappedHeapLoader.cpp b/src/hotspot/share/cds/aotMappedHeapLoader.cpp index 7a201d8297f01..b8c1fd51ec80b 100644 --- a/src/hotspot/share/cds/aotMappedHeapLoader.cpp +++ b/src/hotspot/share/cds/aotMappedHeapLoader.cpp @@ -41,6 +41,7 @@ #include "memory/iterator.inline.hpp" #include "memory/resourceArea.hpp" #include "memory/universe.hpp" +#include "oops/oopCast.inline.hpp" #include "sanitizers/ub.hpp" #include "utilities/bitMap.inline.hpp" #include "utilities/copy.hpp" @@ -360,14 +361,14 @@ bool AOTMappedHeapLoader::load_heap_region(FileMapInfo* mapinfo) { return true; } -objArrayOop AOTMappedHeapLoader::root_segment(int segment_idx) { +refArrayOop AOTMappedHeapLoader::root_segment(int segment_idx) { if (!CDSConfig::is_using_archive()) { assert(CDSConfig::is_dumping_heap() && Thread::current() == (Thread*)VMThread::vm_thread(), "sanity"); } - objArrayOop segment = (objArrayOop)_root_segments->at(segment_idx).resolve(); + oop segment = _root_segments->at(segment_idx).resolve(); assert(segment != nullptr, "should have been initialized"); - return segment; + return oop_cast(segment); } void AOTMappedHeapLoader::get_segment_indexes(int idx, int& seg_idx, int& int_idx) { @@ -386,7 +387,7 @@ void AOTMappedHeapLoader::get_segment_indexes(int idx, int& seg_idx, int& int_id "sanity: %d index maps to %d segment and %d internal", idx, seg_idx, int_idx); } -void AOTMappedHeapLoader::add_root_segment(objArrayOop segment_oop) { +void AOTMappedHeapLoader::add_root_segment(refArrayOop segment_oop) { assert(segment_oop != nullptr, "must be"); assert(is_in_use(), "must be"); if (_root_segments == nullptr) { @@ -403,7 +404,7 @@ oop AOTMappedHeapLoader::get_root(int index) { assert(!_root_segments->is_empty(), "must have loaded shared heap"); int seg_idx, int_idx; get_segment_indexes(index, seg_idx, int_idx); - objArrayOop result = objArrayOop(root_segment(seg_idx)); + refArrayOop result = root_segment(seg_idx); return result->obj_at(int_idx); } @@ -461,8 +462,7 @@ void AOTMappedHeapLoader::finish_initialization(FileMapInfo* info) { intptr_t first_segment_addr = bottom + segments.base_offset(); for (size_t c = 0; c < segments.count(); c++) { oop segment_oop = cast_to_oop(first_segment_addr + (c * segments.max_size_in_bytes())); - assert(segment_oop->is_objArray(), "Must be"); - add_root_segment((objArrayOop)segment_oop); + add_root_segment(oop_cast(segment_oop)); } if (CDSConfig::is_dumping_final_static_archive()) { diff --git a/src/hotspot/share/cds/aotMappedHeapLoader.hpp b/src/hotspot/share/cds/aotMappedHeapLoader.hpp index 10f5ce3124f52..b0fa18cf57386 100644 --- a/src/hotspot/share/cds/aotMappedHeapLoader.hpp +++ b/src/hotspot/share/cds/aotMappedHeapLoader.hpp @@ -154,9 +154,9 @@ class AOTMappedHeapLoader : AllStatic { return (_loaded_heap_bottom <= o && o < _loaded_heap_top); } - static objArrayOop root_segment(int segment_idx); + static refArrayOop root_segment(int segment_idx); static void get_segment_indexes(int idx, int& seg_idx, int& int_idx); - static void add_root_segment(objArrayOop segment_oop); + static void add_root_segment(refArrayOop segment_oop); static void init_root_segment_sizes(int max_size_elems); template diff --git a/src/hotspot/share/cds/aotMappedHeapWriter.cpp b/src/hotspot/share/cds/aotMappedHeapWriter.cpp index 272f548d73126..cfa6c460303e5 100644 --- a/src/hotspot/share/cds/aotMappedHeapWriter.cpp +++ b/src/hotspot/share/cds/aotMappedHeapWriter.cpp @@ -43,6 +43,7 @@ #include "oops/oopHandle.inline.hpp" #include "oops/typeArrayKlass.hpp" #include "oops/typeArrayOop.hpp" +#include "runtime/arguments.hpp" #include "runtime/java.hpp" #include "runtime/mutexLocker.hpp" #include "utilities/bitMap.inline.hpp" @@ -274,12 +275,13 @@ void AOTMappedHeapWriter::ensure_buffer_space(size_t min_bytes) { objArrayOop AOTMappedHeapWriter::allocate_root_segment(size_t offset, int element_count) { HeapWord* mem = offset_to_buffered_address(offset); - memset(mem, 0, objArrayOopDesc::object_size(element_count)); + memset(mem, 0, refArrayOopDesc::object_size(element_count)); // The initialization code is copied from MemAllocator::finish and ObjArrayAllocator::initialize. if (UseCompactObjectHeaders) { oopDesc::release_set_mark(mem, Universe::objectArrayKlass()->prototype_header()); } else { + assert(!Arguments::is_valhalla_enabled() || Universe::objectArrayKlass()->prototype_header() == markWord::prototype(), "should be the same"); oopDesc::set_mark(mem, markWord::prototype()); oopDesc::release_set_klass(mem, Universe::objectArrayKlass()); } @@ -310,7 +312,7 @@ void AOTMappedHeapWriter::copy_roots_to_buffer(GrowableArrayCHeapprototype_header() == markWord::prototype(), "should be the same"); oopDesc::set_mark(mem, markWord::prototype()); cast_to_oop(mem)->set_narrow_klass(nk); } @@ -728,7 +731,8 @@ void AOTMappedHeapWriter::update_header_for_requested_obj(oop requested_obj, oop oop fake_oop = cast_to_oop(buffered_addr); if (UseCompactObjectHeaders) { - fake_oop->set_mark(markWord::prototype().set_narrow_klass(nk)); + markWord prototype_header = src_klass->prototype_header().set_narrow_klass(nk); + fake_oop->set_mark(prototype_header); } else { fake_oop->set_narrow_klass(nk); } @@ -738,10 +742,12 @@ void AOTMappedHeapWriter::update_header_for_requested_obj(oop requested_obj, oop } // We need to retain the identity_hash, because it may have been used by some hashtables // in the shared heap. - if (!src_obj->fast_no_hash_check()) { + if (!src_obj->fast_no_hash_check() && (!(Arguments::is_valhalla_enabled() && src_obj->mark().is_inline_type()))) { intptr_t src_hash = src_obj->identity_hash(); if (UseCompactObjectHeaders) { - fake_oop->set_mark(markWord::prototype().set_narrow_klass(nk).copy_set_hash(src_hash)); + fake_oop->set_mark(fake_oop->mark().copy_set_hash(src_hash)); + } else if (Arguments::is_valhalla_enabled()) { + fake_oop->set_mark(src_klass->prototype_header().copy_set_hash(src_hash)); } else { fake_oop->set_mark(markWord::prototype().copy_set_hash(src_hash)); } diff --git a/src/hotspot/share/cds/aotMetaspace.cpp b/src/hotspot/share/cds/aotMetaspace.cpp index fac320c3ed705..356a0e72f7581 100644 --- a/src/hotspot/share/cds/aotMetaspace.cpp +++ b/src/hotspot/share/cds/aotMetaspace.cpp @@ -77,6 +77,8 @@ #include "nmt/memTracker.hpp" #include "oops/compressedKlass.hpp" #include "oops/constantPool.inline.hpp" +#include "oops/flatArrayKlass.hpp" +#include "oops/inlineKlass.hpp" #include "oops/instanceMirrorKlass.hpp" #include "oops/klass.inline.hpp" #include "oops/objArrayOop.hpp" @@ -503,7 +505,7 @@ void AOTMetaspace::serialize(SerializeClosure* soc) { soc->do_tag(arrayOopDesc::base_offset_in_bytes(T_BYTE)); soc->do_tag(sizeof(ConstantPool)); soc->do_tag(sizeof(ConstantPoolCache)); - soc->do_tag(objArrayOopDesc::base_offset_in_bytes()); + soc->do_tag(refArrayOopDesc::base_offset_in_bytes()); soc->do_tag(typeArrayOopDesc::base_offset_in_bytes(T_BYTE)); soc->do_tag(sizeof(Symbol)); @@ -582,7 +584,14 @@ static void rewrite_bytecodes(const methodHandle& method) { case btos: // fallthrough case ztos: new_code = Bytecodes::_fast_bgetfield; break; - case atos: new_code = Bytecodes::_fast_agetfield; break; + case atos: { + if (rfe->is_flat()) { + new_code = Bytecodes::_fast_vgetfield; + } else { + new_code = Bytecodes::_fast_agetfield; + } + break; + } case itos: new_code = Bytecodes::_fast_igetfield; break; case ctos: new_code = Bytecodes::_fast_cgetfield; break; case stos: new_code = Bytecodes::_fast_sgetfield; break; @@ -607,7 +616,14 @@ static void rewrite_bytecodes(const methodHandle& method) { switch(rfe->tos_state()) { case btos: new_code = Bytecodes::_fast_bputfield; break; case ztos: new_code = Bytecodes::_fast_zputfield; break; - case atos: new_code = Bytecodes::_fast_aputfield; break; + case atos: { + if (rfe->is_flat() || rfe->is_null_free_inline_type()) { + new_code = Bytecodes::_fast_vputfield; + } else { + new_code = Bytecodes::_fast_aputfield; + } + break; + } case itos: new_code = Bytecodes::_fast_iputfield; break; case ctos: new_code = Bytecodes::_fast_cputfield; break; case stos: new_code = Bytecodes::_fast_sputfield; break; diff --git a/src/hotspot/share/cds/aotReferenceObjSupport.cpp b/src/hotspot/share/cds/aotReferenceObjSupport.cpp index 2d5fc8c7f2173..0435f5333e20e 100644 --- a/src/hotspot/share/cds/aotReferenceObjSupport.cpp +++ b/src/hotspot/share/cds/aotReferenceObjSupport.cpp @@ -32,6 +32,7 @@ #include "memory/resourceArea.hpp" #include "memory/universe.hpp" #include "oops/oop.inline.hpp" +#include "oops/oopCast.inline.hpp" #include "oops/oopHandle.inline.hpp" #include "runtime/fieldDescriptor.inline.hpp" #include "runtime/javaCalls.hpp" @@ -163,9 +164,9 @@ void AOTReferenceObjSupport::init_keep_alive_objs_table() { assert_at_safepoint(); // _keep_alive_objs_table uses raw oops oop a = _keep_alive_objs_array.resolve(); if (a != nullptr) { - precond(a->is_objArray()); + precond(a->is_refArray()); precond(AOTReferenceObjSupport::is_enabled()); - objArrayOop array = objArrayOop(a); + refArrayOop array = oop_cast(a); _keep_alive_objs_table = new (mtClass)KeepAliveObjectsTable(); for (int i = 0; i < array->length(); i++) { diff --git a/src/hotspot/share/cds/aotStreamedHeapLoader.cpp b/src/hotspot/share/cds/aotStreamedHeapLoader.cpp index 7f9f8cf062868..393737e51b949 100644 --- a/src/hotspot/share/cds/aotStreamedHeapLoader.cpp +++ b/src/hotspot/share/cds/aotStreamedHeapLoader.cpp @@ -41,6 +41,7 @@ #include "oops/access.inline.hpp" #include "oops/objArrayOop.inline.hpp" #include "oops/oop.inline.hpp" +#include "oops/oopCast.inline.hpp" #include "runtime/globals.hpp" #include "runtime/globals_extension.hpp" #include "runtime/handles.inline.hpp" @@ -995,7 +996,7 @@ void AOTStreamedHeapLoader::initialize() { // We can't retire a TLAB until the filler klass is set; set it to the archived object klass. CollectedHeap::set_filler_object_klass(vmClasses::Object_klass()); - objArrayOop roots = oopFactory::new_objectArray(_num_roots, CHECK); + refArrayOop roots = oopFactory::new_objectArray(_num_roots, CHECK); _roots = OopHandle(Universe::vm_global(), roots); _object_index_to_buffer_offset_table = (size_t*)(((address)_heap_region->mapped_base()) + forwarding_offset); @@ -1051,7 +1052,7 @@ oop AOTStreamedHeapLoader::materialize_root(int root_index) { { MutexLocker ml(AOTHeapLoading_lock, Mutex::_safepoint_check_flag); - oop root = objArrayOop(_roots.resolve())->obj_at(root_index); + oop root = oop_cast(_roots.resolve())->obj_at(root_index); if (root != nullptr) { // The root has already been materialized @@ -1070,7 +1071,7 @@ oop AOTStreamedHeapLoader::materialize_root(int root_index) { } oop AOTStreamedHeapLoader::get_root(int index) { - oop result = objArrayOop(_roots.resolve())->obj_at(index); + oop result = oop_cast(_roots.resolve())->obj_at(index); if (result == nullptr) { // Materialize root result = materialize_root(index); diff --git a/src/hotspot/share/cds/aotStreamedHeapWriter.cpp b/src/hotspot/share/cds/aotStreamedHeapWriter.cpp index 25bef10a6731f..8009e63e63d08 100644 --- a/src/hotspot/share/cds/aotStreamedHeapWriter.cpp +++ b/src/hotspot/share/cds/aotStreamedHeapWriter.cpp @@ -371,14 +371,14 @@ template void AOTStreamedHeapWriter::map_oop_field_in_buffer(oop ob void AOTStreamedHeapWriter::update_header_for_buffered_addr(address buffered_addr, oop src_obj, Klass* src_klass) { narrowKlass nk = ArchiveBuilder::current()->get_requested_narrow_klass(src_klass); - markWord mw = markWord::prototype(); + markWord mw = Arguments::enable_preview() ? src_klass->prototype_header() : markWord::prototype(); oopDesc* fake_oop = (oopDesc*)buffered_addr; // We need to retain the identity_hash, because it may have been used by some hashtables // in the shared heap. This also has the side effect of pre-initializing the // identity_hash for all shared objects, so they are less likely to be written // into during run time, increasing the potential of memory sharing. - if (src_obj != nullptr) { + if (src_obj != nullptr && !src_klass->is_inline_klass()) { intptr_t src_hash = src_obj->identity_hash(); mw = mw.copy_set_hash(src_hash); } diff --git a/src/hotspot/share/cds/archiveBuilder.cpp b/src/hotspot/share/cds/archiveBuilder.cpp index bd0d070b21202..09a2e80ebaa80 100644 --- a/src/hotspot/share/cds/archiveBuilder.cpp +++ b/src/hotspot/share/cds/archiveBuilder.cpp @@ -806,14 +806,20 @@ void ArchiveBuilder::make_klasses_shareable() { address narrow_klass_base = _requested_static_archive_bottom; // runtime encoding base == runtime mapping start const int narrow_klass_shift = precomputed_narrow_klass_shift(); narrowKlass nk = CompressedKlassPointers::encode_not_null_without_asserts(requested_k, narrow_klass_base, narrow_klass_shift); - k->set_prototype_header(markWord::prototype().set_narrow_klass(nk)); + k->set_prototype_header_klass(nk); } #endif //_LP64 - if (k->is_objArray_klass()) { + if (k->is_flatArray_klass()) { + num_obj_array_klasses ++; + type = "flat array"; + } else if (k->is_refArray_klass()) { + num_obj_array_klasses ++; + type = "ref array"; + } else if (k->is_objArray_klass()) { // InstanceKlass and TypeArrayKlass will in turn call remove_unshareable_info // on their array classes. num_obj_array_klasses ++; - type = "array"; + type = "obj array"; } else if (k->is_typeArray_klass()) { num_type_array_klasses ++; type = "array"; diff --git a/src/hotspot/share/cds/archiveUtils.cpp b/src/hotspot/share/cds/archiveUtils.cpp index bfaa1d6644cfc..7261fb1a5c6c6 100644 --- a/src/hotspot/share/cds/archiveUtils.cpp +++ b/src/hotspot/share/cds/archiveUtils.cpp @@ -551,7 +551,7 @@ bool ArchiveUtils::has_aot_initialized_mirror(InstanceKlass* src_ik) { size_t HeapRootSegments::size_in_bytes(size_t seg_idx) { assert(seg_idx < _count, "In range"); - return objArrayOopDesc::object_size(size_in_elems(seg_idx)) * HeapWordSize; + return refArrayOopDesc::object_size(size_in_elems(seg_idx)) * HeapWordSize; } int HeapRootSegments::size_in_elems(size_t seg_idx) { diff --git a/src/hotspot/share/cds/cdsConfig.cpp b/src/hotspot/share/cds/cdsConfig.cpp index 2dd1d9d0824cc..216d6bf15d385 100644 --- a/src/hotspot/share/cds/cdsConfig.cpp +++ b/src/hotspot/share/cds/cdsConfig.cpp @@ -36,6 +36,7 @@ #include "memory/universe.hpp" #include "prims/jvmtiAgentList.hpp" #include "runtime/arguments.hpp" +#include "runtime/globals.hpp" #include "runtime/globals_extension.hpp" #include "runtime/java.hpp" #include "runtime/vmThread.hpp" @@ -158,6 +159,9 @@ const char* CDSConfig::default_archive_path() { tmp.print_raw("_nocoh"); } #endif + if (Arguments::is_valhalla_enabled()) { + tmp.print_raw("_preview"); + } tmp.print_raw(".jsa"); _default_archive_path = os::strdup(tmp.base()); } diff --git a/src/hotspot/share/cds/cdsEnumKlass.cpp b/src/hotspot/share/cds/cdsEnumKlass.cpp index 177d1d6e3ad8b..7684e33e7b8c1 100644 --- a/src/hotspot/share/cds/cdsEnumKlass.cpp +++ b/src/hotspot/share/cds/cdsEnumKlass.cpp @@ -91,10 +91,13 @@ void CDSEnumKlass::archive_static_field(int level, KlassSubGraphInfo* subgraph_i ik->external_name(), fd.name()->as_C_string()); } oop oop_field = mirror->obj_field(fd.offset()); + // There should be no oops for ObjArrayKlass but InstanceKlass::array_klasses holds a list of ObjArrayKlass, + // therefore we need the super of the refined array klass. + Klass* oop_field_klass = oop_field->is_refined_objArray() ? oop_field->klass()->super() : oop_field->klass(); if (oop_field == nullptr) { guarantee(false, "static field %s::%s must not be null", ik->external_name(), fd.name()->as_C_string()); - } else if (oop_field->klass() != ik && oop_field->klass() != ik->array_klass_or_null()) { + } else if (oop_field_klass != ik && oop_field_klass != ik->array_klass_or_null()) { guarantee(false, "static field %s::%s is of the wrong type", ik->external_name(), fd.name()->as_C_string()); } diff --git a/src/hotspot/share/cds/cdsHeapVerifier.cpp b/src/hotspot/share/cds/cdsHeapVerifier.cpp index 3ed0dce1f66de..5608395db138e 100644 --- a/src/hotspot/share/cds/cdsHeapVerifier.cpp +++ b/src/hotspot/share/cds/cdsHeapVerifier.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,6 +39,7 @@ #include "oops/fieldStreams.inline.hpp" #include "oops/klass.inline.hpp" #include "oops/oop.inline.hpp" +#include "oops/oopCast.inline.hpp" #include "oops/oopHandle.inline.hpp" #include "runtime/fieldDescriptor.inline.hpp" @@ -93,6 +94,18 @@ // initialization; this string is never changed during -Xshare:dump. // [D] Simple caches whose value doesn't matter. // [E] Other cases (see comments in-line below). +// +// LIMITATION: +// +// CDSHeapVerifier can only check for problems with object identity. In the example above, +// if the Bar type has identity, the program's correctness requires that the identity +// of Foo.bar and Bar.bar to be equal. This requirement can be checked by CDSHeapVerifier. +// +// However, if Bar does not have identity (e.g., it's a value class, or is a primitive type), +// the program's correctness no longer requires that the identity of Foo.bar and Bar.bar +// to be equal (since they don't have an identity anymore). While the program's +// correctness may still have certain assumptions about Foo.bar and Bar.bar (such as the +// internal fields of these two values), such assumptions cannot be checked by CDSHeapVerifier. CDSHeapVerifier::CDSHeapVerifier() : _archived_objs(0), _problems(0) { @@ -293,6 +306,14 @@ class CDSHeapVerifier::CheckStaticFields : public FieldClosure { } } + if (!field_type->is_identity_class()) { + // See comment of LIMITATION above + // Any concrete value class will have a field ".null_reset" which holds an + // all-zero instance of the value class so it will not change between + // dump time and runtime. + return; + } + if (fd->is_final() && java_lang_String::is_instance(static_obj_field) && fd->has_initial_value()) { // This field looks like like this in the Java source: // static final SOME_STRING = "a string literal"; @@ -484,8 +505,8 @@ int CDSHeapVerifier::trace_to_root(outputStream* st, oop orig_obj, oop orig_fiel TraceFields clo(orig_obj, orig_field, st); InstanceKlass::cast(k)->do_nonstatic_fields(&clo); } else { - assert(orig_obj->is_objArray(), "must be"); - objArrayOop array = (objArrayOop)orig_obj; + assert(orig_obj->is_refArray(), "must be"); + refArrayOop array = oop_cast(orig_obj); for (int i = 0; i < array->length(); i++) { if (array->obj_at(i) == orig_field) { st->print(" @[%d]", i); diff --git a/src/hotspot/share/cds/cdsProtectionDomain.cpp b/src/hotspot/share/cds/cdsProtectionDomain.cpp index ff15fdccabea6..b5e0618d9979e 100644 --- a/src/hotspot/share/cds/cdsProtectionDomain.cpp +++ b/src/hotspot/share/cds/cdsProtectionDomain.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,6 +36,8 @@ #include "memory/resourceArea.hpp" #include "memory/universe.hpp" #include "oops/instanceKlass.hpp" +#include "oops/oopCast.inline.hpp" +#include "oops/refArrayOop.hpp" #include "oops/symbol.hpp" #include "runtime/javaCalls.hpp" @@ -294,41 +296,38 @@ void CDSProtectionDomain::atomic_set_array_index(OopHandle array, int index, oop // The important thing here is that all threads pick up the same result. // It doesn't matter which racing thread wins, as long as only one // result is used by all threads, and all future queries. - ((objArrayOop)array.resolve())->replace_if_null(index, o); + oop_cast(array.resolve())->replace_if_null(index, o); } oop CDSProtectionDomain::shared_protection_domain(int index) { - return ((objArrayOop)_shared_protection_domains.resolve())->obj_at(index); + return oop_cast(_shared_protection_domains.resolve())->obj_at(index); } void CDSProtectionDomain::allocate_shared_protection_domain_array(int size, TRAPS) { if (_shared_protection_domains.resolve() == nullptr) { - oop spd = oopFactory::new_objArray( - vmClasses::ProtectionDomain_klass(), size, CHECK); + oop spd = oopFactory::new_refArray(vmClasses::ProtectionDomain_klass(), size, CHECK); _shared_protection_domains = OopHandle(Universe::vm_global(), spd); } } oop CDSProtectionDomain::shared_jar_url(int index) { - return ((objArrayOop)_shared_jar_urls.resolve())->obj_at(index); + return oop_cast(_shared_jar_urls.resolve())->obj_at(index); } void CDSProtectionDomain::allocate_shared_jar_url_array(int size, TRAPS) { if (_shared_jar_urls.resolve() == nullptr) { - oop sju = oopFactory::new_objArray( - vmClasses::URL_klass(), size, CHECK); + oop sju = oopFactory::new_refArray(vmClasses::URL_klass(), size, CHECK); _shared_jar_urls = OopHandle(Universe::vm_global(), sju); } } oop CDSProtectionDomain::shared_jar_manifest(int index) { - return ((objArrayOop)_shared_jar_manifests.resolve())->obj_at(index); + return oop_cast(_shared_jar_manifests.resolve())->obj_at(index); } void CDSProtectionDomain::allocate_shared_jar_manifest_array(int size, TRAPS) { if (_shared_jar_manifests.resolve() == nullptr) { - oop sjm = oopFactory::new_objArray( - vmClasses::Jar_Manifest_klass(), size, CHECK); + oop sjm = oopFactory::new_refArray(vmClasses::Jar_Manifest_klass(), size, CHECK); _shared_jar_manifests = OopHandle(Universe::vm_global(), sjm); } } diff --git a/src/hotspot/share/cds/cppVtables.cpp b/src/hotspot/share/cds/cppVtables.cpp index 57da12dee489f..64f5032779807 100644 --- a/src/hotspot/share/cds/cppVtables.cpp +++ b/src/hotspot/share/cds/cppVtables.cpp @@ -29,13 +29,17 @@ #include "cds/cppVtables.hpp" #include "logging/log.hpp" #include "memory/resourceArea.hpp" +#include "oops/flatArrayKlass.hpp" +#include "oops/inlineKlass.hpp" #include "oops/instanceClassLoaderKlass.hpp" +#include "oops/instanceKlass.inline.hpp" #include "oops/instanceMirrorKlass.hpp" #include "oops/instanceRefKlass.hpp" #include "oops/instanceStackChunkKlass.hpp" #include "oops/methodCounters.hpp" #include "oops/methodData.hpp" #include "oops/objArrayKlass.hpp" +#include "oops/refArrayKlass.hpp" #include "oops/trainingData.hpp" #include "oops/typeArrayKlass.hpp" #include "runtime/arguments.hpp" @@ -69,6 +73,7 @@ using GrowableArray_ModuleEntry_ptr = GrowableArray; #endif // Currently, the archive contains ONLY the following types of objects that have C++ vtables. +// NOTE: this table must be in-sync with sun.jvm.hotspot.memory.FileMapInfo::populateMetadataTypeArray(). #define CPP_VTABLE_TYPES_DO(f) \ f(ConstantPool) \ f(InstanceKlass) \ @@ -79,8 +84,11 @@ using GrowableArray_ModuleEntry_ptr = GrowableArray; f(Method) \ f(MethodData) \ f(MethodCounters) \ - f(ObjArrayKlass) \ f(TypeArrayKlass) \ + f(ObjArrayKlass) \ + f(RefArrayKlass) \ + f(FlatArrayKlass) \ + f(InlineKlass) \ f(KlassTrainingData) \ f(MethodTrainingData) \ f(CompileTrainingData) \ diff --git a/src/hotspot/share/cds/filemap.cpp b/src/hotspot/share/cds/filemap.cpp index c2b321f3c0ad2..ad819d5d409d8 100644 --- a/src/hotspot/share/cds/filemap.cpp +++ b/src/hotspot/share/cds/filemap.cpp @@ -87,6 +87,77 @@ #define O_BINARY 0 // otherwise do nothing. #endif +inline void CDSMustMatchFlags::do_print(outputStream* st, bool v) { + st->print("%s", v ? "true" : "false"); +} + +#ifdef _LP64 +inline void CDSMustMatchFlags::do_print(outputStream* st, uint v) { + st->print("%u", v); +} +#endif + +inline void CDSMustMatchFlags::do_print(outputStream* st, intx v) { + st->print("%zd", v); +} + +inline void CDSMustMatchFlags::do_print(outputStream* st, uintx v) { + st->print("%zu", v); +} + +inline void CDSMustMatchFlags::do_print(outputStream* st, double v) { + st->print("%f", v); +} + +void CDSMustMatchFlags::init() { + assert(CDSConfig::is_dumping_archive(), "sanity"); + _max_name_width = 0; + +#define INIT_CDS_MUST_MATCH_FLAG(n) \ + _v_##n = n; \ + _max_name_width = MAX2(_max_name_width,strlen(#n)); + CDS_MUST_MATCH_FLAGS_DO(INIT_CDS_MUST_MATCH_FLAG); +#undef INIT_CDS_MUST_MATCH_FLAG +} + +bool CDSMustMatchFlags::runtime_check() const { +#define CHECK_CDS_MUST_MATCH_FLAG(n) \ + if (_v_##n != n) { \ + ResourceMark rm; \ + stringStream ss; \ + ss.print("VM option %s is different between dumptime (", #n); \ + do_print(&ss, _v_ ## n); \ + ss.print(") and runtime ("); \ + do_print(&ss, n); \ + ss.print(")"); \ + log_info(cds)("%s", ss.as_string()); \ + return false; \ + } + CDS_MUST_MATCH_FLAGS_DO(CHECK_CDS_MUST_MATCH_FLAG); +#undef CHECK_CDS_MUST_MATCH_FLAG + + return true; +} + +void CDSMustMatchFlags::print_info() const { + LogTarget(Info, cds) lt; + if (lt.is_enabled()) { + LogStream ls(lt); + ls.print_cr("Recorded VM flags during dumptime:"); + print(&ls); + } +} + +void CDSMustMatchFlags::print(outputStream* st) const { +#define PRINT_CDS_MUST_MATCH_FLAG(n) \ + st->print("- %-s ", #n); \ + st->sp(int(_max_name_width - strlen(#n))); \ + do_print(st, _v_##n); \ + st->cr(); + CDS_MUST_MATCH_FLAGS_DO(PRINT_CDS_MUST_MATCH_FLAG); +#undef PRINT_CDS_MUST_MATCH_FLAG +} + // Fill in the fileMapInfo structure with data about this VM instance. // This method copies the vm version info into header_version. If the version is too @@ -242,6 +313,7 @@ void FileMapHeader::populate(FileMapInfo *info, size_t core_region_alignment, _use_optimized_module_handling = CDSConfig::is_using_optimized_module_handling(); _has_aot_linked_classes = CDSConfig::is_dumping_aot_linked_classes(); _has_full_module_graph = CDSConfig::is_dumping_full_module_graph(); + _has_valhalla_patched_classes = Arguments::is_valhalla_enabled(); // The following fields are for sanity checks for whether this archive // will function correctly with this JVM and the bootclasspath it's @@ -255,6 +327,7 @@ void FileMapHeader::populate(FileMapInfo *info, size_t core_region_alignment, _has_platform_or_app_classes = AOTClassLocationConfig::dumptime()->has_platform_or_app_classes(); _requested_base_address = (char*)SharedBaseAddress; _mapped_base_address = (char*)SharedBaseAddress; + _must_match.init(); } void FileMapHeader::copy_base_archive_name(const char* archive) { @@ -323,6 +396,8 @@ void FileMapHeader::print(outputStream* st) { st->print_cr("- _ro_ptrmap_start_pos: %zu", _ro_ptrmap_start_pos); st->print_cr("- use_optimized_module_handling: %d", _use_optimized_module_handling); st->print_cr("- has_full_module_graph %d", _has_full_module_graph); + st->print_cr("- has_valhalla_patched_classes %d", _has_valhalla_patched_classes); + _must_match.print(st); st->print_cr("- has_aot_linked_classes %d", _has_aot_linked_classes); } @@ -706,6 +781,10 @@ bool FileMapInfo::init_from_file(int fd) { } } + if (!header()->check_must_match_flags()) { + return false; + } + return true; } @@ -1924,6 +2003,24 @@ bool FileMapHeader::validate() { return false; } + if (is_static()) { + const char* err = nullptr; + if (Arguments::is_valhalla_enabled()) { + if (!_has_valhalla_patched_classes) { + err = "not created"; + } + } else { + if (_has_valhalla_patched_classes) { + err = "created"; + } + } + if (err != nullptr) { + log_warning(cds)("This archive was %s with --enable-preview. It is " + "incompatible with the current JVM setting", err); + return false; + } + } + if (compact_headers() != UseCompactObjectHeaders) { aot_log_warning(aot)("Unable to use %s.\nThe %s's UseCompactObjectHeaders setting (%s)" " does not equal the current UseCompactObjectHeaders setting (%s).", file_type, file_type, diff --git a/src/hotspot/share/cds/filemap.hpp b/src/hotspot/share/cds/filemap.hpp index f5f6ee235a4c6..02dd85970130e 100644 --- a/src/hotspot/share/cds/filemap.hpp +++ b/src/hotspot/share/cds/filemap.hpp @@ -36,6 +36,7 @@ #include "memory/allocation.hpp" #include "oops/array.hpp" #include "oops/compressedOops.hpp" +#include "runtime/globals.hpp" #include "utilities/align.hpp" #include "utilities/bitMap.hpp" @@ -102,6 +103,39 @@ class FileMapRegion: private CDSFileMapRegion { void print(outputStream* st, int region_index); }; +#define CDS_MUST_MATCH_FLAGS_DO(f) \ + f(UseArrayFlattening) \ + f(UseFieldFlattening) \ + f(InlineTypePassFieldsAsArgs) \ + f(InlineTypeReturnedAsFields) \ + f(UseNullFreeNonAtomicValueFlattening) \ + f(UseNullFreeAtomicValueFlattening) \ + f(UseNullableAtomicValueFlattening) \ + f(UseNullableNonAtomicValueFlattening) \ + f(FlatteningBudget) + + +class CDSMustMatchFlags { +private: + size_t _max_name_width; +#define DECLARE_CDS_MUST_MATCH_FLAG(n) \ + decltype(n) _v_##n; + CDS_MUST_MATCH_FLAGS_DO(DECLARE_CDS_MUST_MATCH_FLAG); +#undef DECLARE_CDS_MUST_MATCH_FLAG + + inline static void do_print(outputStream* st, bool v); + LP64_ONLY(inline static void do_print(outputStream* st, uint v);) + inline static void do_print(outputStream* st, intx v); + inline static void do_print(outputStream* st, uintx v); + inline static void do_print(outputStream* st, double v); + void print_info() const; + +public: + void init(); + bool runtime_check() const; + void print(outputStream* st) const; +}; + class FileMapHeader: private CDSFileMapHeaderBase { friend class CDSConstants; friend class VMStructs; @@ -144,6 +178,8 @@ class FileMapHeader: private CDSFileMapHeaderBase { // some expensive operations. bool _has_aot_linked_classes; // Was the CDS archive created with -XX:+AOTClassLinking bool _has_full_module_graph; // Does this CDS archive contain the full archived module graph? + bool _has_valhalla_patched_classes; // Is this archived dumped with --enable-preview? + CDSMustMatchFlags _must_match; // These flags must be the same between dumptime and runtime size_t _rw_ptrmap_start_pos; // The first bit in the ptrmap corresponds to this position in the rw region size_t _ro_ptrmap_start_pos; // The first bit in the ptrmap corresponds to this position in the ro region @@ -250,6 +286,10 @@ class FileMapHeader: private CDSFileMapHeaderBase { return (0 <= region && region < NUM_CDS_REGIONS); } + bool check_must_match_flags() const { + return _must_match.runtime_check(); + } + void print(outputStream* st); }; diff --git a/src/hotspot/share/cds/heapShared.cpp b/src/hotspot/share/cds/heapShared.cpp index 428a7dd714861..8739b1f6aefed 100644 --- a/src/hotspot/share/cds/heapShared.cpp +++ b/src/hotspot/share/cds/heapShared.cpp @@ -63,6 +63,7 @@ #include "oops/fieldStreams.inline.hpp" #include "oops/objArrayOop.inline.hpp" #include "oops/oop.inline.hpp" +#include "oops/oopCast.inline.hpp" #include "oops/oopHandle.inline.hpp" #include "oops/typeArrayOop.inline.hpp" #include "prims/jvmtiExport.hpp" @@ -452,8 +453,25 @@ void HeapShared::make_archived_object_cache_gc_safe() { // Copy all CachedOopInfo into a new table using a different hashing algorithm archived_object_cache()->iterate_all([&] (OopHandle oh, CachedOopInfo info) { - new_cache->put_when_absent(oh, info); - }); + if (Arguments::is_valhalla_enabled() && oh.resolve()->klass()->is_inline_klass()) { + // After make_archived_object_cache_gc_safe() returns, + // _archived_object_cache->get() is called only from the (future) AOT code + // compiler to access heap oops referenced by AOT-compiled method. + // + // As planned in JDK 27 (JDK-8335368), AOT-compiled methods will only reference + // oops that are Strings, mirrors, or exceptions, all of which are not value + // objects. + // + // We exclude value objects from new_cache, as we don't know how to track them + // after the GC moves them. This should be fixed when AOT-compiled methods + // need to reference value objects. + // + // Also TODO: the AOT heap should de-duplicate value objects with identical + // values. See JDK-8383381 + } else { + new_cache->put_when_absent(oh, info); + } + }); destroy_archived_object_cache(); _archived_object_cache = new_cache; @@ -692,8 +710,9 @@ void HeapShared::add_scratch_resolved_references(ConstantPool* src, objArrayOop } } -objArrayOop HeapShared::scratch_resolved_references(ConstantPool* src) { - return (objArrayOop)_scratch_objects_table->get_oop(src); +refArrayOop HeapShared::scratch_resolved_references(ConstantPool* src) { + oop rr = _scratch_objects_table->get_oop(src); + return rr == nullptr ? nullptr : oop_cast(rr); } void HeapShared::remove_scratch_resolved_references(ConstantPool* src) { @@ -890,6 +909,7 @@ void HeapShared::copy_java_mirror(oop orig_mirror, oop scratch_m) { narrowKlass nk = CompressedKlassPointers::encode(orig_mirror->klass()); scratch_m->set_mark(markWord::prototype().set_narrow_klass(nk).copy_set_hash(src_hash)); } else { + // For valhalla, the prototype header is the same as markWord::prototype(); scratch_m->set_mark(markWord::prototype().copy_set_hash(src_hash)); } assert(scratch_m->mark().is_unlocked(), "sanity"); @@ -898,6 +918,25 @@ void HeapShared::copy_java_mirror(oop orig_mirror, oop scratch_m) { assert(src_hash == archived_hash, "Different hash codes: original " INTPTR_FORMAT ", archived " INTPTR_FORMAT, src_hash, archived_hash); } + Klass* k = java_lang_Class::as_Klass(orig_mirror); + if (k != nullptr && k->is_instance_klass()) { + InstanceKlass* ik = InstanceKlass::cast(k); + + if (ik->is_inline_klass() && ik->is_initialized()) { + // Only concrete value classes need the null_reset field + InlineKlass* ilk = InlineKlass::cast(k); + if (ilk->supports_nullable_layouts()) { + scratch_m->obj_field_put(ilk->null_reset_value_offset(), ilk->null_reset_value()); + } + } + + if (ik->has_acmp_maps_offset()) { + int maps_offset = ik->acmp_maps_offset(); + oop maps = orig_mirror->obj_field(maps_offset); + scratch_m->obj_field_put(maps_offset, maps); + } + } + if (CDSConfig::is_dumping_aot_linked_classes()) { java_lang_Class::set_module(scratch_m, java_lang_Class::module(orig_mirror)); java_lang_Class::set_protection_domain(scratch_m, java_lang_Class::protection_domain(orig_mirror)); @@ -1114,6 +1153,9 @@ void KlassSubGraphInfo::add_subgraph_object_klass(Klass* orig_k) { // to the list. return; } + if (orig_k->is_flatArray_klass()) { + _subgraph_object_klasses->append_if_missing(FlatArrayKlass::cast(orig_k)->element_klass()); + } } else { assert(orig_k->is_typeArray_klass(), "must be"); // Primitive type arrays are created early during Universe::genesis. @@ -1489,11 +1531,24 @@ HeapShared::resolve_or_init_classes_for_subgraph_of(Klass* k, bool do_init, TRAP log_info(aot, heap)("%s subgraph %s ", do_init ? "init" : "resolve", k->external_name()); } + Array* klasses = record->subgraph_object_klasses(); + + if (do_init && klasses != nullptr) { + // All the classes of the oops in this subgraph are in the klasses array. + // Link them first in case any of the oops are used in the methods + // invoked in the rest of this function. + for (int i = 0; i < klasses->length(); i++) { + Klass* klass = klasses->at(i); + if (klass->in_aot_cache() && klass->is_instance_klass()) { + InstanceKlass::cast(klass)->link_class(CHECK_NULL); + } + } + } + resolve_or_init(k, do_init, CHECK_NULL); // Load/link/initialize the klasses of the objects in the subgraph. // nullptr class loader is used. - Array* klasses = record->subgraph_object_klasses(); if (klasses != nullptr) { for (int i = 0; i < klasses->length(); i++) { Klass* klass = klasses->at(i); @@ -1525,7 +1580,11 @@ void HeapShared::resolve_or_init(Klass* k, bool do_init, TRAPS) { if (!do_init) { if (k->class_loader_data() == nullptr) { Klass* resolved_k = SystemDictionary::resolve_or_null(k->name(), CHECK); - assert(resolved_k == k, "classes used by archived heap must not be replaced by JVMTI ClassFileLoadHook"); + if (resolved_k->is_array_klass()) { + assert(resolved_k == k || resolved_k == k->super(), "classes used by archived heap must not be replaced by JVMTI ClassFileLoadHook"); + } else { + assert(resolved_k == k, "classes used by archived heap must not be replaced by JVMTI ClassFileLoadHook"); + } } } else { assert(k->class_loader_data() != nullptr, "must have been resolved by HeapShared::resolve_classes"); @@ -1985,7 +2044,8 @@ void HeapShared::check_special_subgraph_classes() { for (int i = 0; i < num; i++) { Klass* subgraph_k = klasses->at(i); Symbol* name = subgraph_k->name(); - if (subgraph_k->is_instance_klass() && + + if (subgraph_k->is_identity_class() && name != vmSymbols::java_lang_Class() && name != vmSymbols::java_lang_String() && name != vmSymbols::java_lang_ArithmeticException() && diff --git a/src/hotspot/share/cds/heapShared.hpp b/src/hotspot/share/cds/heapShared.hpp index 10ea35ab56e7c..8f7378a4a5ac4 100644 --- a/src/hotspot/share/cds/heapShared.hpp +++ b/src/hotspot/share/cds/heapShared.hpp @@ -449,7 +449,7 @@ class HeapShared: AllStatic { static void finish_materialize_objects() NOT_CDS_JAVA_HEAP_RETURN; static void write_heap(AOTMappedHeapInfo* mapped_heap_info, AOTStreamedHeapInfo* streamed_heap_info) NOT_CDS_JAVA_HEAP_RETURN; - static objArrayOop scratch_resolved_references(ConstantPool* src); + static refArrayOop scratch_resolved_references(ConstantPool* src); static void add_scratch_resolved_references(ConstantPool* src, objArrayOop dest) NOT_CDS_JAVA_HEAP_RETURN; static void remove_scratch_resolved_references(ConstantPool* src) NOT_CDS_JAVA_HEAP_RETURN; static void init_dumping() NOT_CDS_JAVA_HEAP_RETURN; diff --git a/src/hotspot/share/cds/lambdaFormInvokers.cpp b/src/hotspot/share/cds/lambdaFormInvokers.cpp index 3ff5705b79d31..ea10ad4283fa0 100644 --- a/src/hotspot/share/cds/lambdaFormInvokers.cpp +++ b/src/hotspot/share/cds/lambdaFormInvokers.cpp @@ -46,6 +46,7 @@ #include "oops/objArrayKlass.hpp" #include "oops/objArrayOop.hpp" #include "oops/oop.inline.hpp" +#include "oops/oopCast.inline.hpp" #include "oops/oopHandle.inline.hpp" #include "oops/typeArrayOop.inline.hpp" #include "runtime/handles.inline.hpp" @@ -143,7 +144,7 @@ void LambdaFormInvokers::regenerate_holder_classes(TRAPS) { HandleMark hm(THREAD); int len = _lambdaform_lines->length(); - objArrayHandle list_lines = oopFactory::new_objArray_handle(vmClasses::String_klass(), len, CHECK); + refArrayHandle list_lines = oopFactory::new_refArray_handle(vmClasses::String_klass(), len, CHECK); for (int i = 0; i < len; i++) { Handle h_line = java_lang_String::create_from_str(_lambdaform_lines->at(i), CHECK); list_lines->obj_at_put(i, h_line()); @@ -172,7 +173,7 @@ void LambdaFormInvokers::regenerate_holder_classes(TRAPS) { return; } - objArrayHandle h_array(THREAD, (objArrayOop)result.get_oop()); + refArrayHandle h_array(THREAD, oop_cast(result.get_oop())); int sz = h_array->length(); assert(sz % 2 == 0 && sz >= 2, "Must be even size of length"); for (int i = 0; i < sz; i+= 2) { diff --git a/src/hotspot/share/ci/bcEscapeAnalyzer.cpp b/src/hotspot/share/ci/bcEscapeAnalyzer.cpp index 712f7af4139a3..946cbea2e0655 100644 --- a/src/hotspot/share/ci/bcEscapeAnalyzer.cpp +++ b/src/hotspot/share/ci/bcEscapeAnalyzer.cpp @@ -554,6 +554,9 @@ void BCEscapeAnalyzer::iterate_one_block(ciBlock *blk, StateInfo &state, Growabl set_global_escape(state.apop()); state.spop(); ArgumentMap arr = state.apop(); + // If the array is a flat array, a larger part of it is modified than + // the size of a reference. However, if OFFSET_ANY is given as + // parameter to set_modified(), size is not taken into account. set_modified(arr, OFFSET_ANY, type2size[T_OBJECT]*HeapWordSize); break; } diff --git a/src/hotspot/share/ci/ciArray.cpp b/src/hotspot/share/ci/ciArray.cpp index 6b1a30fec6aa8..6e8ba2101311d 100644 --- a/src/hotspot/share/ci/ciArray.cpp +++ b/src/hotspot/share/ci/ciArray.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,8 +27,11 @@ #include "ci/ciConstant.hpp" #include "ci/ciKlass.hpp" #include "ci/ciUtilities.inline.hpp" +#include "oops/flatArrayKlass.hpp" +#include "oops/layoutKind.hpp" #include "oops/objArrayOop.inline.hpp" #include "oops/oop.inline.hpp" +#include "oops/oopCast.inline.hpp" #include "oops/typeArrayOop.inline.hpp" #include "utilities/powerOfTwo.hpp" @@ -36,7 +39,13 @@ // // This class represents an arrayOop in the HotSpot virtual // machine. + +arrayOop ciArray::get_arrayOop() const { + return oop_cast(get_oop()); +} + static BasicType fixup_element_type(BasicType bt) { + if (bt == T_FLAT_ELEMENT) return T_OBJECT; if (is_reference_type(bt)) return T_OBJECT; if (bt == T_BOOLEAN) return T_BYTE; return bt; @@ -59,10 +68,22 @@ ciConstant ciArray::element_value_impl(BasicType elembt, case T_ARRAY: case T_OBJECT: { - assert(ary->is_objArray(), ""); - objArrayOop objary = (objArrayOop) ary; - oop elem = objary->obj_at(index); - return ciConstant(elembt, CURRENT_ENV->get_object(elem)); + if (ary->is_refArray()) { + refArrayOop refary = oop_cast(ary); + oop elem = refary->obj_at(index); + return ciConstant(elembt, CURRENT_ENV->get_object(elem)); + } else { + assert(ary->is_flatArray(), ""); + flatArrayOop flatary = oop_cast(ary); + assert(CompilerThread::current()->thread_state() == _thread_in_vm, ""); + JavaThread* THREAD = CompilerThread::current(); + oop elem = flatary->obj_at(index, THREAD); + if (HAS_PENDING_EXCEPTION) { + CLEAR_PENDING_EXCEPTION; + return ciConstant(); + } + return ciConstant(elembt, CURRENT_ENV->get_object(elem)); + } } default: break; @@ -119,6 +140,17 @@ ciConstant ciArray::element_value_by_offset(intptr_t element_offset) { return element_value((jint) index); } +bool ciArray::is_null_free() const { + VM_ENTRY_MARK; + return get_arrayOop()->is_null_free_array(); +} + +bool ciArray::is_atomic() const { + VM_ENTRY_MARK; + arrayOop oop = get_arrayOop(); + return !oop->is_flatArray() || LayoutKindHelper::is_atomic_flat(FlatArrayKlass::cast(oop->klass())->layout_kind()); +} + // ------------------------------------------------------------------ // ciArray::print_impl // diff --git a/src/hotspot/share/ci/ciArray.hpp b/src/hotspot/share/ci/ciArray.hpp index d7b913436fe57..8cc0712de256d 100644 --- a/src/hotspot/share/ci/ciArray.hpp +++ b/src/hotspot/share/ci/ciArray.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,6 +29,7 @@ #include "ci/ciConstant.hpp" #include "ci/ciObject.hpp" #include "oops/arrayOop.hpp" +#include "oops/flatArrayOop.hpp" #include "oops/objArrayOop.hpp" #include "oops/typeArrayOop.hpp" @@ -43,8 +44,9 @@ class ciArray : public ciObject { protected: ciArray( objArrayHandle h_a) : ciObject(h_a), _length(h_a()->length()) {} ciArray(typeArrayHandle h_a) : ciObject(h_a), _length(h_a()->length()) {} + ciArray(flatArrayHandle h_a) : ciObject(h_a), _length(h_a()->length()) {} - arrayOop get_arrayOop() const { return (arrayOop)get_oop(); } + arrayOop get_arrayOop() const; const char* type_string() { return "ciArray"; } @@ -66,10 +68,14 @@ class ciArray : public ciObject { // Current value of an element at the specified offset. // Returns T_ILLEGAL if there is no element at the given offset. - ciConstant element_value_by_offset(intptr_t element_offset); + virtual ciConstant element_value_by_offset(intptr_t element_offset); // What kind of ciObject is this? bool is_array() { return true; } + + virtual bool is_flat() { return false; } + bool is_null_free() const; + bool is_atomic() const; }; #endif // SHARE_CI_CIARRAY_HPP diff --git a/src/hotspot/share/ci/ciArrayKlass.cpp b/src/hotspot/share/ci/ciArrayKlass.cpp index 947cc0cb6fcb2..a1e8adf7f4bab 100644 --- a/src/hotspot/share/ci/ciArrayKlass.cpp +++ b/src/hotspot/share/ci/ciArrayKlass.cpp @@ -27,6 +27,7 @@ #include "ci/ciTypeArrayKlass.hpp" #include "ci/ciUtilities.inline.hpp" #include "memory/universe.hpp" +#include "oops/arrayKlass.hpp" // ciArrayKlass // @@ -59,7 +60,7 @@ ciType* ciArrayKlass::element_type() { if (is_type_array_klass()) { return ciType::make(as_type_array_klass()->element_type()); } else { - return as_obj_array_klass()->element_klass()->as_klass(); + return element_klass()->as_klass(); } } @@ -96,11 +97,37 @@ bool ciArrayKlass::is_leaf_type() { // ciArrayKlass::make // // Make an array klass of the specified element type. -ciArrayKlass* ciArrayKlass::make(ciType* element_type) { +ciArrayKlass* ciArrayKlass::make(ciType* element_type, bool null_free, bool atomic, bool refined_type) { if (element_type->is_primitive_type()) { return ciTypeArrayKlass::make(element_type->basic_type()); } else { - return ciObjArrayKlass::make(element_type->as_klass()); + return ciObjArrayKlass::make(element_type->as_klass(), refined_type, null_free, atomic); } } +int ciArrayKlass::array_header_in_bytes() { + return get_ArrayKlass()->array_header_in_bytes(); +} + +ciInstance* ciArrayKlass::component_mirror_instance() const { + GUARDED_VM_ENTRY( + oop component_mirror = ArrayKlass::cast(get_Klass())->component_mirror(); + return CURRENT_ENV->get_instance(component_mirror); + ) +} + +bool ciArrayKlass::is_elem_null_free() const { + ArrayProperties props = properties(); + assert(props.is_valid(), "meaningless"); + return props.is_null_restricted(); +} + +bool ciArrayKlass::is_elem_atomic() const { + ArrayProperties props = properties(); + assert(props.is_valid(), "meaningless"); + return !props.is_non_atomic(); +} + +ArrayProperties ciArrayKlass::properties() const { + GUARDED_VM_ENTRY(return ArrayKlass::cast(get_Klass())->properties();) +} diff --git a/src/hotspot/share/ci/ciArrayKlass.hpp b/src/hotspot/share/ci/ciArrayKlass.hpp index a64d80b20f3f4..71b21996a7944 100644 --- a/src/hotspot/share/ci/ciArrayKlass.hpp +++ b/src/hotspot/share/ci/ciArrayKlass.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,6 +26,7 @@ #define SHARE_CI_CIARRAYKLASS_HPP #include "ci/ciKlass.hpp" +#include "oops/arrayKlass.hpp" // ciArrayKlass // @@ -48,14 +49,27 @@ class ciArrayKlass : public ciKlass { public: jint dimension() { return _dimension; } - ciType* element_type(); // JLS calls this the "component type" - ciType* base_element_type(); // JLS calls this the "element type" + ciType* element_type(); // JLS calls this the "component type", (T[] for T[][]) + ciType* base_element_type(); // JLS calls this the "element type", (T for T[][]) bool is_leaf_type(); // No subtypes of this array type. + bool is_refined() const { return !is_type_array_klass() && properties().is_valid(); } + // What kind of vmObject is this? bool is_array_klass() const { return true; } - static ciArrayKlass* make(ciType* element_type); + // The one-level type of the array elements. + virtual ciKlass* element_klass() { return nullptr; } + + static ciArrayKlass* make(ciType* klass, bool null_free = false, bool atomic = false, bool refined_type = false); + + int array_header_in_bytes(); + ciInstance* component_mirror_instance() const; + + bool is_elem_null_free() const; + bool is_elem_atomic() const; + + ArrayProperties properties() const; }; #endif // SHARE_CI_CIARRAYKLASS_HPP diff --git a/src/hotspot/share/ci/ciClassList.hpp b/src/hotspot/share/ci/ciClassList.hpp index bce1e52e80b00..04b13d5cae923 100644 --- a/src/hotspot/share/ci/ciClassList.hpp +++ b/src/hotspot/share/ci/ciClassList.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -53,17 +53,22 @@ class ciMethodType; class ciArray; class ciObjArray; class ciTypeArray; +class ciFlatArray; class ciSymbol; class ciMetadata; class ciMethod; class ciMethodData; class ciReceiverTypeData; // part of ciMethodData class ciType; +class ciWrapper; class ciReturnAddress; class ciKlass; class ciInstanceKlass; +class ciInlineKlass; class ciArrayKlass; class ciObjArrayKlass; +class ciFlatArrayKlass; +class ciRefArrayKlass; class ciTypeArrayKlass; // Simulate Java Language style package-private access with @@ -105,15 +110,19 @@ friend class ciTypeEntries; \ friend class ciSpeculativeTrapData; \ friend class ciSymbol; \ friend class ciArray; \ +friend class ciFlatArray; \ friend class ciObjArray; \ friend class ciMetadata; \ friend class ciReplay; \ friend class ciTypeArray; \ friend class ciType; \ friend class ciReturnAddress; \ +friend class ciWrapper; \ friend class ciKlass; \ friend class ciInstanceKlass; \ +friend class ciInlineKlass; \ friend class ciArrayKlass; \ +friend class ciFlatArrayKlass; \ friend class ciObjArrayKlass; \ friend class ciTypeArrayKlass; \ diff --git a/src/hotspot/share/ci/ciConstant.cpp b/src/hotspot/share/ci/ciConstant.cpp index 234cd8171c46b..6e09bb7a25a10 100644 --- a/src/hotspot/share/ci/ciConstant.cpp +++ b/src/hotspot/share/ci/ciConstant.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -31,8 +31,29 @@ // // This class represents a constant value. +ciConstant ciConstant::make_zero_or_null(BasicType bt) { + switch (bt) { + case T_FLOAT: return ciConstant((jfloat).0f); + case T_DOUBLE: return ciConstant((jdouble).0); + case T_LONG: return ciConstant((jlong)0L); + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + return ciConstant(bt, 0); + case T_OBJECT: + case T_ARRAY: + return ciConstant(bt, CURRENT_ENV->get_object(nullptr)); + default: + ShouldNotReachHere(); + return ciConstant(); + } +} + // ------------------------------------------------------------------ // ciConstant::is_null_or_zero +// This assumes `this->is_valid()`, otherwise, `as_object` will assert. bool ciConstant::is_null_or_zero() const { if (!is_java_primitive(basic_type())) { return as_object()->is_null_object(); @@ -62,7 +83,7 @@ bool ciConstant::is_loaded() const { // ------------------------------------------------------------------ // ciConstant::print -void ciConstant::print() { +void ciConstant::print() const { tty->print("is_loaded()) { // Now make an array for it - return ciObjArrayKlass::make_impl(elem_klass); + return ciArrayKlass::make(elem_klass); } } @@ -1355,7 +1356,9 @@ void ciEnv::record_lambdaform(Thread* thread, oop form) { } // Check LambdaForm.names array - objArrayOop names = (objArrayOop)obj_field(form, "names"); + // The type of the array is Name[] and Name is an identity class, + // so the array is always an array of references + refArrayOop names = oop_cast(obj_field(form, "names")); if (names != nullptr) { RecordLocation lp0(this, "names"); int len = names->length(); diff --git a/src/hotspot/share/ci/ciEnv.hpp b/src/hotspot/share/ci/ciEnv.hpp index 8167697e84be7..4a844bd1f5154 100644 --- a/src/hotspot/share/ci/ciEnv.hpp +++ b/src/hotspot/share/ci/ciEnv.hpp @@ -201,8 +201,17 @@ class ciEnv : StackObj { } ciObjArrayKlass* get_obj_array_klass(Klass* o) { if (o == nullptr) return nullptr; + assert(o->is_unrefined_objArray_klass(), "must be exact"); return get_metadata(o)->as_obj_array_klass(); } + ciFlatArrayKlass* get_flat_array_klass(Klass* o) { + if (o == nullptr) return nullptr; + return get_metadata(o)->as_flat_array_klass(); + } + ciRefArrayKlass* get_ref_array_klass(Klass* o) { + if (o == nullptr) return nullptr; + return get_metadata(o)->as_ref_array_klass(); + } ciTypeArrayKlass* get_type_array_klass(Klass* o) { if (o == nullptr) return nullptr; return get_metadata(o)->as_type_array_klass(); @@ -503,6 +512,14 @@ class ciEnv : StackObj { void dump_compile_data(outputStream* out); void dump_replay_data_version(outputStream* out); + ciWrapper* make_early_larval_wrapper(ciType* type) const { + return _factory->make_early_larval_wrapper(type); + } + + ciWrapper* make_null_free_wrapper(ciType* type) const { + return _factory->make_null_free_wrapper(type); + } + const char *dyno_name(const InstanceKlass* ik) const; const char *replay_name(const InstanceKlass* ik) const; const char *replay_name(ciKlass* i) const; diff --git a/src/hotspot/share/ci/ciField.cpp b/src/hotspot/share/ci/ciField.cpp index 946fea5346fb5..34f70f5fdd878 100644 --- a/src/hotspot/share/ci/ciField.cpp +++ b/src/hotspot/share/ci/ciField.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -22,19 +22,25 @@ * */ +#include "ci/ciConstant.hpp" #include "ci/ciField.hpp" +#include "ci/ciInlineKlass.hpp" #include "ci/ciInstanceKlass.hpp" +#include "ci/ciSymbol.hpp" #include "ci/ciSymbols.hpp" #include "ci/ciUtilities.inline.hpp" #include "classfile/javaClasses.hpp" #include "classfile/vmClasses.hpp" #include "gc/shared/collectedHeap.inline.hpp" #include "interpreter/linkResolver.hpp" +#include "jvm_io.h" #include "oops/klass.inline.hpp" +#include "oops/layoutKind.hpp" #include "oops/oop.inline.hpp" #include "runtime/fieldDescriptor.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/reflection.hpp" +#include "utilities/globalDefinitions.hpp" // ciField // @@ -70,7 +76,7 @@ // ------------------------------------------------------------------ // ciField::ciField ciField::ciField(ciInstanceKlass* klass, int index, Bytecodes::Code bc) : - _known_to_link_with_put(nullptr), _known_to_link_with_get(nullptr) { + _original_holder(nullptr), _is_flat(false), _known_to_link_with_put(nullptr), _known_to_link_with_get(nullptr) { ASSERT_IN_VM; CompilerThread *THREAD = CompilerThread::current(); @@ -102,6 +108,9 @@ ciField::ciField(ciInstanceKlass* klass, int index, Bytecodes::Code bc) : _type = ciType::make(field_type); } + _is_null_free = false; + _null_marker_offset = -1; + // Get the field's declared holder. // // Note: we actually create a ciInstanceKlass for this klass, @@ -213,6 +222,65 @@ ciField::ciField(fieldDescriptor *fd) : "bootstrap classes must not create & cache unshared fields"); } +// Special copy constructor used to flatten inline type fields by +// copying the fields of the inline type to a new holder klass. +ciField::ciField(ciField* declared_field, ciField* subfield) { + assert(subfield->holder()->is_inlinetype() || subfield->holder()->is_abstract(), "should only be used for inline type field flattening"); + assert(!subfield->is_flat(), "subfield must not be flat"); + assert(declared_field->is_flat(), "declared field must be flat"); + + _flags = declared_field->flags(); + _holder = declared_field->holder(); + _offset = declared_field->offset_in_bytes() + (subfield->offset_in_bytes() - declared_field->type()->as_inline_klass()->payload_offset()); + + ResourceMark rm; + char buffer[256]; + jio_snprintf(buffer, sizeof(buffer), "%s.%s", declared_field->name()->as_utf8(), subfield->name()->as_utf8()); + _name = ciSymbol::make(buffer); + + _signature = subfield->_signature; + _type = subfield->_type; + _is_constant = (declared_field->is_strict() && declared_field->is_final()) || declared_field->is_constant(); + _known_to_link_with_put = subfield->_known_to_link_with_put; + _known_to_link_with_get = subfield->_known_to_link_with_get; + _constant_value = ciConstant(); + + _is_flat = false; + _is_null_free = false; + _null_marker_offset = -1; + _original_holder = (subfield->_original_holder != nullptr) ? subfield->_original_holder : subfield->_holder; + _layout_kind = LayoutKind::UNKNOWN; +} + +// Constructor for the ciField of a null marker +ciField::ciField(ciField* declared_field) { + assert(declared_field->is_flat(), "declared field must be flat"); + assert(!declared_field->is_null_free(), "must have a null marker"); + + _flags = declared_field->flags(); + _holder = declared_field->holder(); + _offset = declared_field->null_marker_offset(); + + ResourceMark rm; + char buffer[256]; + jio_snprintf(buffer, sizeof(buffer), "%s.$nullMarker$", declared_field->name()->as_utf8()); + _name = ciSymbol::make(buffer); + + _signature = ciSymbols::bool_signature(); + _type = ciType::make(T_BOOLEAN); + + _is_constant = (declared_field->is_strict() && declared_field->is_final()) || declared_field->is_constant(); + _known_to_link_with_put = nullptr; + _known_to_link_with_get = nullptr; + _constant_value = ciConstant(); + + _is_flat = false; + _is_null_free = false; + _null_marker_offset = -1; + _original_holder = nullptr; + _layout_kind = LayoutKind::UNKNOWN; +} + static bool trust_final_nonstatic_fields(ciInstanceKlass* holder) { if (holder == nullptr) return false; @@ -231,6 +299,9 @@ static bool trust_final_nonstatic_fields(ciInstanceKlass* holder) { // can't be serialized, so there is no hacking of finals going on with them. if (holder->is_hidden()) return true; + // Trust final fields in inline type buffers + if (holder->is_inlinetype()) + return true; // Trust final fields in records if (holder->is_record()) return true; @@ -241,9 +312,19 @@ void ciField::initialize_from(fieldDescriptor* fd) { // Get the flags, offset, and canonical holder of the field. _flags = ciFlags(fd->access_flags(), fd->field_flags().is_stable(), fd->field_status().is_initialized_final_update()); _offset = fd->offset(); - Klass* field_holder = fd->field_holder(); + InstanceKlass* field_holder = fd->field_holder(); assert(field_holder != nullptr, "null field_holder"); _holder = CURRENT_ENV->get_instance_klass(field_holder); + _is_flat = fd->is_flat(); + _is_null_free = fd->is_null_free_inline_type(); + if (fd->has_null_marker()) { + InlineLayoutInfo* li = field_holder->inline_layout_info_adr(fd->index()); + _null_marker_offset = li->null_marker_offset(); + } else { + _null_marker_offset = -1; + } + _original_holder = nullptr; + _layout_kind = fd->is_flat() ? fd->layout_kind() : LayoutKind::UNKNOWN; // Check to see if the field is constant. Klass* k = _holder->get_Klass(); @@ -286,7 +367,7 @@ ciConstant ciField::constant_value() { if (_constant_value.basic_type() == T_ILLEGAL) { // Static fields are placed in mirror objects. ciInstance* mirror = _holder->java_mirror(); - _constant_value = mirror->field_value_impl(type()->basic_type(), offset_in_bytes()); + _constant_value = mirror->field_value_impl(this); } if (FoldStableValues && is_stable() && _constant_value.is_null_or_zero()) { return ciConstant(); @@ -316,7 +397,9 @@ ciType* ciField::compute_type() { } ciType* ciField::compute_type_impl() { - ciKlass* type = CURRENT_ENV->get_klass_by_name_impl(_holder, constantPoolHandle(), _signature, false); + // Use original holder for fields that came in through flattening + ciKlass* accessing_klass = (_original_holder != nullptr) ? _original_holder : _holder; + ciKlass* type = CURRENT_ENV->get_klass_by_name_impl(accessing_klass, constantPoolHandle(), _signature, false); if (!type->is_primitive_type() && is_shared()) { // We must not cache a pointer to an unshared type, in a shared field. bool type_is_also_shared = false; @@ -335,6 +418,10 @@ ciType* ciField::compute_type_impl() { return type; } +bool ciField::is_atomic() { + assert(is_flat(), "should not ask this property for non-flat field %s.%s", holder()->name()->as_utf8(), name()->as_utf8()); + return LayoutKindHelper::is_atomic_flat(_layout_kind) && !type()->as_inline_klass()->is_naturally_atomic(is_null_free()); +} // ------------------------------------------------------------------ // ciField::will_link @@ -379,6 +466,18 @@ bool ciField::will_link(ciMethod* accessing_method, fieldDescriptor result; LinkResolver::resolve_field(result, link_info, bc, ClassInitMode::dont_init, CHECK_AND_CLEAR_(false)); + // Strict statics may require tracking if their class is not fully initialized. + // For now we can bail out of the compiler and let the interpreter handle it. + if (is_static && result.is_strict_static_unset()) { + // If we left out this logic, we would get (a) spurious + // failures for C2 code because compiled putstatic would not write + // the "unset" bits, and (b) missed failures for too-early reads, + // since the compiled getstatic would not check the "unset" bits. + // Test C1 on with "-XX:TieredStopAtLevel=2 -Xcomp -Xbatch". + // Test C2 on with "-XX:-TieredCompilation -Xcomp -Xbatch". + return false; + } + // update the hit-cache, unless there is a problem with memory scoping: if (accessing_method->holder()->is_shared() || !is_shared()) { if (is_put) { @@ -411,7 +510,7 @@ bool ciField::is_autobox_cache() { // ------------------------------------------------------------------ // ciField::print -void ciField::print() { +void ciField::print() const { tty->print("print(" constant_value="); _constant_value.print(); } + tty->print(" is_flat=%s", bool_to_str(_is_flat)); + tty->print(" is_null_free=%s", bool_to_str(_is_null_free)); + tty->print(" null_marker_offset=%d", _null_marker_offset); tty->print(">"); } diff --git a/src/hotspot/share/ci/ciField.hpp b/src/hotspot/share/ci/ciField.hpp index ffc8730705f7f..78451e1b0e2dd 100644 --- a/src/hotspot/share/ci/ciField.hpp +++ b/src/hotspot/share/ci/ciField.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,6 +30,7 @@ #include "ci/ciFlags.hpp" #include "ci/ciInstance.hpp" #include "ci/ciUtilities.hpp" +#include "oops/layoutKind.hpp" // ciField // @@ -44,11 +45,16 @@ class ciField : public ArenaObj { private: ciFlags _flags; ciInstanceKlass* _holder; + ciInstanceKlass* _original_holder; // For fields nested in flat fields ciSymbol* _name; ciSymbol* _signature; ciType* _type; int _offset; + LayoutKind _layout_kind; bool _is_constant; + bool _is_flat; + bool _is_null_free; + int _null_marker_offset; ciMethod* _known_to_link_with_put; ciInstanceKlass* _known_to_link_with_get; ciConstant _constant_value; @@ -58,6 +64,8 @@ class ciField : public ArenaObj { ciField(ciInstanceKlass* klass, int index, Bytecodes::Code bc); ciField(fieldDescriptor* fd); + ciField(ciField* declared_field, ciField* sudfield); + ciField(ciField* declared_field); // shared constructor code void initialize_from(fieldDescriptor* fd); @@ -91,6 +99,7 @@ class ciField : public ArenaObj { // In that case the declared holder of f would be B and // the canonical holder of f would be A. ciInstanceKlass* holder() const { return _holder; } + ciInstanceKlass* original_holder() const { return _original_holder; } // Name of this field? ciSymbol* name() const { return _name; } @@ -104,9 +113,6 @@ class ciField : public ArenaObj { // How is this field actually stored in memory? BasicType layout_type() { return type2field[(_type == nullptr) ? T_OBJECT : _type->basic_type()]; } - // How big is this field in memory? - int size_in_bytes() { return type2aelembytes(layout_type()); } - // What is the offset of this field? (Fields are aligned to the byte level.) int offset_in_bytes() const { assert(_offset >= 1, "illegal call to offset()"); @@ -169,6 +175,59 @@ class ciField : public ArenaObj { bool is_stable () const { return flags().is_stable(); } bool is_volatile () const { return flags().is_volatile(); } bool is_transient () const { return flags().is_transient(); } + bool is_strict () const { return flags().is_strict(); } + bool is_flat () const { return _is_flat; } + bool is_null_free () const { return _is_null_free; } + int null_marker_offset () const { return _null_marker_offset; } + LayoutKind layout_kind () const { return _layout_kind; } + + // Whether this field needs to act atomically. Note that it does not actually need accessing + // atomically. For example, if there cannot be racy accesses to this field, then it can be + // accessed in a non-atomic manner. Unless this field must be in observably immutable memory, + // this method must not depend on the fact that the field cannot be accessed racily (e.g. it is a + // strict final field), as if the holder object is flattened as a field that is not strict final, + // this property is lost. + // + // A slice of memory is observably immutable if all stores to it must happen before all loads + // from it. A typical example is when the memory is a strict field and its immediate holder is + // not a field inside another object. + // + // For example: + // value class A { + // int x; + // int y; + // } + // value class AHolder { + // A v; + // } + // class AHolderHolder { + // AHolder v; + // } + // The field AHolder.v is flattened in AHolder, but AHolder cannot be flattened in AHolderHolder + // because we cannot access AHolderHolder.v atomically. As a result, we can say that the field is + // non-atomic. In this case, AHolder.v has its layout being NULLABLE_NON_ATOMIC_FLAT, this + // prevents its holder from being flattened in observably mutable memory. + // + // Another example: + // value class B { + // int v; + // } + // looselyconsistent value class BHolder { + // B v; + // byte b; + // } + // class BHolderHolder { + // null-free BHolder v; + // } + // The field BHolder.v is flattened in BHolder, and BHolder can be flattened further in + // BHolderHolder. In this case, while BHolder.v can be accessed in a non-atomic manner if BHolder + // is a standalone object, it must still be accessed atomically when it is a subfield in + // BHolderHolder.v. As a result, the field BHolder.v must still return true for this method, so + // that the compiler knows to access it correctly in all circumstances. Implementation-wise, + // BHolder.v has its layout being NULLABLE_ATOMIC_FLAT, which still allows its holder to be + // flattened in observably mutable memory. + bool is_atomic(); + // The field is modified outside of instance initializer methods // (or class/initializer methods if the field is static). bool has_initialized_final_update() const { return flags().has_initialized_final_update(); } @@ -178,7 +237,7 @@ class ciField : public ArenaObj { bool is_autobox_cache(); // Debugging output - void print(); + void print() const; void print_name_on(outputStream* st); }; diff --git a/src/hotspot/share/ci/ciFlags.cpp b/src/hotspot/share/ci/ciFlags.cpp index 5eade4a12c168..92bb8d0470543 100644 --- a/src/hotspot/share/ci/ciFlags.cpp +++ b/src/hotspot/share/ci/ciFlags.cpp @@ -40,15 +40,18 @@ void ciFlags::print_klass_flags(outputStream* st) { if (is_final()) { st->print(",final"); } - if (is_super()) { - st->print(",super"); - } if (is_interface()) { st->print(",interface"); } if (is_abstract()) { st->print(",abstract"); } + if (is_identity()) { + st->print(",identity"); + } + if (is_strict()) { + st->print(",strict"); + } } // ------------------------------------------------------------------ diff --git a/src/hotspot/share/ci/ciFlags.hpp b/src/hotspot/share/ci/ciFlags.hpp index 426f953611fee..7ea47173177c1 100644 --- a/src/hotspot/share/ci/ciFlags.hpp +++ b/src/hotspot/share/ci/ciFlags.hpp @@ -54,12 +54,15 @@ class ciFlags { bool is_static () const { return _flags.is_static(); } bool is_final () const { return _flags.is_final(); } bool is_synchronized () const { return _flags.is_synchronized(); } - bool is_super () const { return _flags.is_super(); } bool is_volatile () const { return _flags.is_volatile(); } bool is_transient () const { return _flags.is_transient(); } bool is_native () const { return _flags.is_native(); } bool is_interface () const { return _flags.is_interface(); } bool is_abstract () const { return _flags.is_abstract(); } + bool is_varargs () const { return _flags.is_varargs(); } + bool is_identity () const { return _flags.is_identity_class(); } + bool is_strict () const { return _flags.is_strict(); } + bool is_stable () const { return _stable; } // In case the current object represents a field, return true if // the field is modified outside of instance initializer methods diff --git a/src/hotspot/share/ci/ciFlatArray.cpp b/src/hotspot/share/ci/ciFlatArray.cpp new file mode 100644 index 0000000000000..0180fbdb86863 --- /dev/null +++ b/src/hotspot/share/ci/ciFlatArray.cpp @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "ci/ciArray.hpp" +#include "ci/ciConstant.hpp" +#include "ci/ciField.hpp" +#include "ci/ciFlatArray.hpp" +#include "ci/ciInlineKlass.hpp" +#include "ci/ciUtilities.inline.hpp" +#include "oops/oop.inline.hpp" + +// Current value of an element. +// Returns T_ILLEGAL if there is no element at the given index. +ciConstant ciFlatArray::null_marker_of_element_by_index(int index) { + ciConstant nm = field_value(index, nullptr); + postcond(!nm.is_valid() || nm.basic_type() == T_BOOLEAN); + return nm; +} + +ciConstant ciFlatArray::null_marker_of_element_by_offset(intptr_t element_offset) { + FlatArrayKlass* faklass; + GUARDED_VM_ENTRY(faklass = FlatArrayKlass::cast(get_arrayOop()->klass());) + int lh = faklass->layout_helper(); + int shift = Klass::layout_helper_log2_element_size(lh); + intptr_t header = arrayOopDesc::base_offset_in_bytes(T_FLAT_ELEMENT); + intptr_t index = (element_offset - header) >> shift; + intptr_t offset = header + (index << shift); + if (offset != element_offset || index != (jint) index || index < 0 || index >= length()) { + return ciConstant(); + } + return null_marker_of_element_by_index((jint) index); +} + +ciConstant ciFlatArray::element_value_by_offset(intptr_t element_offset) { + FlatArrayKlass* faklass; + GUARDED_VM_ENTRY(faklass = FlatArrayKlass::cast(get_arrayOop()->klass());) + int lh = faklass->layout_helper(); + int shift = Klass::layout_helper_log2_element_size(lh); + intptr_t header = arrayOopDesc::base_offset_in_bytes(T_FLAT_ELEMENT); + intptr_t index = (element_offset - header) >> shift; + intptr_t offset = header + (index << shift); + if (offset != element_offset || index != (jint) index || index < 0 || index >= length()) { + return ciConstant(); + } + return element_value((jint) index); +} + +ciConstant ciFlatArray::field_value_by_offset(intptr_t field_offset) { + ciInlineKlass* elt_type = element_type()->as_inline_klass(); + FlatArrayKlass* faklass; + GUARDED_VM_ENTRY(faklass = FlatArrayKlass::cast(get_arrayOop()->klass());) + int lh = faklass->layout_helper(); + int shift = Klass::layout_helper_log2_element_size(lh); + intptr_t header = arrayOopDesc::base_offset_in_bytes(T_FLAT_ELEMENT); + intptr_t index = (field_offset - header) >> shift; + intptr_t element_offset = header + (index << shift); + int field_offset_in_element = (int)(field_offset - element_offset); + ciField* field = elt_type->get_field_by_offset(elt_type->payload_offset() + field_offset_in_element, false); + if (field == nullptr) { + if (field_offset_in_element != elt_type->null_marker_offset_in_payload()) { + return ciConstant(); + } + } + + if (index != (jint) index || index < 0 || index >= length()) { + return ciConstant(); + } + ciConstant elt = field_value((jint) index, field); + + return elt; +} + +ciConstant ciFlatArray::field_value(int index, ciField* field) { + auto get_field_from_object_constant = [field](const ciConstant& v) -> ciConstant { + ciObject* obj = v.as_object(); + if (obj->is_null_object()) { + if (field == nullptr) { + return ciConstant::make_zero_or_null(T_BOOLEAN); + } + return ciConstant::make_zero_or_null(field->type()->basic_type()); + } + // obj cannot be a ciArray since it is an element of a flat array, so it must be a value class, which arrays are not. + ciInstance* inst = obj->as_instance(); + if (field == nullptr) { + return ciConstant(T_BOOLEAN, 1); + } + return inst->field_value(field); + }; + + BasicType elembt = element_basic_type(); + ciConstant value = check_constant_value_cache(index, elembt); + if (value.is_valid()) { + return get_field_from_object_constant(value); + } + GUARDED_VM_ENTRY( + value = element_value_impl(T_OBJECT, get_arrayOop(), index); + ) + + if (!value.is_valid()) { + return ciConstant(); + } + + add_to_constant_value_cache(index, value); + return get_field_from_object_constant(value); +} + diff --git a/src/hotspot/share/ci/ciFlatArray.hpp b/src/hotspot/share/ci/ciFlatArray.hpp new file mode 100644 index 0000000000000..b1ffdd50c2ce9 --- /dev/null +++ b/src/hotspot/share/ci/ciFlatArray.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2017, 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_CI_CIFLATARRAY_HPP +#define SHARE_VM_CI_CIFLATARRAY_HPP + +#include "ci/ciArray.hpp" +#include "ci/ciClassList.hpp" + +// ciFlatArray +// +// This class represents a flatArrayOop in the HotSpot virtual machine. +class ciFlatArray : public ciArray { + CI_PACKAGE_ACCESS + +protected: + ciFlatArray(flatArrayHandle h_o) : ciArray(h_o) {} + + const char* type_string() override { return "ciFlatArray"; } + +public: + bool is_flat_array() const override { return true; } + bool is_flat() override { return true; } + + // Current value of an element at the specified offset. + // Returns T_ILLEGAL if there is no element at the given offset. + ciConstant element_value_by_offset(intptr_t element_offset) override; + ciConstant field_value_by_offset(intptr_t field_offset); + ciConstant field_value(int index, ciField* field); + ciConstant null_marker_of_element_by_offset(intptr_t element_offset); + ciConstant null_marker_of_element_by_index(int index); +}; + +#endif // SHARE_VM_CI_CIFLATARRAY_HPP diff --git a/src/hotspot/share/ci/ciFlatArrayKlass.hpp b/src/hotspot/share/ci/ciFlatArrayKlass.hpp new file mode 100644 index 0000000000000..197f1fecf5307 --- /dev/null +++ b/src/hotspot/share/ci/ciFlatArrayKlass.hpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_CI_CIFLATARRAYKLASS_HPP +#define SHARE_VM_CI_CIFLATARRAYKLASS_HPP + +#include "ci/ciInlineKlass.hpp" +#include "ci/ciObjArrayKlass.hpp" +#include "oops/flatArrayKlass.hpp" + +// ciFlatArrayKlass +// +// This class represents a Klass* in the HotSpot virtual machine +// whose Klass part is a FlatArrayKlass. +class ciFlatArrayKlass : public ciObjArrayKlass { + CI_PACKAGE_ACCESS + friend class ciEnv; + +protected: + ciFlatArrayKlass(Klass* k) : ciObjArrayKlass(k) { + assert(k->is_flatArray_klass(), "wrong type"); + } + + const FlatArrayKlass* get_FlatArrayKlass() const { + return FlatArrayKlass::cast(get_Klass()); + } + + virtual const char* type_string() override { return "ciFlatArrayKlass"; } + +public: + LayoutKind layout_kind() const { return get_FlatArrayKlass()->layout_kind(); } + + jint max_elements() const { + return get_FlatArrayKlass()->max_elements(); + } + + int log2_element_size() { + return Klass::layout_helper_log2_element_size(layout_helper()); + } + + int element_byte_size() { return 1 << log2_element_size(); } + + // What kind of ciObject is this? + virtual bool is_flat_array_klass() const override { return true; } + + virtual ciKlass* exact_klass() override { + assert(element_klass()->as_inline_klass()->exact_klass() != nullptr, "must have exact klass"); + return this; + } +}; + +#endif // SHARE_VM_CI_CIFLATARRAYKLASS_HPP diff --git a/src/hotspot/share/ci/ciInlineKlass.cpp b/src/hotspot/share/ci/ciInlineKlass.cpp new file mode 100644 index 0000000000000..12944ff035f11 --- /dev/null +++ b/src/hotspot/share/ci/ciInlineKlass.cpp @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2017, 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "ci/ciConstant.hpp" +#include "ci/ciField.hpp" +#include "ci/ciInlineKlass.hpp" +#include "ci/ciUtilities.inline.hpp" +#include "oops/array.hpp" +#include "runtime/signature.hpp" +#include "utilities/globalDefinitions.hpp" + +// Offset of the first field in the inline type +int ciInlineKlass::payload_offset() const { + GUARDED_VM_ENTRY(return to_InlineKlass()->payload_offset();) +} + +// Could any array containing an instance of this value class ever be flat? +bool ciInlineKlass::maybe_flat_in_array() const { + GUARDED_VM_ENTRY(return to_InlineKlass()->maybe_flat_in_array();) +} + +// Are arrays containing an instance of this value class always flat? +bool ciInlineKlass::is_always_flat_in_array() const { + GUARDED_VM_ENTRY(return to_InlineKlass()->is_always_flat_in_array();) +} + +// Can this inline type be passed as multiple values? +bool ciInlineKlass::can_be_passed_as_fields() const { + GUARDED_VM_ENTRY(return to_InlineKlass()->can_be_passed_as_fields();) +} + +// Can this inline type be returned as multiple values? +bool ciInlineKlass::can_be_returned_as_fields() const { + GUARDED_VM_ENTRY(return to_InlineKlass()->can_be_returned_as_fields();) +} + +bool ciInlineKlass::is_empty() { + // Do not use InlineKlass::is_empty_inline_type here because it does + // consider the container empty even if fields of empty inline types + // are not flat + return nof_declared_nonstatic_fields() == 0; +} + +int ciInlineKlass::inline_arg_length() const { + VM_ENTRY_MARK; + return get_InlineKlass()->extended_sig()->length(); +} + +// When passing an inline type's fields as arguments, count the number +// of argument slots that are needed +int ciInlineKlass::inline_arg_slots() const { + VM_ENTRY_MARK; + const Array* sig_vk = get_InlineKlass()->extended_sig(); + int slots = 0; + for (int i = 0; i < sig_vk->length(); i++) { + BasicType bt = sig_vk->at(i)._bt; + if (bt == T_METADATA || bt == T_VOID) { + continue; + } + slots += type2size[bt]; + } + return slots; +} + +bool ciInlineKlass::contains_oops() const { + GUARDED_VM_ENTRY(return get_InlineKlass()->contains_oops();) +} + +int ciInlineKlass::oop_count() const { + GUARDED_VM_ENTRY(return get_InlineKlass()->nonstatic_oop_count();) +} + +address ciInlineKlass::pack_handler() const { + GUARDED_VM_ENTRY(return get_InlineKlass()->pack_handler();) +} + +address ciInlineKlass::unpack_handler() const { + GUARDED_VM_ENTRY(return get_InlineKlass()->unpack_handler();) +} + +InlineKlass* ciInlineKlass::get_InlineKlass() const { + GUARDED_VM_ENTRY(return to_InlineKlass();) +} + +bool ciInlineKlass::has_null_free_non_atomic_layout() const { + GUARDED_VM_ENTRY(return get_InlineKlass()->has_null_free_non_atomic_layout();) +} + +bool ciInlineKlass::has_null_free_atomic_layout() const { + GUARDED_VM_ENTRY(return get_InlineKlass()->has_null_free_atomic_layout();) +} + +bool ciInlineKlass::has_nullable_atomic_layout() const { + GUARDED_VM_ENTRY(return get_InlineKlass()->has_nullable_atomic_layout();) +} + +int ciInlineKlass::null_marker_offset_in_payload() const { + GUARDED_VM_ENTRY(return get_InlineKlass()->null_marker_offset_in_payload();) +} + +// Convert size of atomic layout in bytes to corresponding BasicType +BasicType ciInlineKlass::atomic_size_to_basic_type(bool null_free) const { + VM_ENTRY_MARK + InlineKlass* vk = get_InlineKlass(); + assert(!null_free || vk->has_null_free_atomic_layout(), "No null-free atomic layout available"); + assert( null_free || vk->has_nullable_atomic_layout(), "No nullable atomic layout available"); + int size = null_free ? vk->null_free_atomic_size_in_bytes() : vk->nullable_atomic_size_in_bytes(); + BasicType bt = T_ILLEGAL; + if (size == sizeof(jlong)) { + bt = T_LONG; + } else if (size == sizeof(jint)) { + bt = T_INT; + } else if (size == sizeof(jshort)) { + bt = T_SHORT; + } else if (size == sizeof(jbyte)) { + bt = T_BYTE; + } else { + assert(false, "Unsupported size: %d", size); + } + return bt; +} + +bool ciInlineKlass::is_naturally_atomic(bool null_free) { + return null_free ? (nof_nonstatic_fields() <= 1) : (nof_nonstatic_fields() == 0); +} + +int ciInlineKlass::field_map_offset() const { + GUARDED_VM_ENTRY(return get_InlineKlass()->acmp_maps_offset();) +} + +ciConstant ciInlineKlass::get_field_map() const { + VM_ENTRY_MARK + InlineKlass* vk = get_InlineKlass(); + oop array = vk->java_mirror()->obj_field(vk->acmp_maps_offset()); + return ciConstant(T_ARRAY, CURRENT_ENV->get_object(array)); +} + +// All fields of this object are zero even if they are null-free. As a result, this object should +// only be used to reset the payload of fields or array elements and should not be leaked +// elsewhere. +ciConstant ciInlineKlass::get_null_reset_value() { + assert(is_initialized(), "null_reset_value is only allocated during initialization of %s", name()->as_utf8()); + VM_ENTRY_MARK + InlineKlass* vk = get_InlineKlass(); + oop null_reset_value = vk->null_reset_value(); + return ciConstant(T_OBJECT, CURRENT_ENV->get_object(null_reset_value)); +} + +ArrayDescription ciInlineKlass::array_description_of_array_properties(const ArrayProperties& requested_properties) { + GUARDED_VM_ENTRY(return ObjArrayKlass::array_layout_selection(get_InlineKlass(), requested_properties);) +} diff --git a/src/hotspot/share/ci/ciInlineKlass.hpp b/src/hotspot/share/ci/ciInlineKlass.hpp new file mode 100644 index 0000000000000..fbe7eea91ca64 --- /dev/null +++ b/src/hotspot/share/ci/ciInlineKlass.hpp @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2017, 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_CI_CIINLINEKLASS_HPP +#define SHARE_VM_CI_CIINLINEKLASS_HPP + +#include "ci/ciInstanceKlass.hpp" +#include "oops/inlineKlass.hpp" + +// ciInlineKlass +// +// Specialized ciInstanceKlass for inline types. +class ciInlineKlass : public ciInstanceKlass { + CI_PACKAGE_ACCESS + +private: + + InlineKlass* to_InlineKlass() const { + return InlineKlass::cast(get_Klass()); + } + +protected: + ciInlineKlass(Klass* h_k) : ciInstanceKlass(h_k) { + assert(is_final(), "InlineKlass must be final"); + }; + + ciInlineKlass(ciSymbol* name, jobject loader) : + ciInstanceKlass(name, loader, T_OBJECT) {} + + const char* type_string() override { return "ciInlineKlass"; } + +public: + bool is_inlinetype() const override { return true; } + + // Inline type fields + int payload_offset() const; + + bool maybe_flat_in_array() const override; + bool is_always_flat_in_array() const; + + // Scalarized calling convention support: pass/return this inline type as its + // field components in the calling convention (registers/stack), not as a single oop. + // See InlineKlass::initialize_calling_convention for details. + bool can_be_passed_as_fields() const; + bool can_be_returned_as_fields() const; + + bool is_empty(); + int inline_arg_length() const; + int inline_arg_slots() const; + bool contains_oops() const; + int oop_count() const; + address pack_handler() const; + address unpack_handler() const; + InlineKlass* get_InlineKlass() const; + bool has_null_free_non_atomic_layout() const; + bool has_null_free_atomic_layout() const; + bool has_nullable_atomic_layout() const; + int null_marker_offset_in_payload() const; + BasicType atomic_size_to_basic_type(bool null_free) const; + + bool is_naturally_atomic(bool null_free); + int field_map_offset() const; + ciConstant get_field_map() const; + ciConstant get_null_reset_value(); + ArrayDescription array_description_of_array_properties(const ArrayProperties&); +}; + +#endif // SHARE_VM_CI_CIINLINEKLASS_HPP diff --git a/src/hotspot/share/ci/ciInstance.cpp b/src/hotspot/share/ci/ciInstance.cpp index 9591298e3ab04..dd6976caa25b2 100644 --- a/src/hotspot/share/ci/ciInstance.cpp +++ b/src/hotspot/share/ci/ciInstance.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,13 +24,17 @@ #include "ci/ciConstant.hpp" #include "ci/ciField.hpp" +#include "ci/ciInlineKlass.hpp" #include "ci/ciInstance.hpp" #include "ci/ciInstanceKlass.hpp" #include "ci/ciNullObject.hpp" #include "ci/ciUtilities.inline.hpp" #include "classfile/javaClasses.inline.hpp" #include "classfile/vmClasses.hpp" +#include "oops/fieldStreams.hpp" +#include "oops/fieldStreams.inline.hpp" #include "oops/oop.inline.hpp" +#include "oops/valuePayload.inline.hpp" // ciInstance // @@ -58,43 +62,57 @@ ciType* ciInstance::java_mirror_type() { // ------------------------------------------------------------------ // ciInstance::field_value_impl -ciConstant ciInstance::field_value_impl(BasicType field_btype, int offset) { - ciConstant value = check_constant_value_cache(offset, field_btype); +ciConstant ciInstance::field_value_impl(ciField* field) { + BasicType field_bt = field->type()->basic_type(); + int offset = field->offset_in_bytes(); + ciConstant value = check_constant_value_cache(offset, field_bt); if (value.is_valid()) { return value; } VM_ENTRY_MARK; oop obj = get_oop(); assert(obj != nullptr, "bad oop"); - switch(field_btype) { - case T_BYTE: value = ciConstant(field_btype, obj->byte_field(offset)); break; - case T_CHAR: value = ciConstant(field_btype, obj->char_field(offset)); break; - case T_SHORT: value = ciConstant(field_btype, obj->short_field(offset)); break; - case T_BOOLEAN: value = ciConstant(field_btype, obj->bool_field(offset)); break; - case T_INT: value = ciConstant(field_btype, obj->int_field(offset)); break; + switch(field_bt) { + case T_BYTE: value = ciConstant(field_bt, obj->byte_field(offset)); break; + case T_CHAR: value = ciConstant(field_bt, obj->char_field(offset)); break; + case T_SHORT: value = ciConstant(field_bt, obj->short_field(offset)); break; + case T_BOOLEAN: value = ciConstant(field_bt, obj->bool_field(offset)); break; + case T_INT: value = ciConstant(field_bt, obj->int_field(offset)); break; case T_FLOAT: value = ciConstant(obj->float_field(offset)); break; case T_DOUBLE: value = ciConstant(obj->double_field(offset)); break; case T_LONG: value = ciConstant(obj->long_field(offset)); break; case T_OBJECT: // fall through case T_ARRAY: { - oop o = obj->obj_field(offset); + if (field->is_flat()) { + assert(field->is_atomic(), "do not query atomically a non-atomic flat field"); + InlineKlass* vk = field->type()->as_inline_klass()->get_InlineKlass(); + FlatValuePayload payload = FlatValuePayload::construct_from_parts(obj, offset, vk, field->layout_kind()); + oop res = payload.read(THREAD); + if (HAS_PENDING_EXCEPTION) { + CLEAR_PENDING_EXCEPTION; + return ciConstant(); + } + value = ciConstant(field_bt, CURRENT_ENV->get_object(res)); + } else { + oop o = obj->obj_field(offset); - // A field will be "constant" if it is known always to be - // a non-null reference to an instance of a particular class, - // or to a particular array. This can happen even if the instance - // or array is not perm. In such a case, an "unloaded" ciArray - // or ciInstance is created. The compiler may be able to use - // information about the object's class (which is exact) or length. + // A field will be "constant" if it is known always to be + // a non-null reference to an instance of a particular class, + // or to a particular array. This can happen even if the instance + // or array is not perm. In such a case, an "unloaded" ciArray + // or ciInstance is created. The compiler may be able to use + // information about the object's class (which is exact) or length. - if (o == nullptr) { - value = ciConstant(field_btype, ciNullObject::make()); - } else { - value = ciConstant(field_btype, CURRENT_ENV->get_object(o)); + if (o == nullptr) { + value = ciConstant(field_bt, ciNullObject::make()); + } else { + value = ciConstant(field_bt, CURRENT_ENV->get_object(o)); + } } break; } default: - fatal("no field value: %s", type2name(field_btype)); + fatal("no field value: %s", type2name(field_bt)); } add_to_constant_value_cache(offset, value); return value; @@ -103,12 +121,106 @@ ciConstant ciInstance::field_value_impl(BasicType field_btype, int offset) { // ------------------------------------------------------------------ // ciInstance::field_value // -// Constant value of a field. +// Constant value of a field of any kind: a declared field, or a leaf field. +// For a flat declared field, a cached copy of the value object is returned. +// +// Since stable fields can be treated as "constant" but are not really, we need +// to cache the value of fields so that the compiler will observe only one value +// per field. We also need to ensure that leaf fields from a single stable +// flat declared field will be observed to be consistent with each other. +// +// To do so, we need to always fetch the whole declared field containing the +// desired field. If we want a sub-field of a flat field, we then extract the field +// out of the cached copy, using sub_field_value. +// +// In the case we request a non-flat field, or a declared field (possibly flat), there +// is no sub-field to extract and sub_field_value will not be called. ciConstant ciInstance::field_value(ciField* field) { assert(is_loaded(), "invalid access - must be loaded"); assert(field->holder()->is_loaded(), "invalid access - holder must be loaded"); - assert(field->is_static() || klass()->is_subclass_of(field->holder()), "invalid access - must be subclass"); - return field_value_impl(field->type()->basic_type(), field->offset_in_bytes()); + assert(field->is_static() || field->holder()->is_inlinetype() || klass()->is_subclass_of(field->holder()), + "invalid access - must be subclass"); + ciInstanceKlass* klass = this->klass()->as_instance_klass(); + int containing_field_idx = klass->field_index_by_offset(field->offset_in_bytes()); + ciField* containing_field = klass->declared_nonstatic_field_at(containing_field_idx); + if (containing_field->is_flat() && !containing_field->is_atomic()) { + assert(field != containing_field, "do not ask for a non atomic declared field"); + return field_value_impl(field); + } + ciConstant containing_field_value = field_value_impl(containing_field); + if (!containing_field_value.is_valid()) { + return ciConstant(); + } + if (field->original_holder() == nullptr) { + return containing_field_value; + } + ciObject* obj = containing_field_value.as_object(); + if (obj->is_instance()) { + ciInstance* inst = obj->as_instance(); + // inst->klass() must be an inline klass since it is the value of a flat field. + ciInlineKlass* inst_klass = inst->klass()->as_inline_klass(); + ciField* field_in_value_klass = inst_klass->get_field_by_offset(inst_klass->payload_offset() + field->offset_in_bytes() - containing_field->offset_in_bytes(), false); + return inst->sub_field_value(field_in_value_klass); + } else if (obj->is_null_object()) { + return ciConstant::make_zero_or_null(field->type()->basic_type()); + } + // obj should not be an array since we are trying to get a field inside it + ShouldNotReachHere(); + return ciConstant(); +} + +// Extract a leaf field from a value object. +// +// This is used by field_value when getting the value of a sub-field. field_value +// will take care of getting the value of the declared field containing the requested +// field, and of caching (see the comment on field_value for why). But if we want the +// value of a sub-field, we need to extract it from the value of the declared field +// containing the said sub-field. This is what this function does. +// +// This is meant for internal used only. In particular, this function does not cache +// the result and must be called only on already cached values (to ensure consistency). +// field_value takes care of that. +ciConstant ciInstance::sub_field_value(ciField* field) { + precond(klass()->is_inlinetype()); + precond(!field->is_flat()); + int offset = field->offset_in_bytes(); + BasicType field_btype = field->type()->basic_type(); + + ciConstant value; + VM_ENTRY_MARK; + oop obj = get_oop(); + assert(obj != nullptr, "bad oop"); + switch(field_btype) { + case T_BYTE: value = ciConstant(field_btype, obj->byte_field(offset)); break; + case T_CHAR: value = ciConstant(field_btype, obj->char_field(offset)); break; + case T_SHORT: value = ciConstant(field_btype, obj->short_field(offset)); break; + case T_BOOLEAN: value = ciConstant(field_btype, obj->bool_field(offset)); break; + case T_INT: value = ciConstant(field_btype, obj->int_field(offset)); break; + case T_FLOAT: value = ciConstant(obj->float_field(offset)); break; + case T_DOUBLE: value = ciConstant(obj->double_field(offset)); break; + case T_LONG: value = ciConstant(obj->long_field(offset)); break; + case T_OBJECT: // fall through + case T_ARRAY: { + oop o = obj->obj_field(offset); + + // A field will be "constant" if it is known always to be + // a non-null reference to an instance of a particular class, + // or to a particular array. This can happen even if the instance + // or array is not perm. In such a case, an "unloaded" ciArray + // or ciInstance is created. The compiler may be able to use + // information about the object's class (which is exact) or length. + + if (o == nullptr) { + value = ciConstant(field_btype, ciNullObject::make()); + } else { + value = ciConstant(field_btype, CURRENT_ENV->get_object(o)); + } + break; + } + default: + fatal("no field value: %s", type2name(field_btype)); + } + return value; } // ------------------------------------------------------------------ diff --git a/src/hotspot/share/ci/ciInstance.hpp b/src/hotspot/share/ci/ciInstance.hpp index 5367d397c9723..706349707a2e6 100644 --- a/src/hotspot/share/ci/ciInstance.hpp +++ b/src/hotspot/share/ci/ciInstance.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -49,7 +49,8 @@ class ciInstance : public ciObject { void print_impl(outputStream* st); - ciConstant field_value_impl(BasicType field_btype, int offset); + ciConstant field_value_impl(ciField* field); + ciConstant sub_field_value(ciField* field); public: // If this object is a java mirror, return the corresponding type. diff --git a/src/hotspot/share/ci/ciInstanceKlass.cpp b/src/hotspot/share/ci/ciInstanceKlass.cpp index 9a2a6dcd8f2da..41adc7de96b6d 100644 --- a/src/hotspot/share/ci/ciInstanceKlass.cpp +++ b/src/hotspot/share/ci/ciInstanceKlass.cpp @@ -23,10 +23,12 @@ */ #include "ci/ciField.hpp" +#include "ci/ciInlineKlass.hpp" #include "ci/ciInstance.hpp" #include "ci/ciInstanceKlass.hpp" #include "ci/ciUtilities.inline.hpp" #include "classfile/javaClasses.hpp" +#include "classfile/systemDictionary.hpp" #include "classfile/vmClasses.hpp" #include "memory/allocation.hpp" #include "memory/allocation.inline.hpp" @@ -35,6 +37,7 @@ #include "oops/instanceKlass.inline.hpp" #include "oops/klass.inline.hpp" #include "oops/oop.inline.hpp" +#include "runtime/arguments.hpp" #include "runtime/fieldDescriptor.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/jniHandles.inline.hpp" @@ -65,10 +68,11 @@ ciInstanceKlass::ciInstanceKlass(Klass* k) : _has_nonstatic_concrete_methods = ik->has_nonstatic_concrete_methods(); _is_hidden = ik->is_hidden(); _is_record = ik->is_record(); + _declared_nonstatic_fields = nullptr; // initialized lazily by compute_nonstatic_fields + _nonstatic_fields = nullptr; // initialized lazily by compute_nonstatic_fields _trust_final_fields = ik->trust_final_fields(); - _nonstatic_fields = nullptr; // initialized lazily by compute_nonstatic_fields: _has_injected_fields = -1; - _implementor = nullptr; // we will fill these lazily + _implementor = nullptr; // we will fill these lazily _transitive_interfaces = nullptr; // Ensure that the metadata wrapped by the ciMetadata is kept alive by GC. @@ -115,13 +119,15 @@ ciInstanceKlass::ciInstanceKlass(Klass* k) : // Version for unloaded classes: ciInstanceKlass::ciInstanceKlass(ciSymbol* name, - jobject loader) - : ciKlass(name, T_OBJECT) + jobject loader, + BasicType bt) + : ciKlass(name, bt) { assert(name->char_at(0) != JVM_SIGNATURE_ARRAY, "not an instance klass"); _init_state = (InstanceKlass::ClassState)0; _has_nonstatic_fields = false; - _nonstatic_fields = nullptr; + _declared_nonstatic_fields = nullptr; // initialized lazily by compute_nonstatic_fields + _nonstatic_fields = nullptr; // initialized lazily by compute_nonstatic_fields _has_injected_fields = -1; _is_hidden = false; _is_record = false; @@ -387,19 +393,18 @@ bool ciInstanceKlass::has_finalizable_subclass() { return Dependencies::find_finalizable_subclass(get_instanceKlass()) != nullptr; } -// ------------------------------------------------------------------ -// ciInstanceKlass::contains_field_offset -bool ciInstanceKlass::contains_field_offset(int offset) { +bool ciInstanceKlass::contains_field_offset(int offset) const { VM_ENTRY_MARK; return get_instanceKlass()->contains_field_offset(offset); } ciField* ciInstanceKlass::get_nonstatic_field_by_offset(const int field_offset) { for (int i = 0, len = nof_nonstatic_fields(); i < len; i++) { - ciField* field = _nonstatic_fields->at(i); + ciField* field = nonstatic_field_at(i); int field_off = field->offset_in_bytes(); - if (field_off == field_offset) + if (field_off == field_offset) { return field; + } } return nullptr; } @@ -410,6 +415,7 @@ ciField* ciInstanceKlass::get_field_by_offset(int field_offset, bool is_static) if (!is_static) { return get_nonstatic_field_by_offset(field_offset); } + VM_ENTRY_MARK; InstanceKlass* k = get_instanceKlass(); fieldDescriptor fd; @@ -420,6 +426,39 @@ ciField* ciInstanceKlass::get_field_by_offset(int field_offset, bool is_static) return field; } +ciField* ciInstanceKlass::get_non_flat_field_by_offset(int field_offset) { + for (int i = 0, len = nof_declared_nonstatic_fields(); i < len; i++) { + ciField* field = declared_nonstatic_field_at(i); + int field_off = field->offset_in_bytes(); + if (field_off == field_offset) { + return field; + } + } + return nullptr; +} + +int ciInstanceKlass::field_index_by_offset(int offset) { + int best_offset = 0; + int best_index = -1; + // Search the field with the given offset + for (int i = 0; i < nof_declared_nonstatic_fields(); ++i) { + int field_offset = declared_nonstatic_field_at(i)->offset_in_bytes(); + if (field_offset == offset) { + // Exact match + return i; + } else if (field_offset < offset && field_offset > best_offset) { + // No exact match. Save the index of the field with the closest offset that + // is smaller than the given field offset. This index corresponds to the + // flat field that holds the field we are looking for. + best_offset = field_offset; + best_index = i; + } + } + assert(best_index >= 0, "field not found"); + assert(best_offset == offset || declared_nonstatic_field_at(best_index)->type()->is_inlinetype(), "offset should match for non-inline types"); + return best_index; +} + // ------------------------------------------------------------------ // ciInstanceKlass::get_field_by_name ciField* ciInstanceKlass::get_field_by_name(ciSymbol* name, ciSymbol* signature, bool is_static) { @@ -434,6 +473,8 @@ ciField* ciInstanceKlass::get_field_by_name(ciSymbol* name, ciSymbol* signature, return field; } +const GrowableArray empty_field_array(0, MemTag::mtCompiler); + #ifdef ASSERT static void assert_injected_field(InternalFieldStream& fs) { assert(!fs.done(), "invarinat"); @@ -508,83 +549,120 @@ BasicType ciInstanceKlass::get_field_type_by_offset(const int field_offset, cons return type2field[make(field_type)->basic_type()]; } -// ------------------------------------------------------------------ -// ciInstanceKlass::compute_nonstatic_fields -int ciInstanceKlass::compute_nonstatic_fields() { +void ciInstanceKlass::compute_nonstatic_fields() { assert(is_loaded(), "must be loaded"); - if (_nonstatic_fields != nullptr) - return _nonstatic_fields->length(); + if (_nonstatic_fields != nullptr) { + assert(_declared_nonstatic_fields != nullptr, "must be initialized at the same time, class %s", name()->as_utf8()); + return; + } if (!has_nonstatic_fields()) { - Arena* arena = CURRENT_ENV->arena(); - _nonstatic_fields = new (arena) GrowableArray(arena, 0, 0, nullptr); - return 0; + _declared_nonstatic_fields = &empty_field_array; + _nonstatic_fields = &empty_field_array; + return; } assert(!is_java_lang_Object(), "bootstrap OK"); ciInstanceKlass* super = this->super(); - GrowableArray* super_fields = nullptr; - if (super != nullptr && super->has_nonstatic_fields()) { - int super_flen = super->nof_nonstatic_fields(); - super_fields = super->_nonstatic_fields; - assert(super_flen == 0 || super_fields != nullptr, "first get nof_fields"); - } + assert(super != nullptr, "must have a super class, current class: %s", name()->as_utf8()); + super->compute_nonstatic_fields(); + const GrowableArray* super_declared_fields = super->_declared_nonstatic_fields; + const GrowableArray* super_fields = super->_nonstatic_fields; + assert(super_declared_fields != nullptr && super_fields != nullptr, "must have been initialized, current class: %s, super class: %s", name()->as_utf8(), super->name()->as_utf8()); - GrowableArray* fields = nullptr; GUARDED_VM_ENTRY({ - fields = compute_nonstatic_fields_impl(super_fields); - }); - - if (fields == nullptr) { - // This can happen if this class (java.lang.Class) has invisible fields. - if (super_fields != nullptr) { - _nonstatic_fields = super_fields; - return super_fields->length(); - } else { - return 0; - } - } - - int flen = fields->length(); - - _nonstatic_fields = fields; - return flen; + compute_nonstatic_fields_impl(super_declared_fields, super_fields); + }); } -GrowableArray* -ciInstanceKlass::compute_nonstatic_fields_impl(GrowableArray* - super_fields) { +void ciInstanceKlass::compute_nonstatic_fields_impl(const GrowableArray* super_declared_fields, const GrowableArray* super_fields) { + assert(_declared_nonstatic_fields == nullptr && _nonstatic_fields == nullptr, "initialized already"); ASSERT_IN_VM; Arena* arena = CURRENT_ENV->arena(); - int flen = 0; - GrowableArray* fields = nullptr; - InstanceKlass* k = get_instanceKlass(); - for (JavaFieldStream fs(k); !fs.done(); fs.next()) { - if (fs.access_flags().is_static()) continue; - flen += 1; - } - // allocate the array: - if (flen == 0) { - return nullptr; // return nothing if none are locally declared + InstanceKlass* this_klass = get_instanceKlass(); + int declared_field_num = 0; + int field_num = 0; + for (JavaFieldStream fs(this_klass); !fs.done(); fs.next()) { + if (fs.access_flags().is_static()) { + continue; + } + + declared_field_num++; + + fieldDescriptor& fd = fs.field_descriptor(); + if (fd.is_flat()) { + InlineKlass* k = this_klass->get_inline_type_field_klass(fd.index()); + ciInlineKlass* vk = CURRENT_ENV->get_klass(k)->as_inline_klass(); + field_num += vk->nof_nonstatic_fields(); + field_num += fd.has_null_marker() ? 1 : 0; + } else { + field_num++; + } } - if (super_fields != nullptr) { - flen += super_fields->length(); + + GrowableArray* tmp_declared_fields = nullptr; + if (declared_field_num != 0) { + tmp_declared_fields = new (arena) GrowableArray(arena, declared_field_num + super_declared_fields->length(), 0, nullptr); + tmp_declared_fields->appendAll(super_declared_fields); } - fields = new (arena) GrowableArray(arena, flen, 0, nullptr); - if (super_fields != nullptr) { - fields->appendAll(super_fields); + + GrowableArray* tmp_fields = nullptr; + if (field_num != 0) { + tmp_fields = new (arena) GrowableArray(arena, field_num + super_fields->length(), 0, nullptr); + tmp_fields->appendAll(super_fields); } - for (JavaFieldStream fs(k); !fs.done(); fs.next()) { - if (fs.access_flags().is_static()) continue; + // For later assertion + declared_field_num += super_declared_fields->length(); + field_num += super_fields->length(); + + for (JavaFieldStream fs(this_klass); !fs.done(); fs.next()) { + if (fs.access_flags().is_static()) { + continue; + } + fieldDescriptor& fd = fs.field_descriptor(); - ciField* field = new (arena) ciField(&fd); - fields->append(field); + ciField* declared_field = new (arena) ciField(&fd); + assert(tmp_declared_fields != nullptr, "should be initialized"); + tmp_declared_fields->append(declared_field); + + if (fd.is_flat()) { + // Flat fields are embedded + Klass* k = get_instanceKlass()->get_inline_type_field_klass(fd.index()); + ciInlineKlass* vk = CURRENT_ENV->get_klass(k)->as_inline_klass(); + // Iterate over fields of the flat inline type and copy them to 'this' + for (int i = 0; i < vk->nof_nonstatic_fields(); ++i) { + assert(tmp_fields != nullptr, "should be initialized"); + tmp_fields->append(new (arena) ciField(declared_field, vk->nonstatic_field_at(i))); + } + if (fd.has_null_marker()) { + assert(tmp_fields != nullptr, "should be initialized"); + tmp_fields->append(new (arena) ciField(declared_field)); + } + } else { + assert(tmp_fields != nullptr, "should be initialized"); + tmp_fields->append(declared_field); + } + } + + // Now sort them by offset, ascending. In principle, they could mix with superclass fields. + if (tmp_declared_fields != nullptr) { + assert(tmp_declared_fields->length() == declared_field_num, "sanity check failed for class: %s, number of declared fields: %d, expected: %d", + name()->as_utf8(), tmp_declared_fields->length(), declared_field_num); + _declared_nonstatic_fields = tmp_declared_fields; + } else { + _declared_nonstatic_fields = super_declared_fields; + } + + if (tmp_fields != nullptr) { + assert(tmp_fields->length() == field_num, "sanity check failed for class: %s, number of fields: %d, expected: %d", + name()->as_utf8(), tmp_fields->length(), field_num); + _nonstatic_fields = tmp_fields; + } else { + _nonstatic_fields = super_fields; } - assert(fields->length() == flen, "sanity"); - return fields; } bool ciInstanceKlass::compute_injected_fields_helper() { @@ -696,6 +774,23 @@ ciInstanceKlass* ciInstanceKlass::implementor() { return impl; } +bool ciInstanceKlass::can_be_inline_klass(bool is_exact) { + if (!Arguments::is_valhalla_enabled()) { + return false; + } + if (!is_loaded() || is_inlinetype()) { + // Not loaded or known to be an inline klass + return true; + } + if (!is_exact) { + // Not exact, check if this is a valid super for an inline klass + GUARDED_VM_ENTRY( + return !get_instanceKlass()->access_flags().is_identity_class() || is_java_lang_Object(); + ) + } + return false; +} + // Utility class for printing of the contents of the static fields for // use by compilation replay. It only prints out the information that // could be consumed by the compiler, so for primitive types it prints @@ -704,74 +799,127 @@ ciInstanceKlass* ciInstanceKlass::implementor() { // only value which statically unchangeable. For all other reference // types it simply prints out the dynamic type. -class StaticFinalFieldPrinter : public FieldClosure { +class StaticFieldPrinter : public FieldClosure { +protected: outputStream* _out; +public: + StaticFieldPrinter(outputStream* out) : + _out(out) { + } + void do_field_helper(fieldDescriptor* fd, oop obj, bool is_flat); +}; + +class StaticFinalFieldPrinter : public StaticFieldPrinter { const char* _holder; public: StaticFinalFieldPrinter(outputStream* out, const char* holder) : - _out(out), - _holder(holder) { + StaticFieldPrinter(out), _holder(holder) { } void do_field(fieldDescriptor* fd) { if (fd->is_final() && !fd->has_initial_value()) { ResourceMark rm; - oop mirror = fd->field_holder()->java_mirror(); - _out->print("staticfield %s %s %s ", _holder, fd->name()->as_quoted_ascii(), fd->signature()->as_quoted_ascii()); - BasicType field_type = fd->field_type(); - switch (field_type) { - case T_BYTE: _out->print_cr("%d", mirror->byte_field(fd->offset())); break; - case T_BOOLEAN: _out->print_cr("%d", mirror->bool_field(fd->offset())); break; - case T_SHORT: _out->print_cr("%d", mirror->short_field(fd->offset())); break; - case T_CHAR: _out->print_cr("%d", mirror->char_field(fd->offset())); break; - case T_INT: _out->print_cr("%d", mirror->int_field(fd->offset())); break; - case T_LONG: _out->print_cr(INT64_FORMAT, (int64_t)(mirror->long_field(fd->offset()))); break; - case T_FLOAT: { - float f = mirror->float_field(fd->offset()); - _out->print_cr("%d", *(int*)&f); - break; - } - case T_DOUBLE: { - double d = mirror->double_field(fd->offset()); - _out->print_cr(INT64_FORMAT, *(int64_t*)&d); - break; - } - case T_ARRAY: // fall-through - case T_OBJECT: { - oop value = mirror->obj_field_acquire(fd->offset()); - if (value == nullptr) { - if (field_type == T_ARRAY) { - _out->print("%d", -1); - } - _out->cr(); - } else if (value->is_instance()) { - assert(field_type == T_OBJECT, ""); - if (value->is_a(vmClasses::String_klass())) { - const char* ascii_value = java_lang_String::as_quoted_ascii(value); - _out->print_cr("\"%s\"", (ascii_value != nullptr) ? ascii_value : ""); - } else { - const char* klass_name = value->klass()->name()->as_quoted_ascii(); - _out->print_cr("%s", klass_name); - } - } else if (value->is_array()) { - typeArrayOop ta = (typeArrayOop)value; - _out->print("%d", ta->length()); - if (value->is_objArray()) { - objArrayOop oa = (objArrayOop)value; - const char* klass_name = value->klass()->name()->as_quoted_ascii(); - _out->print(" %s", klass_name); - } - _out->cr(); + InstanceKlass* holder = fd->field_holder(); + oop mirror = holder->java_mirror(); + _out->print("staticfield %s %s ", _holder, fd->name()->as_quoted_ascii()); + BasicType bt = fd->field_type(); + if (bt != T_OBJECT && bt != T_ARRAY) { + _out->print("%s ", fd->signature()->as_quoted_ascii()); + } + do_field_helper(fd, mirror, false); + _out->cr(); + } + } +}; + +class InlineTypeFieldPrinter : public StaticFieldPrinter { + oop _obj; +public: + InlineTypeFieldPrinter(outputStream* out, oop obj) : + StaticFieldPrinter(out), _obj(obj) { + } + void do_field(fieldDescriptor* fd) { + do_field_helper(fd, _obj, true); + _out->print(" "); + } +}; + +void StaticFieldPrinter::do_field_helper(fieldDescriptor* fd, oop mirror, bool is_flat) { + BasicType field_type = fd->field_type(); + switch (field_type) { + case T_BYTE: _out->print("%d", mirror->byte_field(fd->offset())); break; + case T_BOOLEAN: _out->print("%d", mirror->bool_field(fd->offset())); break; + case T_SHORT: _out->print("%d", mirror->short_field(fd->offset())); break; + case T_CHAR: _out->print("%d", mirror->char_field(fd->offset())); break; + case T_INT: _out->print("%d", mirror->int_field(fd->offset())); break; + case T_LONG: _out->print(INT64_FORMAT, (int64_t)(mirror->long_field(fd->offset()))); break; + case T_FLOAT: { + float f = mirror->float_field(fd->offset()); + _out->print("%d", *(int*)&f); + break; + } + case T_DOUBLE: { + double d = mirror->double_field(fd->offset()); + _out->print(INT64_FORMAT, *(int64_t*)&d); + break; + } + case T_ARRAY: // fall-through + case T_OBJECT: + if (!fd->is_null_free_inline_type()) { + _out->print("%s ", fd->signature()->as_quoted_ascii()); + oop value = mirror->obj_field_acquire(fd->offset()); + if (value == nullptr) { + if (field_type == T_ARRAY) { + _out->print("%d", -1); + } + _out->cr(); + } else if (value->is_instance()) { + assert(field_type == T_OBJECT, ""); + if (value->is_a(vmClasses::String_klass())) { + const char* ascii_value = java_lang_String::as_quoted_ascii(value); + _out->print("\"%s\"", (ascii_value != nullptr) ? ascii_value : ""); } else { - ShouldNotReachHere(); + const char* klass_name = value->klass()->name()->as_quoted_ascii(); + _out->print("%s", klass_name); } - break; - } - default: + } else if (value->is_array()) { + arrayOop a = (arrayOop)value; + _out->print("%d", a->length()); + if (value->is_objArray()) { + objArrayOop oa = (objArrayOop)value; + const char* klass_name = value->klass()->name()->as_quoted_ascii(); + _out->print(" %s", klass_name); + } + } else { ShouldNotReachHere(); } - } + break; + } else { + // handling of null free inline type + ResetNoHandleMark rnhm; + Thread* THREAD = Thread::current(); + SignatureStream ss(fd->signature(), false); + Symbol* name = ss.as_symbol(); + assert(!HAS_PENDING_EXCEPTION, "can resolve klass?"); + InstanceKlass* holder = fd->field_holder(); + InstanceKlass* k = SystemDictionary::find_instance_klass(THREAD, name, + Handle(THREAD, holder->class_loader())); + guarantee(k != nullptr && !HAS_PENDING_EXCEPTION, "can resolve klass?"); + InlineKlass* vk = InlineKlass::cast(k); + oop obj; + if (is_flat) { + int field_offset = fd->offset() - vk->payload_offset(); + obj = cast_to_oop(cast_from_oop
(mirror) + field_offset); + } else { + obj = mirror->obj_field_acquire(fd->offset()); + } + InlineTypeFieldPrinter print_field(_out, obj); + vk->do_nonstatic_fields(&print_field); + break; + } + default: + ShouldNotReachHere(); } -}; +} const char *ciInstanceKlass::replay_name() const { return CURRENT_ENV->replay_name(get_instanceKlass()); diff --git a/src/hotspot/share/ci/ciInstanceKlass.hpp b/src/hotspot/share/ci/ciInstanceKlass.hpp index 6e696668638d9..0f49fd1978196 100644 --- a/src/hotspot/share/ci/ciInstanceKlass.hpp +++ b/src/hotspot/share/ci/ciInstanceKlass.hpp @@ -69,7 +69,20 @@ class ciInstanceKlass : public ciKlass { ciInstance* _java_mirror; ciConstantPoolCache* _field_cache; // cached map index->field - GrowableArray* _nonstatic_fields; // ordered by JavaFieldStream + + // Fields declared in the bytecode (without nested fields in flat fields), + // ordered in JavaFieldStream order, with superclasses first (i.e. from lang.java.Object + // to most derived class). + const GrowableArray* _declared_nonstatic_fields; + + // Fields laid out in memory (flat fields are expanded into their components). The ciField object + // for each primitive component has the holder being this ciInstanceKlass or one of its + // superclasses. + // Fields are in the same order as in _declared_nonstatic_fields, but flat fields are replaced by + // the list of their own fields, ordered the same way (hierarchy traversed top-down, in + // JavaFieldStream order). + const GrowableArray* _nonstatic_fields; + int _has_injected_fields; // any non static injected fields? lazily initialized. // The possible values of the _implementor fall into following three cases: @@ -87,7 +100,7 @@ class ciInstanceKlass : public ciKlass { protected: ciInstanceKlass(Klass* k); - ciInstanceKlass(ciSymbol* name, jobject loader); + ciInstanceKlass(ciSymbol* name, jobject loader, BasicType bt = T_OBJECT); // for unloaded klasses InstanceKlass* get_instanceKlass() const { return InstanceKlass::cast(get_Klass()); @@ -108,8 +121,8 @@ class ciInstanceKlass : public ciKlass { InstanceKlass::ClassState compute_init_state(); bool compute_shared_has_subklass(); - int compute_nonstatic_fields(); - GrowableArray* compute_nonstatic_fields_impl(GrowableArray* super_fields); + void compute_nonstatic_fields(); + void compute_nonstatic_fields_impl(const GrowableArray* super_declared_fields, const GrowableArray* super_fields); bool compute_has_trusted_loader(); public: @@ -208,15 +221,26 @@ class ciInstanceKlass : public ciKlass { ciInstanceKlass* get_canonical_holder(int offset); ciField* get_field_by_offset(int field_offset, bool is_static); ciField* get_field_by_name(ciSymbol* name, ciSymbol* signature, bool is_static); + // Get field descriptor at field_offset ignoring flattening + ciField* get_non_flat_field_by_offset(int field_offset); + // Get the index of the declared field that contains this offset + int field_index_by_offset(int offset); + + // Total number of nonstatic fields (including inherited) + int nof_declared_nonstatic_fields() { + if (_declared_nonstatic_fields == nullptr) { + compute_nonstatic_fields(); + } + return _declared_nonstatic_fields->length(); + } ciField* get_injected_instance_field_by_name(ciSymbol* name, ciSymbol* signature); BasicType get_field_type_by_offset(int field_offset, bool is_static); - // total number of nonstatic fields (including inherited): int nof_nonstatic_fields() { - if (_nonstatic_fields == nullptr) - return compute_nonstatic_fields(); - else - return _nonstatic_fields->length(); + if (_nonstatic_fields == nullptr) { + compute_nonstatic_fields(); + } + return _nonstatic_fields->length(); } bool has_injected_fields() { @@ -228,7 +252,11 @@ class ciInstanceKlass : public ciKlass { bool has_object_fields() const; - // nth nonstatic field (presented by ascending address) + ciField* declared_nonstatic_field_at(int i) { + assert(_declared_nonstatic_fields != nullptr, "should be initialized"); + return _declared_nonstatic_fields->at(i); + } + ciField* nonstatic_field_at(int i) { assert(_nonstatic_fields != nullptr, ""); return _nonstatic_fields->at(i); @@ -239,7 +267,7 @@ class ciInstanceKlass : public ciKlass { bool has_class_initializer(); - bool contains_field_offset(int offset); + bool contains_field_offset(int offset) const; // Get the instance of java.lang.Class corresponding to // this klass. This instance is used for locking of @@ -249,9 +277,9 @@ class ciInstanceKlass : public ciKlass { // Java access flags bool is_public () { return flags().is_public(); } bool is_final () { return flags().is_final(); } - bool is_super () { return flags().is_super(); } bool is_interface () { return flags().is_interface(); } bool is_abstract () { return flags().is_abstract(); } + bool is_abstract_value_klass() { return is_abstract() && !flags().is_identity(); } ciMethod* find_method(ciSymbol* name, ciSymbol* signature); // Note: To find a method from name and type strings, use ciSymbol::make, @@ -267,6 +295,8 @@ class ciInstanceKlass : public ciKlass { return (impl != this ? impl : nullptr); } + virtual bool can_be_inline_klass(bool is_exact = false); + // Is the defining class loader of this class the default loader? bool uses_default_loader() const; diff --git a/src/hotspot/share/ci/ciKlass.cpp b/src/hotspot/share/ci/ciKlass.cpp index 0a0379af97e0a..11facf7b3047c 100644 --- a/src/hotspot/share/ci/ciKlass.cpp +++ b/src/hotspot/share/ci/ciKlass.cpp @@ -216,6 +216,15 @@ jint ciKlass::modifier_flags() { ) } +// ------------------------------------------------------------------ +// ciKlass::prototype_header +markWord ciKlass::prototype_header() const { + assert(is_loaded(), "not loaded"); + GUARDED_VM_ENTRY( + return get_Klass()->prototype_header(); + ) +} + // ------------------------------------------------------------------ // ciKlass::misc_flags klass_flags_t ciKlass::misc_flags() { @@ -233,6 +242,11 @@ void ciKlass::print_impl(outputStream* st) { st->print(" name="); print_name_on(st); st->print(" loaded=%s", (is_loaded() ? "true" : "false")); + GUARDED_VM_ENTRY( + if (is_flat_array_klass()) { + st->print(" layout_kind=%d", (int)((FlatArrayKlass*)get_Klass())->layout_kind()); + } + ) } // ------------------------------------------------------------------ diff --git a/src/hotspot/share/ci/ciKlass.hpp b/src/hotspot/share/ci/ciKlass.hpp index f95602b9717bf..04112c4334305 100644 --- a/src/hotspot/share/ci/ciKlass.hpp +++ b/src/hotspot/share/ci/ciKlass.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,6 +27,7 @@ #include "ci/ciType.hpp" #include "oops/klass.hpp" +#include "runtime/arguments.hpp" // ciKlass // @@ -44,8 +45,10 @@ class ciKlass : public ciType { friend class ciMethod; friend class ciMethodData; friend class ciObjArrayKlass; - friend class ciSignature; friend class ciReceiverTypeData; + friend class ciSignature; + friend class ciFlatArrayKlass; + friend class ciArrayKlass; private: ciSymbol* _name; @@ -104,6 +107,10 @@ class ciKlass : public ciType { return false; } + virtual bool can_be_inline_array_klass() { + return Arguments::is_valhalla_enabled() && is_java_lang_Object(); + } + bool is_in_encoding_range() { Klass* k = get_Klass(); bool is_in_encoding_range = CompressedKlassPointers::is_encodable(k); @@ -122,6 +129,8 @@ class ciKlass : public ciType { // Fetch modifier flags. jint modifier_flags(); + markWord prototype_header() const; + // Fetch Klass::misc_flags. klass_flags_t misc_flags(); diff --git a/src/hotspot/share/ci/ciMetadata.hpp b/src/hotspot/share/ci/ciMetadata.hpp index 8f24ebcfaf512..fc94950d09ca5 100644 --- a/src/hotspot/share/ci/ciMetadata.hpp +++ b/src/hotspot/share/ci/ciMetadata.hpp @@ -55,9 +55,14 @@ class ciMetadata: public ciBaseObject { virtual bool is_method_data() const { return false; } virtual bool is_klass() const { return false; } virtual bool is_instance_klass() const { return false; } + virtual bool is_inlinetype() const { return false; } virtual bool is_array_klass() const { return false; } virtual bool is_obj_array_klass() const { return false; } + virtual bool is_flat_array_klass() const { return false; } + virtual bool is_ref_array_klass() const { return false; } virtual bool is_type_array_klass() const { return false; } + virtual bool is_early_larval() const { return false; } + virtual bool maybe_flat_in_array() const { return false; } virtual void dump_replay_data(outputStream* st) { /* do nothing */ } ciMethod* as_method() { @@ -96,10 +101,22 @@ class ciMetadata: public ciBaseObject { assert(is_obj_array_klass(), "bad cast"); return (ciObjArrayKlass*)this; } + ciFlatArrayKlass* as_flat_array_klass() { + assert(is_flat_array_klass(), "bad cast"); + return (ciFlatArrayKlass*)this; + } + ciRefArrayKlass* as_ref_array_klass() { + assert(is_ref_array_klass(), "bad cast"); + return (ciRefArrayKlass*)this; + } ciTypeArrayKlass* as_type_array_klass() { assert(is_type_array_klass(), "bad cast"); return (ciTypeArrayKlass*)this; } + ciInlineKlass* as_inline_klass() { + assert(is_inlinetype(), "bad cast"); + return (ciInlineKlass*)this; + } Metadata* constant_encoding() { return _metadata; } diff --git a/src/hotspot/share/ci/ciMethod.cpp b/src/hotspot/share/ci/ciMethod.cpp index d3b8b8f13cc18..7b4220ec63679 100644 --- a/src/hotspot/share/ci/ciMethod.cpp +++ b/src/hotspot/share/ci/ciMethod.cpp @@ -33,6 +33,7 @@ #include "ci/ciSymbol.hpp" #include "ci/ciSymbols.hpp" #include "ci/ciUtilities.inline.hpp" +#include "classfile/vmIntrinsics.hpp" #include "compiler/abstractCompiler.hpp" #include "compiler/compilerDefinitions.inline.hpp" #include "compiler/compilerOracle.hpp" @@ -52,6 +53,7 @@ #include "prims/methodHandles.hpp" #include "runtime/deoptimization.hpp" #include "runtime/handles.inline.hpp" +#include "runtime/sharedRuntime.hpp" #include "utilities/bitMap.inline.hpp" #include "utilities/xmlstream.hpp" #ifdef COMPILER2 @@ -662,6 +664,71 @@ bool ciMethod::parameter_profiled_type(int i, ciKlass*& type, ProfilePtrKind& pt return false; } +// MDO updates are racy. C2 can observe the array type before the profiling code has updated the +// corresponding flat/null-free flags. If the array type is known, prefer the properties it provides. +static void update_flags_from_type(ciKlass* array_type, bool& flat_array, bool& null_free_array) { + if (array_type != nullptr) { + flat_array |= array_type->is_flat_array_klass(); + null_free_array |= array_type->as_array_klass()->is_elem_null_free(); + } +} + +bool ciMethod::array_access_profiled_type(int bci, ciKlass*& array_type, ciKlass*& element_type, ProfilePtrKind& element_ptr, bool &flat_array, bool &null_free_array) { + if (method_data() != nullptr && method_data()->is_mature()) { + ciProfileData* data = method_data()->bci_to_data(bci); + if (data != nullptr) { + if (data->is_ArrayLoadData()) { + ciArrayLoadData* array_access = (ciArrayLoadData*) data->as_ArrayLoadData(); + array_type = array_access->array()->valid_type(); + element_type = array_access->element()->valid_type(); + element_ptr = array_access->element()->ptr_kind(); + flat_array = array_access->flat_array(); + null_free_array = array_access->null_free_array(); + update_flags_from_type(array_type, flat_array, null_free_array); + return true; + } else if (data->is_ArrayStoreData()) { + ciArrayStoreData* array_access = (ciArrayStoreData*) data->as_ArrayStoreData(); + array_type = array_access->array()->valid_type(); + flat_array = array_access->flat_array(); + null_free_array = array_access->null_free_array(); + update_flags_from_type(array_type, flat_array, null_free_array); + ciCallProfile call_profile = call_profile_at_bci(bci); + if (call_profile.morphism() == 1) { + element_type = call_profile.receiver(0); + } else { + element_type = nullptr; + } + if (!array_access->null_seen()) { + element_ptr = ProfileNeverNull; + } else if (call_profile.count() == 0) { + element_ptr = ProfileAlwaysNull; + } else { + element_ptr = ProfileMaybeNull; + } + return true; + } + } + } + return false; +} + +bool ciMethod::acmp_profiled_type(int bci, ciKlass*& left_type, ciKlass*& right_type, ProfilePtrKind& left_ptr, ProfilePtrKind& right_ptr, bool &left_inline_type, bool &right_inline_type) { + if (method_data() != nullptr && method_data()->is_mature()) { + ciProfileData* data = method_data()->bci_to_data(bci); + if (data != nullptr && data->is_ACmpData()) { + ciACmpData* acmp = (ciACmpData*)data->as_ACmpData(); + left_type = acmp->left()->valid_type(); + right_type = acmp->right()->valid_type(); + left_ptr = acmp->left()->ptr_kind(); + right_ptr = acmp->right()->ptr_kind(); + left_inline_type = acmp->left_inline_type(); + right_inline_type = acmp->right_inline_type(); + return true; + } + } + return false; +} + // ------------------------------------------------------------------ // ciMethod::find_monomorphic_target @@ -974,10 +1041,13 @@ bool ciMethod::is_compiled_lambda_form() const { } // ------------------------------------------------------------------ -// ciMethod::is_object_initializer +// ciMethod::is_object_constructor // -bool ciMethod::is_object_initializer() const { - return name() == ciSymbols::object_initializer_name(); +bool ciMethod::is_object_constructor() const { + return (name() == ciSymbols::object_initializer_name() + && signature()->return_type()->is_void()); + // Note: We can't test is_static, because that would + // require the method to be loaded. Sometimes it isn't. } // ------------------------------------------------------------------ @@ -1529,6 +1599,42 @@ bool ciMethod::is_consistent_info(ciMethod* declared_method, ciMethod* resolved_ } // ------------------------------------------------------------------ + +bool ciMethod::is_scalarized_arg(int idx) const { + VM_ENTRY_MARK; + return get_Method()->is_scalarized_arg(idx); +} + +bool ciMethod::is_scalarized_buffer_arg(int idx) const { + VM_ENTRY_MARK; + return get_Method()->is_scalarized_buffer_arg(idx); +} + +bool ciMethod::has_scalarized_args() const { + GUARDED_VM_ENTRY(return get_Method()->has_scalarized_args();) +} + +const GrowableArray* ciMethod::get_sig_cc() const { + VM_ENTRY_MARK; + if (get_Method()->adapter() == nullptr) { + return nullptr; + } + return get_Method()->adapter()->get_sig_cc(); +} + +bool ciMethod::mismatch() const { + VM_ENTRY_MARK; + return get_Method()->mismatch(); +} + +bool ciMethod::c1_needs_stack_repair() const { + GUARDED_VM_ENTRY(return get_Method()->c1_needs_stack_repair();) +} + +bool ciMethod::c2_needs_stack_repair() const { + GUARDED_VM_ENTRY(return get_Method()->c2_needs_stack_repair();) +} + // ciMethod::is_old // // Return true for redefined methods @@ -1536,3 +1642,24 @@ bool ciMethod::is_old() const { ASSERT_IN_VM; return get_Method()->is_old(); } + +// A larval object can be passed into a constructor, or it can be passed into +// MethodHandle::linkToSpecial, which, in turn, will pass it into a constructor +bool ciMethod::receiver_maybe_larval() const { + bool res = is_object_constructor() || intrinsic_id() == vmIntrinsics::_linkToSpecial; + assert(!res || !is_scalarized_arg(0), "larval argument must not be passed as fields"); + return res; +} + +// Normally, a larval object cannot be returned. However, Unsafe::allocateInstance and +// DirectMethodHandle::allocateInstance return an uninitialized larval object, this is required for +// the construction of an object using the reflection API. +bool ciMethod::return_value_is_larval() const { + if (intrinsic_id() == vmIntrinsics::_allocateInstance) { + return true; + } + if (holder()->name()->equals(ciSymbols::java_lang_invoke_DirectMethodHandle()) && name()->equals(ciSymbols::allocateInstance_name())) { + return true; + } + return false; +} diff --git a/src/hotspot/share/ci/ciMethod.hpp b/src/hotspot/share/ci/ciMethod.hpp index eecd94275850a..70f75c02c1664 100644 --- a/src/hotspot/share/ci/ciMethod.hpp +++ b/src/hotspot/share/ci/ciMethod.hpp @@ -41,11 +41,13 @@ class MethodLiveness; class Arena; class BCEscapeAnalyzer; class InlineTree; +class SigEntry; class xmlStream; // Whether profiling found an oop to be always, never or sometimes // null enum ProfilePtrKind { + ProfileUnknownNull, ProfileAlwaysNull, ProfileNeverNull, ProfileMaybeNull @@ -198,7 +200,7 @@ class ciMethod : public ciMetadata { bool force_inline() const { return get_Method()->force_inline(); } bool dont_inline() const { return get_Method()->dont_inline(); } bool intrinsic_candidate() const { return get_Method()->intrinsic_candidate(); } - bool is_static_initializer() const { return get_Method()->is_static_initializer(); } + bool is_class_initializer() const { return get_Method()->is_class_initializer(); } bool changes_current_thread() const { return get_Method()->changes_current_thread(); } bool deprecated() const { return is_loaded() && get_Method()->deprecated(); } @@ -269,7 +271,11 @@ class ciMethod : public ciMetadata { bool argument_profiled_type(int bci, int i, ciKlass*& type, ProfilePtrKind& ptr_kind); bool parameter_profiled_type(int i, ciKlass*& type, ProfilePtrKind& ptr_kind); bool return_profiled_type(int bci, ciKlass*& type, ProfilePtrKind& ptr_kind); - + bool array_access_profiled_type(int bci, ciKlass*& array_type, ciKlass*& element_type, ProfilePtrKind& element_ptr, + bool& flat_array, bool& null_free); + bool acmp_profiled_type(int bci, ciKlass*& left_type, ciKlass*& right_type, + ProfilePtrKind& left_ptr, ProfilePtrKind& right_ptr, + bool& left_inline_type, bool& right_inline_type); ciField* get_field_at_bci( int bci, bool &will_link); ciMethod* get_method_at_bci(int bci, bool &will_link, ciSignature* *declared_signature); ciMethod* get_method_at_bci(int bci) { @@ -341,6 +347,7 @@ class ciMethod : public ciMetadata { bool is_native () const { return flags().is_native(); } bool is_interface () const { return flags().is_interface(); } bool is_abstract () const { return flags().is_abstract(); } + bool is_varargs () const { return flags().is_varargs(); } // Other flags bool is_final_method() const { return is_final() || holder()->is_final(); } @@ -357,8 +364,8 @@ class ciMethod : public ciMetadata { bool has_reserved_stack_access() const { return _has_reserved_stack_access; } bool is_boxing_method() const; bool is_unboxing_method() const; + bool is_object_constructor() const; bool is_vector_method() const; - bool is_object_initializer() const; bool is_scoped() const; bool is_old() const; @@ -383,6 +390,20 @@ class ciMethod : public ciMetadata { void print_short_name(outputStream* st = tty); static bool is_consistent_info(ciMethod* declared_method, ciMethod* resolved_method); + + // Support for the inline type calling convention + bool is_scalarized_arg(int idx) const; + bool is_scalarized_buffer_arg(int idx) const; + bool has_scalarized_args() const; + const GrowableArray* get_sig_cc() const; + bool mismatch() const; + bool c1_needs_stack_repair() const; + bool c2_needs_stack_repair() const; + + // Generally, a method cannot return a larval object or receive a larval argument. There are some + // exceptions. + bool receiver_maybe_larval() const; + bool return_value_is_larval() const; }; #endif // SHARE_CI_CIMETHOD_HPP diff --git a/src/hotspot/share/ci/ciMethodData.cpp b/src/hotspot/share/ci/ciMethodData.cpp index 5e623e2b9655c..fe1cce9dadeef 100644 --- a/src/hotspot/share/ci/ciMethodData.cpp +++ b/src/hotspot/share/ci/ciMethodData.cpp @@ -24,6 +24,7 @@ #include "ci/ciMetadata.hpp" #include "ci/ciMethodData.hpp" +#include "ci/ciObjArrayKlass.hpp" #include "ci/ciReplay.hpp" #include "ci/ciUtilities.inline.hpp" #include "compiler/compiler_globals.hpp" @@ -337,12 +338,12 @@ void ciTypeStackSlotEntries::translate_type_data_from(const TypeStackSlotEntries } } -void ciReturnTypeEntry::translate_type_data_from(const ReturnTypeEntry* ret) { +void ciSingleTypeEntry::translate_type_data_from(const SingleTypeEntry* ret) { intptr_t k = ret->type(); Klass* klass = (Klass*)klass_part(k); if (klass == nullptr || !klass->is_loader_present_and_alive() || !is_klass_loaded(klass)) { // With concurrent class unloading, the MDO could have stale metadata; override it - set_type(ReturnTypeEntry::with_status((Klass*)nullptr, k)); + set_type(SingleTypeEntry::with_status((Klass*)nullptr, k)); } else { set_type(translate_klass(k)); } @@ -393,6 +394,12 @@ ciProfileData* ciMethodData::data_from(DataLayout* data_layout) { return new ciVirtualCallTypeData(data_layout); case DataLayout::parameters_type_data_tag: return new ciParametersTypeData(data_layout); + case DataLayout::array_store_data_tag: + return new ciArrayStoreData(data_layout); + case DataLayout::array_load_data_tag: + return new ciArrayLoadData(data_layout); + case DataLayout::acmp_data_tag: + return new ciACmpData(data_layout); }; } @@ -715,8 +722,15 @@ void ciMethodData::dump_replay_data_type_helper(outputStream* out, int round, in if (round == 0) { count++; } else { - out->print(" %d %s", (int)(dp_to_di(pdata->dp() + in_bytes(offset)) / sizeof(intptr_t)), - CURRENT_ENV->replay_name(k)); + if (k->is_obj_array_klass()) { + // We also record the array property to load the correct array class during replay compilation. + const ArrayProperties array_properties = k->as_obj_array_klass()->properties(); + out->print(" %d %s %d", static_cast(dp_to_di(pdata->dp() + in_bytes(offset)) / sizeof(intptr_t)), + CURRENT_ENV->replay_name(k), array_properties.value()); + } else { + out->print(" %d %s", static_cast(dp_to_di(pdata->dp() + in_bytes(offset)) / sizeof(intptr_t)), + CURRENT_ENV->replay_name(k)); + } } } } @@ -810,12 +824,29 @@ void ciMethodData::dump_replay_data(outputStream* out) { ciVirtualCallTypeData* call_type_data = (ciVirtualCallTypeData*)pdata; dump_replay_data_call_type_helper(out, round, count, call_type_data); } + } else if (pdata->is_CallTypeData()) { + ciCallTypeData* call_type_data = (ciCallTypeData*)pdata; + dump_replay_data_call_type_helper(out, round, count, call_type_data); + } else if (pdata->is_ArrayStoreData()) { + ciArrayStoreData* array_store_data = (ciArrayStoreData*)pdata; + dump_replay_data_type_helper(out, round, count, array_store_data, ciArrayStoreData::array_offset(), + array_store_data->array()->valid_type()); + dump_replay_data_receiver_type_helper(out, round, count, array_store_data); + } else if (pdata->is_ArrayLoadData()) { + ciArrayLoadData* array_load_data = (ciArrayLoadData*)pdata; + dump_replay_data_type_helper(out, round, count, array_load_data, ciArrayLoadData::array_offset(), + array_load_data->array()->valid_type()); + dump_replay_data_type_helper(out, round, count, array_load_data, ciArrayLoadData::element_offset(), + array_load_data->element()->valid_type()); + } else if (pdata->is_ACmpData()) { + ciACmpData* acmp_data = (ciACmpData*)pdata; + dump_replay_data_type_helper(out, round, count, acmp_data, ciACmpData::left_offset(), + acmp_data->left()->valid_type()); + dump_replay_data_type_helper(out, round, count, acmp_data, ciACmpData::right_offset(), + acmp_data->right()->valid_type()); } else if (pdata->is_ReceiverTypeData()) { ciReceiverTypeData* vdata = (ciReceiverTypeData*)pdata; dump_replay_data_receiver_type_helper(out, round, count, vdata); - } else if (pdata->is_CallTypeData()) { - ciCallTypeData* call_type_data = (ciCallTypeData*)pdata; - dump_replay_data_call_type_helper(out, round, count, call_type_data); } } if (parameters != nullptr) { @@ -898,7 +929,7 @@ void ciTypeStackSlotEntries::print_data_on(outputStream* st) const { } } -void ciReturnTypeEntry::print_data_on(outputStream* st) const { +void ciSingleTypeEntry::print_data_on(outputStream* st) const { _pd->tab(st); st->print("ret "); print_ciklass(st, type()); @@ -971,4 +1002,37 @@ void ciSpeculativeTrapData::print_data_on(outputStream* st, const char* extra) c method()->print_short_name(st); st->cr(); } + +void ciArrayStoreData::print_data_on(outputStream* st, const char* extra) const { + print_shared(st, "ciArrayStoreData", extra); + st->cr(); + tab(st, true); + st->print("array"); + array()->print_data_on(st); + tab(st, true); + st->print("element"); + rtd_super()->print_receiver_data_on(st); +} + +void ciArrayLoadData::print_data_on(outputStream* st, const char* extra) const { + print_shared(st, "ciArrayLoadData", extra); + st->cr(); + tab(st, true); + st->print("array"); + array()->print_data_on(st); + tab(st, true); + st->print("element"); + element()->print_data_on(st); +} + +void ciACmpData::print_data_on(outputStream* st, const char* extra) const { + BranchData::print_data_on(st, extra); + st->cr(); + tab(st, true); + st->print("left"); + left()->print_data_on(st); + tab(st, true); + st->print("right"); + right()->print_data_on(st); +} #endif diff --git a/src/hotspot/share/ci/ciMethodData.hpp b/src/hotspot/share/ci/ciMethodData.hpp index a43d011b77ea2..fca4f07099ffd 100644 --- a/src/hotspot/share/ci/ciMethodData.hpp +++ b/src/hotspot/share/ci/ciMethodData.hpp @@ -124,9 +124,9 @@ class ciTypeStackSlotEntries : public TypeStackSlotEntries, ciTypeEntries { #endif }; -class ciReturnTypeEntry : public ReturnTypeEntry, ciTypeEntries { +class ciSingleTypeEntry : public SingleTypeEntry, ciTypeEntries { public: - void translate_type_data_from(const ReturnTypeEntry* ret); + void translate_type_data_from(const SingleTypeEntry* ret); ciKlass* valid_type() const { return valid_ciklass(type()); @@ -146,7 +146,7 @@ class ciCallTypeData : public CallTypeData { ciCallTypeData(DataLayout* layout) : CallTypeData(layout) {} ciTypeStackSlotEntries* args() const { return (ciTypeStackSlotEntries*)CallTypeData::args(); } - ciReturnTypeEntry* ret() const { return (ciReturnTypeEntry*)CallTypeData::ret(); } + ciSingleTypeEntry* ret() const { return (ciSingleTypeEntry*)CallTypeData::ret(); } void translate_from(const ProfileData* data) { if (has_arguments()) { @@ -258,7 +258,7 @@ class ciVirtualCallTypeData : public VirtualCallTypeData { } ciTypeStackSlotEntries* args() const { return (ciTypeStackSlotEntries*)VirtualCallTypeData::args(); } - ciReturnTypeEntry* ret() const { return (ciReturnTypeEntry*)VirtualCallTypeData::ret(); } + ciSingleTypeEntry* ret() const { return (ciSingleTypeEntry*)VirtualCallTypeData::ret(); } // Copy & translate from oop based VirtualCallData virtual void translate_from(const ProfileData* data) { @@ -362,6 +362,63 @@ class ciSpeculativeTrapData : public SpeculativeTrapData { #endif }; +class ciArrayStoreData : public ArrayStoreData { + // Fake multiple inheritance... It's a ciReceiverTypeData also. + ciReceiverTypeData* rtd_super() const { return (ciReceiverTypeData*) this; } + +public: + ciArrayStoreData(DataLayout* layout) : ArrayStoreData(layout) {} + + ciSingleTypeEntry* array() const { return (ciSingleTypeEntry*)ArrayStoreData::array(); } + + virtual void translate_from(const ProfileData* data) { + array()->translate_type_data_from(data->as_ArrayStoreData()->array()); + rtd_super()->translate_receiver_data_from(data); + } + + ciKlass* receiver(uint row) { + return rtd_super()->receiver(row); + } +#ifndef PRODUCT + void print_data_on(outputStream* st, const char* extra = nullptr) const; +#endif +}; + +class ciArrayLoadData : public ArrayLoadData { +public: + ciArrayLoadData(DataLayout* layout) : ArrayLoadData(layout) {} + + ciSingleTypeEntry* array() const { return (ciSingleTypeEntry*)ArrayLoadData::array(); } + ciSingleTypeEntry* element() const { return (ciSingleTypeEntry*)ArrayLoadData::element(); } + + virtual void translate_from(const ProfileData* data) { + array()->translate_type_data_from(data->as_ArrayLoadData()->array()); + element()->translate_type_data_from(data->as_ArrayLoadData()->element()); + } + +#ifndef PRODUCT + void print_data_on(outputStream* st, const char* extra = nullptr) const; +#endif +}; + + +class ciACmpData : public ACmpData { +public: + ciACmpData(DataLayout* layout) : ACmpData(layout) {} + + ciSingleTypeEntry* left() const { return (ciSingleTypeEntry*)ACmpData::left(); } + ciSingleTypeEntry* right() const { return (ciSingleTypeEntry*)ACmpData::right(); } + + virtual void translate_from(const ProfileData* data) { + left()->translate_type_data_from(data->as_ACmpData()->left()); + right()->translate_type_data_from(data->as_ACmpData()->right()); + } + +#ifndef PRODUCT + void print_data_on(outputStream* st, const char* extra = nullptr) const; +#endif +}; + // ciMethodData // // This class represents a MethodData* in the HotSpot virtual diff --git a/src/hotspot/share/ci/ciObjArray.cpp b/src/hotspot/share/ci/ciObjArray.cpp index 2a485fb26182d..5bbf87b3f5964 100644 --- a/src/hotspot/share/ci/ciObjArray.cpp +++ b/src/hotspot/share/ci/ciObjArray.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,6 +26,7 @@ #include "ci/ciObjArray.hpp" #include "ci/ciUtilities.inline.hpp" #include "oops/objArrayOop.inline.hpp" +#include "oops/oopCast.inline.hpp" // ciObjArray // @@ -34,7 +35,8 @@ ciObject* ciObjArray::obj_at(int index) { VM_ENTRY_MARK; - objArrayOop array = get_objArrayOop(); + // The array should be a refArray, otherwise a ciFlatArray object would have been used + refArrayOop array = oop_cast(get_objArrayOop()); assert(index >= 0 && index < array->length(), "OOB access"); oop o = array->obj_at(index); if (o == nullptr) { diff --git a/src/hotspot/share/ci/ciObjArray.hpp b/src/hotspot/share/ci/ciObjArray.hpp index 4f1dfc501cd65..c1282469df321 100644 --- a/src/hotspot/share/ci/ciObjArray.hpp +++ b/src/hotspot/share/ci/ciObjArray.hpp @@ -50,6 +50,8 @@ class ciObjArray : public ciArray { bool is_obj_array() { return true; } ciObject* obj_at(int index); + + bool is_flat() { return false; } }; #endif // SHARE_CI_CIOBJARRAY_HPP diff --git a/src/hotspot/share/ci/ciObjArrayKlass.cpp b/src/hotspot/share/ci/ciObjArrayKlass.cpp index 191e4e67522ac..684322ad810d6 100644 --- a/src/hotspot/share/ci/ciObjArrayKlass.cpp +++ b/src/hotspot/share/ci/ciObjArrayKlass.cpp @@ -22,9 +22,12 @@ * */ +#include "ci/ciFlatArrayKlass.hpp" #include "ci/ciInstanceKlass.hpp" #include "ci/ciObjArrayKlass.hpp" +#include "ci/ciRefArrayKlass.hpp" #include "ci/ciSymbol.hpp" +#include "ci/ciUtilities.hpp" #include "ci/ciUtilities.inline.hpp" #include "oops/objArrayKlass.hpp" #include "runtime/signature.hpp" @@ -63,14 +66,16 @@ ciObjArrayKlass::ciObjArrayKlass(ciSymbol* array_name, int dimension) : ciArrayKlass(array_name, dimension, T_OBJECT) { - _base_element_klass = base_element_klass; - assert(_base_element_klass->is_instance_klass() || - _base_element_klass->is_type_array_klass(), "bad base klass"); - if (dimension == 1) { - _element_klass = base_element_klass; - } else { - _element_klass = nullptr; - } + _base_element_klass = base_element_klass; + assert(_base_element_klass->is_instance_klass() || + _base_element_klass->is_type_array_klass() || + _base_element_klass->is_flat_array_klass() || + _base_element_klass->is_ref_array_klass(), "bad base klass"); + if (dimension == 1) { + _element_klass = base_element_klass; + } else { + _element_klass = nullptr; + } } // ------------------------------------------------------------------ @@ -115,7 +120,6 @@ ciSymbol* ciObjArrayKlass::construct_array_name(ciSymbol* element_name, name[pos] = JVM_SIGNATURE_ARRAY; } Symbol* base_name_sym = element_name->get_symbol(); - if (Signature::is_array(base_name_sym) || Signature::has_envelope(base_name_sym)) { strncpy(&name[pos], (char*)element_name->base(), element_len); @@ -133,8 +137,7 @@ ciSymbol* ciObjArrayKlass::construct_array_name(ciSymbol* element_name, // ciObjArrayKlass::make_impl // // Implementation of make. -ciObjArrayKlass* ciObjArrayKlass::make_impl(ciKlass* element_klass) { - +ciObjArrayKlass* ciObjArrayKlass::make_impl(ciKlass* element_klass, bool refined_type, bool null_free, bool atomic) { if (element_klass->is_loaded()) { EXCEPTION_CONTEXT; // The element klass is loaded @@ -144,11 +147,32 @@ ciObjArrayKlass* ciObjArrayKlass::make_impl(ciKlass* element_klass) { CURRENT_THREAD_ENV->record_out_of_memory_failure(); return ciEnv::unloaded_ciobjarrayklass(); } - return CURRENT_THREAD_ENV->get_obj_array_klass(array); + if (!refined_type) { + return CURRENT_THREAD_ENV->get_obj_array_klass(array); + } + + assert(!null_free || element_klass->is_inlinetype(), "Only value class arrays can be null free"); + assert(atomic || element_klass->is_inlinetype(), "Only value class arrays can be non-atomic"); + + const ArrayProperties props = ArrayProperties::Default() + .with_null_restricted(null_free) + .with_non_atomic(!atomic); + + array = ObjArrayKlass::cast(array)->klass_with_properties(props, THREAD); + if (HAS_PENDING_EXCEPTION) { + CLEAR_PENDING_EXCEPTION; + CURRENT_THREAD_ENV->record_out_of_memory_failure(); + return ciEnv::unloaded_ciobjarrayklass(); + } + assert(array != nullptr, "klass_with_properties should return a klass or throw"); + if (array->is_flatArray_klass()) { + return CURRENT_THREAD_ENV->get_flat_array_klass(array); + } else { + return CURRENT_THREAD_ENV->get_ref_array_klass(array); + } } - // The array klass was unable to be made or the element klass was - // not loaded. + // The array klass was unable to be made or the element klass was not loaded. ciSymbol* array_name = construct_array_name(element_klass->name(), 1); if (array_name == ciEnv::unloaded_cisymbol()) { return ciEnv::unloaded_ciobjarrayklass(); @@ -162,27 +186,19 @@ ciObjArrayKlass* ciObjArrayKlass::make_impl(ciKlass* element_klass) { // ciObjArrayKlass::make // // Make an array klass corresponding to the specified primitive type. -ciObjArrayKlass* ciObjArrayKlass::make(ciKlass* element_klass) { - GUARDED_VM_ENTRY(return make_impl(element_klass);) +ciObjArrayKlass* ciObjArrayKlass::make(ciKlass* element_klass, bool refined_type, bool null_free, bool atomic) { + GUARDED_VM_ENTRY(return make_impl(element_klass, refined_type, null_free, atomic);) } -ciObjArrayKlass* ciObjArrayKlass::make(ciKlass* element_klass, int dims) { +ciArrayKlass* ciObjArrayKlass::make(ciKlass* element_klass, int dims) { ciKlass* klass = element_klass; for (int i = 0; i < dims; i++) { - klass = ciObjArrayKlass::make(klass); + klass = ciObjArrayKlass::make(klass, /* refined_type = */ false); } - return klass->as_obj_array_klass(); + return klass->as_array_klass(); } ciKlass* ciObjArrayKlass::exact_klass() { - ciType* base = base_element_type(); - if (base->is_instance_klass()) { - ciInstanceKlass* ik = base->as_instance_klass(); - if (ik->exact_klass() != nullptr) { - return this; - } - } else if (base->is_primitive_type()) { - return this; - } + // This cannot be an exact klass because the refined types subtype it return nullptr; } diff --git a/src/hotspot/share/ci/ciObjArrayKlass.hpp b/src/hotspot/share/ci/ciObjArrayKlass.hpp index 3fb37c5088c5c..b2f14bd8ed005 100644 --- a/src/hotspot/share/ci/ciObjArrayKlass.hpp +++ b/src/hotspot/share/ci/ciObjArrayKlass.hpp @@ -26,6 +26,7 @@ #define SHARE_CI_CIOBJARRAYKLASS_HPP #include "ci/ciArrayKlass.hpp" +#include "oops/objArrayKlass.hpp" // ciObjArrayKlass // @@ -46,10 +47,10 @@ class ciObjArrayKlass : public ciArrayKlass { int dimension); ObjArrayKlass* get_ObjArrayKlass() { - return (ObjArrayKlass*)get_Klass(); + return ObjArrayKlass::cast(get_Klass()); } - static ciObjArrayKlass* make_impl(ciKlass* element_klass); + static ciObjArrayKlass* make_impl(ciKlass* element_klass, bool refined_type = false, bool null_free = false, bool atomic = true); static ciSymbol* construct_array_name(ciSymbol* element_name, int dimension); @@ -68,10 +69,14 @@ class ciObjArrayKlass : public ciArrayKlass { // What kind of ciObject is this? bool is_obj_array_klass() const { return true; } - static ciObjArrayKlass* make(ciKlass* element_klass); - static ciObjArrayKlass* make(ciKlass* element_klass, int dims); + static ciObjArrayKlass* make(ciKlass* element_klass, bool refined_type = true, bool null_free = false, bool atomic = true); + static ciArrayKlass* make(ciKlass* element_klass, int dims); virtual ciKlass* exact_klass(); + + virtual bool can_be_inline_array_klass() { + return element_klass()->can_be_inline_klass(); + } }; #endif // SHARE_CI_CIOBJARRAYKLASS_HPP diff --git a/src/hotspot/share/ci/ciObject.hpp b/src/hotspot/share/ci/ciObject.hpp index 9b60639edb5d6..60992d6f37230 100644 --- a/src/hotspot/share/ci/ciObject.hpp +++ b/src/hotspot/share/ci/ciObject.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -59,6 +59,7 @@ class ciObject : public ciBaseObject { jobject _handle; ciKlass* _klass; +protected: // Cache constant value lookups to ensure that consistent values are observed during compilation. class ConstantValue { private: @@ -73,6 +74,7 @@ class ciObject : public ciBaseObject { ciConstant value() const { return _value; } }; +private: GrowableArray* _constant_values = nullptr; protected: @@ -128,6 +130,7 @@ class ciObject : public ciBaseObject { virtual bool is_array() { return false; } virtual bool is_obj_array() { return false; } virtual bool is_type_array() { return false; } + virtual bool is_flat_array() const { return false; } virtual bool is_native_entry_point()const { return false; } // Is this a type or value which has no associated class? @@ -182,6 +185,10 @@ class ciObject : public ciBaseObject { assert(is_type_array(), "bad cast"); return (ciTypeArray*)this; } + ciFlatArray* as_flat_array() { + assert(is_flat_array(), "bad cast"); + return (ciFlatArray*)this; + } // Print debugging output about this ciObject. void print(outputStream* st); diff --git a/src/hotspot/share/ci/ciObjectFactory.cpp b/src/hotspot/share/ci/ciObjectFactory.cpp index d3bef01f85253..12cfef50fadad 100644 --- a/src/hotspot/share/ci/ciObjectFactory.cpp +++ b/src/hotspot/share/ci/ciObjectFactory.cpp @@ -23,6 +23,9 @@ */ #include "ci/ciCallSite.hpp" +#include "ci/ciFlatArray.hpp" +#include "ci/ciFlatArrayKlass.hpp" +#include "ci/ciInlineKlass.hpp" #include "ci/ciInstance.hpp" #include "ci/ciInstanceKlass.hpp" #include "ci/ciMemberName.hpp" @@ -35,6 +38,7 @@ #include "ci/ciObjArrayKlass.hpp" #include "ci/ciObject.hpp" #include "ci/ciObjectFactory.hpp" +#include "ci/ciRefArrayKlass.hpp" #include "ci/ciReplay.hpp" #include "ci/ciSymbol.hpp" #include "ci/ciSymbols.hpp" @@ -168,7 +172,7 @@ void ciObjectFactory::init_shared_objects() { for (int i = T_BOOLEAN; i <= T_CONFLICT; i++) { BasicType t = (BasicType)i; - if (type2name(t) != nullptr && !is_reference_type(t) && + if (type2name(t) != nullptr && t != T_FLAT_ELEMENT && !is_reference_type(t) && t != T_NARROWOOP && t != T_NARROWKLASS) { ciType::_basic_types[t] = new (_arena) ciType(t); init_ident_of(ciType::_basic_types[t]); @@ -399,12 +403,15 @@ ciObject* ciObjectFactory::create_new_object(oop o) { return new (arena()) ciMethodType(h_i); else return new (arena()) ciInstance(h_i); - } else if (o->is_objArray()) { + } else if (o->is_refArray()) { objArrayHandle h_oa(THREAD, (objArrayOop)o); return new (arena()) ciObjArray(h_oa); } else if (o->is_typeArray()) { typeArrayHandle h_ta(THREAD, (typeArrayOop)o); return new (arena()) ciTypeArray(h_ta); + } else if (o->is_flatArray()) { + flatArrayHandle h_ta(THREAD, (flatArrayOop)o); + return new (arena()) ciFlatArray(h_ta); } // The oop is of some type not supported by the compiler interface. @@ -424,11 +431,19 @@ ciMetadata* ciObjectFactory::create_new_metadata(Metadata* o) { if (o->is_klass()) { Klass* k = (Klass*)o; - if (k->is_instance_klass()) { + if (k->is_inline_klass()) { + return new (arena()) ciInlineKlass(k); + } else if (k->is_instance_klass()) { assert(!ReplayCompiles || ciReplay::no_replay_state() || !ciReplay::is_klass_unresolved((InstanceKlass*)k), "must be whitelisted for replay compilation"); return new (arena()) ciInstanceKlass(k); } else if (k->is_objArray_klass()) { - return new (arena()) ciObjArrayKlass(k); + if (k->is_flatArray_klass()) { + return new (arena()) ciFlatArrayKlass(k); + } else if (k->is_refArray_klass()) { + return new (arena()) ciRefArrayKlass(k); + } else { + return new (arena()) ciObjArrayKlass(k); + } } else if (k->is_typeArray_klass()) { return new (arena()) ciTypeArrayKlass(k); } @@ -663,6 +678,18 @@ ciReturnAddress* ciObjectFactory::get_return_address(int bci) { return new_ret_addr; } +ciWrapper* ciObjectFactory::make_early_larval_wrapper(ciType* type) { + ciWrapper* wrapper = new (arena()) ciWrapper(type, ciWrapper::EarlyLarval); + init_ident_of(wrapper); + return wrapper; +} + +ciWrapper* ciObjectFactory::make_null_free_wrapper(ciType* type) { + ciWrapper* wrapper = new (arena()) ciWrapper(type, ciWrapper::NullFree); + init_ident_of(wrapper); + return wrapper; +} + // ------------------------------------------------------------------ // ciObjectFactory::init_ident_of void ciObjectFactory::init_ident_of(ciBaseObject* obj) { diff --git a/src/hotspot/share/ci/ciObjectFactory.hpp b/src/hotspot/share/ci/ciObjectFactory.hpp index c578aecb5647b..d495bd83e3b85 100644 --- a/src/hotspot/share/ci/ciObjectFactory.hpp +++ b/src/hotspot/share/ci/ciObjectFactory.hpp @@ -148,6 +148,9 @@ class ciObjectFactory : public ArenaObj { ciReturnAddress* get_return_address(int bci); + ciWrapper* make_early_larval_wrapper(ciType* type); + ciWrapper* make_null_free_wrapper(ciType* type); + GrowableArray* get_ci_metadata() { return &_ci_metadata; } // RedefineClasses support void metadata_do(MetadataClosure* f); diff --git a/src/hotspot/share/ci/ciRefArrayKlass.hpp b/src/hotspot/share/ci/ciRefArrayKlass.hpp new file mode 100644 index 0000000000000..6555fe8fe9a31 --- /dev/null +++ b/src/hotspot/share/ci/ciRefArrayKlass.hpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_CI_CIREFARRAYKLASS_HPP +#define SHARE_VM_CI_CIREFARRAYKLASS_HPP + +#include "ci/ciObjArrayKlass.hpp" + +// A ciRefArrayKlass represents the klass of a refined array in which the elements are stored as +// reference +class ciRefArrayKlass : public ciObjArrayKlass { +private: + CI_PACKAGE_ACCESS + friend class ciEnv; + +protected: + ciRefArrayKlass(Klass* k) : ciObjArrayKlass(k) { + assert(k->is_refArray_klass(), "wrong type"); + } + + const char* type_string() override { return "ciRefArrayKlass"; } + +public: + bool is_ref_array_klass() const override { return true; } + ciKlass* exact_klass() override { return this; } +}; + +#endif // SHARE_VM_CI_CIREFARRAYKLASS_HPP diff --git a/src/hotspot/share/ci/ciReplay.cpp b/src/hotspot/share/ci/ciReplay.cpp index 35522e75877ba..be55687cca455 100644 --- a/src/hotspot/share/ci/ciReplay.cpp +++ b/src/hotspot/share/ci/ciReplay.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,12 +38,15 @@ #include "memory/allocation.inline.hpp" #include "memory/oopFactory.hpp" #include "memory/resourceArea.hpp" +#include "oops/arrayProperties.hpp" #include "oops/constantPool.inline.hpp" #include "oops/cpCache.inline.hpp" #include "oops/fieldStreams.inline.hpp" +#include "oops/inlineKlass.inline.hpp" #include "oops/klass.inline.hpp" #include "oops/method.inline.hpp" #include "oops/oop.inline.hpp" +#include "oops/oopCast.inline.hpp" #include "oops/resolvedIndyEntry.hpp" #include "prims/jvmtiExport.hpp" #include "prims/methodHandles.hpp" @@ -511,9 +514,9 @@ class CompileReplay : public StackObj { return k; } obj = ciReplay::obj_field(obj, field); - // array - if (obj != nullptr && obj->is_objArray()) { - objArrayOop arr = (objArrayOop)obj; + // TODO 8350865 I think we need to handle null-free/flat arrays here + if (obj != nullptr && obj->is_refArray()) { + refArrayOop arr = oop_cast(obj); int index = parse_int("index"); if (index >= arr->length()) { report_error("bad array index"); @@ -862,6 +865,12 @@ class CompileReplay : public StackObj { return; } Klass* k = parse_klass(CHECK); + if (had_error()) { + return; + } + if (_version >= 3 && k != nullptr && k->is_objArray_klass()) { + k = create_concrete_object_array_klass(ObjArrayKlass::cast(k), THREAD); + } rec->_classes_offsets[i] = offset; rec->_classes[i] = k; } @@ -882,6 +891,15 @@ class CompileReplay : public StackObj { } } + ObjArrayKlass* create_concrete_object_array_klass(ObjArrayKlass* obj_array_klass, TRAPS) { + ArrayProperties array_properties(checked_cast(parse_int("array_properties"))); + if (!Arguments::is_valhalla_enabled()) { + // Ignore Valhalla-specific properties + array_properties = ArrayProperties::Default(); + } + return obj_array_klass->klass_with_properties(array_properties, THREAD); + } + // instanceKlass // instanceKlass #