diff --git a/java/openjdk8-jre/Makefile b/java/openjdk8-jre/Makefile
index 2f8e4d59b53b..a0b4ec39d7f9 100644
--- a/java/openjdk8-jre/Makefile
+++ b/java/openjdk8-jre/Makefile
@@ -1,6 +1,5 @@
# $FreeBSD$
-PORTREVISION= 5
PKGNAMESUFFIX= ${JDK_MAJOR_VERSION}-jre
COMMENT= Java Runtime Environment ${JDK_MAJOR_VERSION}
diff --git a/java/openjdk8/Makefile b/java/openjdk8/Makefile
index e7ed0806c513..6438336ddf0f 100644
--- a/java/openjdk8/Makefile
+++ b/java/openjdk8/Makefile
@@ -2,7 +2,6 @@
PORTNAME= openjdk
PORTVERSION= ${JDK_MAJOR_VERSION}.${JDK_UPDATE_VERSION}.${JDK_BUILD_NUMBER:S/^0//}
-PORTREVISION?= 1
CATEGORIES= java devel
MASTER_SITES= http://download.java.net/openjdk/jdk${JDK_MAJOR_VERSION}/promoted/b${DIST_BUILD_NUMBER}/:jdk \
https://adopt-openjdk.ci.cloudbees.com/job/jtreg/${JTREG_JENKINS_BUILD}/artifact/:jtreg \
@@ -38,6 +37,15 @@ ONLY_FOR_ARCHS= i386 amd64
WRKSRC= ${WRKDIR}/${PORTNAME}
DOS2UNIX_FILES= jdk/src/share/classes/com/sun/org/apache/xml/internal/security/resource/xmlsecurity_en.properties \
+ jdk/src/bsd/doc/man/java.1 \
+ jdk/src/bsd/doc/man/javac.1 \
+ jdk/src/bsd/doc/man/javap.1 \
+ jdk/src/linux/doc/man/java.1 \
+ jdk/src/linux/doc/man/javac.1 \
+ jdk/src/linux/doc/man/javap.1 \
+ jdk/src/solaris/doc/sun/man/man1/java.1 \
+ jdk/src/solaris/doc/sun/man/man1/javac.1 \
+ jdk/src/solaris/doc/sun/man/man1/javap.1 \
nashorn/test/script/jfx.js
SHEBANG_FILES= configure
@@ -62,8 +70,8 @@ NO_CCACHE= yes
NOPRECIOUSMAKEVARS= yes
JDK_MAJOR_VERSION= 8
-JDK_UPDATE_VERSION= 11
-JDK_BUILD_NUMBER= 12
+JDK_UPDATE_VERSION= 25
+JDK_BUILD_NUMBER= 17
DIST_BUILD_NUMBER= 132
JTREG_VERSION= 4.1
JTREG_BUILD_NUMBER= b08
@@ -163,33 +171,20 @@ ICONV_LDFLAGS= -L${LOCALBASE}/lib ${ICONV_LIB}
.endif
post-extract:
- @${MV} -f ${WRKSRC}/hotspot/make/bsd/makefiles/mapfile-vers-debug \
- ${WRKSRC}/hotspot/make/bsd/makefiles/mapfile-vers-debug.macosx
- @${MV} -f ${WRKSRC}/hotspot/make/bsd/makefiles/mapfile-vers-product \
- ${WRKSRC}/hotspot/make/bsd/makefiles/mapfile-vers-product.macosx
- @${CP} -f ${WRKSRC}/hotspot/make/linux/makefiles/mapfile-vers-debug \
- ${WRKSRC}/hotspot/make/bsd/makefiles/mapfile-vers-debug
- @${CP} -f ${WRKSRC}/hotspot/make/linux/makefiles/mapfile-vers-product \
- ${WRKSRC}/hotspot/make/bsd/makefiles/mapfile-vers-product
- @${MKDIR} ${WRKSRC}/jdk/src/bsd/classes/java/net \
- ${WRKSRC}/jdk/src/bsd/classes/sun/nio/ch
- @${MV} -f ${WRKSRC}/jdk/src/macosx/classes/java/net/DefaultInterface.java \
- ${WRKSRC}/jdk/src/bsd/classes/java/net
- @${MV} -f ${WRKSRC}/jdk/src/macosx/classes/sun/nio/ch/DefaultSelectorProvider.java \
- ${WRKSRC}/jdk/src/bsd/classes/sun/nio/ch
- @${MV} -f ${WRKSRC}/jdk/src/macosx/classes/sun/nio/ch/KQueue*.java \
- ${WRKSRC}/jdk/src/solaris/classes/sun/nio/ch
- @${MV} -f ${WRKSRC}/jdk/src/macosx/native/sun/nio/ch/KQueue*.c \
- ${WRKSRC}/jdk/src/solaris/native/sun/nio/ch
+ @${CP} ${FILESDIR}/jdk-test-javax-imageio-plugins-jpeg-truncated.jpg \
+ ${WRKSRC}/jdk/test/javax/imageio/plugins/jpeg/truncated.jpg
+ @${CP} ${FILESDIR}/nashorn-test-script-jfx-flyingimage-golden-bsd.png \
+ ${WRKSRC}/nashorn/test/script/jfx/flyingimage/golden/bsd.png
+ @${CP} ${FILESDIR}/nashorn-test-script-jfx-kaleidoscope-golden-bsd.png \
+ ${WRKSRC}/nashorn/test/script/jfx/kaleidoscope/golden/bsd.png
+ @${CP} ${FILESDIR}/nashorn-test-script-jfx-spread-golden-bsd.png \
+ ${WRKSRC}/nashorn/test/script/jfx/spread/golden/bsd.png
+
+post-patch:
+ @${FIND} ${WRKSRC} -name '*.orig' -delete
@${RMDIR} ${WRKSRC}/jdk/src/macosx/classes/java/net \
${WRKSRC}/jdk/src/macosx/native/sun/nio/ch \
${WRKSRC}/jdk/src/macosx/native/sun/nio
- @${CP} -f ${WRKSRC}/jdk/src/solaris/classes/java/lang/UNIXProcess.java.bsd \
- ${WRKSRC}/jdk/src/solaris/classes/java/lang/UNIXProcess.java.macosx
- @${CP} -f ${FILESDIR}/jdk-test-javax-imageio-plugins-jpeg-truncated.jpg \
- ${WRKSRC}/jdk/test/javax/imageio/plugins/jpeg/truncated.jpg
-
-post-patch:
@${SED} -e 's|%%LOCALBASE%%|${LOCALBASE}|' \
${FILESDIR}/bsd.fontconfig.properties.in > \
${WRKSRC}/jdk/src/solaris/classes/sun/awt/fontconfigs/bsd.fontconfig.properties
diff --git a/java/openjdk8/files/nashorn-test-script-jfx-flyingimage-golden-bsd.png b/java/openjdk8/files/nashorn-test-script-jfx-flyingimage-golden-bsd.png
new file mode 100644
index 000000000000..ba72fe68406b
Binary files /dev/null and b/java/openjdk8/files/nashorn-test-script-jfx-flyingimage-golden-bsd.png differ
diff --git a/java/openjdk8/files/nashorn-test-script-jfx-kaleidoscope-golden-bsd.png b/java/openjdk8/files/nashorn-test-script-jfx-kaleidoscope-golden-bsd.png
new file mode 100644
index 000000000000..64d9499b0ff6
Binary files /dev/null and b/java/openjdk8/files/nashorn-test-script-jfx-kaleidoscope-golden-bsd.png differ
diff --git a/java/openjdk8/files/nashorn-test-script-jfx-spread-golden-bsd.png b/java/openjdk8/files/nashorn-test-script-jfx-spread-golden-bsd.png
new file mode 100644
index 000000000000..c288162329ae
Binary files /dev/null and b/java/openjdk8/files/nashorn-test-script-jfx-spread-golden-bsd.png differ
diff --git a/java/openjdk8/files/patch-8u20-b26 b/java/openjdk8/files/patch-8u20-b26
new file mode 100644
index 000000000000..764df3e62484
--- /dev/null
+++ b/java/openjdk8/files/patch-8u20-b26
@@ -0,0 +1,404059 @@
+--- ./.hgtags Tue Jun 03 14:19:17 2014 -0700
++++ ./.hgtags Wed Jul 30 03:50:56 2014 -0700
+@@ -245,12 +245,18 @@
+ 1e1f86d5d4e22c15a9bf9f1581acddb8c59abae2 jdk8-b121
+ 347009c5881668b1e44f64d56d4a96cb20a183fa jdk8-b122
+ ff1478785e43718146ffbce97e007f39c3bcaa32 jdk8-b123
++c330fa67c4daffdc86527f1a24941aa5a3500098 jdk8u20-b00
+ 790bbd46b2015e69ce301dae194c2b4141011f2d jdk8-b124
++78abb27c27d988a86e6c82b2cce03cdc04211127 jdk8u20-b01
+ 790bbd46b2015e69ce301dae194c2b4141011f2d jdk8-b125
+ 9ccce5bf1b0ea0d3f3fb871be069f305f9cea530 jdk8-b126
+ 2e2ffb9e4b690c63b32142861177390e0f2c40e9 jdk8-b127
+ 101e42de46869e6604fbf095e1666fbf07fcb93c jdk8-b128
+ 1e5fe865491300cd0c63261ecf8d34e621e1345c jdk8-b129
++cc868070f1959b849c8c3b867771fbdb07b9ba05 jdk8u20-b02
++6a3d3b7feab4d4a8252c63b4ce7d0fab106cf2f7 jdk8u20-b03
++7e1b01df280fb065c5953c48f54ac9d619ecbf1c jdk8u20-b04
++69e0af208dad70fdef65a89ab2c4c468ed9e24b8 jdk8u20-b05
+ 839546caab1285c7699a9c2aa1467f57c9ea7f30 jdk8-b130
+ 0c38dfecab2ad9f9b5b5edf54b991602147cd040 jdk8-b131
+ 2a8f4c022aa03e7916223f3291517dbcc38e07cd jdk8-b132
+@@ -268,6 +274,7 @@
+ dd3bd272ceedbd69fabafc531b6b1e056659f733 jdk8u5-b11
+ 3e05b6ae0a1e2bd7352462e9bf8e7262246fb77f jdk8u5-b12
+ d81e301cae70f1f95f4bb976ec053c915dee503a jdk8u5-b13
++19dd42ebf97c187fbf53884f45dca84274909c3e jdk8u5-b31
+ 397902f53444be14aa4e261cd47064fac82919c9 jdk8u11-b01
+ 6ffd41be920a3e63c5767f36ac725e9e3bf5ec50 jdk8u11-b02
+ 3078ab9b8d4ad37cf18bf6a1ed49c8015e70ec73 jdk8u11-b03
+@@ -279,3 +286,30 @@
+ 390084098df7bffecd0eb2318facc6f0f9a46b70 jdk8u11-b09
+ 6d324f36e2448f486d0caa67f70e5a6cf5ac6c0d jdk8u11-b10
+ f0b9fee1d40a6aae31be4780f70aba02148ec54c jdk8u11-b11
++e85bf9b28eb7f4098eeb25ba0e3afed34058ef09 jdk8u11-b12
++66b17e2403b04cfe98dc1cce270f15ed817d0336 jdk8u11-b31
++ae6a3aec6aa29509a0fd5f53709889b99b1e27da jdk8u20-b06
++cc868070f1959b849c8c3b867771fbdb07b9ba05 jdk8u20-b02
++6a3d3b7feab4d4a8252c63b4ce7d0fab106cf2f7 jdk8u20-b03
++7e1b01df280fb065c5953c48f54ac9d619ecbf1c jdk8u20-b04
++69e0af208dad70fdef65a89ab2c4c468ed9e24b8 jdk8u20-b05
++ae6a3aec6aa29509a0fd5f53709889b99b1e27da jdk8u20-b06
++6403ef94cb0db32d9221a5e8f09f3664cd7744dc jdk8u20-b07
++b7750b6ee1578fd5b2b1f6758f905b332503d8ed jdk8u20-b08
++d420eae635c42be98b166e1ce9c64fc0a8825529 jdk8u20-b09
++cc4ca2ff0afcfb932da6fa4fffdd01f08e4ff71b jdk8u20-b10
++61291eee163ab5bbe0b38b37b77673bac9bf5310 jdk8u20-b11
++2feecdcd7b677f3baf9df6e8ea46f7e08c7e4411 jdk8u20-b12
++26764db977ecb590cdee637d27996a87cdd9507e jdk8u20-b13
++548afd2496385263874c0ce970158764166b1156 jdk8u20-b14
++12a1fd80b05aa9c5daab22ca5fab8e2c9c3df61c jdk8u20-b15
++4095a7a49a9ea95f4c59f936acf45ca1f87b8fff jdk8u20-b16
++3a49a08a2e3991a10e6bec531e9dbfa7c503fcb4 jdk8u20-b17
++1695032e51faa36ed9c39b2817baa374ca361513 jdk8u20-b18
++2f40422f564b892a26cb04c62885bb5bc85984e3 jdk8u20-b19
++5b76ecd0cdcf899261da2c9965862771f6da4e26 jdk8u20-b20
++0dccc4aca1859b1ff7dca9db214f7f38c4ddbbce jdk8u20-b21
++6c1fb59fa5d7095d93a023553a949f873f324c6b jdk8u20-b22
++b14daf2459c5430dfe5d435483d6f424cff09584 jdk8u20-b23
++1710841b0229403f4af85eac8b68ea5065a26c81 jdk8u20-b24
++1710841b0229403f4af85eac8b68ea5065a26c81 jdk8u20-b25
+--- ./THIRD_PARTY_README Tue Jun 03 14:19:17 2014 -0700
++++ ./THIRD_PARTY_README Wed Jul 30 03:50:56 2014 -0700
+@@ -2,7 +2,7 @@
+ -----------------------------
+
+ %% This notice is provided with respect to ASM Bytecode Manipulation
+-Framework v5.0, which may be included with JRE 8, and JDK 8, and
++Framework v5.0.3, which may be included with JRE 8, and JDK 8, and
+ OpenJDK 8.
+
+ --- begin of LICENSE ---
+@@ -1471,7 +1471,7 @@
+ version 2.0.
+
+ The NSS libraries are supplied in executable form, built from unmodified
+-NSS source code labeled with the "NSS_3.13.1_RTM" release tag.
++NSS source code labeled with the "NSS_3_16_RTM" HG tag.
+
+ The NSS source code is available in the OpenJDK source code repository at:
+ jdk/test/sun/security/pkcs11/nss/src
+@@ -3349,14 +3349,14 @@
+
+ -------------------------------------------------------------------------------
+
+-%% This notice is provided with respect to zlib v1.2.5, which may be included
++%% This notice is provided with respect to zlib v1.2.8, which may be included
+ with JRE 8, JDK 8, and OpenJDK 8.
+
+ --- begin of LICENSE ---
+
+- version 1.2.5, July 18th, 2005
+-
+- Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler
++ version 1.2.8, April 28th, 2013
++
++ Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+@@ -3382,11 +3382,11 @@
+ -------------------------------------------------------------------------------
+
+ %% This notice is provided with respect to the following which may be
+-included with JRE 8, JDK 8, and OpenJDK 8, except where noted:
+-
+- Apache Commons Math 2.2
+- Apache Derby 10.10.1.2 [included with JDK 8]
+- Apache Jakarta BCEL 5.2
++included with JRE 8, JDK 8, and OpenJDK 8.
++
++ Apache Commons Math 3.2
++ Apache Derby 10.10.1.3
++ Apache Jakarta BCEL 5.1
+ Apache Jakarta Regexp 1.4
+ Apache Santuario XML Security for Java 1.5.4
+ Apache Xalan-Java 2.7.1
+--- ./common/autoconf/boot-jdk.m4 Tue Jun 03 14:19:17 2014 -0700
++++ ./common/autoconf/boot-jdk.m4 Wed Jul 30 03:50:56 2014 -0700
+@@ -316,7 +316,7 @@
+
+ # Minimum amount of heap memory.
+ ADD_JVM_ARG_IF_OK([-Xms64M],boot_jdk_jvmargs,[$JAVA])
+- if test "x$OPENJDK_TARGET_OS" = "xmacosx"; then
++ if test "x$OPENJDK_TARGET_OS" = "xmacosx" || test "x$OPENJDK_TARGET_CPU" = "xppc64" ; then
+ # Why does macosx need more heap? Its the huge JDK batch.
+ ADD_JVM_ARG_IF_OK([-Xmx1600M],boot_jdk_jvmargs,[$JAVA])
+ else
+--- ./common/autoconf/build-aux/config.guess Tue Jun 03 14:19:17 2014 -0700
++++ ./common/autoconf/build-aux/config.guess Wed Jul 30 03:50:56 2014 -0700
+@@ -60,4 +60,30 @@
+ esac
+ fi
+
++# Test and fix architecture string on AIX
++# On AIX 'config.guess' returns 'powerpc' as architecture but 'powerpc' is
++# implicitely handled as 32-bit architecture in 'platform.m4' so we check
++# for the kernel mode rewrite it to 'powerpc64' if we'Re running in 64-bit mode.
++# The check could also be done with `/usr/sbin/prtconf | grep "Kernel Type" | grep "64-bit"`
++echo $OUT | grep powerpc-ibm-aix > /dev/null 2> /dev/null
++if test $? = 0; then
++ if [ -x /bin/getconf ] ; then
++ KERNEL_BITMODE=`getconf KERNEL_BITMODE`
++ if [ "$KERNEL_BITMODE" = "32" ]; then
++ KERNEL_BITMODE=""
++ fi
++ fi
++ OUT=powerpc$KERNEL_BITMODE`echo $OUT | sed -e 's/[^-]*//'`
++fi
++
++# Test and fix little endian PowerPC64.
++# TODO: should be handled by autoconf-config.guess.
++if [ "x$OUT" = x ]; then
++ if [ `uname -m` = ppc64le ]; then
++ if [ `uname -s` = Linux ]; then
++ OUT=powerpc64le-unknown-linux-gnu
++ fi
++ fi
++fi
++
+ echo $OUT
+--- ./common/autoconf/build-performance.m4 Tue Jun 03 14:19:17 2014 -0700
++++ ./common/autoconf/build-performance.m4 Wed Jul 30 03:50:56 2014 -0700
+@@ -41,6 +41,9 @@
+ # Looks like a MacOSX system
+ NUM_CORES=`/usr/sbin/system_profiler -detailLevel full SPHardwareDataType | grep 'Cores' | awk '{print [$]5}'`
+ FOUND_CORES=yes
++ elif test "x$OPENJDK_BUILD_OS" = xaix ; then
++ NUM_CORES=`/usr/sbin/prtconf | grep "^Number Of Processors" | awk '{ print [$]4 }'`
++ FOUND_CORES=yes
+ elif test -n "$NUMBER_OF_PROCESSORS"; then
+ # On windows, look in the env
+ NUM_CORES=$NUMBER_OF_PROCESSORS
+@@ -68,8 +71,8 @@
+ MEMORY_SIZE=`expr $MEMORY_SIZE / 1024`
+ FOUND_MEM=yes
+ elif test -x /usr/sbin/prtconf; then
+- # Looks like a Solaris system
+- MEMORY_SIZE=`/usr/sbin/prtconf | grep "Memory size" | awk '{ print [$]3 }'`
++ # Looks like a Solaris or AIX system
++ MEMORY_SIZE=`/usr/sbin/prtconf | grep "^Memory [[Ss]]ize" | awk '{ print [$]3 }'`
+ FOUND_MEM=yes
+ elif test -x /usr/sbin/system_profiler; then
+ # Looks like a MacOSX system
+--- ./common/autoconf/configure.ac Tue Jun 03 14:19:17 2014 -0700
++++ ./common/autoconf/configure.ac Wed Jul 30 03:50:56 2014 -0700
+@@ -88,6 +88,7 @@
+
+ # These are needed to be able to create a configuration name (and thus the output directory)
+ JDKOPT_SETUP_JDK_VARIANT
++JDKOPT_SETUP_JVM_INTERPRETER
+ JDKOPT_SETUP_JVM_VARIANTS
+ JDKOPT_SETUP_DEBUG_LEVEL
+
+--- ./common/autoconf/generated-configure.sh Tue Jun 03 14:19:17 2014 -0700
++++ ./common/autoconf/generated-configure.sh Wed Jul 30 03:50:56 2014 -0700
+@@ -665,6 +665,7 @@
+ CFLAGS_DEBUG_SYMBOLS
+ ZIP_DEBUGINFO_FILES
+ ENABLE_DEBUG_SYMBOLS
++USING_BROKEN_SUSE_LD
+ COMPILER_SUPPORTS_TARGET_BITS_FLAG
+ ZERO_ARCHFLAG
+ LDFLAGS_CXX_JDK
+@@ -749,6 +750,7 @@
+ PROPER_COMPILER_CXX
+ POTENTIAL_CXX
+ TOOLS_DIR_CXX
++COMPILER_TARGET_BITS_FLAG
+ OBJEXT
+ EXEEXT
+ ac_ct_CC
+@@ -851,6 +853,7 @@
+ DEBUG_LEVEL
+ MACOSX_UNIVERSAL
+ INCLUDE_SA
++JVM_VARIANT_CORE
+ JVM_VARIANT_ZEROSHARK
+ JVM_VARIANT_ZERO
+ JVM_VARIANT_KERNEL
+@@ -858,6 +861,7 @@
+ JVM_VARIANT_CLIENT
+ JVM_VARIANT_SERVER
+ JVM_VARIANTS
++JVM_INTERPRETER
+ JDK_VARIANT
+ SET_OPENJDK
+ BUILD_LOG_WRAPPER
+@@ -1003,6 +1007,7 @@
+ with_devkit
+ enable_openjdk_only
+ with_jdk_variant
++with_jvm_interpreter
+ with_jvm_variants
+ enable_debug
+ with_debug_level
+@@ -1747,8 +1752,10 @@
+ --with-devkit use this directory as base for tools-dir and
+ sys-root (for cross-compiling)
+ --with-jdk-variant JDK variant to build (normal) [normal]
++ --with-jvm-interpreter JVM interpreter to build (template, cpp) [template]
+ --with-jvm-variants JVM variants (separated by commas) to build (server,
+- client, minimal1, kernel, zero, zeroshark) [server]
++ client, minimal1, kernel, zero, zeroshark, core)
++ [server]
+ --with-debug-level set the debug level (release, fastdebug, slowdebug)
+ [release]
+ --with-conf-name use this as the name of the configuration [generated
+@@ -3485,8 +3492,6 @@
+ If you put the resulting build in \"C:\Program Files\GnuWin32\", it will be found automatically."
+ fi
+ ;;
+- * )
+- break ;;
+ esac
+ }
+
+@@ -3512,8 +3517,6 @@
+ PKGHANDLER_COMMAND="sudo apt-get install libX11-dev libxext-dev libxrender-dev libxtst-dev libxt-dev" ;;
+ ccache)
+ PKGHANDLER_COMMAND="sudo apt-get install ccache" ;;
+- * )
+- break ;;
+ esac
+ }
+
+@@ -3535,8 +3538,6 @@
+ PKGHANDLER_COMMAND="sudo yum install libXtst-devel libXt-devel libXrender-devel" ;;
+ ccache)
+ PKGHANDLER_COMMAND="sudo yum install ccache" ;;
+- * )
+- break ;;
+ esac
+ }
+
+@@ -3586,6 +3587,8 @@
+
+
+
++
++
+ ###############################################################################
+ #
+ # Should we build only OpenJDK even if closed sources are present?
+@@ -3865,7 +3868,7 @@
+ #CUSTOM_AUTOCONF_INCLUDE
+
+ # Do not change or remove the following line, it is needed for consistency checks:
+-DATE_WHEN_GENERATED=1390334534
++DATE_WHEN_GENERATED=1397150809
+
+ ###############################################################################
+ #
+@@ -6784,6 +6787,11 @@
+ VAR_OS_API=winapi
+ VAR_OS_ENV=windows.msys
+ ;;
++ *aix*)
++ VAR_OS=aix
++ VAR_OS_API=posix
++ VAR_OS_ENV=aix
++ ;;
+ *)
+ as_fn_error $? "unsupported operating system $build_os" "$LINENO" 5
+ ;;
+@@ -6822,6 +6830,12 @@
+ VAR_CPU_BITS=64
+ VAR_CPU_ENDIAN=big
+ ;;
++ powerpc64le)
++ VAR_CPU=ppc64
++ VAR_CPU_ARCH=ppc
++ VAR_CPU_BITS=64
++ VAR_CPU_ENDIAN=little
++ ;;
+ s390)
+ VAR_CPU=s390
+ VAR_CPU_ARCH=s390
+@@ -6904,6 +6918,11 @@
+ VAR_OS_API=winapi
+ VAR_OS_ENV=windows.msys
+ ;;
++ *aix*)
++ VAR_OS=aix
++ VAR_OS_API=posix
++ VAR_OS_ENV=aix
++ ;;
+ *)
+ as_fn_error $? "unsupported operating system $host_os" "$LINENO" 5
+ ;;
+@@ -6942,6 +6961,12 @@
+ VAR_CPU_BITS=64
+ VAR_CPU_ENDIAN=big
+ ;;
++ powerpc64le)
++ VAR_CPU=ppc64
++ VAR_CPU_ARCH=ppc
++ VAR_CPU_BITS=64
++ VAR_CPU_ENDIAN=little
++ ;;
+ s390)
+ VAR_CPU=s390
+ VAR_CPU_ARCH=s390
+@@ -7810,6 +7835,37 @@
+ $as_echo "$JDK_VARIANT" >&6; }
+
+
++###############################################################################
++#
++# Check which interpreter of the JVM we want to build.
++# Currently we have:
++# template: Template interpreter (the default)
++# cpp : C++ interpreter
++{ $as_echo "$as_me:${as_lineno-$LINENO}: checking which interpreter of the JVM to build" >&5
++$as_echo_n "checking which interpreter of the JVM to build... " >&6; }
++
++# Check whether --with-jvm-interpreter was given.
++if test "${with_jvm_interpreter+set}" = set; then :
++ withval=$with_jvm_interpreter;
++fi
++
++
++if test "x$with_jvm_interpreter" = x; then
++ with_jvm_interpreter="template"
++fi
++
++JVM_INTERPRETER="$with_jvm_interpreter"
++
++if test "x$JVM_INTERPRETER" != xtemplate && test "x$JVM_INTERPRETER" != xcpp; then
++ as_fn_error $? "The available JVM interpreters are: template, cpp" "$LINENO" 5
++fi
++
++
++
++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_jvm_interpreter" >&5
++$as_echo "$with_jvm_interpreter" >&6; }
++
++
+
+ ###############################################################################
+ #
+@@ -7822,6 +7878,7 @@
+ # ie normal interpreter and C1, only the serial GC, kernel jvmti etc
+ # zero: no machine code interpreter, no compiler
+ # zeroshark: zero interpreter and shark/llvm compiler backend
++# core: interpreter only, no compiler (only works on some platforms)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking which variants of the JVM to build" >&5
+ $as_echo_n "checking which variants of the JVM to build... " >&6; }
+
+@@ -7836,10 +7893,10 @@
+ fi
+
+ JVM_VARIANTS=",$with_jvm_variants,"
+- TEST_VARIANTS=`$ECHO "$JVM_VARIANTS" | $SED -e 's/server,//' -e 's/client,//' -e 's/minimal1,//' -e 's/kernel,//' -e 's/zero,//' -e 's/zeroshark,//'`
++ TEST_VARIANTS=`$ECHO "$JVM_VARIANTS" | $SED -e 's/server,//' -e 's/client,//' -e 's/minimal1,//' -e 's/kernel,//' -e 's/zero,//' -e 's/zeroshark,//' -e 's/core,//'`
+
+ if test "x$TEST_VARIANTS" != "x,"; then
+- as_fn_error $? "The available JVM variants are: server, client, minimal1, kernel, zero, zeroshark" "$LINENO" 5
++ as_fn_error $? "The available JVM variants are: server, client, minimal1, kernel, zero, zeroshark, core" "$LINENO" 5
+ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_jvm_variants" >&5
+ $as_echo "$with_jvm_variants" >&6; }
+@@ -7850,6 +7907,7 @@
+ JVM_VARIANT_KERNEL=`$ECHO "$JVM_VARIANTS" | $SED -e '/,kernel,/!s/.*/false/g' -e '/,kernel,/s/.*/true/g'`
+ JVM_VARIANT_ZERO=`$ECHO "$JVM_VARIANTS" | $SED -e '/,zero,/!s/.*/false/g' -e '/,zero,/s/.*/true/g'`
+ JVM_VARIANT_ZEROSHARK=`$ECHO "$JVM_VARIANTS" | $SED -e '/,zeroshark,/!s/.*/false/g' -e '/,zeroshark,/s/.*/true/g'`
++ JVM_VARIANT_CORE=`$ECHO "$JVM_VARIANTS" | $SED -e '/,core,/!s/.*/false/g' -e '/,core,/s/.*/true/g'`
+
+ if test "x$JVM_VARIANT_CLIENT" = xtrue; then
+ if test "x$OPENJDK_TARGET_CPU_BITS" = x64; then
+@@ -7869,7 +7927,7 @@
+
+ # Replace the commas with AND for use in the build directory name.
+ ANDED_JVM_VARIANTS=`$ECHO "$JVM_VARIANTS" | $SED -e 's/^,//' -e 's/,$//' -e 's/,/AND/'`
+- COUNT_VARIANTS=`$ECHO "$JVM_VARIANTS" | $SED -e 's/server,/1/' -e 's/client,/1/' -e 's/minimal1,/1/' -e 's/kernel,/1/' -e 's/zero,/1/' -e 's/zeroshark,/1/'`
++ COUNT_VARIANTS=`$ECHO "$JVM_VARIANTS" | $SED -e 's/server,/1/' -e 's/client,/1/' -e 's/minimal1,/1/' -e 's/kernel,/1/' -e 's/zero,/1/' -e 's/zeroshark,/1/' -e 's/core,/1/'`
+ if test "x$COUNT_VARIANTS" != "x,1"; then
+ BUILDING_MULTIPLE_JVM_VARIANTS=yes
+ else
+@@ -7884,6 +7942,7 @@
+
+
+
++
+ INCLUDE_SA=true
+ if test "x$JVM_VARIANT_ZERO" = xtrue ; then
+ INCLUDE_SA=false
+@@ -7891,6 +7950,9 @@
+ if test "x$JVM_VARIANT_ZEROSHARK" = xtrue ; then
+ INCLUDE_SA=false
+ fi
++ if test "x$VAR_CPU" = xppc64 ; then
++ INCLUDE_SA=false
++ fi
+
+
+ if test "x$OPENJDK_TARGET_OS" = "xmacosx"; then
+@@ -8006,6 +8068,10 @@
+ HOTSPOT_TARGET="$HOTSPOT_TARGET${HOTSPOT_DEBUG_LEVEL}shark "
+ fi
+
++ if test "x$JVM_VARIANT_CORE" = xtrue; then
++ HOTSPOT_TARGET="$HOTSPOT_TARGET${HOTSPOT_DEBUG_LEVEL}core "
++ fi
++
+ HOTSPOT_TARGET="$HOTSPOT_TARGET docs export_$HOTSPOT_EXPORT"
+
+ # On Macosx universal binaries are produced, but they only contain
+@@ -15857,8 +15923,6 @@
+ pkgutil_help $MISSING_DEPENDENCY ;;
+ pkgadd)
+ pkgadd_help $MISSING_DEPENDENCY ;;
+- * )
+- break ;;
+ esac
+
+ if test "x$PKGHANDLER_COMMAND" != x; then
+@@ -16037,7 +16101,7 @@
+ JVM_ARG_OK=false
+ fi
+
+- if test "x$OPENJDK_TARGET_OS" = "xmacosx"; then
++ if test "x$OPENJDK_TARGET_OS" = "xmacosx" || test "x$OPENJDK_TARGET_CPU" = "xppc64" ; then
+ # Why does macosx need more heap? Its the huge JDK batch.
+
+ $ECHO "Check if jvm arg is ok: -Xmx1600M" >&5
+@@ -19109,6 +19173,9 @@
+ COMPILER_CHECK_LIST="cl"
+ elif test "x$OPENJDK_TARGET_OS" = "xsolaris"; then
+ COMPILER_CHECK_LIST="cc gcc"
++ elif test "x$OPENJDK_TARGET_OS" = "xaix"; then
++ # Do not probe for cc on AIX.
++ COMPILER_CHECK_LIST="xlc_r"
+ else
+ COMPILER_CHECK_LIST="gcc cc"
+ fi
+@@ -19246,8 +19313,6 @@
+ pkgutil_help $MISSING_DEPENDENCY ;;
+ pkgadd)
+ pkgadd_help $MISSING_DEPENDENCY ;;
+- * )
+- break ;;
+ esac
+
+ if test "x$PKGHANDLER_COMMAND" != x; then
+@@ -19523,9 +19588,12 @@
+ $as_echo "$as_me: Rewriting CC to \"$new_complete\"" >&6;}
+ fi
+
+- { $as_echo "$as_me:${as_lineno-$LINENO}: checking resolved symbolic links for CC" >&5
++ TEST_COMPILER="$CC"
++ # Don't remove symbolic links on AIX because 'xlc_r' and 'xlC_r' may all be links
++ # to 'xlc' but it is crucial that we invoke the compiler with the right name!
++ if test "x$OPENJDK_BUILD_OS" != xaix; then
++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking resolved symbolic links for CC" >&5
+ $as_echo_n "checking resolved symbolic links for CC... " >&6; }
+- TEST_COMPILER="$CC"
+
+ if test "x$OPENJDK_BUILD_OS" != xwindows; then
+ # Follow a chain of symbolic links. Use readlink
+@@ -19574,8 +19642,9 @@
+ fi
+ fi
+
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $TEST_COMPILER" >&5
++ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $TEST_COMPILER" >&5
+ $as_echo "$TEST_COMPILER" >&6; }
++ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking if CC is disguised ccache" >&5
+ $as_echo_n "checking if CC is disguised ccache... " >&6; }
+
+@@ -20039,6 +20108,15 @@
+ COMPILER_VERSION=`$ECHO $COMPILER_VERSION_TEST | $SED -n "s/^.*[ ,\t]$COMPILER_NAME[ ,\t]\([1-9]\.[0-9][0-9]*\).*/\1/p"`
+ COMPILER_VENDOR="Sun Studio"
+ fi
++ elif test "x$OPENJDK_TARGET_OS" = xaix; then
++ COMPILER_VERSION_TEST=`$COMPILER -qversion 2>&1 | $TAIL -n 1`
++ $ECHO $COMPILER_VERSION_TEST | $GREP "^Version: " > /dev/null
++ if test $? -ne 0; then
++ as_fn_error $? "Failed to detect the compiler version of $COMPILER ...." "$LINENO" 5
++ else
++ COMPILER_VERSION=`$ECHO $COMPILER_VERSION_TEST | $SED -n 's/Version: \(0-90-9\.0-90-9*\).*/\1/p'`
++ COMPILER_VENDOR='IBM'
++ fi
+ elif test "x$OPENJDK_TARGET_OS" = xwindows; then
+ # First line typically looks something like:
+ # Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.40219.01 for 80x86
+@@ -20680,6 +20758,14 @@
+ ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+
++ # Option used to tell the compiler whether to create 32- or 64-bit executables
++ # Notice that CC contains the full compiler path at this point.
++ case $CC in
++ *xlc_r) COMPILER_TARGET_BITS_FLAG="-q";;
++ *) COMPILER_TARGET_BITS_FLAG="-m";;
++ esac
++
++
+ ### Locate C++ compiler (CXX)
+
+ if test "x$CXX" != x; then
+@@ -20688,6 +20774,9 @@
+ COMPILER_CHECK_LIST="cl"
+ elif test "x$OPENJDK_TARGET_OS" = "xsolaris"; then
+ COMPILER_CHECK_LIST="CC g++"
++ elif test "x$OPENJDK_TARGET_OS" = "xaix"; then
++ # Do not probe for CC on AIX .
++ COMPILER_CHECK_LIST="xlC_r"
+ else
+ COMPILER_CHECK_LIST="g++ CC"
+ fi
+@@ -20825,8 +20914,6 @@
+ pkgutil_help $MISSING_DEPENDENCY ;;
+ pkgadd)
+ pkgadd_help $MISSING_DEPENDENCY ;;
+- * )
+- break ;;
+ esac
+
+ if test "x$PKGHANDLER_COMMAND" != x; then
+@@ -21102,9 +21189,12 @@
+ $as_echo "$as_me: Rewriting CXX to \"$new_complete\"" >&6;}
+ fi
+
+- { $as_echo "$as_me:${as_lineno-$LINENO}: checking resolved symbolic links for CXX" >&5
++ TEST_COMPILER="$CXX"
++ # Don't remove symbolic links on AIX because 'xlc_r' and 'xlC_r' may all be links
++ # to 'xlc' but it is crucial that we invoke the compiler with the right name!
++ if test "x$OPENJDK_BUILD_OS" != xaix; then
++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking resolved symbolic links for CXX" >&5
+ $as_echo_n "checking resolved symbolic links for CXX... " >&6; }
+- TEST_COMPILER="$CXX"
+
+ if test "x$OPENJDK_BUILD_OS" != xwindows; then
+ # Follow a chain of symbolic links. Use readlink
+@@ -21153,8 +21243,9 @@
+ fi
+ fi
+
+- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $TEST_COMPILER" >&5
++ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $TEST_COMPILER" >&5
+ $as_echo "$TEST_COMPILER" >&6; }
++ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking if CXX is disguised ccache" >&5
+ $as_echo_n "checking if CXX is disguised ccache... " >&6; }
+
+@@ -21618,6 +21709,15 @@
+ COMPILER_VERSION=`$ECHO $COMPILER_VERSION_TEST | $SED -n "s/^.*[ ,\t]$COMPILER_NAME[ ,\t]\([1-9]\.[0-9][0-9]*\).*/\1/p"`
+ COMPILER_VENDOR="Sun Studio"
+ fi
++ elif test "x$OPENJDK_TARGET_OS" = xaix; then
++ COMPILER_VERSION_TEST=`$COMPILER -qversion 2>&1 | $TAIL -n 1`
++ $ECHO $COMPILER_VERSION_TEST | $GREP "^Version: " > /dev/null
++ if test $? -ne 0; then
++ as_fn_error $? "Failed to detect the compiler version of $COMPILER ...." "$LINENO" 5
++ else
++ COMPILER_VERSION=`$ECHO $COMPILER_VERSION_TEST | $SED -n 's/Version: \(0-90-9\.0-90-9*\).*/\1/p'`
++ COMPILER_VENDOR='IBM'
++ fi
+ elif test "x$OPENJDK_TARGET_OS" = xwindows; then
+ # First line typically looks something like:
+ # Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.40219.01 for 80x86
+@@ -22823,6 +22923,8 @@
+ fi
+ if test "x$OPENJDK_TARGET_OS" = xmacosx; then
+ ARFLAGS="-r"
++ elif test "x$OPENJDK_TARGET_OS" = xaix; then
++ ARFLAGS="-X64"
+ else
+ ARFLAGS=""
+ fi
+@@ -28798,16 +28900,17 @@
+ # is made at runtime.)
+ #
+
+- if test "x$OPENJDK_TARGET_OS" = xsolaris; then
+- # Always specify -m flags on Solaris
++ if test "x$OPENJDK_TARGET_OS" = xsolaris || test "x$OPENJDK_TARGET_OS" = xaix; then
++ # Always specify -m flag on Solaris
++ # And -q on AIX because otherwise the compiler produces 32-bit objects by default
+
+ # When we add flags to the "official" CFLAGS etc, we need to
+ # keep track of these additions in ADDED_CFLAGS etc. These
+ # will later be checked to make sure only controlled additions
+ # have been made to CFLAGS etc.
+- ADDED_CFLAGS=" -m${OPENJDK_TARGET_CPU_BITS}"
+- ADDED_CXXFLAGS=" -m${OPENJDK_TARGET_CPU_BITS}"
+- ADDED_LDFLAGS=" -m${OPENJDK_TARGET_CPU_BITS}"
++ ADDED_CFLAGS=" ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}"
++ ADDED_CXXFLAGS=" ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}"
++ ADDED_LDFLAGS=" ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}"
+
+ CFLAGS="${CFLAGS}${ADDED_CFLAGS}"
+ CXXFLAGS="${CXXFLAGS}${ADDED_CXXFLAGS}"
+@@ -28825,9 +28928,9 @@
+ # keep track of these additions in ADDED_CFLAGS etc. These
+ # will later be checked to make sure only controlled additions
+ # have been made to CFLAGS etc.
+- ADDED_CFLAGS=" -m${OPENJDK_TARGET_CPU_BITS}"
+- ADDED_CXXFLAGS=" -m${OPENJDK_TARGET_CPU_BITS}"
+- ADDED_LDFLAGS=" -m${OPENJDK_TARGET_CPU_BITS}"
++ ADDED_CFLAGS=" ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}"
++ ADDED_CXXFLAGS=" ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}"
++ ADDED_LDFLAGS=" ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}"
+
+ CFLAGS="${CFLAGS}${ADDED_CFLAGS}"
+ CXXFLAGS="${CXXFLAGS}${ADDED_CXXFLAGS}"
+@@ -28901,20 +29004,85 @@
+
+
+
+- if test "x$SIZEOF_INT_P" != "x$ac_cv_sizeof_int_p"; then
+- # Workaround autoconf bug, see http://lists.gnu.org/archive/html/autoconf/2010-07/msg00004.html
+- SIZEOF_INT_P="$ac_cv_sizeof_int_p"
+- fi
+-
+- if test "x$SIZEOF_INT_P" = x; then
++ # AC_CHECK_SIZEOF defines 'ac_cv_sizeof_int_p' to hold the number of bytes used by an 'int*'
++ if test "x$ac_cv_sizeof_int_p" = x; then
+ # The test failed, lets stick to the assumed value.
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: The number of bits in the target could not be determined, using $OPENJDK_TARGET_CPU_BITS." >&5
+ $as_echo "$as_me: WARNING: The number of bits in the target could not be determined, using $OPENJDK_TARGET_CPU_BITS." >&2;}
+ else
+- TESTED_TARGET_CPU_BITS=`expr 8 \* $SIZEOF_INT_P`
++ TESTED_TARGET_CPU_BITS=`expr 8 \* $ac_cv_sizeof_int_p`
+
+ if test "x$TESTED_TARGET_CPU_BITS" != "x$OPENJDK_TARGET_CPU_BITS"; then
+- as_fn_error $? "The tested number of bits in the target ($TESTED_TARGET_CPU_BITS) differs from the number of bits expected to be found in the target ($OPENJDK_TARGET_CPU_BITS)" "$LINENO" 5
++ # This situation may happen on 64-bit platforms where the compiler by default only generates 32-bit objects
++ # Let's try to implicitely set the compilers target architecture and retry the test
++ { $as_echo "$as_me:${as_lineno-$LINENO}: The tested number of bits in the target ($TESTED_TARGET_CPU_BITS) differs from the number of bits expected to be found in the target ($OPENJDK_TARGET_CPU_BITS)." >&5
++$as_echo "$as_me: The tested number of bits in the target ($TESTED_TARGET_CPU_BITS) differs from the number of bits expected to be found in the target ($OPENJDK_TARGET_CPU_BITS)." >&6;}
++ { $as_echo "$as_me:${as_lineno-$LINENO}: I'll retry after setting the platforms compiler target bits flag to ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}" >&5
++$as_echo "$as_me: I'll retry after setting the platforms compiler target bits flag to ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}" >&6;}
++
++ # When we add flags to the "official" CFLAGS etc, we need to
++ # keep track of these additions in ADDED_CFLAGS etc. These
++ # will later be checked to make sure only controlled additions
++ # have been made to CFLAGS etc.
++ ADDED_CFLAGS=" ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}"
++ ADDED_CXXFLAGS=" ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}"
++ ADDED_LDFLAGS=" ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}"
++
++ CFLAGS="${CFLAGS}${ADDED_CFLAGS}"
++ CXXFLAGS="${CXXFLAGS}${ADDED_CXXFLAGS}"
++ LDFLAGS="${LDFLAGS}${ADDED_LDFLAGS}"
++
++ CFLAGS_JDK="${CFLAGS_JDK}${ADDED_CFLAGS}"
++ CXXFLAGS_JDK="${CXXFLAGS_JDK}${ADDED_CXXFLAGS}"
++ LDFLAGS_JDK="${LDFLAGS_JDK}${ADDED_LDFLAGS}"
++
++
++ # We have to unset 'ac_cv_sizeof_int_p' first, otherwise AC_CHECK_SIZEOF will use the previously cached value!
++ unset ac_cv_sizeof_int_p
++ # And we have to undef the definition of SIZEOF_INT_P in confdefs.h by the previous invocation of AC_CHECK_SIZEOF
++ cat >>confdefs.h <<_ACEOF
++#undef SIZEOF_INT_P
++_ACEOF
++
++ # The cast to long int works around a bug in the HP C Compiler
++# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
++# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
++# This bug is HP SR number 8606223364.
++{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of int *" >&5
++$as_echo_n "checking size of int *... " >&6; }
++if ${ac_cv_sizeof_int_p+:} false; then :
++ $as_echo_n "(cached) " >&6
++else
++ if ac_fn_cxx_compute_int "$LINENO" "(long int) (sizeof (int *))" "ac_cv_sizeof_int_p" "$ac_includes_default"; then :
++
++else
++ if test "$ac_cv_type_int_p" = yes; then
++ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
++$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
++as_fn_error 77 "cannot compute sizeof (int *)
++See \`config.log' for more details" "$LINENO" 5; }
++ else
++ ac_cv_sizeof_int_p=0
++ fi
++fi
++
++fi
++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_int_p" >&5
++$as_echo "$ac_cv_sizeof_int_p" >&6; }
++
++
++
++cat >>confdefs.h <<_ACEOF
++#define SIZEOF_INT_P $ac_cv_sizeof_int_p
++_ACEOF
++
++
++
++ TESTED_TARGET_CPU_BITS=`expr 8 \* $ac_cv_sizeof_int_p`
++
++ if test "x$TESTED_TARGET_CPU_BITS" != "x$OPENJDK_TARGET_CPU_BITS"; then
++ as_fn_error $? "The tested number of bits in the target ($TESTED_TARGET_CPU_BITS) differs from the number of bits expected to be found in the target ($OPENJDK_TARGET_CPU_BITS)" "$LINENO" 5
++ fi
+ fi
+ fi
+
+@@ -29227,6 +29395,29 @@
+ POST_STRIP_CMD="$STRIP -x"
+ POST_MCS_CMD="$MCS -d -a \"JDK $FULL_VERSION\""
+ fi
++ if test "x$OPENJDK_TARGET_OS" = xaix; then
++ COMPILER_NAME=xlc
++ PICFLAG="-qpic=large"
++ LIBRARY_PREFIX=lib
++ SHARED_LIBRARY='lib$1.so'
++ STATIC_LIBRARY='lib$1.a'
++ SHARED_LIBRARY_FLAGS="-qmkshrobj"
++ SHARED_LIBRARY_SUFFIX='.so'
++ STATIC_LIBRARY_SUFFIX='.a'
++ OBJ_SUFFIX='.o'
++ EXE_SUFFIX=''
++ SET_SHARED_LIBRARY_NAME=''
++ SET_SHARED_LIBRARY_MAPFILE=''
++ C_FLAG_REORDER=''
++ CXX_FLAG_REORDER=''
++ SET_SHARED_LIBRARY_ORIGIN=''
++ SET_EXECUTABLE_ORIGIN=""
++ CFLAGS_JDK=""
++ CXXFLAGS_JDK=""
++ CFLAGS_JDKLIB_EXTRA=''
++ POST_STRIP_CMD="$STRIP -X32_64"
++ POST_MCS_CMD=""
++ fi
+ if test "x$OPENJDK_TARGET_OS" = xwindows; then
+ # If it is not gcc, then assume it is the MS Visual Studio compiler
+ COMPILER_NAME=cl
+@@ -29412,6 +29603,24 @@
+
+ CFLAGS_DEBUG_SYMBOLS="-g -xs"
+ CXXFLAGS_DEBUG_SYMBOLS="-g0 -xs"
++ ;;
++ xlc )
++ C_FLAG_DEPS="-qmakedep=gcc -MF"
++ CXX_FLAG_DEPS="-qmakedep=gcc -MF"
++ C_O_FLAG_HIGHEST="-O3"
++ C_O_FLAG_HI="-O3 -qstrict"
++ C_O_FLAG_NORM="-O2"
++ C_O_FLAG_NONE=""
++ CXX_O_FLAG_HIGHEST="-O3"
++ CXX_O_FLAG_HI="-O3 -qstrict"
++ CXX_O_FLAG_NORM="-O2"
++ CXX_O_FLAG_NONE=""
++ CFLAGS_DEBUG_SYMBOLS="-g"
++ CXXFLAGS_DEBUG_SYMBOLS="-g"
++ LDFLAGS_JDK="${LDFLAGS_JDK} -q64 -brtl -bnolibpath -liconv -bexpall"
++ CFLAGS_JDK="${CFLAGS_JDK} -qchars=signed -q64 -qfullpath -qsaveopt"
++ CXXFLAGS_JDK="${CXXFLAGS_JDK} -qchars=signed -q64 -qfullpath -qsaveopt"
++ ;;
+ esac
+ ;;
+ CL )
+@@ -29535,6 +29744,13 @@
+ LDFLAGS_JDK="$LDFLAGS_JDK -z defs -xildoff -ztext"
+ LDFLAGS_CXX_JDK="$LDFLAGS_CXX_JDK -norunpath -xnolib"
+ ;;
++ xlc )
++ CFLAGS_JDK="$CFLAGS_JDK -D_GNU_SOURCE -D_REENTRANT -D_LARGEFILE64_SOURCE -DSTDC"
++ CXXFLAGS_JDK="$CXXFLAGS_JDK -D_GNU_SOURCE -D_REENTRANT -D_LARGEFILE64_SOURCE -DSTDC"
++
++ LDFLAGS_JDK="$LDFLAGS_JDK"
++ LDFLAGS_CXX_JDK="$LDFLAGS_CXX_JDK"
++ ;;
+ cl )
+ CCXXFLAGS_JDK="$CCXXFLAGS $CCXXFLAGS_JDK -Zi -MD -Zc:wchar_t- -W3 -wd4800 \
+ -D_STATIC_CPPLIB -D_DISABLE_DEPRECATE_STATIC_CPPLIB -DWIN32_LEAN_AND_MEAN \
+@@ -29604,6 +29820,9 @@
+ if test "x$OPENJDK_TARGET_OS" = xsolaris; then
+ CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DSOLARIS"
+ fi
++ if test "x$OPENJDK_TARGET_OS" = xaix; then
++ CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DAIX -DPPC64"
++ fi
+ if test "x$OPENJDK_TARGET_OS" = xmacosx; then
+ CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DMACOSX -D_ALLBSD_SOURCE -D_DARWIN_UNLIMITED_SELECT"
+ # Setting these parameters makes it an error to link to macosx APIs that are
+@@ -29736,10 +29955,10 @@
+ # ZERO_ARCHFLAG tells the compiler which mode to build for
+ case "${OPENJDK_TARGET_CPU}" in
+ s390)
+- ZERO_ARCHFLAG="-m31"
++ ZERO_ARCHFLAG="${COMPILER_TARGET_BITS_FLAG}31"
+ ;;
+ *)
+- ZERO_ARCHFLAG="-m${OPENJDK_TARGET_CPU_BITS}"
++ ZERO_ARCHFLAG="${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}"
+ esac
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking if compiler supports \"$ZERO_ARCHFLAG\"" >&5
+@@ -29808,15 +30027,15 @@
+
+
+
+- # Check that the compiler supports -mX flags
++ # Check that the compiler supports -mX (or -qX on AIX) flags
+ # Set COMPILER_SUPPORTS_TARGET_BITS_FLAG to 'true' if it does
+
+- { $as_echo "$as_me:${as_lineno-$LINENO}: checking if compiler supports \"-m${OPENJDK_TARGET_CPU_BITS}\"" >&5
+-$as_echo_n "checking if compiler supports \"-m${OPENJDK_TARGET_CPU_BITS}\"... " >&6; }
++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking if compiler supports \"${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}\"" >&5
++$as_echo_n "checking if compiler supports \"${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}\"... " >&6; }
+ supports=yes
+
+ saved_cflags="$CFLAGS"
+- CFLAGS="$CFLAGS -m${OPENJDK_TARGET_CPU_BITS}"
++ CFLAGS="$CFLAGS ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}"
+ ac_ext=c
+ ac_cpp='$CPP $CPPFLAGS'
+ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+@@ -29842,7 +30061,7 @@
+ CFLAGS="$saved_cflags"
+
+ saved_cxxflags="$CXXFLAGS"
+- CXXFLAGS="$CXXFLAG -m${OPENJDK_TARGET_CPU_BITS}"
++ CXXFLAGS="$CXXFLAG ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}"
+ ac_ext=cpp
+ ac_cpp='$CXXCPP $CPPFLAGS'
+ ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+@@ -29878,6 +30097,27 @@
+
+
+
++ # Check for broken SuSE 'ld' for which 'Only anonymous version tag is allowed in executable.'
++ USING_BROKEN_SUSE_LD=no
++ if test "x$OPENJDK_TARGET_OS" = xlinux && test "x$GCC" = xyes; then
++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for broken SuSE 'ld' which only understands anonymous version tags in executables" >&5
++$as_echo_n "checking for broken SuSE 'ld' which only understands anonymous version tags in executables... " >&6; }
++ echo "SUNWprivate_1.1 { local: *; };" > version-script.map
++ echo "int main() { }" > main.c
++ if $CXX -Xlinker -version-script=version-script.map main.c 2>&5 >&5; then
++ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
++$as_echo "no" >&6; }
++ USING_BROKEN_SUSE_LD=no
++ else
++ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
++$as_echo "yes" >&6; }
++ USING_BROKEN_SUSE_LD=yes
++ fi
++ rm -rf version-script.map main.c
++ fi
++
++
++
+ # Setup debug symbols (need objcopy from the toolchain for that)
+
+ #
+@@ -30034,6 +30274,16 @@
+ $as_echo "alsa pulse" >&6; }
+ fi
+
++ if test "x$OPENJDK_TARGET_OS" = xaix; then
++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking what is not needed on AIX?" >&5
++$as_echo_n "checking what is not needed on AIX?... " >&6; }
++ ALSA_NOT_NEEDED=yes
++ PULSE_NOT_NEEDED=yes
++ { $as_echo "$as_me:${as_lineno-$LINENO}: result: alsa pulse" >&5
++$as_echo "alsa pulse" >&6; }
++ fi
++
++
+ if test "x$OPENJDK_TARGET_OS" = xwindows; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking what is not needed on Windows?" >&5
+ $as_echo_n "checking what is not needed on Windows?... " >&6; }
+@@ -30844,8 +31094,6 @@
+ pkgutil_help $MISSING_DEPENDENCY ;;
+ pkgadd)
+ pkgadd_help $MISSING_DEPENDENCY ;;
+- * )
+- break ;;
+ esac
+
+ if test "x$PKGHANDLER_COMMAND" != x; then
+@@ -30936,8 +31184,6 @@
+ pkgutil_help $MISSING_DEPENDENCY ;;
+ pkgadd)
+ pkgadd_help $MISSING_DEPENDENCY ;;
+- * )
+- break ;;
+ esac
+
+ if test "x$PKGHANDLER_COMMAND" != x; then
+@@ -31198,8 +31444,6 @@
+ pkgutil_help $MISSING_DEPENDENCY ;;
+ pkgadd)
+ pkgadd_help $MISSING_DEPENDENCY ;;
+- * )
+- break ;;
+ esac
+
+ if test "x$PKGHANDLER_COMMAND" != x; then
+@@ -33960,8 +34204,6 @@
+ pkgutil_help $MISSING_DEPENDENCY ;;
+ pkgadd)
+ pkgadd_help $MISSING_DEPENDENCY ;;
+- * )
+- break ;;
+ esac
+
+ if test "x$PKGHANDLER_COMMAND" != x; then
+@@ -34297,8 +34539,6 @@
+ pkgutil_help $MISSING_DEPENDENCY ;;
+ pkgadd)
+ pkgadd_help $MISSING_DEPENDENCY ;;
+- * )
+- break ;;
+ esac
+
+ if test "x$PKGHANDLER_COMMAND" != x; then
+@@ -34377,7 +34617,7 @@
+ fi
+
+ if test "x${with_alsa}" != x; then
+- ALSA_LIBS="-L${with_alsa}/lib -lalsa"
++ ALSA_LIBS="-L${with_alsa}/lib -lasound"
+ ALSA_CFLAGS="-I${with_alsa}/include"
+ ALSA_FOUND=yes
+ fi
+@@ -34386,7 +34626,7 @@
+ ALSA_FOUND=yes
+ fi
+ if test "x${with_alsa_lib}" != x; then
+- ALSA_LIBS="-L${with_alsa_lib} -lalsa"
++ ALSA_LIBS="-L${with_alsa_lib} -lasound"
+ ALSA_FOUND=yes
+ fi
+ if test "x$ALSA_FOUND" = xno; then
+@@ -34650,8 +34890,6 @@
+ pkgutil_help $MISSING_DEPENDENCY ;;
+ pkgadd)
+ pkgadd_help $MISSING_DEPENDENCY ;;
+- * )
+- break ;;
+ esac
+
+ if test "x$PKGHANDLER_COMMAND" != x; then
+@@ -35454,6 +35692,9 @@
+ # Looks like a MacOSX system
+ NUM_CORES=`/usr/sbin/system_profiler -detailLevel full SPHardwareDataType | grep 'Cores' | awk '{print $5}'`
+ FOUND_CORES=yes
++ elif test "x$OPENJDK_BUILD_OS" = xaix ; then
++ NUM_CORES=`/usr/sbin/prtconf | grep "^Number Of Processors" | awk '{ print $4 }'`
++ FOUND_CORES=yes
+ elif test -n "$NUMBER_OF_PROCESSORS"; then
+ # On windows, look in the env
+ NUM_CORES=$NUMBER_OF_PROCESSORS
+@@ -35498,8 +35739,8 @@
+ MEMORY_SIZE=`expr $MEMORY_SIZE / 1024`
+ FOUND_MEM=yes
+ elif test -x /usr/sbin/prtconf; then
+- # Looks like a Solaris system
+- MEMORY_SIZE=`/usr/sbin/prtconf | grep "Memory size" | awk '{ print $3 }'`
++ # Looks like a Solaris or AIX system
++ MEMORY_SIZE=`/usr/sbin/prtconf | grep "^Memory [Ss]ize" | awk '{ print $3 }'`
+ FOUND_MEM=yes
+ elif test -x /usr/sbin/system_profiler; then
+ # Looks like a MacOSX system
+@@ -37410,8 +37651,6 @@
+ pkgutil_help $MISSING_DEPENDENCY ;;
+ pkgadd)
+ pkgadd_help $MISSING_DEPENDENCY ;;
+- * )
+- break ;;
+ esac
+
+ if test "x$PKGHANDLER_COMMAND" != x; then
+--- ./common/autoconf/help.m4 Tue Jun 03 14:19:17 2014 -0700
++++ ./common/autoconf/help.m4 Wed Jul 30 03:50:56 2014 -0700
+@@ -52,8 +52,6 @@
+ pkgutil_help $MISSING_DEPENDENCY ;;
+ pkgadd)
+ pkgadd_help $MISSING_DEPENDENCY ;;
+- * )
+- break ;;
+ esac
+
+ if test "x$PKGHANDLER_COMMAND" != x; then
+@@ -92,8 +90,6 @@
+ If you put the resulting build in \"C:\Program Files\GnuWin32\", it will be found automatically."
+ fi
+ ;;
+- * )
+- break ;;
+ esac
+ }
+
+@@ -119,8 +115,6 @@
+ PKGHANDLER_COMMAND="sudo apt-get install libX11-dev libxext-dev libxrender-dev libxtst-dev libxt-dev" ;;
+ ccache)
+ PKGHANDLER_COMMAND="sudo apt-get install ccache" ;;
+- * )
+- break ;;
+ esac
+ }
+
+@@ -142,8 +136,6 @@
+ PKGHANDLER_COMMAND="sudo yum install libXtst-devel libXt-devel libXrender-devel" ;;
+ ccache)
+ PKGHANDLER_COMMAND="sudo yum install ccache" ;;
+- * )
+- break ;;
+ esac
+ }
+
+--- ./common/autoconf/hotspot-spec.gmk.in Tue Jun 03 14:19:17 2014 -0700
++++ ./common/autoconf/hotspot-spec.gmk.in Wed Jul 30 03:50:56 2014 -0700
+@@ -91,6 +91,11 @@
+ ALT_OUTPUTDIR=$(HOTSPOT_OUTPUTDIR)
+ ALT_EXPORT_PATH=$(HOTSPOT_DIST)
+
++JVM_INTERPRETER:=@JVM_INTERPRETER@
++ifeq ($(JVM_INTERPRETER), cpp)
++ CC_INTERP=true
++endif
++
+ HOTSPOT_MAKE_ARGS:=@HOTSPOT_MAKE_ARGS@ @STATIC_CXX_SETTING@
+ # This is used from the libjvm build for C/C++ code.
+ HOTSPOT_BUILD_JOBS:=$(JOBS)
+--- ./common/autoconf/jdk-options.m4 Tue Jun 03 14:19:17 2014 -0700
++++ ./common/autoconf/jdk-options.m4 Wed Jul 30 03:50:56 2014 -0700
+@@ -51,6 +51,33 @@
+ AC_MSG_RESULT([$JDK_VARIANT])
+ ])
+
++AC_DEFUN_ONCE([JDKOPT_SETUP_JVM_INTERPRETER],
++[
++###############################################################################
++#
++# Check which interpreter of the JVM we want to build.
++# Currently we have:
++# template: Template interpreter (the default)
++# cpp : C++ interpreter
++AC_MSG_CHECKING([which interpreter of the JVM to build])
++AC_ARG_WITH([jvm-interpreter], [AS_HELP_STRING([--with-jvm-interpreter],
++ [JVM interpreter to build (template, cpp) @<:@template@:>@])])
++
++if test "x$with_jvm_interpreter" = x; then
++ with_jvm_interpreter="template"
++fi
++
++JVM_INTERPRETER="$with_jvm_interpreter"
++
++if test "x$JVM_INTERPRETER" != xtemplate && test "x$JVM_INTERPRETER" != xcpp; then
++ AC_MSG_ERROR([The available JVM interpreters are: template, cpp])
++fi
++
++AC_SUBST(JVM_INTERPRETER)
++
++AC_MSG_RESULT([$with_jvm_interpreter])
++])
++
+ AC_DEFUN_ONCE([JDKOPT_SETUP_JVM_VARIANTS],
+ [
+
+@@ -65,19 +92,20 @@
+ # ie normal interpreter and C1, only the serial GC, kernel jvmti etc
+ # zero: no machine code interpreter, no compiler
+ # zeroshark: zero interpreter and shark/llvm compiler backend
++# core: interpreter only, no compiler (only works on some platforms)
+ AC_MSG_CHECKING([which variants of the JVM to build])
+ AC_ARG_WITH([jvm-variants], [AS_HELP_STRING([--with-jvm-variants],
+- [JVM variants (separated by commas) to build (server, client, minimal1, kernel, zero, zeroshark) @<:@server@:>@])])
++ [JVM variants (separated by commas) to build (server, client, minimal1, kernel, zero, zeroshark, core) @<:@server@:>@])])
+
+ if test "x$with_jvm_variants" = x; then
+ with_jvm_variants="server"
+ fi
+
+ JVM_VARIANTS=",$with_jvm_variants,"
+- TEST_VARIANTS=`$ECHO "$JVM_VARIANTS" | $SED -e 's/server,//' -e 's/client,//' -e 's/minimal1,//' -e 's/kernel,//' -e 's/zero,//' -e 's/zeroshark,//'`
++ TEST_VARIANTS=`$ECHO "$JVM_VARIANTS" | $SED -e 's/server,//' -e 's/client,//' -e 's/minimal1,//' -e 's/kernel,//' -e 's/zero,//' -e 's/zeroshark,//' -e 's/core,//'`
+
+ if test "x$TEST_VARIANTS" != "x,"; then
+- AC_MSG_ERROR([The available JVM variants are: server, client, minimal1, kernel, zero, zeroshark])
++ AC_MSG_ERROR([The available JVM variants are: server, client, minimal1, kernel, zero, zeroshark, core])
+ fi
+ AC_MSG_RESULT([$with_jvm_variants])
+
+@@ -87,6 +115,7 @@
+ JVM_VARIANT_KERNEL=`$ECHO "$JVM_VARIANTS" | $SED -e '/,kernel,/!s/.*/false/g' -e '/,kernel,/s/.*/true/g'`
+ JVM_VARIANT_ZERO=`$ECHO "$JVM_VARIANTS" | $SED -e '/,zero,/!s/.*/false/g' -e '/,zero,/s/.*/true/g'`
+ JVM_VARIANT_ZEROSHARK=`$ECHO "$JVM_VARIANTS" | $SED -e '/,zeroshark,/!s/.*/false/g' -e '/,zeroshark,/s/.*/true/g'`
++ JVM_VARIANT_CORE=`$ECHO "$JVM_VARIANTS" | $SED -e '/,core,/!s/.*/false/g' -e '/,core,/s/.*/true/g'`
+
+ if test "x$JVM_VARIANT_CLIENT" = xtrue; then
+ if test "x$OPENJDK_TARGET_CPU_BITS" = x64; then
+@@ -106,7 +135,7 @@
+
+ # Replace the commas with AND for use in the build directory name.
+ ANDED_JVM_VARIANTS=`$ECHO "$JVM_VARIANTS" | $SED -e 's/^,//' -e 's/,$//' -e 's/,/AND/'`
+- COUNT_VARIANTS=`$ECHO "$JVM_VARIANTS" | $SED -e 's/server,/1/' -e 's/client,/1/' -e 's/minimal1,/1/' -e 's/kernel,/1/' -e 's/zero,/1/' -e 's/zeroshark,/1/'`
++ COUNT_VARIANTS=`$ECHO "$JVM_VARIANTS" | $SED -e 's/server,/1/' -e 's/client,/1/' -e 's/minimal1,/1/' -e 's/kernel,/1/' -e 's/zero,/1/' -e 's/zeroshark,/1/' -e 's/core,/1/'`
+ if test "x$COUNT_VARIANTS" != "x,1"; then
+ BUILDING_MULTIPLE_JVM_VARIANTS=yes
+ else
+@@ -120,6 +149,7 @@
+ AC_SUBST(JVM_VARIANT_KERNEL)
+ AC_SUBST(JVM_VARIANT_ZERO)
+ AC_SUBST(JVM_VARIANT_ZEROSHARK)
++ AC_SUBST(JVM_VARIANT_CORE)
+
+ INCLUDE_SA=true
+ if test "x$JVM_VARIANT_ZERO" = xtrue ; then
+@@ -128,6 +158,9 @@
+ if test "x$JVM_VARIANT_ZEROSHARK" = xtrue ; then
+ INCLUDE_SA=false
+ fi
++ if test "x$VAR_CPU" = xppc64 ; then
++ INCLUDE_SA=false
++ fi
+ AC_SUBST(INCLUDE_SA)
+
+ if test "x$OPENJDK_TARGET_OS" = "xmacosx"; then
+@@ -236,6 +269,10 @@
+ HOTSPOT_TARGET="$HOTSPOT_TARGET${HOTSPOT_DEBUG_LEVEL}shark "
+ fi
+
++ if test "x$JVM_VARIANT_CORE" = xtrue; then
++ HOTSPOT_TARGET="$HOTSPOT_TARGET${HOTSPOT_DEBUG_LEVEL}core "
++ fi
++
+ HOTSPOT_TARGET="$HOTSPOT_TARGET docs export_$HOTSPOT_EXPORT"
+
+ # On Macosx universal binaries are produced, but they only contain
+--- ./common/autoconf/libraries.m4 Tue Jun 03 14:19:17 2014 -0700
++++ ./common/autoconf/libraries.m4 Wed Jul 30 03:50:56 2014 -0700
+@@ -43,6 +43,14 @@
+ AC_MSG_RESULT([alsa pulse])
+ fi
+
++ if test "x$OPENJDK_TARGET_OS" = xaix; then
++ AC_MSG_CHECKING([what is not needed on AIX?])
++ ALSA_NOT_NEEDED=yes
++ PULSE_NOT_NEEDED=yes
++ AC_MSG_RESULT([alsa pulse])
++ fi
++
++
+ if test "x$OPENJDK_TARGET_OS" = xwindows; then
+ AC_MSG_CHECKING([what is not needed on Windows?])
+ CUPS_NOT_NEEDED=yes
+@@ -554,7 +562,7 @@
+ fi
+
+ if test "x${with_alsa}" != x; then
+- ALSA_LIBS="-L${with_alsa}/lib -lalsa"
++ ALSA_LIBS="-L${with_alsa}/lib -lasound"
+ ALSA_CFLAGS="-I${with_alsa}/include"
+ ALSA_FOUND=yes
+ fi
+@@ -563,7 +571,7 @@
+ ALSA_FOUND=yes
+ fi
+ if test "x${with_alsa_lib}" != x; then
+- ALSA_LIBS="-L${with_alsa_lib} -lalsa"
++ ALSA_LIBS="-L${with_alsa_lib} -lasound"
+ ALSA_FOUND=yes
+ fi
+ if test "x$ALSA_FOUND" = xno; then
+--- ./common/autoconf/platform.m4 Tue Jun 03 14:19:17 2014 -0700
++++ ./common/autoconf/platform.m4 Wed Jul 30 03:50:56 2014 -0700
+@@ -60,6 +60,12 @@
+ VAR_CPU_BITS=64
+ VAR_CPU_ENDIAN=big
+ ;;
++ powerpc64le)
++ VAR_CPU=ppc64
++ VAR_CPU_ARCH=ppc
++ VAR_CPU_BITS=64
++ VAR_CPU_ENDIAN=little
++ ;;
+ s390)
+ VAR_CPU=s390
+ VAR_CPU_ARCH=s390
+@@ -126,6 +132,11 @@
+ VAR_OS_API=winapi
+ VAR_OS_ENV=windows.msys
+ ;;
++ *aix*)
++ VAR_OS=aix
++ VAR_OS_API=posix
++ VAR_OS_ENV=aix
++ ;;
+ *)
+ AC_MSG_ERROR([unsupported operating system $1])
+ ;;
+@@ -432,9 +443,9 @@
+ # keep track of these additions in ADDED_CFLAGS etc. These
+ # will later be checked to make sure only controlled additions
+ # have been made to CFLAGS etc.
+- ADDED_CFLAGS=" -m${OPENJDK_TARGET_CPU_BITS}"
+- ADDED_CXXFLAGS=" -m${OPENJDK_TARGET_CPU_BITS}"
+- ADDED_LDFLAGS=" -m${OPENJDK_TARGET_CPU_BITS}"
++ ADDED_CFLAGS=" ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}"
++ ADDED_CXXFLAGS=" ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}"
++ ADDED_LDFLAGS=" ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}"
+
+ CFLAGS="${CFLAGS}${ADDED_CFLAGS}"
+ CXXFLAGS="${CXXFLAGS}${ADDED_CXXFLAGS}"
+@@ -454,8 +465,9 @@
+ # is made at runtime.)
+ #
+
+- if test "x$OPENJDK_TARGET_OS" = xsolaris; then
+- # Always specify -m flags on Solaris
++ if test "x$OPENJDK_TARGET_OS" = xsolaris || test "x$OPENJDK_TARGET_OS" = xaix; then
++ # Always specify -m flag on Solaris
++ # And -q on AIX because otherwise the compiler produces 32-bit objects by default
+ PLATFORM_SET_COMPILER_TARGET_BITS_FLAGS
+ elif test "x$COMPILE_TYPE" = xreduced; then
+ if test "x$OPENJDK_TARGET_OS" != xwindows; then
+@@ -477,19 +489,34 @@
+
+ AC_CHECK_SIZEOF([int *], [1111])
+
+- if test "x$SIZEOF_INT_P" != "x$ac_cv_sizeof_int_p"; then
+- # Workaround autoconf bug, see http://lists.gnu.org/archive/html/autoconf/2010-07/msg00004.html
+- SIZEOF_INT_P="$ac_cv_sizeof_int_p"
+- fi
+-
+- if test "x$SIZEOF_INT_P" = x; then
++ # AC_CHECK_SIZEOF defines 'ac_cv_sizeof_int_p' to hold the number of bytes used by an 'int*'
++ if test "x$ac_cv_sizeof_int_p" = x; then
+ # The test failed, lets stick to the assumed value.
+ AC_MSG_WARN([The number of bits in the target could not be determined, using $OPENJDK_TARGET_CPU_BITS.])
+ else
+- TESTED_TARGET_CPU_BITS=`expr 8 \* $SIZEOF_INT_P`
++ TESTED_TARGET_CPU_BITS=`expr 8 \* $ac_cv_sizeof_int_p`
+
+ if test "x$TESTED_TARGET_CPU_BITS" != "x$OPENJDK_TARGET_CPU_BITS"; then
+- AC_MSG_ERROR([The tested number of bits in the target ($TESTED_TARGET_CPU_BITS) differs from the number of bits expected to be found in the target ($OPENJDK_TARGET_CPU_BITS)])
++ # This situation may happen on 64-bit platforms where the compiler by default only generates 32-bit objects
++ # Let's try to implicitely set the compilers target architecture and retry the test
++ AC_MSG_NOTICE([The tested number of bits in the target ($TESTED_TARGET_CPU_BITS) differs from the number of bits expected to be found in the target ($OPENJDK_TARGET_CPU_BITS).])
++ AC_MSG_NOTICE([I'll retry after setting the platforms compiler target bits flag to ${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}])
++ PLATFORM_SET_COMPILER_TARGET_BITS_FLAGS
++
++ # We have to unset 'ac_cv_sizeof_int_p' first, otherwise AC_CHECK_SIZEOF will use the previously cached value!
++ unset ac_cv_sizeof_int_p
++ # And we have to undef the definition of SIZEOF_INT_P in confdefs.h by the previous invocation of AC_CHECK_SIZEOF
++ cat >>confdefs.h <<_ACEOF
++#undef SIZEOF_INT_P
++_ACEOF
++
++ AC_CHECK_SIZEOF([int *], [1111])
++
++ TESTED_TARGET_CPU_BITS=`expr 8 \* $ac_cv_sizeof_int_p`
++
++ if test "x$TESTED_TARGET_CPU_BITS" != "x$OPENJDK_TARGET_CPU_BITS"; then
++ AC_MSG_ERROR([The tested number of bits in the target ($TESTED_TARGET_CPU_BITS) differs from the number of bits expected to be found in the target ($OPENJDK_TARGET_CPU_BITS)])
++ fi
+ fi
+ fi
+
+--- ./common/autoconf/spec.gmk.in Tue Jun 03 14:19:17 2014 -0700
++++ ./common/autoconf/spec.gmk.in Wed Jul 30 03:50:56 2014 -0700
+@@ -208,6 +208,7 @@
+ JVM_VARIANT_KERNEL:=@JVM_VARIANT_KERNEL@
+ JVM_VARIANT_ZERO:=@JVM_VARIANT_ZERO@
+ JVM_VARIANT_ZEROSHARK:=@JVM_VARIANT_ZEROSHARK@
++JVM_VARIANT_CORE:=@JVM_VARIANT_CORE@
+
+ # Universal binaries on macosx
+ MACOSX_UNIVERSAL=@MACOSX_UNIVERSAL@
+@@ -297,6 +298,8 @@
+ COMPILER_TYPE:=@COMPILER_TYPE@
+ COMPILER_NAME:=@COMPILER_NAME@
+
++# Option used to tell the compiler whether to create 32- or 64-bit executables
++COMPILER_TARGET_BITS_FLAG:=@COMPILER_TARGET_BITS_FLAG@
+ COMPILER_SUPPORTS_TARGET_BITS_FLAG=@COMPILER_SUPPORTS_TARGET_BITS_FLAG@
+
+ CC_OUT_OPTION:=@CC_OUT_OPTION@
+@@ -340,6 +343,11 @@
+ # The linker can be gcc or ld on posix systems, or link.exe on windows systems.
+ LD:=@FIXPATH@ @LD@
+
++# The linker on older SuSE distros (e.g. on SLES 10) complains with:
++# "Invalid version tag `SUNWprivate_1.1'. Only anonymous version tag is allowed in executable."
++# if feeded with a version script which contains named tags.
++USING_BROKEN_SUSE_LD:=@USING_BROKEN_SUSE_LD@
++
+ # LDFLAGS used to link the jdk native libraries (C-code)
+ LDFLAGS_JDKLIB:=@LDFLAGS_JDKLIB@
+ LDFLAGS_JDKLIB_SUFFIX:=@LDFLAGS_JDKLIB_SUFFIX@
+--- ./common/autoconf/toolchain.m4 Tue Jun 03 14:19:17 2014 -0700
++++ ./common/autoconf/toolchain.m4 Wed Jul 30 03:50:56 2014 -0700
+@@ -44,6 +44,15 @@
+ COMPILER_VERSION=`$ECHO $COMPILER_VERSION_TEST | $SED -n "s/^.*@<:@ ,\t@:>@$COMPILER_NAME@<:@ ,\t@:>@\(@<:@1-9@:>@\.@<:@0-9@:>@@<:@0-9@:>@*\).*/\1/p"`
+ COMPILER_VENDOR="Sun Studio"
+ fi
++ elif test "x$OPENJDK_TARGET_OS" = xaix; then
++ COMPILER_VERSION_TEST=`$COMPILER -qversion 2>&1 | $TAIL -n 1`
++ $ECHO $COMPILER_VERSION_TEST | $GREP "^Version: " > /dev/null
++ if test $? -ne 0; then
++ AC_MSG_ERROR([Failed to detect the compiler version of $COMPILER ....])
++ else
++ COMPILER_VERSION=`$ECHO $COMPILER_VERSION_TEST | $SED -n 's/Version: \([0-9][0-9]\.[0-9][0-9]*\).*/\1/p'`
++ COMPILER_VENDOR='IBM'
++ fi
+ elif test "x$OPENJDK_TARGET_OS" = xwindows; then
+ # First line typically looks something like:
+ # Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.40219.01 for 80x86
+@@ -137,10 +146,14 @@
+ AC_MSG_ERROR([Could not find a $COMPILER_NAME compiler. $HELP_MSG])
+ fi
+ BASIC_FIXUP_EXECUTABLE($1)
+- AC_MSG_CHECKING([resolved symbolic links for $1])
+ TEST_COMPILER="[$]$1"
+- BASIC_REMOVE_SYMBOLIC_LINKS(TEST_COMPILER)
+- AC_MSG_RESULT([$TEST_COMPILER])
++ # Don't remove symbolic links on AIX because 'xlc_r' and 'xlC_r' may all be links
++ # to 'xlc' but it is crucial that we invoke the compiler with the right name!
++ if test "x$OPENJDK_BUILD_OS" != xaix; then
++ AC_MSG_CHECKING([resolved symbolic links for $1])
++ BASIC_REMOVE_SYMBOLIC_LINKS(TEST_COMPILER)
++ AC_MSG_RESULT([$TEST_COMPILER])
++ fi
+ AC_MSG_CHECKING([if $1 is disguised ccache])
+
+ COMPILER_BASENAME=`$BASENAME "$TEST_COMPILER"`
+@@ -254,6 +267,9 @@
+ COMPILER_CHECK_LIST="cl"
+ elif test "x$OPENJDK_TARGET_OS" = "xsolaris"; then
+ COMPILER_CHECK_LIST="cc gcc"
++ elif test "x$OPENJDK_TARGET_OS" = "xaix"; then
++ # Do not probe for cc on AIX.
++ COMPILER_CHECK_LIST="xlc_r"
+ else
+ COMPILER_CHECK_LIST="gcc cc"
+ fi
+@@ -262,6 +278,14 @@
+ # Now that we have resolved CC ourself, let autoconf have its go at it
+ AC_PROG_CC([$CC])
+
++ # Option used to tell the compiler whether to create 32- or 64-bit executables
++ # Notice that CC contains the full compiler path at this point.
++ case $CC in
++ *xlc_r) COMPILER_TARGET_BITS_FLAG="-q";;
++ *) COMPILER_TARGET_BITS_FLAG="-m";;
++ esac
++ AC_SUBST(COMPILER_TARGET_BITS_FLAG)
++
+ ### Locate C++ compiler (CXX)
+
+ if test "x$CXX" != x; then
+@@ -270,6 +294,9 @@
+ COMPILER_CHECK_LIST="cl"
+ elif test "x$OPENJDK_TARGET_OS" = "xsolaris"; then
+ COMPILER_CHECK_LIST="CC g++"
++ elif test "x$OPENJDK_TARGET_OS" = "xaix"; then
++ # Do not probe for CC on AIX .
++ COMPILER_CHECK_LIST="xlC_r"
+ else
+ COMPILER_CHECK_LIST="g++ CC"
+ fi
+@@ -311,6 +338,8 @@
+ fi
+ if test "x$OPENJDK_TARGET_OS" = xmacosx; then
+ ARFLAGS="-r"
++ elif test "x$OPENJDK_TARGET_OS" = xaix; then
++ ARFLAGS="-X64"
+ else
+ ARFLAGS=""
+ fi
+@@ -554,6 +583,29 @@
+ POST_STRIP_CMD="$STRIP -x"
+ POST_MCS_CMD="$MCS -d -a \"JDK $FULL_VERSION\""
+ fi
++ if test "x$OPENJDK_TARGET_OS" = xaix; then
++ COMPILER_NAME=xlc
++ PICFLAG="-qpic=large"
++ LIBRARY_PREFIX=lib
++ SHARED_LIBRARY='lib[$]1.so'
++ STATIC_LIBRARY='lib[$]1.a'
++ SHARED_LIBRARY_FLAGS="-qmkshrobj"
++ SHARED_LIBRARY_SUFFIX='.so'
++ STATIC_LIBRARY_SUFFIX='.a'
++ OBJ_SUFFIX='.o'
++ EXE_SUFFIX=''
++ SET_SHARED_LIBRARY_NAME=''
++ SET_SHARED_LIBRARY_MAPFILE=''
++ C_FLAG_REORDER=''
++ CXX_FLAG_REORDER=''
++ SET_SHARED_LIBRARY_ORIGIN=''
++ SET_EXECUTABLE_ORIGIN=""
++ CFLAGS_JDK=""
++ CXXFLAGS_JDK=""
++ CFLAGS_JDKLIB_EXTRA=''
++ POST_STRIP_CMD="$STRIP -X32_64"
++ POST_MCS_CMD=""
++ fi
+ if test "x$OPENJDK_TARGET_OS" = xwindows; then
+ # If it is not gcc, then assume it is the MS Visual Studio compiler
+ COMPILER_NAME=cl
+@@ -730,6 +782,24 @@
+
+ CFLAGS_DEBUG_SYMBOLS="-g -xs"
+ CXXFLAGS_DEBUG_SYMBOLS="-g0 -xs"
++ ;;
++ xlc )
++ C_FLAG_DEPS="-qmakedep=gcc -MF"
++ CXX_FLAG_DEPS="-qmakedep=gcc -MF"
++ C_O_FLAG_HIGHEST="-O3"
++ C_O_FLAG_HI="-O3 -qstrict"
++ C_O_FLAG_NORM="-O2"
++ C_O_FLAG_NONE=""
++ CXX_O_FLAG_HIGHEST="-O3"
++ CXX_O_FLAG_HI="-O3 -qstrict"
++ CXX_O_FLAG_NORM="-O2"
++ CXX_O_FLAG_NONE=""
++ CFLAGS_DEBUG_SYMBOLS="-g"
++ CXXFLAGS_DEBUG_SYMBOLS="-g"
++ LDFLAGS_JDK="${LDFLAGS_JDK} -q64 -brtl -bnolibpath -liconv -bexpall"
++ CFLAGS_JDK="${CFLAGS_JDK} -qchars=signed -q64 -qfullpath -qsaveopt"
++ CXXFLAGS_JDK="${CXXFLAGS_JDK} -qchars=signed -q64 -qfullpath -qsaveopt"
++ ;;
+ esac
+ ;;
+ CL )
+@@ -840,6 +910,13 @@
+ LDFLAGS_JDK="$LDFLAGS_JDK -z defs -xildoff -ztext"
+ LDFLAGS_CXX_JDK="$LDFLAGS_CXX_JDK -norunpath -xnolib"
+ ;;
++ xlc )
++ CFLAGS_JDK="$CFLAGS_JDK -D_GNU_SOURCE -D_REENTRANT -D_LARGEFILE64_SOURCE -DSTDC"
++ CXXFLAGS_JDK="$CXXFLAGS_JDK -D_GNU_SOURCE -D_REENTRANT -D_LARGEFILE64_SOURCE -DSTDC"
++
++ LDFLAGS_JDK="$LDFLAGS_JDK"
++ LDFLAGS_CXX_JDK="$LDFLAGS_CXX_JDK"
++ ;;
+ cl )
+ CCXXFLAGS_JDK="$CCXXFLAGS $CCXXFLAGS_JDK -Zi -MD -Zc:wchar_t- -W3 -wd4800 \
+ -D_STATIC_CPPLIB -D_DISABLE_DEPRECATE_STATIC_CPPLIB -DWIN32_LEAN_AND_MEAN \
+@@ -909,6 +986,9 @@
+ if test "x$OPENJDK_TARGET_OS" = xsolaris; then
+ CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DSOLARIS"
+ fi
++ if test "x$OPENJDK_TARGET_OS" = xaix; then
++ CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DAIX -DPPC64"
++ fi
+ if test "x$OPENJDK_TARGET_OS" = xmacosx; then
+ CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DMACOSX -D_ALLBSD_SOURCE -D_DARWIN_UNLIMITED_SELECT"
+ # Setting these parameters makes it an error to link to macosx APIs that are
+@@ -1076,20 +1156,38 @@
+ # ZERO_ARCHFLAG tells the compiler which mode to build for
+ case "${OPENJDK_TARGET_CPU}" in
+ s390)
+- ZERO_ARCHFLAG="-m31"
++ ZERO_ARCHFLAG="${COMPILER_TARGET_BITS_FLAG}31"
+ ;;
+ *)
+- ZERO_ARCHFLAG="-m${OPENJDK_TARGET_CPU_BITS}"
++ ZERO_ARCHFLAG="${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}"
+ esac
+ TOOLCHAIN_COMPILER_CHECK_ARGUMENTS([$ZERO_ARCHFLAG], [], [ZERO_ARCHFLAG=""])
+ AC_SUBST(ZERO_ARCHFLAG)
+
+- # Check that the compiler supports -mX flags
++ # Check that the compiler supports -mX (or -qX on AIX) flags
+ # Set COMPILER_SUPPORTS_TARGET_BITS_FLAG to 'true' if it does
+- TOOLCHAIN_COMPILER_CHECK_ARGUMENTS([-m${OPENJDK_TARGET_CPU_BITS}],
++ TOOLCHAIN_COMPILER_CHECK_ARGUMENTS([${COMPILER_TARGET_BITS_FLAG}${OPENJDK_TARGET_CPU_BITS}],
+ [COMPILER_SUPPORTS_TARGET_BITS_FLAG=true],
+ [COMPILER_SUPPORTS_TARGET_BITS_FLAG=false])
+ AC_SUBST(COMPILER_SUPPORTS_TARGET_BITS_FLAG)
++
++
++ # Check for broken SuSE 'ld' for which 'Only anonymous version tag is allowed in executable.'
++ USING_BROKEN_SUSE_LD=no
++ if test "x$OPENJDK_TARGET_OS" = xlinux && test "x$GCC" = xyes; then
++ AC_MSG_CHECKING([for broken SuSE 'ld' which only understands anonymous version tags in executables])
++ echo "SUNWprivate_1.1 { local: *; };" > version-script.map
++ echo "int main() { }" > main.c
++ if $CXX -Xlinker -version-script=version-script.map main.c 2>&AS_MESSAGE_LOG_FD >&AS_MESSAGE_LOG_FD; then
++ AC_MSG_RESULT(no)
++ USING_BROKEN_SUSE_LD=no
++ else
++ AC_MSG_RESULT(yes)
++ USING_BROKEN_SUSE_LD=yes
++ fi
++ rm -rf version-script.map main.c
++ fi
++ AC_SUBST(USING_BROKEN_SUSE_LD)
+ ])
+
+ # Setup the JTREG paths
+--- ./make/Javadoc.gmk Tue Jun 03 14:19:17 2014 -0700
++++ ./make/Javadoc.gmk Wed Jul 30 03:50:56 2014 -0700
+@@ -72,6 +72,7 @@
+ TREEAPI_FIRST_COPYRIGHT_YEAR = 2005
+ JNLP_FIRST_COPYRIGHT_YEAR = 1998
+ PLUGIN2_FIRST_COPYRIGHT_YEAR = 2007
++JDKNET_FIRST_COPYRIGHT_YEAR = 2014
+
+ # Oracle name
+ FULL_COMPANY_NAME = Oracle and/or its affiliates
+@@ -102,10 +103,7 @@
+ DOCSDIR_URL = {@docroot}/$(GET2DOCSDIR)
+
+ # Url to copyright html file
+-COPYRIGHT_URL-7 = $(DOCSDIR_URL)/legal/cpyr.html
+-# This isn't added in old build yet.
+-#COPYRIGHT_URL-8 = $(DOCSDIR_URL)/legal/cpyr.html
+-COPYRIGHT_URL = $(COPYRIGHT_URL-$(JDK_MINOR_VERSION))
++COPYRIGHT_URL = $(DOCSDIR_URL)/legal/cpyr.html
+
+ # Url to bug filing site
+ BUG_SUBMIT_URL = http://bugreport.sun.com/bugreport/
+@@ -1147,6 +1145,57 @@
+
+ #############################################################
+ #
++# jdk.net docs
++#
++
++ALL_OTHER_TARGETS += jdknetdocs
++
++JDKNET_DOCDIR := $(JRE_API_DOCSDIR)/net/socketoptions/spec
++JDKNET2COREAPI := ../../../$(JDKJRE2COREAPI)
++JDKNET_DOCTITLE := jdk.net API
++JDKNET_WINDOWTITLE := jdk.net API
++JDKNET_HEADER := jdk.net API
++JDKNET_BOTTOM := $(call CommonBottom,$(JDKNET_FIRST_COPYRIGHT_YEAR))
++JDKNET_PKGS := jdk.net
++
++JDKNET_INDEX_HTML = $(JDKNET_DOCDIR)/index.html
++JDKNET_OPTIONS_FILE = $(DOCSTMPDIR)/jdknet.options
++JDKNET_PACKAGES_FILE = $(DOCSTMPDIR)/jdknet.packages
++
++jdknetdocs: $(JDKNET_INDEX_HTML)
++
++# Set relative location to core api document root
++$(JDKNET_INDEX_HTML): GET2DOCSDIR=$(JDKNET2COREAPI)/..
++
++# Run javadoc if the index file is out of date or missing
++$(JDKNET_INDEX_HTML): $(JDKNET_OPTIONS_FILE) $(JDKNET_PACKAGES_FILE) coredocs
++ $(prep-javadoc)
++ $(call JavadocSummary,$(JDKNET_OPTIONS_FILE),$(JDKNET_PACKAGES_FILE))
++ $(JAVADOC_CMD) -d $(@D) \
++ @$(JDKNET_OPTIONS_FILE) @$(JDKNET_PACKAGES_FILE)
++
++# Create file with javadoc options in it
++$(JDKNET_OPTIONS_FILE):
++ $(prep-target)
++ @($(call OptionOnly,$(COMMON_JAVADOCFLAGS)) ; \
++ $(call OptionOnly,-Xdoclint:none) ; \
++ $(call OptionPair,-sourcepath,$(RELEASEDOCS_SOURCEPATH)) ; \
++ $(call OptionPair,-encoding,ascii) ; \
++ $(call OptionOnly,-nodeprecatedlist) ; \
++ $(call OptionPair,-doctitle,$(JDKNET_DOCTITLE)) ; \
++ $(call OptionPair,-windowtitle,$(JDKNET_WINDOWTITLE) $(DRAFT_WINTITLE)); \
++ $(call OptionPair,-header,$(JDKNET_HEADER)$(DRAFT_HEADER)); \
++ $(call OptionPair,-bottom,$(JDKNET_BOTTOM)$(DRAFT_BOTTOM)); \
++ $(call OptionTrip,-linkoffline,$(JDKNET2COREAPI),$(COREAPI_DOCSDIR)/); \
++ ) >> $@
++
++# Create a file with the package names in it
++$(JDKNET_PACKAGES_FILE): $(DIRECTORY_CACHE) $(call PackageDependencies,$(JDKNET_PKGS))
++ $(prep-target)
++ $(call PackageFilter,$(JDKNET_PKGS))
++
++#############################################################
++#
+ # Get a cache of all the directories
+
+ $(DIRECTORY_CACHE): $(ALL_EXISTING_SOURCE_DIRS)
+--- ./make/common/JavaCompilation.gmk Tue Jun 03 14:19:17 2014 -0700
++++ ./make/common/JavaCompilation.gmk Wed Jul 30 03:50:56 2014 -0700
+@@ -163,11 +163,12 @@
+
+ # The capture contents macro finds all files (matching the patterns, typically
+ # .class and .prp) that are newer than the jar-file, ie the new content to be put into the jar.
++ # NOTICE: please leave the parentheses space separated otherwise the AIX build will break!
+ $1_CAPTURE_CONTENTS=$$(foreach src,$$($1_SRCS), \
+- (($(FIND) $$(src) -type f -a \( $$($1_FIND_PATTERNS) \) -a -newer $$@ $$($1_GREP_INCLUDES) \
++ ( ( $(FIND) $$(src) -type f -a \( $$($1_FIND_PATTERNS) \) -a -newer $$@ $$($1_GREP_INCLUDES) \
+ $$($1_GREP_EXCLUDES) | $(SED) 's|$$(src)/||g' && \
+- $(ECHO) $$(subst $$(src)/,,$$($1_EXTRA_FILES))) > \
+- $$(src)/_the.$$($1_JARNAME)_contents) $$(NEWLINE))
++ $(ECHO) $$(subst $$(src)/,,$$($1_EXTRA_FILES) ) ) > \
++ $$(src)/_the.$$($1_JARNAME)_contents) $$(NEWLINE) )
+ # The capture metainf macro finds all files below the META-INF directory that are newer than the jar-file.
+ ifeq (,$$($1_SKIP_METAINF))
+ $1_CAPTURE_METAINF =$$(foreach src,$$($1_SRCS),($(FIND) $$(src)/META-INF -type f -a -newer $$@ 2> /dev/null | $(SED) 's|$$(src)/||g' >> $$(src)/_the.$$($1_JARNAME)_contents ) $$(NEWLINE))
+@@ -176,19 +177,20 @@
+ # tells us what to remove from the jar-file.
+ $1_CAPTURE_DELETES=$$(foreach src,$$($1_SRCS),($(FIND) $$(src) -name _the.package.deleted -newer $$@ -exec $(SED) 's|$$(src)||g' \{\} >> $$($1_DELETES_FILE) \;) $$(NEWLINE))
+ # The update contents macro updates the jar file with the previously capture contents.
+- # xargs is used to trim the whitespace from the contents file, to see if it is empty.
++ # Use 'wc -w' to see if the contents file is empty.
+ $1_UPDATE_CONTENTS=$$(foreach src,$$($1_SRCS), \
+ (cd $$(src) && \
+- if [ -n "`$(CAT) _the.$$($1_JARNAME)_contents | $(XARGS)`" ]; then \
++ if [ "`$(WC) -w _the.$$($1_JARNAME)_contents | $(AWK) '{ print $$$$1 }'`" -gt "0" ]; then \
+ $(ECHO) " updating" `$(WC) -l _the.$$($1_JARNAME)_contents | $(AWK) '{ print $$$$1 }'` files && \
+ $(JAR) $$($1_JAR_UPDATE_OPTIONS) $$@ @_the.$$($1_JARNAME)_contents; \
+ fi) $$(NEWLINE))
+ # The s-variants of the above macros are used when the jar is created from scratch.
++ # NOTICE: please leave the parentheses space separated otherwise the AIX build will break!
+ $1_SCAPTURE_CONTENTS=$$(foreach src,$$($1_SRCS), \
+- (($(FIND) $$(src) -type f -a \( $$($1_FIND_PATTERNS) \) $$($1_GREP_INCLUDES) \
++ ( ( $(FIND) $$(src) -type f -a \( $$($1_FIND_PATTERNS) \) $$($1_GREP_INCLUDES) \
+ $$($1_GREP_EXCLUDES) | $(SED) 's|$$(src)/||g' && \
+- $$(subst $$(src)/,,$(ECHO) $$($1_EXTRA_FILES))) > \
+- $$(src)/_the.$$($1_JARNAME)_contents) $$(NEWLINE))
++ $$(subst $$(src)/,,$(ECHO) $$($1_EXTRA_FILES) ) ) > \
++ $$(src)/_the.$$($1_JARNAME)_contents) $$(NEWLINE) )
+
+ ifeq (,$$($1_SKIP_METAINF))
+ $1_SCAPTURE_METAINF=$$(foreach src,$$($1_SRCS), \
+@@ -527,16 +529,16 @@
+ # When building in batch, put headers in a temp dir to filter out those that actually
+ # changed before copying them to the real header dir.
+ ifneq (,$$($1_HEADERS))
+- $1_HEADERS_ARG := -h $$($1_HEADERS).tmp
++ $1_HEADERS_ARG := -h $$($1_HEADERS).$1.tmp
+
+ $$($1_HEADERS)/_the.$1_headers: $$($1_BIN)/_the.$1_batch
+ $(MKDIR) -p $$(@D)
+- for f in `ls $$($1_HEADERS).tmp`; do \
+- if [ ! -f "$$($1_HEADERS)/$$$$f" ] || [ "`$(DIFF) $$($1_HEADERS)/$$$$f $$($1_HEADERS).tmp/$$$$f`" != "" ]; then \
+- $(CP) -f $$($1_HEADERS).tmp/$$$$f $$($1_HEADERS)/$$$$f; \
++ for f in `ls $$($1_HEADERS).$1.tmp`; do \
++ if [ ! -f "$$($1_HEADERS)/$$$$f" ] || [ "`$(DIFF) $$($1_HEADERS)/$$$$f $$($1_HEADERS).$1.tmp/$$$$f`" != "" ]; then \
++ $(CP) -f $$($1_HEADERS).$1.tmp/$$$$f $$($1_HEADERS)/$$$$f; \
+ fi; \
+ done
+- $(RM) -r $$($1_HEADERS).tmp
++ $(RM) -r $$($1_HEADERS).$1.tmp
+ $(TOUCH) $$@
+
+ $1 += $$($1_HEADERS)/_the.$1_headers
+--- ./make/common/NON_CORE_PKGS.gmk Tue Jun 03 14:19:17 2014 -0700
++++ ./make/common/NON_CORE_PKGS.gmk Wed Jul 30 03:50:56 2014 -0700
+@@ -94,7 +94,8 @@
+ com.apple.eio
+ endif
+
+-JDK_PKGS = jdk
++JDK_PKGS = jdk \
++ jdk.net
+
+ # non-core packages in rt.jar
+ NON_CORE_PKGS = $(DOMAPI_PKGS) \
+--- ./make/common/NativeCompilation.gmk Tue Jun 03 14:19:17 2014 -0700
++++ ./make/common/NativeCompilation.gmk Wed Jul 30 03:50:56 2014 -0700
+@@ -501,7 +501,7 @@
+ # Generating a static library, ie object file archive.
+ $$($1_TARGET) : $$($1_EXPECTED_OBJS) $$($1_RES)
+ $$(call ARCHIVING_MSG,$$($1_LIBRARY))
+- $(AR) $$($1_AR_FLAGS) $(AR_OUT_OPTION)$$($1_TARGET) $$($1_EXPECTED_OBJS) \
++ $(AR) $$($1_ARFLAGS) $(AR_OUT_OPTION)$$($1_TARGET) $$($1_EXPECTED_OBJS) \
+ $$($1_RES) $$($1_LDFLAGS_SUFFIX) $$($1_EXTRA_LDFLAGS_SUFFIX)
+ endif
+
+--- ./corba/.hgtags Tue Jun 03 14:19:19 2014 -0700
++++ ./corba/.hgtags Wed Jul 30 03:51:08 2014 -0700
+@@ -245,7 +245,9 @@
+ a7d3638deb2f4e33217b1ecf889479e90f9e5b50 jdk8-b121
+ 0cd687347540b1d14e9cc653ba6af5f6807eb379 jdk8-b122
+ 1ecd4619f60c2432120821e805e64bdb45de66cc jdk8-b123
++afecd2878aee29c2d0282d2c6b3ba56e91b0b2de jdk8u20-b00
+ 7b45151c7a05764b87ca088ba70448d39de6d41f jdk8-b124
++7547c6a90a9e06b11ec1dca943b30c09c1665aa8 jdk8u20-b01
+ 7b45151c7a05764b87ca088ba70448d39de6d41f jdk8-b125
+ 8ceb68fd9e107767edf949c6b3ac9d425ca93cac jdk8-b126
+ b8c71dae05574f8eca7ca6d703b16b450850b033 jdk8-b127
+@@ -268,6 +270,7 @@
+ a2f7b36bfc1bc8df033fe5721b48fac1c3928a5b jdk8u5-b11
+ 475b96f6d8cecf720ca9fd6d332dd4bafb0f654c jdk8u5-b12
+ 897088ef059f53a8aa73267985666ad319223064 jdk8u5-b13
++0355626e88639a9b51b209f26f48dee28c924c72 jdk8u5-b31
+ e2cfebaf3b9d0eae06c2d5ee7669180f1723f897 jdk8u11-b01
+ 152d1b91e5c5dfc940cccef1bfeae60a6604032a jdk8u11-b02
+ 60b073836311720d4f013d4493af2729ebe663f6 jdk8u11-b03
+@@ -279,3 +282,32 @@
+ f846c0c1c330545b8a85fc05a36fa95f48757525 jdk8u11-b09
+ 3e4b895d06e8d292f7338aa2155849173722062f jdk8u11-b10
+ ce324096c5626997634df1e5fa68f206909431ab jdk8u11-b11
++c4d817051c6a620a4f748e9e057423a774f172c7 jdk8u11-b12
++c79def3415b9c36e925d71d247de6bf60240a29b jdk8u11-b31
++bfafb13aac1c8b2d9184d59ec510b45d965b7667 jdk8u20-b02
++9059a1c857044ad5ce7564ddb71a064364f8fcf5 jdk8u20-b03
++abe5b0157c367a72f9059269ca633ecfe15732d2 jdk8u20-b04
++17d296953274eb110f284035ddb6c588f8b61823 jdk8u20-b05
++0683ee308085785d0455f4153e764e062843f178 jdk8-b130
++5e5c8f0c45dd75a08089586ca50835393f00c2cb jdk8-b131
++84fed37bbe640666bfc022c2e8b9fde468de35d2 jdk8-b132
++bcdc679d86aa752ccb62f6ecb182ff10ea09dce1 jdk8u20-b06
++32b9c4f0ab3c6d33f70724b775cb9d12c004be6d jdk8u20-b07
++4e4a75376185ca1a712cc9fef5a340e6927cf5e2 jdk8u20-b08
++0344396d09b0e20d4a8d1bdff9f129250a60f365 jdk8u20-b09
++e930c4fa31586b0f21887f7b50fba927550f17fb jdk8u20-b10
++9a6092124c7c926d380a5f3b0f899fd1427c6e69 jdk8u20-b11
++673829390271e51f8bc442ffbd4726833a7b1c06 jdk8u20-b12
++d9985581e2a4973b48817103cd006f70863bc776 jdk8u20-b13
++1680797c869e6e6df965c83a854fe7633ab94aa7 jdk8u20-b14
++5949c13039299a0052343e81bb7da2e2068282fa jdk8u20-b15
++db1b47c1102e31fe649e0ca13a85dc4f13281df4 jdk8u20-b16
++90a428aa799d843cfc923031bd79f9e3896b49fa jdk8u20-b17
++2c5113522ce5b887ce060b6accf225095349fc3c jdk8u20-b18
++b078cb18ef95fe1afeacab70c2c313b6abbc959c jdk8u20-b19
++aca1d25d10812c86024d9dbb7ec529876cca55e8 jdk8u20-b20
++7d1e0f0b63f1d66c77924d8b2a1accdf8f7480db jdk8u20-b21
++7677bf14d105ca23ab045f5041ceb19ee88b86c6 jdk8u20-b22
++919405d7316dfcbddee5ad8dd08905916df88e04 jdk8u20-b23
++34c930eaa6b591621afde05ca2e24571c91cdc9b jdk8u20-b24
++34c930eaa6b591621afde05ca2e24571c91cdc9b jdk8u20-b25
+--- ./corba/THIRD_PARTY_README Tue Jun 03 14:19:19 2014 -0700
++++ ./corba/THIRD_PARTY_README Wed Jul 30 03:51:08 2014 -0700
+@@ -2,7 +2,7 @@
+ -----------------------------
+
+ %% This notice is provided with respect to ASM Bytecode Manipulation
+-Framework v5.0, which may be included with JRE 8, and JDK 8, and
++Framework v5.0.3, which may be included with JRE 8, and JDK 8, and
+ OpenJDK 8.
+
+ --- begin of LICENSE ---
+@@ -1471,7 +1471,7 @@
+ version 2.0.
+
+ The NSS libraries are supplied in executable form, built from unmodified
+-NSS source code labeled with the "NSS_3.13.1_RTM" release tag.
++NSS source code labeled with the "NSS_3_16_RTM" HG tag.
+
+ The NSS source code is available in the OpenJDK source code repository at:
+ jdk/test/sun/security/pkcs11/nss/src
+@@ -3349,14 +3349,14 @@
+
+ -------------------------------------------------------------------------------
+
+-%% This notice is provided with respect to zlib v1.2.5, which may be included
++%% This notice is provided with respect to zlib v1.2.8, which may be included
+ with JRE 8, JDK 8, and OpenJDK 8.
+
+ --- begin of LICENSE ---
+
+- version 1.2.5, July 18th, 2005
+-
+- Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler
++ version 1.2.8, April 28th, 2013
++
++ Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+@@ -3382,11 +3382,11 @@
+ -------------------------------------------------------------------------------
+
+ %% This notice is provided with respect to the following which may be
+-included with JRE 8, JDK 8, and OpenJDK 8, except where noted:
+-
+- Apache Commons Math 2.2
+- Apache Derby 10.10.1.2 [included with JDK 8]
+- Apache Jakarta BCEL 5.2
++included with JRE 8, JDK 8, and OpenJDK 8.
++
++ Apache Commons Math 3.2
++ Apache Derby 10.10.1.3
++ Apache Jakarta BCEL 5.1
+ Apache Jakarta Regexp 1.4
+ Apache Santuario XML Security for Java 1.5.4
+ Apache Xalan-Java 2.7.1
+--- ./corba/src/share/classes/com/sun/corba/se/impl/orbutil/resources/sunorb_ja.properties Tue Jun 03 14:19:19 2014 -0700
++++ ./corba/src/share/classes/com/sun/corba/se/impl/orbutil/resources/sunorb_ja.properties Wed Jul 30 03:51:08 2014 -0700
+@@ -1,5 +1,5 @@
+ #
+-# Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
+ # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ #
+ # This code is free software; you can redistribute it and/or modify it
+@@ -95,7 +95,7 @@
+ bootstrap.exception=\u30D7\u30ED\u30D1\u30C6\u30A3\u3092\u30D5\u30A1\u30A4\u30EB{0}\u306B\u4FDD\u5B58\u4E2D\u306B\u4F8B\u5916\u3092\u6355\u6349\u3057\u307E\u3057\u305F: \u4F8B\u5916{1}
+
+ tnameserv.exception=\u30DD\u30FC\u30C8{0}\u3067\u30D6\u30FC\u30C8\u30B9\u30C8\u30E9\u30C3\u30D7\u30FB\u30B5\u30FC\u30D3\u30B9\u3092\u8D77\u52D5\u4E2D\u306B\u4F8B\u5916\u3092\u6355\u6349\u3057\u307E\u3057\u305F
+-tnameserv.usage=\u30B3\u30DE\u30F3\u30C9\u30E9\u30A4\u30F3\u5F15\u6570-ORBInitialPort\u3092\u4F7F\u7528\u3057\u3066\u5225\u306E\u30DD\u30FC\u30C8\u3092\u4F7F\u7528\u3057\u3066\u304F\u3060\u3055\u3044
++tnameserv.usage=\u30B3\u30DE\u30F3\u30C9\u884C\u5F15\u6570-ORBInitialPort\u3092\u4F7F\u7528\u3057\u3066\u5225\u306E\u30DD\u30FC\u30C8\u3092\u4F7F\u7528\u3057\u3066\u304F\u3060\u3055\u3044
+ tnameserv.invalidhostoption=ORBInitialHost\u306FNameService\u306B\u6709\u52B9\u306A\u30AA\u30D7\u30B7\u30E7\u30F3\u3067\u306F\u3042\u308A\u307E\u305B\u3093
+ tnameserv.orbinitialport0=ORBInitialPort 0\u306FNameService\u306B\u6709\u52B9\u306A\u30AA\u30D7\u30B7\u30E7\u30F3\u3067\u306F\u3042\u308A\u307E\u305B\u3093
+ tnameserv.hs1=\u521D\u671F\u30CD\u30FC\u30DF\u30F3\u30B0\u30FB\u30B3\u30F3\u30C6\u30AD\u30B9\u30C8:\n{0}
+--- ./corba/src/share/classes/com/sun/corba/se/impl/orbutil/resources/sunorb_pt_BR.properties Tue Jun 03 14:19:19 2014 -0700
++++ ./corba/src/share/classes/com/sun/corba/se/impl/orbutil/resources/sunorb_pt_BR.properties Wed Jul 30 03:51:08 2014 -0700
+@@ -1,5 +1,5 @@
+ #
+-# Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
+ # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ #
+ # This code is free software; you can redistribute it and/or modify it
+@@ -23,7 +23,7 @@
+ # questions.
+ #
+
+-orbd.usage=Uso: {0} \n\nem que inclui:\n -port porta de ativa\u00E7\u00E3o na qual o ORBD deve ser iniciado, default 1049 (opcional)\n -defaultdb diret\u00F3rio dos arquivos ORBD, default "./orb.db" (opcional)\n -serverid id do servidor para ORBD, default 1 (opcional)\n -ORBInitialPort porta inicial (obrigat\u00F3rio)\n -ORBInitialHost nome de host inicial (obrigat\u00F3rio)\n
++orbd.usage=Uso: {0} \n\nem que inclui:\n -port porta de ativa\u00E7\u00E3o na qual o ORBD deve ser iniciado, padr\u00E3o 1049 (opcional)\n -defaultdb diret\u00F3rio dos arquivos ORBD, padr\u00E3o "./orb.db" (opcional)\n -serverid id do servidor para ORBD, padr\u00E3o 1 (opcional)\n -ORBInitialPort porta inicial (obrigat\u00F3rio)\n -ORBInitialHost nome de host inicial (obrigat\u00F3rio)\n
+
+ servertool.usage=Uso: {0} \n\nem que inclui:\n -ORBInitialPort porta inicial (obrigat\u00F3rio)\n -ORBInitialHost nome de host inicial (obrigat\u00F3rio)\n
+ servertool.banner=\n\nBem-vindo \u00E0 Ferramenta de Servidor IDL Java \ninsira os comandos no prompt \n
+--- ./corba/src/share/classes/com/sun/corba/se/impl/transport/SocketOrChannelAcceptorImpl.java Tue Jun 03 14:19:19 2014 -0700
++++ ./corba/src/share/classes/com/sun/corba/se/impl/transport/SocketOrChannelAcceptorImpl.java Wed Jul 30 03:51:08 2014 -0700
+@@ -253,6 +253,13 @@
+ // registered with the selector. Otherwise if the bytes
+ // are read on the connection it will attempt a time stamp
+ // but the cache will be null, resulting in NPE.
++
++ // A connection needs to be timestamped before putting to the cache.
++ // Otherwise the newly created connection (with 0 timestamp) could be
++ // incorrectly reclaimed by concurrent reclaim() call OR if there
++ // will be no events on this connection then it could be reclaimed
++ // by upcoming reclaim() call.
++ getConnectionCache().stampTime(connection);
+ getConnectionCache().put(this, connection);
+
+ if (connection.shouldRegisterServerReadEvent()) {
+--- ./corba/src/share/classes/org/omg/CORBA/ORB.java Tue Jun 03 14:19:19 2014 -0700
++++ ./corba/src/share/classes/org/omg/CORBA/ORB.java Wed Jul 30 03:51:08 2014 -0700
+@@ -291,28 +291,12 @@
+ (className.equals("com.sun.corba.se.impl.orb.ORBSingleton"))) {
+ singleton = new com.sun.corba.se.impl.orb.ORBSingleton();
+ } else {
+- singleton = create_impl_with_systemclassloader(className);
++ singleton = create_impl(className);
+ }
+ }
+ return singleton;
+ }
+
+- private static ORB create_impl_with_systemclassloader(String className) {
+-
+- try {
+- ReflectUtil.checkPackageAccess(className);
+- ClassLoader cl = ClassLoader.getSystemClassLoader();
+- Class orbBaseClass = org.omg.CORBA.ORB.class;
+- Class> singletonOrbClass = Class.forName(className, true, cl).asSubclass(orbBaseClass);
+- return (ORB)singletonOrbClass.newInstance();
+- } catch (Throwable ex) {
+- SystemException systemException = new INITIALIZE(
+- "can't instantiate default ORB implementation " + className);
+- systemException.initCause(ex);
+- throw systemException;
+- }
+- }
+-
+ private static ORB create_impl(String className) {
+ ClassLoader cl = Thread.currentThread().getContextClassLoader();
+ if (cl == null)
+--- ./hotspot/.hgtags Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/.hgtags Wed Jul 30 03:51:43 2014 -0700
+@@ -408,6 +408,8 @@
+ 55fb97c4c58d6ed4db8ec02a382ba518d9265815 hs25-b65
+ d3521d8e562a782f66fc0dfdebeffba2c7e3471d jdk8-b122
+ 591135a7d6f96c0ef281d078cee9a8d8c342d45c jdk8-b123
++c89630a122b43d0eabd78b74f6498a1c3cf04ca3 jdk8u20-b00
++c89630a122b43d0eabd78b74f6498a1c3cf04ca3 hs25.20-b00
+ 9b9816164447214f21b06ccf646893c281c76a42 hs25-b66
+ df333ee12bba67e2e928f8ce1da37afd9bf95b48 jdk8-b124
+ 3585183c191aa6b4d0375ea659515335e1804417 hs25-b67
+@@ -436,6 +438,7 @@
+ 17a75e692af397532e2b296b24f6b9b6c239c633 jdk8u5-b11
+ 9b289963cb9a14636fbe8faaa2dd6d3678464a7b jdk8u5-b12
+ 8a67179106085689906732013a282efeeb9bd5f4 jdk8u5-b13
++5c7ef8e396835b82c0460b73f23cac86ba34846f jdk8u5-b31
+ f0d759a6a2309a1c149d530b29db24eda885f267 jdk8u11-b01
+ 3c079aebb516765784dd8097887daadda5a76ac1 jdk8u11-b02
+ 0037e964ce486c009984171f004259263628079f jdk8u11-b03
+@@ -447,3 +450,53 @@
+ 34de1e8eeabbcc6e690f92766fd619beb9f3f049 jdk8u11-b09
+ 7e4ae023277bef5b82361fd985262f4009eb2fe8 jdk8u11-b10
+ e6b7384074325d5a4ede728d6928ecb7f1cc1326 jdk8u11-b11
++78df957d46ebd98ba5bb68f4d9654c8bea3f1587 jdk8u11-b12
++13f04650aa09df696d62a1912febe25fe4a64082 jdk8u11-b31
++412d3b5fe90e54c0ff9d9ac7374b98607c561d5a hs25.20-b01
++4638c4d7ff106db0f29ef7f18b128dd7e69bc470 hs25.20-b02
++e56d11f8cc2158d4280f80e56d196193349c150a hs25.20-b03
++757fe22ae90681e2b6cff50699c5abbe2563dd2c jdk8u20-b01
++9c2ddd17626e375554044a3082a6dc5e68184ed9 jdk8u20-b02
++ecf3678d5736a645aea893b525a9eb5fa1a8e072 hs25.20-b04
++51e1bb81df8680bd237630323de5e0704fb25607 jdk8u20-b03
++54436d3b2a915ff50a8d6b34f61d5afb45be7bb6 hs25.20-b05
++d4e18f0633c662588cc0875be7759721c7d85af4 jdk8u20-b04
++57eb3e69397e9d5818c5fdaef65b47d9b03f7f88 jdk8u20-b05
++804f89b6ff46728d60a69e9a338e63f362f7ac68 hs25.20-b06
++c3d92e04873788275eeebec6bcd2948cdbd143a7 jdk8u20-b06
++39eae002499704438142e78f5e0e24d46d0b266f hs25.20-b07
++f0ea4d3df1299b6c958e1a72f892c695fca055ad jdk8u20-b07
++2627c7be4279478b880d7f643a252d185e4915ec hs25.20-b08
++e9ffa408f7af28205a7114ca78bce29846f5a8df jdk8u20-b08
++5186bc5047c1725888ed99f423bdfaa116e05abe hs25.20-b09
++4d73f1e99f97d1444e16ee5ef4634eb2129969ad jdk8u20-b09
++27a9e6a96a8ced7b7ee892d5d0f1a735b9010abb hs25.20-b10
++300e2c5eeb2710de3630d14ffe4592214633dbff jdk8u20-b10
++70dc2c030c69470a5d9099b7f54e4cfef89276fd jdk8u20-b11
++b6a2ba7d3ea7259a76c8ff1ec22fac9094494c1c hs25.20-b11
++3c291bc2aa7c58efb1219701f38c41731609e595 hs25.20-b12
++18ae0dac7620474547aa1721bc3fd748af07b8b5 jdk8u20-b12
++47951595af60460a479b8574622375bfbf5c8ed2 jdk8u20-b13
++798f5b02be897151fdad44d695446088b1cca6b1 hs25.20-b13
++28bbbecff5f08c1e343fc0c40923c05d86b7cf82 hs25.20-b14
++c20d8a452574c85c8fc1f7f2d4e788cd6b156bc9 jdk8u20-b14
++87bdb86f0aedbd9b9ef8e9999b273114c8be4748 hs25.20-b15
++8c785f9bde6f603cbd13eecd2ee6acd699b376f8 jdk8u20-b15
++50e5d560367b94275a91d5d579c32f1164eb5fa5 hs25.20-b16
++c36ef639e6d3c2d238f4e4f8b2f5803a60de8be8 jdk8u20-b16
++ee8b934668694dba5dc0ac039f8d56e52499c0f9 hs25.20-b17
++8ea4732884ccd5586f0afe9478b80add90231455 jdk8u20-b17
++b685b4e870b159ea5731984199d275879d427038 hs25.20-b18
++11159d7ec80462a422e39c9b3a39ae932923622d jdk8u20-b18
++3e1cec358ab95ef985f821219104141b9ffda83f hs25.20-b19
++b15553cde967dfd7781a4a5c669e4cb7db734317 jdk8u20-b19
++4f18dea0312d601d0515976bc0c643ea7acc829d hs25.20-b20
++9e4d27da4ac04c6e19291087f7c68a5c5803c7ca jdk8u20-b20
++4828415ebbf11e205dcc08e97ad5ae7dd03522f9 jdk8u20-b21
++e4a6e7f1b90b85270aee1c54edaca3ef737082f1 hs25.20-b21
++f7429096a202cab5c36a0f20dea33c554026010f jdk8u20-b22
++7c56530b11496459e66cb9ea933035002311672c hs25.20-b22
++f09d1f6a401e25a54dad44bb7bea482e47558af5 jdk8u20-b23
++42ddd0bbcb6630fe463ec9bc1893c838d5edff1b jdk8u20-b24
++00cf2b6f51b9560b01030e8f4c28c466f0b21fe3 hs25.20-b23
++19408d5fd31c25ce60c43dd33e92b96e8df4a4ea jdk8u20-b25
+--- ./hotspot/THIRD_PARTY_README Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/THIRD_PARTY_README Wed Jul 30 03:51:43 2014 -0700
+@@ -2,7 +2,7 @@
+ -----------------------------
+
+ %% This notice is provided with respect to ASM Bytecode Manipulation
+-Framework v5.0, which may be included with JRE 8, and JDK 8, and
++Framework v5.0.3, which may be included with JRE 8, and JDK 8, and
+ OpenJDK 8.
+
+ --- begin of LICENSE ---
+@@ -1471,7 +1471,7 @@
+ version 2.0.
+
+ The NSS libraries are supplied in executable form, built from unmodified
+-NSS source code labeled with the "NSS_3.13.1_RTM" release tag.
++NSS source code labeled with the "NSS_3_16_RTM" HG tag.
+
+ The NSS source code is available in the OpenJDK source code repository at:
+ jdk/test/sun/security/pkcs11/nss/src
+@@ -3349,14 +3349,14 @@
+
+ -------------------------------------------------------------------------------
+
+-%% This notice is provided with respect to zlib v1.2.5, which may be included
++%% This notice is provided with respect to zlib v1.2.8, which may be included
+ with JRE 8, JDK 8, and OpenJDK 8.
+
+ --- begin of LICENSE ---
+
+- version 1.2.5, July 18th, 2005
+-
+- Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler
++ version 1.2.8, April 28th, 2013
++
++ Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+@@ -3382,11 +3382,11 @@
+ -------------------------------------------------------------------------------
+
+ %% This notice is provided with respect to the following which may be
+-included with JRE 8, JDK 8, and OpenJDK 8, except where noted:
+-
+- Apache Commons Math 2.2
+- Apache Derby 10.10.1.2 [included with JDK 8]
+- Apache Jakarta BCEL 5.2
++included with JRE 8, JDK 8, and OpenJDK 8.
++
++ Apache Commons Math 3.2
++ Apache Derby 10.10.1.3
++ Apache Jakarta BCEL 5.1
+ Apache Jakarta Regexp 1.4
+ Apache Santuario XML Security for Java 1.5.4
+ Apache Xalan-Java 2.7.1
+--- ./hotspot/agent/src/os/bsd/MacosxDebuggerLocal.m Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/agent/src/os/bsd/MacosxDebuggerLocal.m Wed Jul 30 03:51:43 2014 -0700
+@@ -95,7 +95,9 @@
+ #define CHECK_EXCEPTION_CLEAR_(value) if ((*env)->ExceptionOccurred(env)) { (*env)->ExceptionClear(env); return value; }
+
+ static void throw_new_debugger_exception(JNIEnv* env, const char* errMsg) {
+- (*env)->ThrowNew(env, (*env)->FindClass(env, "sun/jvm/hotspot/debugger/DebuggerException"), errMsg);
++ jclass exceptionClass = (*env)->FindClass(env, "sun/jvm/hotspot/debugger/DebuggerException");
++ CHECK_EXCEPTION;
++ (*env)->ThrowNew(env, exceptionClass, errMsg);
+ }
+
+ static struct ps_prochandle* get_proc_handle(JNIEnv* env, jobject this_obj) {
+@@ -129,6 +131,7 @@
+ JNIEXPORT void JNICALL
+ Java_sun_jvm_hotspot_debugger_bsd_BsdDebuggerLocal_init0(JNIEnv *env, jclass cls) {
+ symbolicatorID = (*env)->GetFieldID(env, cls, "symbolicator", "J");
++ CHECK_EXCEPTION;
+ taskID = (*env)->GetFieldID(env, cls, "task", "J");
+ CHECK_EXCEPTION;
+
+@@ -236,13 +239,16 @@
+ (JNIEnv *env, jobject this_obj, jlong addr) {
+ uintptr_t offset;
+ const char* sym = NULL;
++ jstring sym_string;
+
+ struct ps_prochandle* ph = get_proc_handle(env, this_obj);
+ if (ph != NULL && ph->core != NULL) {
+ sym = symbol_for_pc(ph, (uintptr_t) addr, &offset);
+ if (sym == NULL) return 0;
++ sym_string = (*env)->NewStringUTF(env, sym);
++ CHECK_EXCEPTION_(0);
+ return (*env)->CallObjectMethod(env, this_obj, createClosestSymbol_ID,
+- (*env)->NewStringUTF(env, sym), (jlong)offset);
++ sym_string, (jlong)offset);
+ }
+ return 0;
+ }
+@@ -749,11 +755,14 @@
+ const char* name;
+ jobject loadObject;
+ jobject loadObjectList;
++ jstring nameString;
+
+ base = get_lib_base(ph, i);
+ name = get_lib_name(ph, i);
++ nameString = (*env)->NewStringUTF(env, name);
++ CHECK_EXCEPTION;
+ loadObject = (*env)->CallObjectMethod(env, this_obj, createLoadObject_ID,
+- (*env)->NewStringUTF(env, name), (jlong)0, (jlong)base);
++ nameString, (jlong)0, (jlong)base);
+ CHECK_EXCEPTION;
+ loadObjectList = (*env)->GetObjectField(env, this_obj, loadObjectList_ID);
+ CHECK_EXCEPTION;
+--- ./hotspot/agent/src/os/linux/libproc.h Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/agent/src/os/linux/libproc.h Wed Jul 30 03:51:43 2014 -0700
+@@ -34,19 +34,7 @@
+ #include "libproc_md.h"
+ #endif
+
+-#if defined(sparc) || defined(sparcv9)
+-/*
+- If _LP64 is defined ptrace.h should be taken from /usr/include/asm-sparc64
+- otherwise it should be from /usr/include/asm-sparc
+- These two files define pt_regs structure differently
+-*/
+-#ifdef _LP64
+-#include "asm-sparc64/ptrace.h"
+-#else
+-#include "asm-sparc/ptrace.h"
+-#endif
+-
+-#endif //sparc or sparcv9
++#include
+
+ /************************************************************************************
+
+@@ -80,7 +68,7 @@
+ *************************************************************************************/
+
+
+-#if defined(sparc) || defined(sparcv9)
++#if defined(sparc) || defined(sparcv9) || defined(ppc64)
+ #define user_regs_struct pt_regs
+ #endif
+
+--- ./hotspot/agent/src/share/classes/sun/jvm/hotspot/ci/ciEnv.java Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/agent/src/share/classes/sun/jvm/hotspot/ci/ciEnv.java Wed Jul 30 03:51:43 2014 -0700
+@@ -95,9 +95,15 @@
+ int entryBci = task.osrBci();
+ int compLevel = task.compLevel();
+ Klass holder = method.getMethodHolder();
+- out.println("compile " + holder.getName().asString() + " " +
+- OopUtilities.escapeString(method.getName().asString()) + " " +
+- method.getSignature().asString() + " " +
+- entryBci + " " + compLevel);
++ out.print("compile " + holder.getName().asString() + " " +
++ OopUtilities.escapeString(method.getName().asString()) + " " +
++ method.getSignature().asString() + " " +
++ entryBci + " " + compLevel);
++ Compile compiler = compilerData();
++ if (compiler != null) {
++ // Dump inlining data.
++ compiler.dumpInlineData(out);
++ }
++ out.println();
+ }
+ }
+--- ./hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java Wed Jul 30 03:51:43 2014 -0700
+@@ -51,9 +51,9 @@
+ static private CIntegerField summaryBytesUsedField;
+ // G1MonitoringSupport* _g1mm;
+ static private AddressField g1mmField;
+- // MasterOldRegionSet _old_set;
++ // HeapRegionSet _old_set;
+ static private long oldSetFieldOffset;
+- // MasterHumongousRegionSet _humongous_set;
++ // HeapRegionSet _humongous_set;
+ static private long humongousSetFieldOffset;
+
+ static {
+--- ./hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetBase.java Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetBase.java Wed Jul 30 03:51:43 2014 -0700
+@@ -40,12 +40,8 @@
+ // Mirror class for HeapRegionSetBase. Represents a group of regions.
+
+ public class HeapRegionSetBase extends VMObject {
+- // uint _length;
+- static private CIntegerField lengthField;
+- // uint _region_num;
+- static private CIntegerField regionNumField;
+- // size_t _total_used_bytes;
+- static private CIntegerField totalUsedBytesField;
++
++ static private long countField;
+
+ static {
+ VM.registerVMInitializedObserver(new Observer() {
+@@ -58,21 +54,13 @@
+ static private synchronized void initialize(TypeDataBase db) {
+ Type type = db.lookupType("HeapRegionSetBase");
+
+- lengthField = type.getCIntegerField("_length");
+- regionNumField = type.getCIntegerField("_region_num");
+- totalUsedBytesField = type.getCIntegerField("_total_used_bytes");
++ countField = type.getField("_count").getOffset();
+ }
+
+- public long length() {
+- return lengthField.getValue(addr);
+- }
+
+- public long regionNum() {
+- return regionNumField.getValue(addr);
+- }
+-
+- public long totalUsedBytes() {
+- return totalUsedBytesField.getValue(addr);
++ public HeapRegionSetCount count() {
++ Address countFieldAddr = addr.addOffsetTo(countField);
++ return (HeapRegionSetCount) VMObjectFactory.newObject(HeapRegionSetCount.class, countFieldAddr);
+ }
+
+ public HeapRegionSetBase(Address addr) {
+--- ./hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetCount.java Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetCount.java Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,73 @@
++/*
++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.gc_implementation.g1;
++
++import java.util.Iterator;
++import java.util.Observable;
++import java.util.Observer;
++
++import sun.jvm.hotspot.debugger.Address;
++import sun.jvm.hotspot.runtime.VM;
++import sun.jvm.hotspot.runtime.VMObject;
++import sun.jvm.hotspot.runtime.VMObjectFactory;
++import sun.jvm.hotspot.types.AddressField;
++import sun.jvm.hotspot.types.CIntegerField;
++import sun.jvm.hotspot.types.Type;
++import sun.jvm.hotspot.types.TypeDataBase;
++
++// Mirror class for HeapRegionSetCount. Represents a group of regions.
++
++public class HeapRegionSetCount extends VMObject {
++
++ static private CIntegerField lengthField;
++ static private CIntegerField capacityField;
++
++ static {
++ VM.registerVMInitializedObserver(new Observer() {
++ public void update(Observable o, Object data) {
++ initialize(VM.getVM().getTypeDataBase());
++ }
++ });
++ }
++
++ static private synchronized void initialize(TypeDataBase db) {
++ Type type = db.lookupType("HeapRegionSetCount");
++
++ lengthField = type.getCIntegerField("_length");
++ capacityField = type.getCIntegerField("_capacity");
++ }
++
++ public long length() {
++ return lengthField.getValue(addr);
++ }
++
++ public long capacity() {
++ return capacityField.getValue(addr);
++ }
++
++ public HeapRegionSetCount(Address addr) {
++ super(addr);
++ }
++}
+--- ./hotspot/agent/src/share/classes/sun/jvm/hotspot/opto/Compile.java Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/agent/src/share/classes/sun/jvm/hotspot/opto/Compile.java Wed Jul 30 03:51:43 2014 -0700
+@@ -25,6 +25,7 @@
+ package sun.jvm.hotspot.opto;
+
+ import java.util.*;
++import java.io.PrintStream;
+ import sun.jvm.hotspot.ci.*;
+ import sun.jvm.hotspot.debugger.*;
+ import sun.jvm.hotspot.runtime.*;
+@@ -92,4 +93,13 @@
+ }
+ return null;
+ }
++
++ public void dumpInlineData(PrintStream out) {
++ InlineTree inlTree = ilt();
++ if (inlTree != null) {
++ out.print(" inline " + inlTree.count());
++ inlTree.dumpReplayData(out);
++ }
++ }
++
+ }
+--- ./hotspot/agent/src/share/classes/sun/jvm/hotspot/opto/InlineTree.java Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/agent/src/share/classes/sun/jvm/hotspot/opto/InlineTree.java Wed Jul 30 03:51:43 2014 -0700
+@@ -87,6 +87,11 @@
+ return GrowableArray.create(addr, inlineTreeConstructor);
+ }
+
++ public int inlineLevel() {
++ JVMState jvms = callerJvms();
++ return (jvms != null) ? jvms.depth() : 0;
++ }
++
+ public void printImpl(PrintStream st, int indent) {
+ for (int i = 0; i < indent; i++) st.print(" ");
+ st.printf(" @ %d ", callerBci());
+@@ -101,4 +106,28 @@
+ public void print(PrintStream st) {
+ printImpl(st, 2);
+ }
++
++ // Count number of nodes in this subtree
++ public int count() {
++ int result = 1;
++ GrowableArray subt = subtrees();
++ for (int i = 0 ; i < subt.length(); i++) {
++ result += subt.at(i).count();
++ }
++ return result;
++ }
++
++ public void dumpReplayData(PrintStream out) {
++ out.printf(" %d %d ", inlineLevel(), callerBci());
++ Method method = (Method)method().getMetadata();
++ Klass holder = method.getMethodHolder();
++ out.print(holder.getName().asString() + " " +
++ OopUtilities.escapeString(method.getName().asString()) + " " +
++ method.getSignature().asString());
++
++ GrowableArray subt = subtrees();
++ for (int i = 0 ; i < subt.length(); i++) {
++ subt.at(i).dumpReplayData(out);
++ }
++ }
+ }
+--- ./hotspot/agent/src/share/classes/sun/jvm/hotspot/opto/JVMState.java Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/agent/src/share/classes/sun/jvm/hotspot/opto/JVMState.java Wed Jul 30 03:51:43 2014 -0700
+@@ -88,6 +88,10 @@
+ return (int)bciField.getValue(getAddress());
+ }
+
++ public int depth() {
++ return (int)depthField.getValue(getAddress());
++ }
++
+ public JVMState caller() {
+ return create(callerField.getValue(getAddress()));
+ }
+--- ./hotspot/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java Wed Jul 30 03:51:43 2014 -0700
+@@ -114,7 +114,8 @@
+ long survivorRegionNum = g1mm.survivorRegionNum();
+ HeapRegionSetBase oldSet = g1h.oldSet();
+ HeapRegionSetBase humongousSet = g1h.humongousSet();
+- long oldRegionNum = oldSet.regionNum() + humongousSet.regionNum();
++ long oldRegionNum = oldSet.count().length()
++ + humongousSet.count().capacity() / HeapRegion.grainBytes();
+ printG1Space("G1 Heap:", g1h.n_regions(),
+ g1h.used(), g1h.capacity());
+ System.out.println("G1 Young Generation:");
+--- ./hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/Hashtable.java Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/Hashtable.java Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -61,8 +61,9 @@
+ long h = 0;
+ int s = 0;
+ int len = buf.length;
++ // Emulate the unsigned int in java_lang_String::hash_code
+ while (len-- > 0) {
+- h = 31*h + (0xFFL & buf[s]);
++ h = 31*h + (0xFFFFFFFFL & buf[s]);
+ s++;
+ }
+ return h & 0xFFFFFFFFL;
+--- ./hotspot/make/Makefile Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/Makefile Wed Jul 30 03:51:43 2014 -0700
+@@ -87,6 +87,7 @@
+ # Typical C1/C2 targets made available with this Makefile
+ C1_VM_TARGETS=product1 fastdebug1 optimized1 debug1
+ C2_VM_TARGETS=product fastdebug optimized debug
++CORE_VM_TARGETS=productcore fastdebugcore optimizedcore debugcore
+ ZERO_VM_TARGETS=productzero fastdebugzero optimizedzero debugzero
+ SHARK_VM_TARGETS=productshark fastdebugshark optimizedshark debugshark
+ MINIMAL1_VM_TARGETS=productminimal1 fastdebugminimal1 debugminimal1
+@@ -136,6 +137,12 @@
+ all_debugshark: debugshark docs export_debug
+ all_optimizedshark: optimizedshark docs export_optimized
+
++allcore: all_productcore all_fastdebugcore
++all_productcore: productcore docs export_product
++all_fastdebugcore: fastdebugcore docs export_fastdebug
++all_debugcore: debugcore docs export_debug
++all_optimizedcore: optimizedcore docs export_optimized
++
+ # Do everything
+ world: all create_jdk
+
+@@ -154,6 +161,7 @@
+ # Output directories
+ C1_DIR =$(OUTPUTDIR)/$(VM_PLATFORM)_compiler1
+ C2_DIR =$(OUTPUTDIR)/$(VM_PLATFORM)_compiler2
++CORE_DIR =$(OUTPUTDIR)/$(VM_PLATFORM)_core
+ MINIMAL1_DIR=$(OUTPUTDIR)/$(VM_PLATFORM)_minimal1
+ ZERO_DIR =$(OUTPUTDIR)/$(VM_PLATFORM)_zero
+ SHARK_DIR =$(OUTPUTDIR)/$(VM_PLATFORM)_shark
+@@ -167,6 +175,10 @@
+ $(CD) $(GAMMADIR)/make; \
+ $(MAKE) BUILD_DIR=$(C2_DIR) BUILD_FLAVOR=$@ VM_TARGET=$@ generic_build2 $(ALT_OUT)
+
++$(CORE_VM_TARGETS):
++ $(CD) $(GAMMADIR)/make; \
++ $(MAKE) BUILD_DIR=$(CORE_DIR) BUILD_FLAVOR=$(@:%core=%) VM_TARGET=$@ generic_buildcore $(ALT_OUT)
++
+ $(ZERO_VM_TARGETS):
+ $(CD) $(GAMMADIR)/make; \
+ $(MAKE) BUILD_DIR=$(ZERO_DIR) BUILD_FLAVOR=$(@:%zero=%) VM_TARGET=$@ generic_buildzero $(ALT_OUT)
+@@ -228,6 +240,20 @@
+ $(MAKE_ARGS) $(VM_TARGET)
+ endif
+
++generic_buildcore: $(HOTSPOT_SCRIPT)
++ifeq ($(HS_ARCH),ppc)
++ ifeq ($(ARCH_DATA_MODEL),64)
++ $(MKDIR) -p $(OUTPUTDIR)
++ $(CD) $(OUTPUTDIR); \
++ $(MAKE) -f $(ABS_OS_MAKEFILE) \
++ $(MAKE_ARGS) $(VM_TARGET)
++ else
++ @$(ECHO) "No ($(VM_TARGET)) for ppc ARCH_DATA_MODEL=$(ARCH_DATA_MODEL)"
++ endif
++else
++ @$(ECHO) "No ($(VM_TARGET)) for $(HS_ARCH)"
++endif
++
+ generic_buildzero: $(HOTSPOT_SCRIPT)
+ $(MKDIR) -p $(OUTPUTDIR)
+ $(CD) $(OUTPUTDIR); \
+@@ -287,6 +313,7 @@
+ DOCS_DIR=$(OUTPUTDIR)/$(VM_PLATFORM)_docs
+ C1_BUILD_DIR =$(C1_DIR)/$(BUILD_FLAVOR)
+ C2_BUILD_DIR =$(C2_DIR)/$(BUILD_FLAVOR)
++CORE_BUILD_DIR =$(CORE_DIR)/$(BUILD_FLAVOR)
+ MINIMAL1_BUILD_DIR=$(MINIMAL1_DIR)/$(BUILD_FLAVOR)
+ ZERO_BUILD_DIR =$(ZERO_DIR)/$(BUILD_FLAVOR)
+ SHARK_BUILD_DIR =$(SHARK_DIR)/$(BUILD_FLAVOR)
+@@ -464,6 +491,28 @@
+ $(install-dir)
+ endif
+
++# Core
++ifeq ($(JVM_VARIANT_CORE), true)
++# Common
++$(EXPORT_LIB_DIR)/%.jar: $(CORE_BUILD_DIR)/../generated/%.jar
++ $(install-file)
++$(EXPORT_INCLUDE_DIR)/%: $(CORE_BUILD_DIR)/../generated/jvmtifiles/%
++ $(install-file)
++# Unix
++$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(CORE_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
++ $(install-file)
++$(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo: $(CORE_BUILD_DIR)/%.debuginfo
++ $(install-file)
++$(EXPORT_JRE_LIB_ARCH_DIR)/%.diz: $(CORE_BUILD_DIR)/%.diz
++ $(install-file)
++$(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX): $(CORE_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
++ $(install-file)
++$(EXPORT_SERVER_DIR)/%.debuginfo: $(CORE_BUILD_DIR)/%.debuginfo
++ $(install-file)
++$(EXPORT_SERVER_DIR)/%.diz: $(CORE_BUILD_DIR)/%.diz
++ $(install-file)
++endif
++
+ # Shark
+ ifeq ($(JVM_VARIANT_ZEROSHARK), true)
+ # Common
+@@ -531,6 +580,7 @@
+ clean_build:
+ $(RM) -r $(C1_DIR)
+ $(RM) -r $(C2_DIR)
++ $(RM) -r $(CORE_DIR)
+ $(RM) -r $(ZERO_DIR)
+ $(RM) -r $(SHARK_DIR)
+ $(RM) -r $(MINIMAL1_DIR)
+--- ./hotspot/make/aix/Makefile Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/Makefile Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,381 @@
++#
++# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright 2012, 2013 SAP AG. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# This makefile creates a build tree and lights off a build.
++# You can go back into the build tree and perform rebuilds or
++# incremental builds as desired. Be sure to reestablish
++# environment variable settings for LD_LIBRARY_PATH and JAVA_HOME.
++
++# The make process now relies on java and javac. These can be
++# specified either implicitly on the PATH, by setting the
++# (JDK-inherited) ALT_BOOTDIR environment variable to full path to a
++# JDK in which bin/java and bin/javac are present and working (e.g.,
++# /usr/local/java/jdk1.3/solaris), or via the (JDK-inherited)
++# default BOOTDIR path value. Note that one of ALT_BOOTDIR
++# or BOOTDIR has to be set. We do *not* search javac, javah, rmic etc.
++# from the PATH.
++#
++# One can set ALT_BOOTDIR or BOOTDIR to point to a jdk that runs on
++# an architecture that differs from the target architecture, as long
++# as the bootstrap jdk runs under the same flavor of OS as the target
++# (i.e., if the target is linux, point to a jdk that runs on a linux
++# box). In order to use such a bootstrap jdk, set the make variable
++# REMOTE to the desired remote command mechanism, e.g.,
++#
++# make REMOTE="rsh -l me myotherlinuxbox"
++
++# Along with VM, Serviceability Agent (SA) is built for SA/JDI binding.
++# JDI binding on SA produces two binaries:
++# 1. sa-jdi.jar - This is built before building libjvm.so
++# Please refer to ./makefiles/sa.make
++# 2. libsa.so - Native library for SA - This is built after
++# libjsig.so (signal interposition library)
++# Please refer to ./makefiles/vm.make
++# If $(GAMMADIR)/agent dir is not present, SA components are not built.
++
++# No tests on Aix.
++TEST_IN_BUILD=false
++
++ifeq ($(GAMMADIR),)
++include ../../make/defs.make
++else
++include $(GAMMADIR)/make/defs.make
++endif
++include $(GAMMADIR)/make/$(OSNAME)/makefiles/rules.make
++
++ifndef CC_INTERP
++ ifndef FORCE_TIERED
++ FORCE_TIERED=1
++ endif
++endif
++# C1 is not ported on ppc64(le), so we cannot build a tiered VM:
++ifneq (,$(filter $(ARCH),ppc64 pp64le))
++ FORCE_TIERED=0
++endif
++
++ifdef LP64
++ ifeq ("$(filter $(LP64_ARCH),$(BUILDARCH))","")
++ _JUNK_ := $(shell echo >&2 \
++ $(OSNAME) $(ARCH) "*** ERROR: this platform does not support 64-bit compilers!")
++ @exit 1
++ endif
++endif
++
++# we need to set up LP64 correctly to satisfy sanity checks in adlc
++ifneq ("$(filter $(LP64_ARCH),$(BUILDARCH))","")
++ MFLAGS += " LP64=1 "
++endif
++
++# pass USE_SUNCC further, through MFLAGS
++ifdef USE_SUNCC
++ MFLAGS += " USE_SUNCC=1 "
++endif
++
++# The following renders pathnames in generated Makefiles valid on
++# machines other than the machine containing the build tree.
++#
++# For example, let's say my build tree lives on /files12 on
++# exact.east.sun.com. This logic will cause GAMMADIR to begin with
++# /net/exact/files12/...
++#
++# We only do this on SunOS variants, for a couple of reasons:
++# * It is extremely rare that source trees exist on other systems
++# * It has been claimed that the Linux automounter is flakey, so
++# changing GAMMADIR in a way that exercises the automounter could
++# prove to be a source of unreliability in the build process.
++# Obviously, this Makefile is only relevant on SunOS boxes to begin
++# with, but the SunOS conditionalization will make it easier to
++# combine Makefiles in the future (assuming we ever do that).
++
++ifeq ($(OSNAME),solaris)
++
++ # prepend current directory to relative pathnames.
++ NEW_GAMMADIR := \
++ $(shell echo $(GAMMADIR) | \
++ sed -e "s=^\([^/].*\)=$(shell pwd)/\1=" \
++ )
++ unexport NEW_GAMMADIR
++
++ # If NEW_GAMMADIR doesn't already start with "/net/":
++ ifeq ($(strip $(filter /net/%,$(NEW_GAMMADIR))),)
++ # prepend /net/$(HOST)
++ # remove /net/$(HOST) if name already began with /home/
++ # remove /net/$(HOST) if name already began with /java/
++ # remove /net/$(HOST) if name already began with /lab/
++ NEW_GAMMADIR := \
++ $(shell echo $(NEW_GAMMADIR) | \
++ sed -e "s=^\(.*\)=/net/$(HOST)\1=" \
++ -e "s=^/net/$(HOST)/home/=/home/=" \
++ -e "s=^/net/$(HOST)/java/=/java/=" \
++ -e "s=^/net/$(HOST)/lab/=/lab/=" \
++ )
++ # Don't use the new value for GAMMADIR unless a file with the new
++ # name actually exists.
++ ifneq ($(wildcard $(NEW_GAMMADIR)),)
++ GAMMADIR := $(NEW_GAMMADIR)
++ endif
++ endif
++
++endif
++
++# BUILDARCH is set to "zero" for Zero builds. VARIANTARCH
++# is used to give the build directories meaningful names.
++VARIANTARCH = $(subst i386,i486,$(ZERO_LIBARCH))
++
++# There is a (semi-) regular correspondence between make targets and actions:
++#
++# Target Tree Type Build Dir
++#
++# debug compiler2 __compiler2/debug
++# fastdebug compiler2 __compiler2/fastdebug
++# optimized compiler2 __compiler2/optimized
++# product compiler2 __compiler2/product
++#
++# debug1 compiler1 __compiler1/debug
++# fastdebug1 compiler1 __compiler1/fastdebug
++# optimized1 compiler1 __compiler1/optimized
++# product1 compiler1 __compiler1/product
++#
++# debugcore core __core/debug
++# fastdebugcore core __core/fastdebug
++# optimizedcore core __core/optimized
++# productcore core __core/product
++#
++# debugzero zero __zero/debug
++# fastdebugzero zero __zero/fastdebug
++# optimizedzero zero __zero/optimized
++# productzero zero __zero/product
++#
++# debugshark shark __shark/debug
++# fastdebugshark shark __shark/fastdebug
++# optimizedshark shark __shark/optimized
++# productshark shark __shark/product
++#
++# fastdebugminimal1 minimal1 __minimal1/fastdebug
++# productminimal1 minimal1 __minimal1/product
++#
++# What you get with each target:
++#
++# debug* - debug compile with asserts enabled
++# fastdebug* - optimized compile, but with asserts enabled
++# optimized* - optimized compile, no asserts
++# product* - the shippable thing: optimized compile, no asserts, -DPRODUCT
++
++# This target list needs to be coordinated with the usage message
++# in the build.sh script:
++TARGETS = debug fastdebug optimized product
++
++ifeq ($(findstring true, $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
++ SUBDIR_DOCS = $(OSNAME)_$(VARIANTARCH)_docs
++else
++ SUBDIR_DOCS = $(OSNAME)_$(BUILDARCH)_docs
++endif
++SUBDIRS_C1 = $(addprefix $(OSNAME)_$(BUILDARCH)_compiler1/,$(TARGETS))
++SUBDIRS_C2 = $(addprefix $(OSNAME)_$(BUILDARCH)_compiler2/,$(TARGETS))
++SUBDIRS_TIERED = $(addprefix $(OSNAME)_$(BUILDARCH)_tiered/,$(TARGETS))
++SUBDIRS_CORE = $(addprefix $(OSNAME)_$(BUILDARCH)_core/,$(TARGETS))
++SUBDIRS_ZERO = $(addprefix $(OSNAME)_$(VARIANTARCH)_zero/,$(TARGETS))
++SUBDIRS_SHARK = $(addprefix $(OSNAME)_$(VARIANTARCH)_shark/,$(TARGETS))
++SUBDIRS_MINIMAL1 = $(addprefix $(OSNAME)_$(BUILDARCH)_minimal1/,$(TARGETS))
++
++TARGETS_C2 = $(TARGETS)
++TARGETS_C1 = $(addsuffix 1,$(TARGETS))
++TARGETS_TIERED = $(addsuffix tiered,$(TARGETS))
++TARGETS_CORE = $(addsuffix core,$(TARGETS))
++TARGETS_ZERO = $(addsuffix zero,$(TARGETS))
++TARGETS_SHARK = $(addsuffix shark,$(TARGETS))
++TARGETS_MINIMAL1 = $(addsuffix minimal1,$(TARGETS))
++
++BUILDTREE_MAKE = $(GAMMADIR)/make/$(OSNAME)/makefiles/buildtree.make
++BUILDTREE_VARS = GAMMADIR=$(GAMMADIR) OS_FAMILY=$(OSNAME) SRCARCH=$(SRCARCH) BUILDARCH=$(BUILDARCH) LIBARCH=$(LIBARCH)
++BUILDTREE_VARS += HOTSPOT_RELEASE_VERSION=$(HOTSPOT_RELEASE_VERSION) HOTSPOT_BUILD_VERSION=$(HOTSPOT_BUILD_VERSION) JRE_RELEASE_VERSION=$(JRE_RELEASE_VERSION)
++BUILDTREE_VARS += ENABLE_FULL_DEBUG_SYMBOLS=$(ENABLE_FULL_DEBUG_SYMBOLS) OBJCOPY=$(OBJCOPY) STRIP_POLICY=$(STRIP_POLICY) ZIP_DEBUGINFO_FILES=$(ZIP_DEBUGINFO_FILES) ZIPEXE=$(ZIPEXE)
++
++BUILDTREE = $(MAKE) -f $(BUILDTREE_MAKE) $(BUILDTREE_VARS)
++
++#-------------------------------------------------------------------------------
++
++# Could make everything by default, but that would take a while.
++all:
++ @echo "Try '$(MAKE) ...' where is one or more of"
++ @echo " $(TARGETS_C2)"
++ @echo " $(TARGETS_C1)"
++ @echo " $(TARGETS_CORE)"
++ @echo " $(TARGETS_ZERO)"
++ @echo " $(TARGETS_SHARK)"
++ @echo " $(TARGETS_MINIMAL1)"
++
++checks: check_os_version check_j2se_version
++
++# We do not want people accidentally building on old systems (e.g. Linux 2.2.x,
++# Solaris 2.5.1, 2.6).
++# Disable this check by setting DISABLE_HOTSPOT_OS_VERSION_CHECK=ok.
++
++SUPPORTED_OS_VERSION = AIX
++OS_VERSION := $(shell uname -a)
++EMPTY_IF_NOT_SUPPORTED = $(filter $(SUPPORTED_OS_VERSION),$(OS_VERSION))
++
++check_os_version:
++ifeq ($(DISABLE_HOTSPOT_OS_VERSION_CHECK)$(EMPTY_IF_NOT_SUPPORTED),)
++ $(QUIETLY) >&2 echo "*** This OS is not supported:" `uname -a`; exit 1;
++endif
++
++# jvmti.make requires XSLT (J2SE 1.4.x or newer):
++XSLT_CHECK = $(REMOTE) $(RUN.JAVAP) javax.xml.transform.TransformerFactory
++# If not found then fail fast.
++check_j2se_version:
++ $(QUIETLY) $(XSLT_CHECK) > /dev/null 2>&1; \
++ if [ $$? -ne 0 ]; then \
++ $(REMOTE) $(RUN.JAVA) -version; \
++ echo "*** An XSLT processor (J2SE 1.4.x or newer) is required" \
++ "to bootstrap this build" 1>&2; \
++ exit 1; \
++ fi
++
++$(SUBDIRS_TIERED): $(BUILDTREE_MAKE)
++ $(QUIETLY) $(MAKE) -f $(GAMMADIR)/make/$(OSNAME)/Makefile checks
++ $(BUILDTREE) VARIANT=tiered
++
++$(SUBDIRS_C2): $(BUILDTREE_MAKE)
++ifeq ($(FORCE_TIERED),1)
++ $(QUIETLY) $(MAKE) -f $(GAMMADIR)/make/$(OSNAME)/Makefile checks
++ $(BUILDTREE) VARIANT=tiered FORCE_TIERED=1
++else
++ $(QUIETLY) $(MAKE) -f $(GAMMADIR)/make/$(OSNAME)/Makefile checks
++ $(BUILDTREE) VARIANT=compiler2
++endif
++
++$(SUBDIRS_C1): $(BUILDTREE_MAKE)
++ $(QUIETLY) $(MAKE) -f $(GAMMADIR)/make/$(OSNAME)/Makefile checks
++ $(BUILDTREE) VARIANT=compiler1
++
++$(SUBDIRS_CORE): $(BUILDTREE_MAKE)
++ $(QUIETLY) $(MAKE) -f $(GAMMADIR)/make/$(OSNAME)/Makefile checks
++ $(BUILDTREE) VARIANT=core
++
++$(SUBDIRS_ZERO): $(BUILDTREE_MAKE) platform_zero
++ $(QUIETLY) $(MAKE) -f $(GAMMADIR)/make/$(OSNAME)/Makefile checks
++ $(BUILDTREE) VARIANT=zero VARIANTARCH=$(VARIANTARCH)
++
++$(SUBDIRS_SHARK): $(BUILDTREE_MAKE) platform_zero
++ $(QUIETLY) $(MAKE) -f $(GAMMADIR)/make/$(OSNAME)/Makefile checks
++ $(BUILDTREE) VARIANT=shark VARIANTARCH=$(VARIANTARCH)
++
++$(SUBDIRS_MINIMAL1): $(BUILDTREE_MAKE)
++ $(QUIETLY) $(MAKE) -f $(GAMMADIR)/make/$(OSNAME)/Makefile checks
++ $(BUILDTREE) VARIANT=minimal1
++
++
++platform_zero: $(GAMMADIR)/make/$(OSNAME)/platform_zero.in
++ $(SED) 's/@ZERO_ARCHDEF@/$(ZERO_ARCHDEF)/g;s/@ZERO_LIBARCH@/$(ZERO_LIBARCH)/g;' < $< > $@
++
++# Define INSTALL=y at command line to automatically copy JVM into JAVA_HOME
++
++$(TARGETS_C2): $(SUBDIRS_C2)
++ cd $(OSNAME)_$(BUILDARCH)_compiler2/$@ && $(MAKE) $(MFLAGS)
++ifdef INSTALL
++ cd $(OSNAME)_$(BUILDARCH)_compiler2/$@ && $(MAKE) $(MFLAGS) install
++endif
++
++$(TARGETS_TIERED): $(SUBDIRS_TIERED)
++ cd $(OSNAME)_$(BUILDARCH)_tiered/$(patsubst %tiered,%,$@) && $(MAKE) $(MFLAGS)
++ifdef INSTALL
++ cd $(OSNAME)_$(BUILDARCH)_tiered/$(patsubst %tiered,%,$@) && $(MAKE) $(MFLAGS) install
++endif
++
++$(TARGETS_C1): $(SUBDIRS_C1)
++ cd $(OSNAME)_$(BUILDARCH)_compiler1/$(patsubst %1,%,$@) && $(MAKE) $(MFLAGS)
++ifdef INSTALL
++ cd $(OSNAME)_$(BUILDARCH)_compiler1/$(patsubst %1,%,$@) && $(MAKE) $(MFLAGS) install
++endif
++
++$(TARGETS_CORE): $(SUBDIRS_CORE)
++ cd $(OSNAME)_$(BUILDARCH)_core/$(patsubst %core,%,$@) && $(MAKE) $(MFLAGS)
++ifdef INSTALL
++ cd $(OSNAME)_$(BUILDARCH)_core/$(patsubst %core,%,$@) && $(MAKE) $(MFLAGS) install
++endif
++
++$(TARGETS_ZERO): $(SUBDIRS_ZERO)
++ cd $(OSNAME)_$(VARIANTARCH)_zero/$(patsubst %zero,%,$@) && $(MAKE) $(MFLAGS)
++ifdef INSTALL
++ cd $(OSNAME)_$(VARIANTARCH)_zero/$(patsubst %zero,%,$@) && $(MAKE) $(MFLAGS) install
++endif
++
++$(TARGETS_SHARK): $(SUBDIRS_SHARK)
++ cd $(OSNAME)_$(VARIANTARCH)_shark/$(patsubst %shark,%,$@) && $(MAKE) $(MFLAGS)
++ifdef INSTALL
++ cd $(OSNAME)_$(VARIANTARCH)_shark/$(patsubst %shark,%,$@) && $(MAKE) $(MFLAGS) install
++endif
++
++$(TARGETS_MINIMAL1): $(SUBDIRS_MINIMAL1)
++ cd $(OSNAME)_$(BUILDARCH)_minimal1/$(patsubst %minimal1,%,$@) && $(MAKE) $(MFLAGS)
++ifdef INSTALL
++ cd $(OSNAME)_$(BUILDARCH)_minimal1/$(patsubst %minimal1,%,$@) && $(MAKE) $(MFLAGS) install
++endif
++
++# Just build the tree, and nothing else:
++tree: $(SUBDIRS_C2)
++tree1: $(SUBDIRS_C1)
++treecore: $(SUBDIRS_CORE)
++treezero: $(SUBDIRS_ZERO)
++treeshark: $(SUBDIRS_SHARK)
++treeminimal1: $(SUBDIRS_MINIMAL1)
++
++# Doc target. This is the same for all build options.
++# Hence create a docs directory beside ...$(ARCH)_[...]
++# We specify 'BUILD_FLAVOR=product' so that the proper
++# ENABLE_FULL_DEBUG_SYMBOLS value is used.
++docs: checks
++ $(QUIETLY) mkdir -p $(SUBDIR_DOCS)
++ $(MAKE) -f $(GAMMADIR)/make/$(OSNAME)/makefiles/jvmti.make $(MFLAGS) $(BUILDTREE_VARS) JvmtiOutDir=$(SUBDIR_DOCS) BUILD_FLAVOR=product jvmtidocs
++
++# Synonyms for win32-like targets.
++compiler2: debug product
++
++compiler1: debug1 product1
++
++core: debugcore productcore
++
++zero: debugzero productzero
++
++shark: debugshark productshark
++
++clean_docs:
++ rm -rf $(SUBDIR_DOCS)
++
++clean_compiler1 clean_compiler2 clean_core clean_zero clean_shark clean_minimal1:
++ rm -rf $(OSNAME)_$(BUILDARCH)_$(subst clean_,,$@)
++
++clean: clean_compiler2 clean_compiler1 clean_core clean_zero clean_shark clean_minimal1 clean_docs
++
++include $(GAMMADIR)/make/cscope.make
++
++#-------------------------------------------------------------------------------
++
++.PHONY: $(TARGETS_C2) $(TARGETS_C1) $(TARGETS_CORE) $(TARGETS_ZERO) $(TARGETS_SHARK) $(TARGETS_MINIMAL1)
++.PHONY: tree tree1 treecore treezero treeshark
++.PHONY: all compiler1 compiler2 core zero shark
++.PHONY: clean clean_compiler1 clean_compiler2 clean_core clean_zero clean_shark docs clean_docs
++.PHONY: checks check_os_version check_j2se_version
+--- ./hotspot/make/aix/adlc_updater Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/adlc_updater Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,20 @@
++#! /bin/sh
++#
++# This file is used by adlc.make to selectively update generated
++# adlc files. Because source and target diretories are relative
++# paths, this file is copied to the target build directory before
++# use.
++#
++# adlc-updater
++#
++fix_lines() {
++ # repair bare #line directives in $1 to refer to $2
++ awk < $1 > $1+ '
++ /^#line 999999$/ {print "#line " (NR+1) " \"" F2 "\""; next}
++ {print}
++ ' F2=$2
++ mv $1+ $1
++}
++fix_lines $2/$1 $3/$1
++[ -f $3/$1 ] && cmp -s $2/$1 $3/$1 || \
++( [ -f $3/$1 ] && echo Updating $3/$1 ; touch $2/made-change ; mv $2/$1 $3/$1 )
+--- ./hotspot/make/aix/makefiles/adjust-mflags.sh Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/adjust-mflags.sh Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,87 @@
++#! /bin/sh
++#
++# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# This script is used only from top.make.
++# The macro $(MFLAGS-adjusted) calls this script to
++# adjust the "-j" arguments to take into account
++# the HOTSPOT_BUILD_JOBS variable. The default
++# handling of the "-j" argument by gnumake does
++# not meet our needs, so we must adjust it ourselves.
++
++# This argument adjustment applies to two recursive
++# calls to "$(MAKE) $(MFLAGS-adjusted)" in top.make.
++# One invokes adlc.make, and the other invokes vm.make.
++# The adjustment propagates the desired concurrency
++# level down to the sub-make (of the adlc or vm).
++# The default behavior of gnumake is to run all
++# sub-makes without concurrency ("-j1").
++
++# Also, we use a make variable rather than an explicit
++# "-j" argument to control this setting, so that
++# the concurrency setting (which must be tuned separately
++# for each MP system) can be set via an environment variable.
++# The recommended setting is 1.5x to 2x the number of available
++# CPUs on the MP system, which is large enough to keep the CPUs
++# busy (even though some jobs may be I/O bound) but not too large,
++# we may presume, to overflow the system's swap space.
++
++set -eu
++
++default_build_jobs=4
++
++case $# in
++[12]) true;;
++*) >&2 echo "Usage: $0 ${MFLAGS} ${HOTSPOT_BUILD_JOBS}"; exit 2;;
++esac
++
++MFLAGS=$1
++HOTSPOT_BUILD_JOBS=${2-}
++
++# Normalize any -jN argument to the form " -j${HBJ}"
++MFLAGS=`
++ echo "$MFLAGS" \
++ | sed '
++ s/^-/ -/
++ s/ -\([^ ][^ ]*\)j/ -\1 -j/
++ s/ -j[0-9][0-9]*/ -j/
++ s/ -j\([^ ]\)/ -j -\1/
++ s/ -j/ -j'${HOTSPOT_BUILD_JOBS:-${default_build_jobs}}'/
++ ' `
++
++case ${HOTSPOT_BUILD_JOBS} in \
++
++'') case ${MFLAGS} in
++ *\ -j*)
++ >&2 echo "# Note: -jN is ineffective for setting parallelism in this makefile."
++ >&2 echo "# please set HOTSPOT_BUILD_JOBS=${default_build_jobs} in the command line or environment."
++ esac;;
++
++?*) case ${MFLAGS} in
++ *\ -j*) true;;
++ *) MFLAGS="-j${HOTSPOT_BUILD_JOBS} ${MFLAGS}";;
++ esac;;
++esac
++
++echo "${MFLAGS}"
+--- ./hotspot/make/aix/makefiles/adlc.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/adlc.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,231 @@
++#
++# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# This makefile (adlc.make) is included from the adlc.make in the
++# build directories.
++# It knows how to compile, link, and run the adlc.
++
++include $(GAMMADIR)/make/$(Platform_os_family)/makefiles/rules.make
++
++# #########################################################################
++
++# OUTDIR must be the same as AD_Dir = $(GENERATED)/adfiles in top.make:
++GENERATED = ../generated
++OUTDIR = $(GENERATED)/adfiles
++
++ARCH = $(Platform_arch)
++OS = $(Platform_os_family)
++
++SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad
++
++ifeq ("${Platform_arch_model}", "${Platform_arch}")
++ SOURCES.AD = \
++ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad)
++else
++ SOURCES.AD = \
++ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
++ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad)
++endif
++
++EXEC = $(OUTDIR)/adlc
++
++# set VPATH so make knows where to look for source files
++Src_Dirs_V += $(GAMMADIR)/src/share/vm/adlc
++VPATH += $(Src_Dirs_V:%=%:)
++
++# set INCLUDES for C preprocessor
++Src_Dirs_I += $(GAMMADIR)/src/share/vm/adlc $(GENERATED)
++INCLUDES += $(Src_Dirs_I:%=-I%)
++
++# set flags for adlc compilation
++CXXFLAGS = $(SYSDEFS) $(INCLUDES)
++
++# Force assertions on.
++CXXFLAGS += -DASSERT
++
++# CFLAGS_WARN holds compiler options to suppress/enable warnings.
++# Suppress warnings (for now)
++CFLAGS_WARN = -w
++CFLAGS += $(CFLAGS_WARN)
++
++OBJECTNAMES = \
++ adlparse.o \
++ archDesc.o \
++ arena.o \
++ dfa.o \
++ dict2.o \
++ filebuff.o \
++ forms.o \
++ formsopt.o \
++ formssel.o \
++ main.o \
++ adlc-opcodes.o \
++ output_c.o \
++ output_h.o \
++
++OBJECTS = $(OBJECTNAMES:%=$(OUTDIR)/%)
++
++GENERATEDNAMES = \
++ ad_$(Platform_arch_model).cpp \
++ ad_$(Platform_arch_model).hpp \
++ ad_$(Platform_arch_model)_clone.cpp \
++ ad_$(Platform_arch_model)_expand.cpp \
++ ad_$(Platform_arch_model)_format.cpp \
++ ad_$(Platform_arch_model)_gen.cpp \
++ ad_$(Platform_arch_model)_misc.cpp \
++ ad_$(Platform_arch_model)_peephole.cpp \
++ ad_$(Platform_arch_model)_pipeline.cpp \
++ adGlobals_$(Platform_arch_model).hpp \
++ dfa_$(Platform_arch_model).cpp \
++
++GENERATEDFILES = $(GENERATEDNAMES:%=$(OUTDIR)/%)
++
++# #########################################################################
++
++all: $(EXEC)
++
++$(EXEC) : $(OBJECTS)
++ @echo Making adlc
++ $(QUIETLY) $(HOST.LINK_NOPROF.CXX) -o $(EXEC) $(OBJECTS)
++
++# Random dependencies:
++$(OBJECTS): opcodes.hpp classes.hpp adlc.hpp adlcVMDeps.hpp adlparse.hpp archDesc.hpp arena.hpp dict2.hpp filebuff.hpp forms.hpp formsopt.hpp formssel.hpp
++
++# The source files refer to ostream.h, which sparcworks calls iostream.h
++$(OBJECTS): ostream.h
++
++ostream.h :
++ @echo >$@ '#include '
++
++dump:
++ : OUTDIR=$(OUTDIR)
++ : OBJECTS=$(OBJECTS)
++ : products = $(GENERATEDFILES)
++
++all: $(GENERATEDFILES)
++
++$(GENERATEDFILES): refresh_adfiles
++
++# Get a unique temporary directory name, so multiple makes can run in parallel.
++# Note that product files are updated via "mv", which is atomic.
++TEMPDIR := $(OUTDIR)/mktmp$(shell echo $$$$)
++
++# Debuggable by default
++CFLAGS += -g
++
++# Pass -D flags into ADLC.
++ADLCFLAGS += $(SYSDEFS)
++
++# Note "+="; it is a hook so flags.make can add more flags, like -g or -DFOO.
++ADLCFLAGS += -q -T
++
++# Normally, debugging is done directly on the ad_*.cpp files.
++# But -g will put #line directives in those files pointing back to .ad.
++# Some builds of gcc 3.2 have a bug that gets tickled by the extra #line directives
++# so skip it for 3.2 and ealier.
++ifneq "$(shell expr \( $(CC_VER_MAJOR) \> 3 \) \| \( \( $(CC_VER_MAJOR) = 3 \) \& \( $(CC_VER_MINOR) \>= 3 \) \))" "0"
++ADLCFLAGS += -g
++endif
++
++ifdef LP64
++ADLCFLAGS += -D_LP64
++else
++ADLCFLAGS += -U_LP64
++endif
++
++#
++# adlc_updater is a simple sh script, under sccs control. It is
++# used to selectively update generated adlc files. This should
++# provide a nice compilation speed improvement.
++#
++ADLC_UPDATER_DIRECTORY = $(GAMMADIR)/make/$(OS)
++ADLC_UPDATER = adlc_updater
++$(ADLC_UPDATER): $(ADLC_UPDATER_DIRECTORY)/$(ADLC_UPDATER)
++ $(QUIETLY) cp $< $@; chmod +x $@
++
++# This action refreshes all generated adlc files simultaneously.
++# The way it works is this:
++# 1) create a scratch directory to work in.
++# 2) if the current working directory does not have $(ADLC_UPDATER), copy it.
++# 3) run the compiled adlc executable. This will create new adlc files in the scratch directory.
++# 4) call $(ADLC_UPDATER) on each generated adlc file. It will selectively update changed or missing files.
++# 5) If we actually updated any files, echo a notice.
++#
++refresh_adfiles: $(EXEC) $(SOURCE.AD) $(ADLC_UPDATER)
++ @rm -rf $(TEMPDIR); mkdir $(TEMPDIR)
++ $(QUIETLY) $(EXEC) $(ADLCFLAGS) $(SOURCE.AD) \
++ -c$(TEMPDIR)/ad_$(Platform_arch_model).cpp -h$(TEMPDIR)/ad_$(Platform_arch_model).hpp -a$(TEMPDIR)/dfa_$(Platform_arch_model).cpp -v$(TEMPDIR)/adGlobals_$(Platform_arch_model).hpp \
++ || { rm -rf $(TEMPDIR); exit 1; }
++ $(QUIETLY) ./$(ADLC_UPDATER) ad_$(Platform_arch_model).cpp $(TEMPDIR) $(OUTDIR)
++ $(QUIETLY) ./$(ADLC_UPDATER) ad_$(Platform_arch_model).hpp $(TEMPDIR) $(OUTDIR)
++ $(QUIETLY) ./$(ADLC_UPDATER) ad_$(Platform_arch_model)_clone.cpp $(TEMPDIR) $(OUTDIR)
++ $(QUIETLY) ./$(ADLC_UPDATER) ad_$(Platform_arch_model)_expand.cpp $(TEMPDIR) $(OUTDIR)
++ $(QUIETLY) ./$(ADLC_UPDATER) ad_$(Platform_arch_model)_format.cpp $(TEMPDIR) $(OUTDIR)
++ $(QUIETLY) ./$(ADLC_UPDATER) ad_$(Platform_arch_model)_gen.cpp $(TEMPDIR) $(OUTDIR)
++ $(QUIETLY) ./$(ADLC_UPDATER) ad_$(Platform_arch_model)_misc.cpp $(TEMPDIR) $(OUTDIR)
++ $(QUIETLY) ./$(ADLC_UPDATER) ad_$(Platform_arch_model)_peephole.cpp $(TEMPDIR) $(OUTDIR)
++ $(QUIETLY) ./$(ADLC_UPDATER) ad_$(Platform_arch_model)_pipeline.cpp $(TEMPDIR) $(OUTDIR)
++ $(QUIETLY) ./$(ADLC_UPDATER) adGlobals_$(Platform_arch_model).hpp $(TEMPDIR) $(OUTDIR)
++ $(QUIETLY) ./$(ADLC_UPDATER) dfa_$(Platform_arch_model).cpp $(TEMPDIR) $(OUTDIR)
++ $(QUIETLY) [ -f $(TEMPDIR)/made-change ] \
++ || echo "Rescanned $(SOURCE.AD) but encountered no changes."
++ $(QUIETLY) rm -rf $(TEMPDIR)
++
++
++# #########################################################################
++
++$(SOURCE.AD): $(SOURCES.AD)
++ $(QUIETLY) $(PROCESS_AD_FILES) $(SOURCES.AD) > $(SOURCE.AD)
++
++#PROCESS_AD_FILES = cat
++# Pass through #line directives, in case user enables -g option above:
++PROCESS_AD_FILES = awk '{ \
++ if (CUR_FN != FILENAME) { CUR_FN=FILENAME; NR_BASE=NR-1; need_lineno=1 } \
++ if (need_lineno && $$0 !~ /\/\//) \
++ { print "\n\n\#line " (NR-NR_BASE) " \"" FILENAME "\""; need_lineno=0 }; \
++ print }'
++
++$(OUTDIR)/%.o: %.cpp
++ @echo Compiling $<
++ $(QUIETLY) $(REMOVE_TARGET)
++ $(QUIETLY) $(HOST.COMPILE.CXX) -o $@ $< $(COMPILE_DONE)
++
++# Some object files are given a prefix, to disambiguate
++# them from objects of the same name built for the VM.
++$(OUTDIR)/adlc-%.o: %.cpp
++ @echo Compiling $<
++ $(QUIETLY) $(REMOVE_TARGET)
++ $(QUIETLY) $(HOST.COMPILE.CXX) -o $@ $< $(COMPILE_DONE)
++
++# #########################################################################
++
++clean:
++ rm $(OBJECTS)
++
++cleanall:
++ rm $(OBJECTS) $(EXEC)
++
++# #########################################################################
++
++.PHONY: all dump refresh_adfiles clean cleanall
+--- ./hotspot/make/aix/makefiles/build_vm_def.sh Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/build_vm_def.sh Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,18 @@
++#!/bin/sh
++
++# If we're cross compiling use that path for nm
++if [ "$CROSS_COMPILE_ARCH" != "" ]; then
++NM=$ALT_COMPILER_PATH/nm
++else
++# On AIX we have to prevent that we pick up the 'nm' version from the GNU binutils
++# which may be installed under /opt/freeware/bin. So better use an absolute path here!
++NM=/usr/bin/nm
++fi
++
++$NM -X64 -B -C $* \
++ | awk '{
++ if (($2="d" || $2="D") && ($3 ~ /^__vft/ || $3 ~ /^gHotSpotVM/)) print "\t" $3 ";"
++ if ($3 ~ /^UseSharedSpaces$/) print "\t" $3 ";"
++ if ($3 ~ /^SharedArchivePath__9Arguments$/) print "\t" $3 ";"
++ }' \
++ | sort -u
+--- ./hotspot/make/aix/makefiles/buildtree.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/buildtree.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,364 @@
++#
++# Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright 2012, 2013 SAP AG. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# Usage:
++#
++# $(MAKE) -f buildtree.make SRCARCH=srcarch BUILDARCH=buildarch LIBARCH=libarch
++# GAMMADIR=dir OS_FAMILY=os VARIANT=variant
++#
++# The macros ARCH, GAMMADIR, OS_FAMILY and VARIANT must be defined in the
++# environment or on the command-line:
++#
++# ARCH - sparc, i486, ... HotSpot cpu and os_cpu source directory
++# BUILDARCH - build directory
++# LIBARCH - the corresponding directory in JDK/JRE
++# GAMMADIR - top of workspace
++# OS_FAMILY - operating system
++# VARIANT - core, compiler1, compiler2, or tiered
++# HOTSPOT_RELEASE_VERSION - .-b (11.0-b07)
++# HOTSPOT_BUILD_VERSION - internal, internal-$(USER_RELEASE_SUFFIX) or empty
++# JRE_RELEASE_VERSION - .. (1.7.0)
++#
++# Builds the directory trees with makefiles plus some convenience files in
++# each directory:
++#
++# Makefile - for "make foo"
++# flags.make - with macro settings
++# vm.make - to support making "$(MAKE) -v vm.make" in makefiles
++# adlc.make -
++# trace.make - generate tracing event and type definitions
++# jvmti.make - generate JVMTI bindings from the spec (JSR-163)
++# sa.make - generate SA jar file and natives
++#
++# The makefiles are split this way so that "make foo" will run faster by not
++# having to read the dependency files for the vm.
++
++-include $(SPEC)
++include $(GAMMADIR)/make/scm.make
++include $(GAMMADIR)/make/defs.make
++include $(GAMMADIR)/make/altsrc.make
++
++
++# 'gmake MAKE_VERBOSE=y' or 'gmake QUIETLY=' gives all the gory details.
++QUIETLY$(MAKE_VERBOSE) = @
++
++ifeq ($(findstring true, $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
++ PLATFORM_FILE = $(shell dirname $(shell dirname $(shell pwd)))/platform_zero
++else
++ ifdef USE_SUNCC
++ PLATFORM_FILE = $(GAMMADIR)/make/$(OS_FAMILY)/platform_$(BUILDARCH).suncc
++ else
++ PLATFORM_FILE = $(GAMMADIR)/make/$(OS_FAMILY)/platform_$(BUILDARCH)
++ endif
++endif
++
++# Allow overriding of the arch part of the directory but default
++# to BUILDARCH if nothing is specified
++ifeq ($(VARIANTARCH),)
++ VARIANTARCH=$(BUILDARCH)
++endif
++
++ifdef FORCE_TIERED
++ifeq ($(VARIANT),tiered)
++PLATFORM_DIR = $(OS_FAMILY)_$(VARIANTARCH)_compiler2
++else
++PLATFORM_DIR = $(OS_FAMILY)_$(VARIANTARCH)_$(VARIANT)
++endif
++else
++PLATFORM_DIR = $(OS_FAMILY)_$(VARIANTARCH)_$(VARIANT)
++endif
++
++#
++# We do two levels of exclusion in the shared directory.
++# TOPLEVEL excludes are pruned, they are not recursively searched,
++# but lower level directories can be named without fear of collision.
++# ALWAYS excludes are excluded at any level in the directory tree.
++#
++
++ALWAYS_EXCLUDE_DIRS = $(SCM_DIRS)
++
++ifeq ($(VARIANT),tiered)
++TOPLEVEL_EXCLUDE_DIRS = $(ALWAYS_EXCLUDE_DIRS) -o -name adlc -o -name agent
++else
++ifeq ($(VARIANT),compiler2)
++TOPLEVEL_EXCLUDE_DIRS = $(ALWAYS_EXCLUDE_DIRS) -o -name adlc -o -name c1 -o -name agent
++else
++# compiler1 and core use the same exclude list
++TOPLEVEL_EXCLUDE_DIRS = $(ALWAYS_EXCLUDE_DIRS) -o -name adlc -o -name opto -o -name libadt -o -name agent
++endif
++endif
++
++# Get things from the platform file.
++COMPILER = $(shell sed -n 's/^compiler[ ]*=[ ]*//p' $(PLATFORM_FILE))
++
++SIMPLE_DIRS = \
++ $(PLATFORM_DIR)/generated/dependencies \
++ $(PLATFORM_DIR)/generated/adfiles \
++ $(PLATFORM_DIR)/generated/jvmtifiles \
++ $(PLATFORM_DIR)/generated/tracefiles
++
++TARGETS = debug fastdebug optimized product
++SUBMAKE_DIRS = $(addprefix $(PLATFORM_DIR)/,$(TARGETS))
++
++# For dependencies and recursive makes.
++BUILDTREE_MAKE = $(GAMMADIR)/make/$(OS_FAMILY)/makefiles/buildtree.make
++
++BUILDTREE_TARGETS = Makefile flags.make flags_vm.make vm.make adlc.make jvmti.make trace.make sa.make
++
++BUILDTREE_VARS = GAMMADIR=$(GAMMADIR) OS_FAMILY=$(OS_FAMILY) \
++ SRCARCH=$(SRCARCH) BUILDARCH=$(BUILDARCH) LIBARCH=$(LIBARCH) VARIANT=$(VARIANT)
++
++# Define variables to be set in flags.make.
++# Default values are set in make/defs.make.
++ifeq ($(HOTSPOT_BUILD_VERSION),)
++ HS_BUILD_VER=$(HOTSPOT_RELEASE_VERSION)
++else
++ HS_BUILD_VER=$(HOTSPOT_RELEASE_VERSION)-$(HOTSPOT_BUILD_VERSION)
++endif
++# Set BUILD_USER from system-dependent hints: $LOGNAME, $(whoami)
++ifndef HOTSPOT_BUILD_USER
++ HOTSPOT_BUILD_USER := $(shell echo $$LOGNAME)
++endif
++ifndef HOTSPOT_BUILD_USER
++ HOTSPOT_BUILD_USER := $(shell whoami)
++endif
++# Define HOTSPOT_VM_DISTRO based on settings in make/openjdk_distro
++# or make/hotspot_distro.
++ifndef HOTSPOT_VM_DISTRO
++ ifeq ($(call if-has-altsrc,$(HS_COMMON_SRC)/,true,false),true)
++ include $(GAMMADIR)/make/hotspot_distro
++ else
++ include $(GAMMADIR)/make/openjdk_distro
++ endif
++endif
++
++# if hotspot-only build and/or OPENJDK isn't passed down, need to set OPENJDK
++ifndef OPENJDK
++ ifneq ($(call if-has-altsrc,$(HS_COMMON_SRC)/,true,false),true)
++ OPENJDK=true
++ endif
++endif
++
++BUILDTREE_VARS += HOTSPOT_RELEASE_VERSION=$(HS_BUILD_VER) HOTSPOT_BUILD_VERSION= JRE_RELEASE_VERSION=$(JRE_RELEASE_VERSION)
++
++BUILDTREE = \
++ $(MAKE) -f $(BUILDTREE_MAKE) $(BUILDTREE_TARGETS) $(BUILDTREE_VARS)
++
++BUILDTREE_COMMENT = echo "\# Generated by $(BUILDTREE_MAKE)"
++
++all: $(SUBMAKE_DIRS)
++
++# Run make in each subdirectory recursively.
++$(SUBMAKE_DIRS): $(SIMPLE_DIRS) FORCE
++ $(QUIETLY) [ -d $@ ] || { mkdir -p $@; }
++ $(QUIETLY) cd $@ && $(BUILDTREE) TARGET=$(@F)
++ $(QUIETLY) touch $@
++
++$(SIMPLE_DIRS):
++ $(QUIETLY) mkdir -p $@
++
++# Convenience macro which takes a source relative path, applies $(1) to the
++# absolute path, and then replaces $(GAMMADIR) in the result with a
++# literal "$(GAMMADIR)/" suitable for inclusion in a Makefile.
++gamma-path=$(subst $(GAMMADIR),\$$(GAMMADIR),$(call $(1),$(HS_COMMON_SRC)/$(2)))
++
++# This bit is needed to enable local rebuilds.
++# Unless the makefile itself sets LP64, any environmental
++# setting of LP64 will interfere with the build.
++LP64_SETTING/32 = LP64 = \#empty
++LP64_SETTING/64 = LP64 = 1
++
++DATA_MODE/ppc64 = 64
++
++DATA_MODE = $(DATA_MODE/$(BUILDARCH))
++
++flags.make: $(BUILDTREE_MAKE) ../shared_dirs.lst
++ @echo Creating $@ ...
++ $(QUIETLY) ( \
++ $(BUILDTREE_COMMENT); \
++ echo; \
++ echo "Platform_file = $(PLATFORM_FILE)" | sed 's|$(GAMMADIR)|$$(GAMMADIR)|'; \
++ sed -n '/=/s/^ */Platform_/p' < $(PLATFORM_FILE); \
++ echo; \
++ echo "GAMMADIR = $(GAMMADIR)"; \
++ echo "HS_ALT_MAKE = $(HS_ALT_MAKE)"; \
++ echo "OSNAME = $(OSNAME)"; \
++ echo "SYSDEFS = \$$(Platform_sysdefs)"; \
++ echo "SRCARCH = $(SRCARCH)"; \
++ echo "BUILDARCH = $(BUILDARCH)"; \
++ echo "LIBARCH = $(LIBARCH)"; \
++ echo "TARGET = $(TARGET)"; \
++ echo "HS_BUILD_VER = $(HS_BUILD_VER)"; \
++ echo "JRE_RELEASE_VER = $(JRE_RELEASE_VERSION)"; \
++ echo "SA_BUILD_VERSION = $(HS_BUILD_VER)"; \
++ echo "HOTSPOT_BUILD_USER = $(HOTSPOT_BUILD_USER)"; \
++ echo "HOTSPOT_VM_DISTRO = $(HOTSPOT_VM_DISTRO)"; \
++ echo "OPENJDK = $(OPENJDK)"; \
++ echo "$(LP64_SETTING/$(DATA_MODE))"; \
++ echo; \
++ echo "# Used for platform dispatching"; \
++ echo "TARGET_DEFINES = -DTARGET_OS_FAMILY_\$$(Platform_os_family)"; \
++ echo "TARGET_DEFINES += -DTARGET_ARCH_\$$(Platform_arch)"; \
++ echo "TARGET_DEFINES += -DTARGET_ARCH_MODEL_\$$(Platform_arch_model)"; \
++ echo "TARGET_DEFINES += -DTARGET_OS_ARCH_\$$(Platform_os_arch)"; \
++ echo "TARGET_DEFINES += -DTARGET_OS_ARCH_MODEL_\$$(Platform_os_arch_model)"; \
++ echo "TARGET_DEFINES += -DTARGET_COMPILER_\$$(Platform_compiler)"; \
++ echo "CFLAGS += \$$(TARGET_DEFINES)"; \
++ echo; \
++ echo "Src_Dirs_V = \\"; \
++ sed 's/$$/ \\/;s|$(GAMMADIR)|$$(GAMMADIR)|' ../shared_dirs.lst; \
++ echo "$(call gamma-path,altsrc,cpu/$(SRCARCH)/vm) \\"; \
++ echo "$(call gamma-path,commonsrc,cpu/$(SRCARCH)/vm) \\"; \
++ echo "$(call gamma-path,altsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
++ echo "$(call gamma-path,commonsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
++ echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \
++ echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \
++ echo "$(call gamma-path,altsrc,os/posix/vm) \\"; \
++ echo "$(call gamma-path,commonsrc,os/posix/vm)"; \
++ echo; \
++ echo "Src_Dirs_I = \\"; \
++ echo "$(call gamma-path,altsrc,share/vm/prims) \\"; \
++ echo "$(call gamma-path,commonsrc,share/vm/prims) \\"; \
++ echo "$(call gamma-path,altsrc,share/vm) \\"; \
++ echo "$(call gamma-path,commonsrc,share/vm) \\"; \
++ echo "$(call gamma-path,altsrc,share/vm/precompiled) \\"; \
++ echo "$(call gamma-path,commonsrc,share/vm/precompiled) \\"; \
++ echo "$(call gamma-path,altsrc,cpu/$(SRCARCH)/vm) \\"; \
++ echo "$(call gamma-path,commonsrc,cpu/$(SRCARCH)/vm) \\"; \
++ echo "$(call gamma-path,altsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
++ echo "$(call gamma-path,commonsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
++ echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \
++ echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \
++ echo "$(call gamma-path,altsrc,os/posix/vm) \\"; \
++ echo "$(call gamma-path,commonsrc,os/posix/vm)"; \
++ [ -n "$(CFLAGS_BROWSE)" ] && \
++ echo && echo "CFLAGS_BROWSE = $(CFLAGS_BROWSE)"; \
++ [ -n "$(ENABLE_FULL_DEBUG_SYMBOLS)" ] && \
++ echo && echo "ENABLE_FULL_DEBUG_SYMBOLS = $(ENABLE_FULL_DEBUG_SYMBOLS)"; \
++ [ -n "$(OBJCOPY)" ] && \
++ echo && echo "OBJCOPY = $(OBJCOPY)"; \
++ [ -n "$(STRIP_POLICY)" ] && \
++ echo && echo "STRIP_POLICY = $(STRIP_POLICY)"; \
++ [ -n "$(ZIP_DEBUGINFO_FILES)" ] && \
++ echo && echo "ZIP_DEBUGINFO_FILES = $(ZIP_DEBUGINFO_FILES)"; \
++ [ -n "$(ZIPEXE)" ] && \
++ echo && echo "ZIPEXE = $(ZIPEXE)"; \
++ [ -n "$(HOTSPOT_EXTRA_SYSDEFS)" ] && \
++ echo && \
++ echo "HOTSPOT_EXTRA_SYSDEFS\$$(HOTSPOT_EXTRA_SYSDEFS) = $(HOTSPOT_EXTRA_SYSDEFS)" && \
++ echo "SYSDEFS += \$$(HOTSPOT_EXTRA_SYSDEFS)"; \
++ [ -n "$(INCLUDE_TRACE)" ] && \
++ echo && echo "INCLUDE_TRACE = $(INCLUDE_TRACE)"; \
++ echo; \
++ [ -n "$(SPEC)" ] && \
++ echo "include $(SPEC)"; \
++ echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(VARIANT).make"; \
++ echo "include \$$(GAMMADIR)/make/excludeSrc.make"; \
++ echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(COMPILER).make"; \
++ ) > $@
++
++flags_vm.make: $(BUILDTREE_MAKE) ../shared_dirs.lst
++ @echo Creating $@ ...
++ $(QUIETLY) ( \
++ $(BUILDTREE_COMMENT); \
++ echo; \
++ echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(TARGET).make"; \
++ ) > $@
++
++../shared_dirs.lst: $(BUILDTREE_MAKE) $(GAMMADIR)/src/share/vm
++ @echo Creating directory list $@
++ $(QUIETLY) if [ -d $(HS_ALT_SRC)/share/vm ]; then \
++ find $(HS_ALT_SRC)/share/vm/* -prune \
++ -type d \! \( $(TOPLEVEL_EXCLUDE_DIRS) \) -exec find {} \
++ \( $(ALWAYS_EXCLUDE_DIRS) \) -prune -o -type d -print \; > $@; \
++ fi;
++ $(QUIETLY) find $(HS_COMMON_SRC)/share/vm/* -prune \
++ -type d \! \( $(TOPLEVEL_EXCLUDE_DIRS) \) -exec find {} \
++ \( $(ALWAYS_EXCLUDE_DIRS) \) -prune -o -type d -print \; >> $@
++
++Makefile: $(BUILDTREE_MAKE)
++ @echo Creating $@ ...
++ $(QUIETLY) ( \
++ $(BUILDTREE_COMMENT); \
++ echo; \
++ echo include flags.make; \
++ echo; \
++ echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/top.make"; \
++ ) > $@
++
++vm.make: $(BUILDTREE_MAKE)
++ @echo Creating $@ ...
++ $(QUIETLY) ( \
++ $(BUILDTREE_COMMENT); \
++ echo; \
++ echo include flags.make; \
++ echo include flags_vm.make; \
++ echo; \
++ echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(@F)"; \
++ ) > $@
++
++adlc.make: $(BUILDTREE_MAKE)
++ @echo Creating $@ ...
++ $(QUIETLY) ( \
++ $(BUILDTREE_COMMENT); \
++ echo; \
++ echo include flags.make; \
++ echo; \
++ echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(@F)"; \
++ ) > $@
++
++jvmti.make: $(BUILDTREE_MAKE)
++ @echo Creating $@ ...
++ $(QUIETLY) ( \
++ $(BUILDTREE_COMMENT); \
++ echo; \
++ echo include flags.make; \
++ echo; \
++ echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(@F)"; \
++ ) > $@
++
++trace.make: $(BUILDTREE_MAKE)
++ @echo Creating $@ ...
++ $(QUIETLY) ( \
++ $(BUILDTREE_COMMENT); \
++ echo; \
++ echo include flags.make; \
++ echo; \
++ echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(@F)"; \
++ ) > $@
++
++sa.make: $(BUILDTREE_MAKE)
++ @echo Creating $@ ...
++ $(QUIETLY) ( \
++ $(BUILDTREE_COMMENT); \
++ echo; \
++ echo include flags.make; \
++ echo; \
++ echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(@F)"; \
++ ) > $@
++
++FORCE:
++
++.PHONY: all FORCE
+--- ./hotspot/make/aix/makefiles/compiler2.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/compiler2.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,32 @@
++#
++# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright 2012, 2013 SAP AG. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# Sets make macros for making server version of VM
++
++TYPE=COMPILER2
++
++VM_SUBDIR = server
++
++CFLAGS += -DCOMPILER2
+--- ./hotspot/make/aix/makefiles/core.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/core.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,33 @@
++#
++# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# Sets make macros for making core version of VM
++
++# Select which files to use (in top.make)
++TYPE=CORE
++
++# There is no "core" directory in JDK. Install core build in server directory.
++VM_SUBDIR = server
++
++# Note: macros.hpp defines CORE
+--- ./hotspot/make/aix/makefiles/debug.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/debug.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,41 @@
++#
++# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright 2012, 2013 SAP AG. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# Sets make macros for making debug version of VM
++
++# Compiler specific DEBUG_CFLAGS are passed in from gcc.make, sparcWorks.make
++DEBUG_CFLAGS/DEFAULT= $(DEBUG_CFLAGS)
++DEBUG_CFLAGS/BYFILE = $(DEBUG_CFLAGS/$@)$(DEBUG_CFLAGS/DEFAULT$(DEBUG_CFLAGS/$@))
++CFLAGS += $(DEBUG_CFLAGS/BYFILE)
++
++# Set the environment variable HOTSPARC_GENERIC to "true"
++# to inhibit the effect of the previous line on CFLAGS.
++
++# Linker mapfile
++MAPFILE = $(GAMMADIR)/make/aix/makefiles/mapfile-vers-debug
++
++VERSION = debug
++SYSDEFS += -DASSERT -DDEBUG
++PICFLAGS = DEFAULT
+--- ./hotspot/make/aix/makefiles/defs.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/defs.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,231 @@
++#
++# Copyright (c) 2006, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright 2012, 2013 SAP AG. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# The common definitions for hotspot AIX builds.
++# Include the top level defs.make under make directory instead of this one.
++# This file is included into make/defs.make.
++
++SLASH_JAVA ?= /java
++
++# Need PLATFORM (os-arch combo names) for jdk and hotspot, plus libarch name
++#ARCH:=$(shell uname -m)
++PATH_SEP = :
++ifeq ($(LP64), 1)
++ ARCH_DATA_MODEL ?= 64
++else
++ ARCH_DATA_MODEL ?= 32
++endif
++
++ifeq ($(ARCH_DATA_MODEL), 64)
++ ARCH = ppc64
++else
++ ARCH = ppc
++endif
++
++# PPC
++ifeq ($(ARCH), ppc)
++ #ARCH_DATA_MODEL = 32
++ PLATFORM = aix-ppc
++ VM_PLATFORM = aix_ppc
++ HS_ARCH = ppc
++endif
++
++# PPC64
++ifeq ($(ARCH), ppc64)
++ #ARCH_DATA_MODEL = 64
++ MAKE_ARGS += LP64=1
++ PLATFORM = aix-ppc64
++ VM_PLATFORM = aix_ppc64
++ HS_ARCH = ppc
++endif
++
++# On 32 bit aix we build server and client, on 64 bit just server.
++ifeq ($(JVM_VARIANTS),)
++ ifeq ($(ARCH_DATA_MODEL), 32)
++ JVM_VARIANTS:=client,server
++ JVM_VARIANT_CLIENT:=true
++ JVM_VARIANT_SERVER:=true
++ else
++ JVM_VARIANTS:=server
++ JVM_VARIANT_SERVER:=true
++ endif
++endif
++
++# determine if HotSpot is being built in JDK6 or earlier version
++JDK6_OR_EARLIER=0
++ifeq "$(shell expr \( '$(JDK_MAJOR_VERSION)' != '' \& '$(JDK_MINOR_VERSION)' != '' \& '$(JDK_MICRO_VERSION)' != '' \))" "1"
++ # if the longer variable names (newer build style) are set, then check those
++ ifeq "$(shell expr \( $(JDK_MAJOR_VERSION) = 1 \& $(JDK_MINOR_VERSION) \< 7 \))" "1"
++ JDK6_OR_EARLIER=1
++ endif
++else
++ # the longer variables aren't set so check the shorter variable names
++ ifeq "$(shell expr \( '$(JDK_MAJOR_VER)' = 1 \& '$(JDK_MINOR_VER)' \< 7 \))" "1"
++ JDK6_OR_EARLIER=1
++ endif
++endif
++
++ifeq ($(JDK6_OR_EARLIER),0)
++ # Full Debug Symbols is supported on JDK7 or newer.
++ # The Full Debug Symbols (FDS) default for BUILD_FLAVOR == product
++ # builds is enabled with debug info files ZIP'ed to save space. For
++ # BUILD_FLAVOR != product builds, FDS is always enabled, after all a
++ # debug build without debug info isn't very useful.
++ # The ZIP_DEBUGINFO_FILES option only has meaning when FDS is enabled.
++ #
++ # If you invoke a build with FULL_DEBUG_SYMBOLS=0, then FDS will be
++ # disabled for a BUILD_FLAVOR == product build.
++ #
++ # Note: Use of a different variable name for the FDS override option
++ # versus the FDS enabled check is intentional (FULL_DEBUG_SYMBOLS
++ # versus ENABLE_FULL_DEBUG_SYMBOLS). For auto build systems that pass
++ # in options via environment variables, use of distinct variables
++ # prevents strange behaviours. For example, in a BUILD_FLAVOR !=
++ # product build, the FULL_DEBUG_SYMBOLS environment variable will be
++ # 0, but the ENABLE_FULL_DEBUG_SYMBOLS make variable will be 1. If
++ # the same variable name is used, then different values can be picked
++ # up by different parts of the build. Just to be clear, we only need
++ # two variable names because the incoming option value can be
++ # overridden in some situations, e.g., a BUILD_FLAVOR != product
++ # build.
++
++ # Due to the multiple sub-make processes that occur this logic gets
++ # executed multiple times. We reduce the noise by at least checking that
++ # BUILD_FLAVOR has been set.
++ ifneq ($(BUILD_FLAVOR),)
++ ifeq ($(BUILD_FLAVOR), product)
++ FULL_DEBUG_SYMBOLS ?= 1
++ ENABLE_FULL_DEBUG_SYMBOLS = $(FULL_DEBUG_SYMBOLS)
++ else
++ # debug variants always get Full Debug Symbols (if available)
++ ENABLE_FULL_DEBUG_SYMBOLS = 1
++ endif
++ _JUNK_ := $(shell \
++ echo >&2 "INFO: ENABLE_FULL_DEBUG_SYMBOLS=$(ENABLE_FULL_DEBUG_SYMBOLS)")
++ # since objcopy is optional, we set ZIP_DEBUGINFO_FILES later
++
++ ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
++ # Default OBJCOPY comes from GNU Binutils on Linux
++ ifeq ($(CROSS_COMPILE_ARCH),)
++ DEF_OBJCOPY=/usr/bin/objcopy
++ else
++ # Assume objcopy is part of the cross-compilation toolset
++ ifneq ($(ALT_COMPILER_PATH),)
++ DEF_OBJCOPY=$(ALT_COMPILER_PATH)/objcopy
++ endif
++ endif
++ OBJCOPY=$(shell test -x $(DEF_OBJCOPY) && echo $(DEF_OBJCOPY))
++ ifneq ($(ALT_OBJCOPY),)
++ _JUNK_ := $(shell echo >&2 "INFO: ALT_OBJCOPY=$(ALT_OBJCOPY)")
++ OBJCOPY=$(shell test -x $(ALT_OBJCOPY) && echo $(ALT_OBJCOPY))
++ endif
++
++ ifeq ($(OBJCOPY),)
++ _JUNK_ := $(shell \
++ echo >&2 "INFO: no objcopy cmd found so cannot create .debuginfo files. You may need to set ALT_OBJCOPY.")
++ ENABLE_FULL_DEBUG_SYMBOLS=0
++ _JUNK_ := $(shell \
++ echo >&2 "INFO: ENABLE_FULL_DEBUG_SYMBOLS=$(ENABLE_FULL_DEBUG_SYMBOLS)")
++ else
++ _JUNK_ := $(shell \
++ echo >&2 "INFO: $(OBJCOPY) cmd found so will create .debuginfo files.")
++
++ # Library stripping policies for .debuginfo configs:
++ # all_strip - strips everything from the library
++ # min_strip - strips most stuff from the library; leaves minimum symbols
++ # no_strip - does not strip the library at all
++ #
++ # Oracle security policy requires "all_strip". A waiver was granted on
++ # 2011.09.01 that permits using "min_strip" in the Java JDK and Java JRE.
++ #
++ # Currently, STRIP_POLICY is only used when Full Debug Symbols is enabled.
++ #
++ STRIP_POLICY ?= min_strip
++
++ _JUNK_ := $(shell \
++ echo >&2 "INFO: STRIP_POLICY=$(STRIP_POLICY)")
++
++ ZIP_DEBUGINFO_FILES ?= 1
++
++ _JUNK_ := $(shell \
++ echo >&2 "INFO: ZIP_DEBUGINFO_FILES=$(ZIP_DEBUGINFO_FILES)")
++ endif
++ endif # ENABLE_FULL_DEBUG_SYMBOLS=1
++ endif # BUILD_FLAVOR
++endif # JDK_6_OR_EARLIER
++
++# unused JDK_INCLUDE_SUBDIR=aix
++
++# Library suffix
++LIBRARY_SUFFIX=so
++
++EXPORT_LIST += $(EXPORT_DOCS_DIR)/platform/jvmti/jvmti.html
++
++# client and server subdirectories have symbolic links to ../libjsig.so
++EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX)
++#ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
++# ifeq ($(ZIP_DEBUGINFO_FILES),1)
++# EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.diz
++# else
++# EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.debuginfo
++# endif
++#endif
++EXPORT_SERVER_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/server
++EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client
++EXPORT_MINIMAL_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/minimal
++
++ifeq ($(findstring true, $(JVM_VARIANT_SERVER) $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK) $(JVM_VARIANT_CORE)), true)
++ EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
++ EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.$(LIBRARY_SUFFIX)
++# ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
++# ifeq ($(ZIP_DEBUGINFO_FILES),1)
++# EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.diz
++# else
++# EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.debuginfo
++# endif
++# endif
++endif
++
++ifeq ($(JVM_VARIANT_CLIENT),true)
++ EXPORT_LIST += $(EXPORT_CLIENT_DIR)/Xusage.txt
++ EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.$(LIBRARY_SUFFIX)
++# ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
++# ifeq ($(ZIP_DEBUGINFO_FILES),1)
++# EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.diz
++# else
++# EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.debuginfo
++# endif
++# endif
++endif
++
++# Serviceability Binaries
++# No SA Support for PPC or zero
++ADD_SA_BINARIES/ppc =
++ADD_SA_BINARIES/ppc64 =
++ADD_SA_BINARIES/zero =
++
++EXPORT_LIST += $(ADD_SA_BINARIES/$(HS_ARCH))
++
++
+--- ./hotspot/make/aix/makefiles/dtrace.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/dtrace.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,27 @@
++#
++# Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# Linux does not build jvm_db
++LIBJVM_DB =
++
+--- ./hotspot/make/aix/makefiles/fastdebug.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/fastdebug.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,73 @@
++#
++# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright 2012, 2013 SAP AG. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# Sets make macros for making debug version of VM
++
++# Compiler specific OPT_CFLAGS are passed in from gcc.make, sparcWorks.make
++# Pare down optimization to -O2 if xlCV10.1 is in use.
++OPT_CFLAGS/DEFAULT= $(OPT_CFLAGS) $(QV10_OPT_CONSERVATIVE)
++OPT_CFLAGS/BYFILE = $(OPT_CFLAGS/$@)$(OPT_CFLAGS/DEFAULT$(OPT_CFLAGS/$@))
++
++# (OPT_CFLAGS/SLOWER is also available, to alter compilation of buggy files)
++
++ifeq ($(BUILDARCH), ia64)
++ # Bug in GCC, causes hang. -O1 will override the -O3 specified earlier
++ OPT_CFLAGS/callGenerator.o += -O1
++ OPT_CFLAGS/ciTypeFlow.o += -O1
++ OPT_CFLAGS/compile.o += -O1
++ OPT_CFLAGS/concurrentMarkSweepGeneration.o += -O1
++ OPT_CFLAGS/doCall.o += -O1
++ OPT_CFLAGS/generateOopMap.o += -O1
++ OPT_CFLAGS/generateOptoStub.o += -O1
++ OPT_CFLAGS/graphKit.o += -O1
++ OPT_CFLAGS/instanceKlass.o += -O1
++ OPT_CFLAGS/interpreterRT_ia64.o += -O1
++ OPT_CFLAGS/output.o += -O1
++ OPT_CFLAGS/parse1.o += -O1
++ OPT_CFLAGS/runtime.o += -O1
++ OPT_CFLAGS/synchronizer.o += -O1
++endif
++
++
++# If you set HOTSPARC_GENERIC=yes, you disable all OPT_CFLAGS settings
++CFLAGS$(HOTSPARC_GENERIC) += $(OPT_CFLAGS/BYFILE)
++
++# Set the environment variable HOTSPARC_GENERIC to "true"
++# to inhibit the effect of the previous line on CFLAGS.
++
++# Linker mapfile
++MAPFILE = $(GAMMADIR)/make/aix/makefiles/mapfile-vers-debug
++
++# xlc 10.1 parameters for ipa linkage.
++# - remove ipa linkage altogether. Does not seem to benefit performance,
++# but increases code footprint.
++# - this is a debug build in the end. Extra effort for ipa linkage is thus
++# not justified.
++LFLAGS_QIPA=
++
++G_SUFFIX = _g
++VERSION = optimized
++SYSDEFS += -DASSERT -DFASTDEBUG
++PICFLAGS = DEFAULT
+--- ./hotspot/make/aix/makefiles/jsig.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/jsig.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,87 @@
++#
++# Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright 2012, 2013 SAP AG. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# Rules to build signal interposition library, used by vm.make
++
++# libjsig.so: signal interposition library
++JSIG = jsig
++LIBJSIG = lib$(JSIG).so
++
++LIBJSIG_DEBUGINFO = lib$(JSIG).debuginfo
++LIBJSIG_DIZ = lib$(JSIG).diz
++
++JSIGSRCDIR = $(GAMMADIR)/src/os/$(Platform_os_family)/vm
++
++DEST_JSIG = $(JDK_LIBDIR)/$(LIBJSIG)
++DEST_JSIG_DEBUGINFO = $(JDK_LIBDIR)/$(LIBJSIG_DEBUGINFO)
++DEST_JSIG_DIZ = $(JDK_LIBDIR)/$(LIBJSIG_DIZ)
++
++LIBJSIG_MAPFILE = $(MAKEFILES_DIR)/mapfile-vers-jsig
++
++# On Linux we really dont want a mapfile, as this library is small
++# and preloaded using LD_PRELOAD, making functions private will
++# cause problems with interposing. See CR: 6466665
++# LFLAGS_JSIG += $(MAPFLAG:FILENAME=$(LIBJSIG_MAPFILE))
++
++LFLAGS_JSIG += -D_GNU_SOURCE -D_REENTRANT $(LDFLAGS_HASH_STYLE)
++
++LFLAGS_JSIG += $(BIN_UTILS)
++
++# DEBUG_BINARIES overrides everything, use full -g debug information
++ifeq ($(DEBUG_BINARIES), true)
++ JSIG_DEBUG_CFLAGS = -g
++endif
++
++$(LIBJSIG): $(JSIGSRCDIR)/jsig.c $(LIBJSIG_MAPFILE)
++ @echo Making signal interposition lib...
++ $(QUIETLY) $(CXX) $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG) \
++ $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) -o $@ $< -ldl
++
++#ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
++# $(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBJSIG_DEBUGINFO)
++# $(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBJSIG_DEBUGINFO) $@
++# ifeq ($(STRIP_POLICY),all_strip)
++# $(QUIETLY) $(STRIP) $@
++# else
++# ifeq ($(STRIP_POLICY),min_strip)
++# $(QUIETLY) $(STRIP) -g $@
++# # implied else here is no stripping at all
++# endif
++# endif
++# ifeq ($(ZIP_DEBUGINFO_FILES),1)
++# $(ZIPEXE) -q -y $(LIBJSIG_DIZ) $(LIBJSIG_DEBUGINFO)
++# $(RM) $(LIBJSIG_DEBUGINFO)
++# endif
++#endif
++
++install_jsig: $(LIBJSIG)
++ @echo "Copying $(LIBJSIG) to $(DEST_JSIG)"
++ $(QUIETLY) test -f $(LIBJSIG_DEBUGINFO) && \
++ cp -f $(LIBJSIG_DEBUGINFO) $(DEST_JSIG_DEBUGINFO)
++ $(QUIETLY) test -f $(LIBJSIG_DIZ) && \
++ cp -f $(LIBJSIG_DIZ) $(DEST_JSIG_DIZ)
++ $(QUIETLY) cp -f $(LIBJSIG) $(DEST_JSIG) && echo "Done"
++
++.PHONY: install_jsig
+--- ./hotspot/make/aix/makefiles/jvmti.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/jvmti.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,118 @@
++#
++# Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright 2012, 2013 SAP AG. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# This makefile (jvmti.make) is included from the jvmti.make in the
++# build directories.
++#
++# It knows how to build and run the tools to generate jvmti.
++
++include $(GAMMADIR)/make/aix/makefiles/rules.make
++
++# #########################################################################
++
++TOPDIR = $(shell echo `pwd`)
++GENERATED = $(TOPDIR)/../generated
++JvmtiOutDir = $(GENERATED)/jvmtifiles
++
++JvmtiSrcDir = $(GAMMADIR)/src/share/vm/prims
++InterpreterSrcDir = $(GAMMADIR)/src/share/vm/interpreter
++
++# set VPATH so make knows where to look for source files
++Src_Dirs_V += $(JvmtiSrcDir)
++VPATH += $(Src_Dirs_V:%=%:)
++
++JvmtiGeneratedNames = \
++ jvmtiEnv.hpp \
++ jvmtiEnter.cpp \
++ jvmtiEnterTrace.cpp \
++ jvmtiEnvRecommended.cpp \
++ bytecodeInterpreterWithChecks.cpp \
++ jvmti.h \
++
++JvmtiEnvFillSource = $(JvmtiSrcDir)/jvmtiEnvFill.java
++JvmtiEnvFillClass = $(JvmtiOutDir)/jvmtiEnvFill.class
++
++JvmtiGenSource = $(JvmtiSrcDir)/jvmtiGen.java
++JvmtiGenClass = $(JvmtiOutDir)/jvmtiGen.class
++
++JvmtiGeneratedFiles = $(JvmtiGeneratedNames:%=$(JvmtiOutDir)/%)
++
++XSLT = $(QUIETLY) $(REMOTE) $(RUN.JAVA) -classpath $(JvmtiOutDir) jvmtiGen
++
++.PHONY: all jvmtidocs clean cleanall
++
++# #########################################################################
++
++all: $(JvmtiGeneratedFiles)
++
++both = $(JvmtiGenClass) $(JvmtiSrcDir)/jvmti.xml $(JvmtiSrcDir)/jvmtiLib.xsl
++
++$(JvmtiGenClass): $(JvmtiGenSource)
++ $(QUIETLY) $(REMOTE) $(COMPILE.JAVAC) -d $(JvmtiOutDir) $(JvmtiGenSource)
++
++$(JvmtiEnvFillClass): $(JvmtiEnvFillSource)
++ $(QUIETLY) $(REMOTE) $(COMPILE.JAVAC) -d $(JvmtiOutDir) $(JvmtiEnvFillSource)
++
++$(JvmtiOutDir)/jvmtiEnter.cpp: $(both) $(JvmtiSrcDir)/jvmtiEnter.xsl
++ @echo Generating $@
++ $(XSLT) -IN $(JvmtiSrcDir)/jvmti.xml -XSL $(JvmtiSrcDir)/jvmtiEnter.xsl -OUT $(JvmtiOutDir)/jvmtiEnter.cpp -PARAM interface jvmti
++
++$(JvmtiOutDir)/bytecodeInterpreterWithChecks.cpp: $(JvmtiGenClass) $(InterpreterSrcDir)/bytecodeInterpreter.cpp $(InterpreterSrcDir)/bytecodeInterpreterWithChecks.xml $(InterpreterSrcDir)/bytecodeInterpreterWithChecks.xsl
++ @echo Generating $@
++ $(XSLT) -IN $(InterpreterSrcDir)/bytecodeInterpreterWithChecks.xml -XSL $(InterpreterSrcDir)/bytecodeInterpreterWithChecks.xsl -OUT $(JvmtiOutDir)/bytecodeInterpreterWithChecks.cpp
++
++$(JvmtiOutDir)/jvmtiEnterTrace.cpp: $(both) $(JvmtiSrcDir)/jvmtiEnter.xsl
++ @echo Generating $@
++ $(XSLT) -IN $(JvmtiSrcDir)/jvmti.xml -XSL $(JvmtiSrcDir)/jvmtiEnter.xsl -OUT $(JvmtiOutDir)/jvmtiEnterTrace.cpp -PARAM interface jvmti -PARAM trace Trace
++
++$(JvmtiOutDir)/jvmtiEnvRecommended.cpp: $(both) $(JvmtiSrcDir)/jvmtiEnv.xsl $(JvmtiSrcDir)/jvmtiEnv.cpp $(JvmtiEnvFillClass)
++ @echo Generating $@
++ $(XSLT) -IN $(JvmtiSrcDir)/jvmti.xml -XSL $(JvmtiSrcDir)/jvmtiEnv.xsl -OUT $(JvmtiOutDir)/jvmtiEnvStub.cpp
++ $(QUIETLY) $(REMOTE) $(RUN.JAVA) -classpath $(JvmtiOutDir) jvmtiEnvFill $(JvmtiSrcDir)/jvmtiEnv.cpp $(JvmtiOutDir)/jvmtiEnvStub.cpp $(JvmtiOutDir)/jvmtiEnvRecommended.cpp
++
++$(JvmtiOutDir)/jvmtiEnv.hpp: $(both) $(JvmtiSrcDir)/jvmtiHpp.xsl
++ @echo Generating $@
++ $(XSLT) -IN $(JvmtiSrcDir)/jvmti.xml -XSL $(JvmtiSrcDir)/jvmtiHpp.xsl -OUT $(JvmtiOutDir)/jvmtiEnv.hpp
++
++$(JvmtiOutDir)/jvmti.h: $(both) $(JvmtiSrcDir)/jvmtiH.xsl
++ @echo Generating $@
++ $(XSLT) -IN $(JvmtiSrcDir)/jvmti.xml -XSL $(JvmtiSrcDir)/jvmtiH.xsl -OUT $(JvmtiOutDir)/jvmti.h
++
++jvmtidocs: $(JvmtiOutDir)/jvmti.html
++
++$(JvmtiOutDir)/jvmti.html: $(both) $(JvmtiSrcDir)/jvmti.xsl
++ @echo Generating $@
++ $(XSLT) -IN $(JvmtiSrcDir)/jvmti.xml -XSL $(JvmtiSrcDir)/jvmti.xsl -OUT $(JvmtiOutDir)/jvmti.html
++
++# #########################################################################
++
++clean :
++ rm $(JvmtiGenClass) $(JvmtiEnvFillClass) $(JvmtiGeneratedFiles)
++
++cleanall :
++ rm $(JvmtiGenClass) $(JvmtiEnvFillClass) $(JvmtiGeneratedFiles)
++
++# #########################################################################
++
+--- ./hotspot/make/aix/makefiles/mapfile-vers-debug Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/mapfile-vers-debug Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,275 @@
++#
++# Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# Define public interface.
++
++SUNWprivate_1.1 {
++ global:
++ # JNI
++ JNI_CreateJavaVM;
++ JNI_GetCreatedJavaVMs;
++ JNI_GetDefaultJavaVMInitArgs;
++
++ # JVM
++ JVM_Accept;
++ JVM_ActiveProcessorCount;
++ JVM_AllocateNewArray;
++ JVM_AllocateNewObject;
++ JVM_ArrayCopy;
++ JVM_AssertionStatusDirectives;
++ JVM_Available;
++ JVM_Bind;
++ JVM_ClassDepth;
++ JVM_ClassLoaderDepth;
++ JVM_Clone;
++ JVM_Close;
++ JVM_CX8Field;
++ JVM_CompileClass;
++ JVM_CompileClasses;
++ JVM_CompilerCommand;
++ JVM_Connect;
++ JVM_ConstantPoolGetClassAt;
++ JVM_ConstantPoolGetClassAtIfLoaded;
++ JVM_ConstantPoolGetDoubleAt;
++ JVM_ConstantPoolGetFieldAt;
++ JVM_ConstantPoolGetFieldAtIfLoaded;
++ JVM_ConstantPoolGetFloatAt;
++ JVM_ConstantPoolGetIntAt;
++ JVM_ConstantPoolGetLongAt;
++ JVM_ConstantPoolGetMethodAt;
++ JVM_ConstantPoolGetMethodAtIfLoaded;
++ JVM_ConstantPoolGetMemberRefInfoAt;
++ JVM_ConstantPoolGetSize;
++ JVM_ConstantPoolGetStringAt;
++ JVM_ConstantPoolGetUTF8At;
++ JVM_CountStackFrames;
++ JVM_CurrentClassLoader;
++ JVM_CurrentLoadedClass;
++ JVM_CurrentThread;
++ JVM_CurrentTimeMillis;
++ JVM_DefineClass;
++ JVM_DefineClassWithSource;
++ JVM_DefineClassWithSourceCond;
++ JVM_DesiredAssertionStatus;
++ JVM_DisableCompiler;
++ JVM_DoPrivileged;
++ JVM_DTraceGetVersion;
++ JVM_DTraceActivate;
++ JVM_DTraceIsProbeEnabled;
++ JVM_DTraceIsSupported;
++ JVM_DTraceDispose;
++ JVM_DumpAllStacks;
++ JVM_DumpThreads;
++ JVM_EnableCompiler;
++ JVM_Exit;
++ JVM_FillInStackTrace;
++ JVM_FindClassFromClass;
++ JVM_FindClassFromClassLoader;
++ JVM_FindClassFromBootLoader;
++ JVM_FindLibraryEntry;
++ JVM_FindLoadedClass;
++ JVM_FindPrimitiveClass;
++ JVM_FindSignal;
++ JVM_FreeMemory;
++ JVM_GC;
++ JVM_GetAllThreads;
++ JVM_GetArrayElement;
++ JVM_GetArrayLength;
++ JVM_GetCPClassNameUTF;
++ JVM_GetCPFieldClassNameUTF;
++ JVM_GetCPFieldModifiers;
++ JVM_GetCPFieldNameUTF;
++ JVM_GetCPFieldSignatureUTF;
++ JVM_GetCPMethodClassNameUTF;
++ JVM_GetCPMethodModifiers;
++ JVM_GetCPMethodNameUTF;
++ JVM_GetCPMethodSignatureUTF;
++ JVM_GetCallerClass;
++ JVM_GetClassAccessFlags;
++ JVM_GetClassAnnotations;
++ JVM_GetClassCPEntriesCount;
++ JVM_GetClassCPTypes;
++ JVM_GetClassConstantPool;
++ JVM_GetClassContext;
++ JVM_GetClassDeclaredConstructors;
++ JVM_GetClassDeclaredFields;
++ JVM_GetClassDeclaredMethods;
++ JVM_GetClassFieldsCount;
++ JVM_GetClassInterfaces;
++ JVM_GetClassLoader;
++ JVM_GetClassMethodsCount;
++ JVM_GetClassModifiers;
++ JVM_GetClassName;
++ JVM_GetClassNameUTF;
++ JVM_GetClassSignature;
++ JVM_GetClassSigners;
++ JVM_GetClassTypeAnnotations;
++ JVM_GetComponentType;
++ JVM_GetDeclaredClasses;
++ JVM_GetDeclaringClass;
++ JVM_GetEnclosingMethodInfo;
++ JVM_GetFieldAnnotations;
++ JVM_GetFieldIxModifiers;
++ JVM_GetFieldTypeAnnotations;
++ JVM_GetHostName;
++ JVM_GetInheritedAccessControlContext;
++ JVM_GetInterfaceVersion;
++ JVM_GetLastErrorString;
++ JVM_GetManagement;
++ JVM_GetMethodAnnotations;
++ JVM_GetMethodDefaultAnnotationValue;
++ JVM_GetMethodIxArgsSize;
++ JVM_GetMethodIxByteCode;
++ JVM_GetMethodIxByteCodeLength;
++ JVM_GetMethodIxExceptionIndexes;
++ JVM_GetMethodIxExceptionTableEntry;
++ JVM_GetMethodIxExceptionTableLength;
++ JVM_GetMethodIxExceptionsCount;
++ JVM_GetMethodIxLocalsCount;
++ JVM_GetMethodIxMaxStack;
++ JVM_GetMethodIxModifiers;
++ JVM_GetMethodIxNameUTF;
++ JVM_GetMethodIxSignatureUTF;
++ JVM_GetMethodParameterAnnotations;
++ JVM_GetMethodParameters;
++ JVM_GetMethodTypeAnnotations;
++ JVM_GetPrimitiveArrayElement;
++ JVM_GetProtectionDomain;
++ JVM_GetSockName;
++ JVM_GetSockOpt;
++ JVM_GetStackAccessControlContext;
++ JVM_GetStackTraceDepth;
++ JVM_GetStackTraceElement;
++ JVM_GetSystemPackage;
++ JVM_GetSystemPackages;
++ JVM_GetTemporaryDirectory;
++ JVM_GetThreadStateNames;
++ JVM_GetThreadStateValues;
++ JVM_GetVersionInfo;
++ JVM_Halt;
++ JVM_HoldsLock;
++ JVM_IHashCode;
++ JVM_InitAgentProperties;
++ JVM_InitProperties;
++ JVM_InitializeCompiler;
++ JVM_InitializeSocketLibrary;
++ JVM_InternString;
++ JVM_Interrupt;
++ JVM_InvokeMethod;
++ JVM_IsArrayClass;
++ JVM_IsConstructorIx;
++ JVM_IsInterface;
++ JVM_IsInterrupted;
++ JVM_IsNaN;
++ JVM_IsPrimitiveClass;
++ JVM_IsSameClassPackage;
++ JVM_IsSilentCompiler;
++ JVM_IsSupportedJNIVersion;
++ JVM_IsThreadAlive;
++ JVM_IsVMGeneratedMethodIx;
++ JVM_LatestUserDefinedLoader;
++ JVM_Listen;
++ JVM_LoadClass0;
++ JVM_LoadLibrary;
++ JVM_Lseek;
++ JVM_MaxObjectInspectionAge;
++ JVM_MaxMemory;
++ JVM_MonitorNotify;
++ JVM_MonitorNotifyAll;
++ JVM_MonitorWait;
++ JVM_NanoTime;
++ JVM_NativePath;
++ JVM_NewArray;
++ JVM_NewInstanceFromConstructor;
++ JVM_NewMultiArray;
++ JVM_OnExit;
++ JVM_Open;
++ JVM_RaiseSignal;
++ JVM_RawMonitorCreate;
++ JVM_RawMonitorDestroy;
++ JVM_RawMonitorEnter;
++ JVM_RawMonitorExit;
++ JVM_Read;
++ JVM_Recv;
++ JVM_RecvFrom;
++ JVM_RegisterSignal;
++ JVM_ReleaseUTF;
++ JVM_ResolveClass;
++ JVM_ResumeThread;
++ JVM_Send;
++ JVM_SendTo;
++ JVM_SetArrayElement;
++ JVM_SetClassSigners;
++ JVM_SetLength;
++ JVM_SetNativeThreadName;
++ JVM_SetPrimitiveArrayElement;
++ JVM_SetProtectionDomain;
++ JVM_SetSockOpt;
++ JVM_SetThreadPriority;
++ JVM_Sleep;
++ JVM_Socket;
++ JVM_SocketAvailable;
++ JVM_SocketClose;
++ JVM_SocketShutdown;
++ JVM_StartThread;
++ JVM_StopThread;
++ JVM_SuspendThread;
++ JVM_SupportsCX8;
++ JVM_Sync;
++ JVM_Timeout;
++ JVM_TotalMemory;
++ JVM_TraceInstructions;
++ JVM_TraceMethodCalls;
++ JVM_UnloadLibrary;
++ JVM_Write;
++ JVM_Yield;
++ JVM_handle_linux_signal;
++
++ # debug JVM
++ JVM_AccessVMBooleanFlag;
++ JVM_AccessVMIntFlag;
++ JVM_VMBreakPoint;
++
++ # miscellaneous functions
++ jio_fprintf;
++ jio_printf;
++ jio_snprintf;
++ jio_vfprintf;
++ jio_vsnprintf;
++ fork1;
++ numa_warn;
++ numa_error;
++
++ # Needed because there is no JVM interface for this.
++ sysThreadAvailableStackWithSlack;
++
++ # This is for Forte Analyzer profiling support.
++ AsyncGetCallTrace;
++
++ # INSERT VTABLE SYMBOLS HERE
++
++ local:
++ *;
++};
++
+--- ./hotspot/make/aix/makefiles/mapfile-vers-jsig Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/mapfile-vers-jsig Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,38 @@
++#
++# Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# Define library interface.
++
++SUNWprivate_1.1 {
++ global:
++ JVM_begin_signal_setting;
++ JVM_end_signal_setting;
++ JVM_get_libjsig_version;
++ JVM_get_signal_action;
++ sigaction;
++ signal;
++ sigset;
++ local:
++ *;
++};
+--- ./hotspot/make/aix/makefiles/mapfile-vers-product Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/mapfile-vers-product Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,268 @@
++#
++# Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# Define public interface.
++
++SUNWprivate_1.1 {
++ global:
++ # JNI
++ JNI_CreateJavaVM;
++ JNI_GetCreatedJavaVMs;
++ JNI_GetDefaultJavaVMInitArgs;
++
++ # JVM
++ JVM_Accept;
++ JVM_ActiveProcessorCount;
++ JVM_AllocateNewArray;
++ JVM_AllocateNewObject;
++ JVM_ArrayCopy;
++ JVM_AssertionStatusDirectives;
++ JVM_Available;
++ JVM_Bind;
++ JVM_ClassDepth;
++ JVM_ClassLoaderDepth;
++ JVM_Clone;
++ JVM_Close;
++ JVM_CX8Field;
++ JVM_CompileClass;
++ JVM_CompileClasses;
++ JVM_CompilerCommand;
++ JVM_Connect;
++ JVM_ConstantPoolGetClassAt;
++ JVM_ConstantPoolGetClassAtIfLoaded;
++ JVM_ConstantPoolGetDoubleAt;
++ JVM_ConstantPoolGetFieldAt;
++ JVM_ConstantPoolGetFieldAtIfLoaded;
++ JVM_ConstantPoolGetFloatAt;
++ JVM_ConstantPoolGetIntAt;
++ JVM_ConstantPoolGetLongAt;
++ JVM_ConstantPoolGetMethodAt;
++ JVM_ConstantPoolGetMethodAtIfLoaded;
++ JVM_ConstantPoolGetMemberRefInfoAt;
++ JVM_ConstantPoolGetSize;
++ JVM_ConstantPoolGetStringAt;
++ JVM_ConstantPoolGetUTF8At;
++ JVM_CountStackFrames;
++ JVM_CurrentClassLoader;
++ JVM_CurrentLoadedClass;
++ JVM_CurrentThread;
++ JVM_CurrentTimeMillis;
++ JVM_DefineClass;
++ JVM_DefineClassWithSource;
++ JVM_DefineClassWithSourceCond;
++ JVM_DesiredAssertionStatus;
++ JVM_DisableCompiler;
++ JVM_DoPrivileged;
++ JVM_DTraceGetVersion;
++ JVM_DTraceActivate;
++ JVM_DTraceIsProbeEnabled;
++ JVM_DTraceIsSupported;
++ JVM_DTraceDispose;
++ JVM_DumpAllStacks;
++ JVM_DumpThreads;
++ JVM_EnableCompiler;
++ JVM_Exit;
++ JVM_FillInStackTrace;
++ JVM_FindClassFromClass;
++ JVM_FindClassFromClassLoader;
++ JVM_FindClassFromBootLoader;
++ JVM_FindLibraryEntry;
++ JVM_FindLoadedClass;
++ JVM_FindPrimitiveClass;
++ JVM_FindSignal;
++ JVM_FreeMemory;
++ JVM_GC;
++ JVM_GetAllThreads;
++ JVM_GetArrayElement;
++ JVM_GetArrayLength;
++ JVM_GetCPClassNameUTF;
++ JVM_GetCPFieldClassNameUTF;
++ JVM_GetCPFieldModifiers;
++ JVM_GetCPFieldNameUTF;
++ JVM_GetCPFieldSignatureUTF;
++ JVM_GetCPMethodClassNameUTF;
++ JVM_GetCPMethodModifiers;
++ JVM_GetCPMethodNameUTF;
++ JVM_GetCPMethodSignatureUTF;
++ JVM_GetCallerClass;
++ JVM_GetClassAccessFlags;
++ JVM_GetClassAnnotations;
++ JVM_GetClassCPEntriesCount;
++ JVM_GetClassCPTypes;
++ JVM_GetClassConstantPool;
++ JVM_GetClassContext;
++ JVM_GetClassDeclaredConstructors;
++ JVM_GetClassDeclaredFields;
++ JVM_GetClassDeclaredMethods;
++ JVM_GetClassFieldsCount;
++ JVM_GetClassInterfaces;
++ JVM_GetClassLoader;
++ JVM_GetClassMethodsCount;
++ JVM_GetClassModifiers;
++ JVM_GetClassName;
++ JVM_GetClassNameUTF;
++ JVM_GetClassSignature;
++ JVM_GetClassSigners;
++ JVM_GetClassTypeAnnotations;
++ JVM_GetComponentType;
++ JVM_GetDeclaredClasses;
++ JVM_GetDeclaringClass;
++ JVM_GetEnclosingMethodInfo;
++ JVM_GetFieldAnnotations;
++ JVM_GetFieldIxModifiers;
++ JVM_GetHostName;
++ JVM_GetInheritedAccessControlContext;
++ JVM_GetInterfaceVersion;
++ JVM_GetLastErrorString;
++ JVM_GetManagement;
++ JVM_GetMethodAnnotations;
++ JVM_GetMethodDefaultAnnotationValue;
++ JVM_GetMethodIxArgsSize;
++ JVM_GetMethodIxByteCode;
++ JVM_GetMethodIxByteCodeLength;
++ JVM_GetMethodIxExceptionIndexes;
++ JVM_GetMethodIxExceptionTableEntry;
++ JVM_GetMethodIxExceptionTableLength;
++ JVM_GetMethodIxExceptionsCount;
++ JVM_GetMethodIxLocalsCount;
++ JVM_GetMethodIxMaxStack;
++ JVM_GetMethodIxModifiers;
++ JVM_GetMethodIxNameUTF;
++ JVM_GetMethodIxSignatureUTF;
++ JVM_GetMethodParameterAnnotations;
++ JVM_GetMethodParameters;
++ JVM_GetPrimitiveArrayElement;
++ JVM_GetProtectionDomain;
++ JVM_GetSockName;
++ JVM_GetSockOpt;
++ JVM_GetStackAccessControlContext;
++ JVM_GetStackTraceDepth;
++ JVM_GetStackTraceElement;
++ JVM_GetSystemPackage;
++ JVM_GetSystemPackages;
++ JVM_GetTemporaryDirectory;
++ JVM_GetThreadStateNames;
++ JVM_GetThreadStateValues;
++ JVM_GetVersionInfo;
++ JVM_Halt;
++ JVM_HoldsLock;
++ JVM_IHashCode;
++ JVM_InitAgentProperties;
++ JVM_InitProperties;
++ JVM_InitializeCompiler;
++ JVM_InitializeSocketLibrary;
++ JVM_InternString;
++ JVM_Interrupt;
++ JVM_InvokeMethod;
++ JVM_IsArrayClass;
++ JVM_IsConstructorIx;
++ JVM_IsInterface;
++ JVM_IsInterrupted;
++ JVM_IsNaN;
++ JVM_IsPrimitiveClass;
++ JVM_IsSameClassPackage;
++ JVM_IsSilentCompiler;
++ JVM_IsSupportedJNIVersion;
++ JVM_IsThreadAlive;
++ JVM_IsVMGeneratedMethodIx;
++ JVM_LatestUserDefinedLoader;
++ JVM_Listen;
++ JVM_LoadClass0;
++ JVM_LoadLibrary;
++ JVM_Lseek;
++ JVM_MaxObjectInspectionAge;
++ JVM_MaxMemory;
++ JVM_MonitorNotify;
++ JVM_MonitorNotifyAll;
++ JVM_MonitorWait;
++ JVM_NanoTime;
++ JVM_NativePath;
++ JVM_NewArray;
++ JVM_NewInstanceFromConstructor;
++ JVM_NewMultiArray;
++ JVM_OnExit;
++ JVM_Open;
++ JVM_RaiseSignal;
++ JVM_RawMonitorCreate;
++ JVM_RawMonitorDestroy;
++ JVM_RawMonitorEnter;
++ JVM_RawMonitorExit;
++ JVM_Read;
++ JVM_Recv;
++ JVM_RecvFrom;
++ JVM_RegisterSignal;
++ JVM_ReleaseUTF;
++ JVM_ResolveClass;
++ JVM_ResumeThread;
++ JVM_Send;
++ JVM_SendTo;
++ JVM_SetArrayElement;
++ JVM_SetClassSigners;
++ JVM_SetLength;
++ JVM_SetNativeThreadName;
++ JVM_SetPrimitiveArrayElement;
++ JVM_SetProtectionDomain;
++ JVM_SetSockOpt;
++ JVM_SetThreadPriority;
++ JVM_Sleep;
++ JVM_Socket;
++ JVM_SocketAvailable;
++ JVM_SocketClose;
++ JVM_SocketShutdown;
++ JVM_StartThread;
++ JVM_StopThread;
++ JVM_SuspendThread;
++ JVM_SupportsCX8;
++ JVM_Sync;
++ JVM_Timeout;
++ JVM_TotalMemory;
++ JVM_TraceInstructions;
++ JVM_TraceMethodCalls;
++ JVM_UnloadLibrary;
++ JVM_Write;
++ JVM_Yield;
++ JVM_handle_linux_signal;
++
++ # miscellaneous functions
++ jio_fprintf;
++ jio_printf;
++ jio_snprintf;
++ jio_vfprintf;
++ jio_vsnprintf;
++ fork1;
++ numa_warn;
++ numa_error;
++
++ # Needed because there is no JVM interface for this.
++ sysThreadAvailableStackWithSlack;
++
++ # This is for Forte Analyzer profiling support.
++ AsyncGetCallTrace;
++
++ # INSERT VTABLE SYMBOLS HERE
++
++ local:
++ *;
++};
++
+--- ./hotspot/make/aix/makefiles/ppc64.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/ppc64.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,94 @@
++#
++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright 2012, 2013 SAP AG. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# Produce 64 bits object files.
++CFLAGS += -q64
++
++# Balanced tuning for recent versions of the POWER architecture (if supported by xlc).
++QTUNE=$(if $(CXX_SUPPORTS_BALANCED_TUNING),balanced,pwr5)
++
++# Try to speed up the interpreter: use ppc64 instructions and inline
++# glue code for external functions.
++OPT_CFLAGS += -qarch=ppc64 -qtune=$(QTUNE) -qinlglue
++
++# We need variable length arrays
++CFLAGS += -qlanglvl=c99vla
++# Just to check for unwanted macro redefinitions
++CFLAGS += -qlanglvl=noredefmac
++
++# Suppress those "implicit private" warnings xlc gives.
++# - The omitted keyword "private" is assumed for base class "...".
++CFLAGS += -qsuppress=1540-0198
++
++# Suppress the following numerous warning:
++# - 1540-1090 (I) The destructor of "..." might not be called.
++# - 1500-010: (W) WARNING in ...: Infinite loop. Program may not stop.
++# There are several infinite loops in the vm, suppress.
++CFLAGS += -qsuppress=1540-1090 -qsuppress=1500-010
++
++# Suppress
++# - 540-1088 (W) The exception specification is being ignored.
++# caused by throw() in declaration of new() in nmethod.hpp.
++CFLAGS += -qsuppress=1540-1088
++
++# Turn off floating-point optimizations that may alter program semantics
++OPT_CFLAGS += -qstrict
++
++# Disable aggressive optimizations for functions in sharedRuntimeTrig.cpp
++# and sharedRuntimeTrans.cpp on ppc64.
++# -qstrict turns off the following optimizations:
++# * Performing code motion and scheduling on computations such as loads
++# and floating-point computations that may trigger an exception.
++# * Relaxing conformance to IEEE rules.
++# * Reassociating floating-point expressions.
++# When using '-qstrict' there still remains one problem
++# in javasoft.sqe.tests.api.java.lang.Math.sin5Tests when run in compile-all
++# mode, so don't optimize sharedRuntimeTrig.cpp at all.
++OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
++OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT)
++
++# xlc 10.01 parameters for ipa compile.
++QIPA_COMPILE=$(if $(CXX_IS_V10),-qipa)
++
++# Xlc 10.1 parameters for aggressive optimization:
++# - qhot=level=1: Most aggressive loop optimizations.
++# - qignerrno: Assume errno is not modified by system calls.
++# - qinline: Inline method calls. No suboptions for c++ compiles.
++# - qxflag=ASMMIDCOALFIX: Activate fix for -O3 problem in interpreter loop.
++# - qxflag=asmfastsync: Activate fix for performance problem with inline assembler with memory clobber.
++QV10_OPT=$(if $(CXX_IS_V10),-qxflag=ASMMIDCOALFIX -qxflag=asmfastsync)
++QV10_OPT_AGGRESSIVE=$(if $(CXX_IS_V10),-qhot=level=1 -qignerrno -qinline)
++QV10_OPT_CONSERVATIVE=$(if $(CXX_IS_V10),-qhot=level=1 -qignerrno -qinline)
++
++# Disallow inlining for synchronizer.cpp, but perform O3 optimizations.
++OPT_CFLAGS/synchronizer.o = $(OPT_CFLAGS) -qnoinline
++
++# Set all the xlC V10.1 options here.
++OPT_CFLAGS += $(QIPA_COMPILE) $(QV10_OPT) $(QV10_OPT_AGGRESSIVE)
++
++export OBJECT_MODE=64
++
++# Also build launcher as 64 bit executable.
++LAUNCHERFLAGS += -q64
+--- ./hotspot/make/aix/makefiles/product.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/product.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,58 @@
++#
++# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright 2012, 2013 SAP AG. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# Sets make macros for making optimized version of Gamma VM
++# (This is the "product", not the "release" version.)
++
++# Compiler specific OPT_CFLAGS are passed in from gcc.make, sparcWorks.make
++OPT_CFLAGS/DEFAULT= $(OPT_CFLAGS)
++OPT_CFLAGS/BYFILE = $(OPT_CFLAGS/$@)$(OPT_CFLAGS/DEFAULT$(OPT_CFLAGS/$@))
++
++# (OPT_CFLAGS/SLOWER is also available, to alter compilation of buggy files)
++
++# If you set HOTSPARC_GENERIC=yes, you disable all OPT_CFLAGS settings
++CFLAGS$(HOTSPARC_GENERIC) += $(OPT_CFLAGS/BYFILE)
++
++# Set the environment variable HOTSPARC_GENERIC to "true"
++# to inhibit the effect of the previous line on CFLAGS.
++
++# Linker mapfile
++MAPFILE = $(GAMMADIR)/make/aix/makefiles/mapfile-vers-product
++
++# Remove ipa linkage altogether. Does not seem to benfit performance, but increases code footprint.
++LFLAGS_QIPA=
++
++SYSDEFS += -DPRODUCT
++VERSION = optimized
++
++# use -g to strip library as -x will discard its symbol table; -x is fine for
++# executables.
++# Note: these macros are not used in .debuginfo configs
++STRIP_LIBJVM = $(STRIP) -g $@ || exit 1;
++STRIP_AOUT = $(STRIP) -x $@ || exit 1;
++
++# If we can create .debuginfo files, then the VM is stripped in vm.make
++# and this macro is not used.
++# LINK_LIB.CXX/POST_HOOK += $(STRIP_$(LINK_INTO))
+--- ./hotspot/make/aix/makefiles/rules.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/rules.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,203 @@
++#
++# Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# Common rules/macros for the vm, adlc.
++
++# Tell make that .cpp is important
++.SUFFIXES: .cpp $(SUFFIXES)
++
++DEMANGLER = c++filt
++DEMANGLE = $(DEMANGLER) < $@ > .$@ && mv -f .$@ $@
++
++# $(CC) is the c compiler (cc/gcc), $(CXX) is the c++ compiler (CC/g++).
++CC_COMPILE = $(CC) $(CXXFLAGS) $(CFLAGS)
++CXX_COMPILE = $(CXX) $(CXXFLAGS) $(CFLAGS)
++
++AS.S = $(AS) $(ASFLAGS)
++
++COMPILE.CC = $(CC_COMPILE) -c
++GENASM.CC = $(CC_COMPILE) -S
++LINK.CC = $(CC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
++LINK_LIB.CC = $(CC) $(LFLAGS) $(SHARED_FLAG)
++PREPROCESS.CC = $(CC_COMPILE) -E
++
++COMPILE.CXX = $(CXX_COMPILE) -c
++GENASM.CXX = $(CXX_COMPILE) -S
++LINK.CXX = $(CXX) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
++LINK_NOPROF.CXX = $(CXX) $(LFLAGS) $(AOUT_FLAGS)
++LINK_LIB.CXX = $(CXX) $(LFLAGS) $(SHARED_FLAG)
++PREPROCESS.CXX = $(CXX_COMPILE) -E
++
++# cross compiling the jvm with c2 requires host compilers to build
++# adlc tool
++
++HOST.CXX_COMPILE = $(HOSTCXX) $(CXXFLAGS) $(CFLAGS)
++HOST.COMPILE.CXX = $(HOST.CXX_COMPILE) -c
++HOST.LINK_NOPROF.CXX = $(HOSTCXX) $(LFLAGS) $(AOUT_FLAGS)
++
++
++# Effect of REMOVE_TARGET is to delete out-of-date files during "gnumake -k".
++REMOVE_TARGET = rm -f $@
++
++# Note use of ALT_BOOTDIR to explicitly specify location of java and
++# javac; this is the same environment variable used in the J2SE build
++# process for overriding the default spec, which is BOOTDIR.
++# Note also that we fall back to using JAVA_HOME if neither of these is
++# specified.
++
++ifdef ALT_BOOTDIR
++
++RUN.JAVA = $(ALT_BOOTDIR)/bin/java
++RUN.JAVAP = $(ALT_BOOTDIR)/bin/javap
++RUN.JAVAH = $(ALT_BOOTDIR)/bin/javah
++RUN.JAR = $(ALT_BOOTDIR)/bin/jar
++COMPILE.JAVAC = $(ALT_BOOTDIR)/bin/javac
++COMPILE.RMIC = $(ALT_BOOTDIR)/bin/rmic
++BOOT_JAVA_HOME = $(ALT_BOOTDIR)
++
++else
++
++ifdef BOOTDIR
++
++RUN.JAVA = $(BOOTDIR)/bin/java
++RUN.JAVAP = $(BOOTDIR)/bin/javap
++RUN.JAVAH = $(BOOTDIR)/bin/javah
++RUN.JAR = $(BOOTDIR)/bin/jar
++COMPILE.JAVAC = $(BOOTDIR)/bin/javac
++COMPILE.RMIC = $(BOOTDIR)/bin/rmic
++BOOT_JAVA_HOME = $(BOOTDIR)
++
++else
++
++ifdef JAVA_HOME
++
++RUN.JAVA = $(JAVA_HOME)/bin/java
++RUN.JAVAP = $(JAVA_HOME)/bin/javap
++RUN.JAVAH = $(JAVA_HOME)/bin/javah
++RUN.JAR = $(JAVA_HOME)/bin/jar
++COMPILE.JAVAC = $(JAVA_HOME)/bin/javac
++COMPILE.RMIC = $(JAVA_HOME)/bin/rmic
++BOOT_JAVA_HOME = $(JAVA_HOME)
++
++else
++
++# take from the PATH, if ALT_BOOTDIR, BOOTDIR and JAVA_HOME are not defined
++# note that this is to support hotspot build without SA. To build
++# SA along with hotspot, you need to define ALT_BOOTDIR, BOOTDIR or JAVA_HOME
++
++RUN.JAVA = java
++RUN.JAVAP = javap
++RUN.JAVAH = javah
++RUN.JAR = jar
++COMPILE.JAVAC = javac
++COMPILE.RMIC = rmic
++
++endif
++endif
++endif
++
++COMPILE.JAVAC += $(BOOTSTRAP_JAVAC_FLAGS)
++
++SUM = /usr/bin/sum
++
++# 'gmake MAKE_VERBOSE=y' gives all the gory details.
++QUIETLY$(MAKE_VERBOSE) = @
++RUN.JAR$(MAKE_VERBOSE) += >/dev/null
++
++# Settings for javac
++BOOT_SOURCE_LANGUAGE_VERSION = 6
++BOOT_TARGET_CLASS_VERSION = 6
++JAVAC_FLAGS = -g -encoding ascii
++BOOTSTRAP_JAVAC_FLAGS = $(JAVAC_FLAGS) -source $(BOOT_SOURCE_LANGUAGE_VERSION) -target $(BOOT_TARGET_CLASS_VERSION)
++
++# With parallel makes, print a message at the end of compilation.
++ifeq ($(findstring j,$(MFLAGS)),j)
++COMPILE_DONE = && { echo Done with $<; }
++endif
++
++# Include $(NONPIC_OBJ_FILES) definition
++ifndef LP64
++include $(GAMMADIR)/make/pic.make
++endif
++
++include $(GAMMADIR)/make/altsrc.make
++
++# The non-PIC object files are only generated for 32 bit platforms.
++ifdef LP64
++%.o: %.cpp
++ @echo Compiling $<
++ $(QUIETLY) $(REMOVE_TARGET)
++ $(QUIETLY) $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
++else
++%.o: %.cpp
++ @echo Compiling $<
++ $(QUIETLY) $(REMOVE_TARGET)
++ $(QUIETLY) $(if $(findstring $@, $(NONPIC_OBJ_FILES)), \
++ $(subst $(VM_PICFLAG), ,$(COMPILE.CXX)) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE), \
++ $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE))
++endif
++
++%.o: %.s
++ @echo Assembling $<
++ $(QUIETLY) $(REMOVE_TARGET)
++ $(QUIETLY) $(AS.S) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
++
++%.s: %.cpp
++ @echo Generating assembly for $<
++ $(QUIETLY) $(GENASM.CXX) -o $@ $<
++ $(QUIETLY) $(DEMANGLE) $(COMPILE_DONE)
++
++# Intermediate files (for debugging macros)
++%.i: %.cpp
++ @echo Preprocessing $< to $@
++ $(QUIETLY) $(PREPROCESS.CXX) $< > $@ $(COMPILE_DONE)
++
++# Override gnumake built-in rules which do sccs get operations badly.
++# (They put the checked out code in the current directory, not in the
++# directory of the original file.) Since this is a symptom of a teamware
++# failure, and since not all problems can be detected by gnumake due
++# to incomplete dependency checking... just complain and stop.
++%:: s.%
++ @echo "========================================================="
++ @echo File $@
++ @echo is out of date with respect to its SCCS file.
++ @echo This file may be from an unresolved Teamware conflict.
++ @echo This is also a symptom of a Teamware bringover/putback failure
++ @echo in which SCCS files are updated but not checked out.
++ @echo Check for other out of date files in your workspace.
++ @echo "========================================================="
++ @exit 666
++
++%:: SCCS/s.%
++ @echo "========================================================="
++ @echo File $@
++ @echo is out of date with respect to its SCCS file.
++ @echo This file may be from an unresolved Teamware conflict.
++ @echo This is also a symptom of a Teamware bringover/putback failure
++ @echo in which SCCS files are updated but not checked out.
++ @echo Check for other out of date files in your workspace.
++ @echo "========================================================="
++ @exit 666
++
++.PHONY: default
+--- ./hotspot/make/aix/makefiles/sa.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/sa.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,116 @@
++#
++# Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright 2012, 2013 SAP AG. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# This makefile (sa.make) is included from the sa.make in the
++# build directories.
++
++# This makefile is used to build Serviceability Agent java code
++# and generate JNI header file for native methods.
++
++include $(GAMMADIR)/make/aix/makefiles/rules.make
++
++include $(GAMMADIR)/make/defs.make
++
++AGENT_DIR = $(GAMMADIR)/agent
++
++include $(GAMMADIR)/make/sa.files
++
++TOPDIR = $(shell echo `pwd`)
++GENERATED = $(TOPDIR)/../generated
++
++# tools.jar is needed by the JDI - SA binding
++SA_CLASSPATH = $(BOOT_JAVA_HOME)/lib/tools.jar
++
++# TODO: if it's a modules image, check if SA module is installed.
++MODULELIB_PATH= $(BOOT_JAVA_HOME)/lib/modules
++
++AGENT_FILES_LIST := $(GENERATED)/agent.classes.list
++
++SA_CLASSDIR = $(GENERATED)/saclasses
++
++SA_BUILD_VERSION_PROP = "sun.jvm.hotspot.runtime.VM.saBuildVersion=$(SA_BUILD_VERSION)"
++
++SA_PROPERTIES = $(SA_CLASSDIR)/sa.properties
++
++# if $(AGENT_DIR) does not exist, we don't build SA
++# also, we don't build SA on Itanium, PowerPC, ARM or zero.
++
++all:
++ if [ -d $(AGENT_DIR) -a "$(SRCARCH)" != "ia64" \
++ -a "$(SRCARCH)" != "arm" \
++ -a "$(SRCARCH)" != "ppc" \
++ -a "$(SRCARCH)" != "zero" ] ; then \
++ $(MAKE) -f sa.make $(GENERATED)/sa-jdi.jar; \
++ fi
++
++$(GENERATED)/sa-jdi.jar: $(AGENT_FILES)
++ $(QUIETLY) echo "Making $@"
++ $(QUIETLY) if [ "$(BOOT_JAVA_HOME)" = "" ]; then \
++ echo "ALT_BOOTDIR, BOOTDIR or JAVA_HOME needs to be defined to build SA"; \
++ exit 1; \
++ fi
++ $(QUIETLY) if [ ! -f $(SA_CLASSPATH) -a ! -d $(MODULELIB_PATH) ] ; then \
++ echo "Missing $(SA_CLASSPATH) file. Use 1.6.0 or later version of JDK";\
++ echo ""; \
++ exit 1; \
++ fi
++ $(QUIETLY) if [ ! -d $(SA_CLASSDIR) ] ; then \
++ mkdir -p $(SA_CLASSDIR); \
++ fi
++# Note: When indented, make tries to execute the '$(shell' comment.
++# In some environments, cmd processors have limited line length.
++# To prevent the javac invocation in the next block from using
++# a very long cmd line, we use javac's @file-list option. We
++# generate the file lists using make's built-in 'foreach' control
++# flow which also avoids cmd processor line length issues. Since
++# the 'foreach' is done as part of make's macro expansion phase,
++# the initialization of the lists is also done in the same phase
++# using '$(shell rm ...' instead of using the more traditional
++# 'rm ...' rule.
++ $(shell rm -rf $(AGENT_FILES_LIST))
++# gnumake 3.78.1 does not accept the *'s that
++# are in AGENT_FILES, so use the shell to expand them.
++# Be extra carefull to not produce too long command lines in the shell!
++ $(foreach file,$(AGENT_FILES),$(shell ls -1 $(file) >> $(AGENT_FILES_LIST)))
++ $(QUIETLY) $(REMOTE) $(COMPILE.JAVAC) -classpath $(SA_CLASSPATH) -sourcepath $(AGENT_SRC_DIR) -d $(SA_CLASSDIR) @$(AGENT_FILES_LIST)
++ $(QUIETLY) $(REMOTE) $(COMPILE.RMIC) -classpath $(SA_CLASSDIR) -d $(SA_CLASSDIR) sun.jvm.hotspot.debugger.remote.RemoteDebuggerServer
++ $(QUIETLY) echo "$(SA_BUILD_VERSION_PROP)" > $(SA_PROPERTIES)
++ $(QUIETLY) rm -f $(SA_CLASSDIR)/sun/jvm/hotspot/utilities/soql/sa.js
++ $(QUIETLY) cp $(AGENT_SRC_DIR)/sun/jvm/hotspot/utilities/soql/sa.js $(SA_CLASSDIR)/sun/jvm/hotspot/utilities/soql
++ $(QUIETLY) mkdir -p $(SA_CLASSDIR)/sun/jvm/hotspot/ui/resources
++ $(QUIETLY) rm -f $(SA_CLASSDIR)/sun/jvm/hotspot/ui/resources/*
++ $(QUIETLY) cp $(AGENT_SRC_DIR)/sun/jvm/hotspot/ui/resources/*.png $(SA_CLASSDIR)/sun/jvm/hotspot/ui/resources/
++ $(QUIETLY) cp -r $(AGENT_SRC_DIR)/images/* $(SA_CLASSDIR)/
++ $(QUIETLY) $(REMOTE) $(RUN.JAR) cf $@ -C $(SA_CLASSDIR)/ .
++ $(QUIETLY) $(REMOTE) $(RUN.JAR) uf $@ -C $(AGENT_SRC_DIR) META-INF/services/com.sun.jdi.connect.Connector
++ $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.x86.X86ThreadContext
++ $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.ia64.IA64ThreadContext
++ $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext
++ $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.sparc.SPARCThreadContext
++
++clean:
++ rm -rf $(SA_CLASSDIR)
++ rm -rf $(GENERATED)/sa-jdi.jar
++ rm -rf $(AGENT_FILES_LIST)
+--- ./hotspot/make/aix/makefiles/saproc.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/saproc.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,117 @@
++#
++# Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright 2012, 2013 SAP AG. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++include $(GAMMADIR)/make/defs.make
++
++# Rules to build serviceability agent library, used by vm.make
++
++# libsaproc.so: serviceability agent
++
++SAPROC = saproc
++LIBSAPROC = lib$(SAPROC).so
++
++LIBSAPROC_DEBUGINFO = lib$(SAPROC).debuginfo
++LIBSAPROC_DIZ = lib$(SAPROC).diz
++
++AGENT_DIR = $(GAMMADIR)/agent
++
++SASRCDIR = $(AGENT_DIR)/src/os/$(Platform_os_family)
++
++SASRCFILES = $(SASRCDIR)/salibelf.c \
++ $(SASRCDIR)/symtab.c \
++ $(SASRCDIR)/libproc_impl.c \
++ $(SASRCDIR)/ps_proc.c \
++ $(SASRCDIR)/ps_core.c \
++ $(SASRCDIR)/LinuxDebuggerLocal.c \
++
++SAMAPFILE = $(SASRCDIR)/mapfile
++
++DEST_SAPROC = $(JDK_LIBDIR)/$(LIBSAPROC)
++DEST_SAPROC_DEBUGINFO = $(JDK_LIBDIR)/$(LIBSAPROC_DEBUGINFO)
++DEST_SAPROC_DIZ = $(JDK_LIBDIR)/$(LIBSAPROC_DIZ)
++
++# DEBUG_BINARIES overrides everything, use full -g debug information
++ifeq ($(DEBUG_BINARIES), true)
++ SA_DEBUG_CFLAGS = -g
++endif
++
++# if $(AGENT_DIR) does not exist, we don't build SA
++# also, we don't build SA on Itanium, PPC, ARM or zero.
++
++ifneq ($(wildcard $(AGENT_DIR)),)
++ifneq ($(filter-out ia64 arm ppc zero,$(SRCARCH)),)
++ BUILDLIBSAPROC = $(LIBSAPROC)
++endif
++endif
++
++
++SA_LFLAGS = $(MAPFLAG:FILENAME=$(SAMAPFILE)) $(LDFLAGS_HASH_STYLE)
++
++$(LIBSAPROC): $(SASRCFILES) $(SAMAPFILE)
++ $(QUIETLY) if [ "$(BOOT_JAVA_HOME)" = "" ]; then \
++ echo "ALT_BOOTDIR, BOOTDIR or JAVA_HOME needs to be defined to build SA"; \
++ exit 1; \
++ fi
++ @echo Making SA debugger back-end...
++ $(QUIETLY) $(CC) -D$(BUILDARCH) -D_GNU_SOURCE \
++ -D_FILE_OFFSET_BITS=64 \
++ $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG) \
++ $(BIN_UTILS) \
++ -I$(SASRCDIR) \
++ -I$(GENERATED) \
++ -I$(BOOT_JAVA_HOME)/include \
++ -I$(BOOT_JAVA_HOME)/include/$(Platform_os_family) \
++ $(SASRCFILES) \
++ $(SA_LFLAGS) \
++ $(SA_DEBUG_CFLAGS) \
++ -o $@ \
++ -lthread_db
++ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
++ $(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBSAPROC_DEBUGINFO)
++ $(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBSAPROC_DEBUGINFO) $@
++ ifeq ($(STRIP_POLICY),all_strip)
++ $(QUIETLY) $(STRIP) $@
++ else
++ ifeq ($(STRIP_POLICY),min_strip)
++ $(QUIETLY) $(STRIP) -g $@
++ # implied else here is no stripping at all
++ endif
++ endif
++ ifeq ($(ZIP_DEBUGINFO_FILES),1)
++ $(ZIPEXE) -q -y $(LIBSAPROC_DIZ) $(LIBSAPROC_DEBUGINFO)
++ $(RM) $(LIBSAPROC_DEBUGINFO)
++ endif
++endif
++
++install_saproc: $(BUILDLIBSAPROC)
++ $(QUIETLY) if [ -e $(LIBSAPROC) ] ; then \
++ echo "Copying $(LIBSAPROC) to $(DEST_SAPROC)"; \
++ test -f $(LIBSAPROC_DEBUGINFO) && \
++ cp -f $(LIBSAPROC_DEBUGINFO) $(DEST_SAPROC_DEBUGINFO); \
++ test -f $(LIBSAPROC_DIZ) && \
++ cp -f $(LIBSAPROC_DIZ) $(DEST_SAPROC_DIZ); \
++ cp -f $(LIBSAPROC) $(DEST_SAPROC) && echo "Done"; \
++ fi
++
++.PHONY: install_saproc
+--- ./hotspot/make/aix/makefiles/top.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/top.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,144 @@
++#
++# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# top.make is included in the Makefile in the build directories.
++# It DOES NOT include the vm dependency info in order to be faster.
++# Its main job is to implement the incremental form of make lists.
++# It also:
++# -builds and runs adlc via adlc.make
++# -generates JVMTI source and docs via jvmti.make (JSR-163)
++# -generate sa-jdi.jar (JDI binding to core files)
++
++# It assumes the following flags are set:
++# CFLAGS Platform_file, Src_Dirs_I, Src_Dirs_V, SYSDEFS, AOUT, Obj_Files
++
++# -- D. Ungar (5/97) from a file by Bill Bush
++
++# Don't override the built-in $(MAKE).
++# Instead, use "gmake" (or "gnumake") from the command line. --Rose
++#MAKE = gmake
++
++include $(GAMMADIR)/make/altsrc.make
++
++TOPDIR = $(shell echo `pwd`)
++GENERATED = $(TOPDIR)/../generated
++VM = $(GAMMADIR)/src/share/vm
++Plat_File = $(Platform_file)
++CDG = cd $(GENERATED);
++
++ifneq ($(USE_PRECOMPILED_HEADER),0)
++UpdatePCH = $(MAKE) -f vm.make $(PRECOMPILED_HEADER) $(MFLAGS)
++else
++UpdatePCH = \# precompiled header is not used
++endif
++
++Cached_plat = $(GENERATED)/platform.current
++
++AD_Dir = $(GENERATED)/adfiles
++ADLC = $(AD_Dir)/adlc
++AD_Spec = $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad)
++AD_Src = $(call altsrc-replace,$(HS_COMMON_SRC)/share/vm/adlc)
++AD_Names = ad_$(Platform_arch_model).hpp ad_$(Platform_arch_model).cpp
++AD_Files = $(AD_Names:%=$(AD_Dir)/%)
++
++# AD_Files_If_Required/COMPILER1 = ad_stuff
++AD_Files_If_Required/COMPILER2 = ad_stuff
++AD_Files_If_Required/TIERED = ad_stuff
++AD_Files_If_Required = $(AD_Files_If_Required/$(TYPE))
++
++# Wierd argument adjustment for "gnumake -j..."
++adjust-mflags = $(GENERATED)/adjust-mflags
++MFLAGS-adjusted = -r `$(adjust-mflags) "$(MFLAGS)" "$(HOTSPOT_BUILD_JOBS)"`
++
++
++# default target: update lists, make vm
++# done in stages to force sequential order with parallel make
++#
++
++default: vm_build_preliminaries the_vm
++ @echo All done.
++
++# This is an explicit dependency for the sake of parallel makes.
++vm_build_preliminaries: checks $(Cached_plat) $(AD_Files_If_Required) trace_stuff jvmti_stuff sa_stuff
++ @# We need a null action here, so implicit rules don't get consulted.
++
++$(Cached_plat): $(Plat_File)
++ $(CDG) cp $(Plat_File) $(Cached_plat)
++
++# make AD files as necessary
++ad_stuff: $(Cached_plat) $(adjust-mflags)
++ @$(MAKE) -f adlc.make $(MFLAGS-adjusted)
++
++# generate JVMTI files from the spec
++jvmti_stuff: $(Cached_plat) $(adjust-mflags)
++ @$(MAKE) -f jvmti.make $(MFLAGS-adjusted)
++
++# generate trace files
++trace_stuff: jvmti_stuff $(Cached_plat) $(adjust-mflags)
++ @$(MAKE) -f trace.make $(MFLAGS-adjusted)
++
++# generate SA jar files and native header
++sa_stuff:
++ @$(MAKE) -f sa.make $(MFLAGS-adjusted)
++
++# and the VM: must use other makefile with dependencies included
++
++# We have to go to great lengths to get control over the -jN argument
++# to the recursive invocation of vm.make. The problem is that gnumake
++# resets -jN to -j1 for recursive runs. (How helpful.)
++# Note that the user must specify the desired parallelism level via a
++# command-line or environment variable name HOTSPOT_BUILD_JOBS.
++$(adjust-mflags): $(GAMMADIR)/make/$(Platform_os_family)/makefiles/adjust-mflags.sh
++ @+rm -f $@ $@+
++ @+cat $< > $@+
++ @+chmod +x $@+
++ @+mv $@+ $@
++
++the_vm: vm_build_preliminaries $(adjust-mflags)
++ @$(UpdatePCH)
++ @$(MAKE) -f vm.make $(MFLAGS-adjusted)
++
++install gamma: the_vm
++ @$(MAKE) -f vm.make $@
++
++# next rules support "make foo.[ois]"
++
++%.o %.i %.s:
++ $(UpdatePCH)
++ $(MAKE) -f vm.make $(MFLAGS) $@
++ #$(MAKE) -f vm.make $@
++
++# this should force everything to be rebuilt
++clean:
++ rm -f $(GENERATED)/*.class
++ $(MAKE) -f vm.make $(MFLAGS) clean
++
++# just in case it doesn't, this should do it
++realclean:
++ $(MAKE) -f vm.make $(MFLAGS) clean
++ rm -fr $(GENERATED)
++
++.PHONY: default vm_build_preliminaries
++.PHONY: lists ad_stuff jvmti_stuff sa_stuff the_vm clean realclean
++.PHONY: checks check_os_version install
+--- ./hotspot/make/aix/makefiles/trace.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/trace.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,120 @@
++#
++# Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# This makefile (trace.make) is included from the trace.make in the
++# build directories.
++#
++# It knows how to build and run the tools to generate trace files.
++
++include $(GAMMADIR)/make/linux/makefiles/rules.make
++include $(GAMMADIR)/make/altsrc.make
++
++# #########################################################################
++
++HAS_ALT_SRC:=$(shell if [ -d $(HS_ALT_SRC)/share/vm/trace ]; then \
++ echo "true"; else echo "false";\
++ fi)
++
++TOPDIR = $(shell echo `pwd`)
++GENERATED = $(TOPDIR)/../generated
++JvmtiOutDir = $(GENERATED)/jvmtifiles
++TraceOutDir = $(GENERATED)/tracefiles
++
++TraceAltSrcDir = $(HS_ALT_SRC)/share/vm/trace
++TraceSrcDir = $(HS_COMMON_SRC)/share/vm/trace
++
++# set VPATH so make knows where to look for source files
++Src_Dirs_V += $(TraceSrcDir) $(TraceAltSrcDir)
++VPATH += $(Src_Dirs_V:%=%:)
++
++TraceGeneratedNames = \
++ traceEventClasses.hpp \
++ traceEventIds.hpp \
++ traceTypes.hpp
++
++ifeq ($(HAS_ALT_SRC), true)
++TraceGeneratedNames += \
++ traceRequestables.hpp \
++ traceEventControl.hpp
++
++ifneq ($(INCLUDE_TRACE), false)
++TraceGeneratedNames += traceProducer.cpp
++endif
++
++endif
++
++TraceGeneratedFiles = $(TraceGeneratedNames:%=$(TraceOutDir)/%)
++
++XSLT = $(REMOTE) $(RUN.JAVA) -classpath $(JvmtiOutDir) jvmtiGen
++
++XML_DEPS = $(TraceSrcDir)/trace.xml $(TraceSrcDir)/tracetypes.xml \
++ $(TraceSrcDir)/trace.dtd $(TraceSrcDir)/xinclude.mod
++ifeq ($(HAS_ALT_SRC), true)
++ XML_DEPS += $(TraceAltSrcDir)/traceevents.xml
++endif
++
++.PHONY: all clean cleanall
++
++# #########################################################################
++
++all: $(TraceGeneratedFiles)
++
++GENERATE_CODE= \
++ $(QUIETLY) echo Generating $@; \
++ $(XSLT) -IN $(word 1,$^) -XSL $(word 2,$^) -OUT $@; \
++ test -f $@
++
++$(TraceOutDir)/traceEventIds.hpp: $(TraceSrcDir)/trace.xml $(TraceSrcDir)/traceEventIds.xsl $(XML_DEPS)
++ $(GENERATE_CODE)
++
++$(TraceOutDir)/traceTypes.hpp: $(TraceSrcDir)/trace.xml $(TraceSrcDir)/traceTypes.xsl $(XML_DEPS)
++ $(GENERATE_CODE)
++
++ifeq ($(HAS_ALT_SRC), false)
++
++$(TraceOutDir)/traceEventClasses.hpp: $(TraceSrcDir)/trace.xml $(TraceSrcDir)/traceEventClasses.xsl $(XML_DEPS)
++ $(GENERATE_CODE)
++
++else
++
++$(TraceOutDir)/traceEventClasses.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceEventClasses.xsl $(XML_DEPS)
++ $(GENERATE_CODE)
++
++$(TraceOutDir)/traceProducer.cpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceProducer.xsl $(XML_DEPS)
++ $(GENERATE_CODE)
++
++$(TraceOutDir)/traceRequestables.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceRequestables.xsl $(XML_DEPS)
++ $(GENERATE_CODE)
++
++$(TraceOutDir)/traceEventControl.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceEventControl.xsl $(XML_DEPS)
++ $(GENERATE_CODE)
++
++endif
++
++# #########################################################################
++
++clean cleanall:
++ rm $(TraceGeneratedFiles)
++
++
+--- ./hotspot/make/aix/makefiles/vm.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/vm.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,377 @@
++#
++# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright 2012, 2013 SAP AG. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# Rules to build JVM and related libraries, included from vm.make in the build
++# directory.
++
++# Common build rules.
++MAKEFILES_DIR=$(GAMMADIR)/make/$(Platform_os_family)/makefiles
++include $(MAKEFILES_DIR)/rules.make
++include $(GAMMADIR)/make/altsrc.make
++
++default: build
++
++#----------------------------------------------------------------------
++# Defs
++
++GENERATED = ../generated
++DEP_DIR = $(GENERATED)/dependencies
++
++# reads the generated files defining the set of .o's and the .o .h dependencies
++-include $(DEP_DIR)/*.d
++
++# read machine-specific adjustments (%%% should do this via buildtree.make?)
++ifeq ($(findstring true, $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
++ include $(MAKEFILES_DIR)/zeroshark.make
++else
++ include $(MAKEFILES_DIR)/$(BUILDARCH).make
++endif
++
++# set VPATH so make knows where to look for source files
++# Src_Dirs_V is everything in src/share/vm/*, plus the right os/*/vm and cpu/*/vm
++# The adfiles directory contains ad_.[ch]pp.
++# The jvmtifiles directory contains jvmti*.[ch]pp
++Src_Dirs_V += $(GENERATED)/adfiles $(GENERATED)/jvmtifiles $(GENERATED)/tracefiles
++VPATH += $(Src_Dirs_V:%=%:)
++
++# set INCLUDES for C preprocessor.
++Src_Dirs_I += $(GENERATED)
++# The order is important for the precompiled headers to work.
++INCLUDES += $(PRECOMPILED_HEADER_DIR:%=-I%) $(Src_Dirs_I:%=-I%)
++
++# SYMFLAG is used by {jsig,saproc}.make
++ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
++ # always build with debug info when we can create .debuginfo files
++ SYMFLAG = -g
++else
++ ifeq (${VERSION}, debug)
++ SYMFLAG = -g
++ else
++ SYMFLAG =
++ endif
++endif
++
++# HOTSPOT_RELEASE_VERSION and HOTSPOT_BUILD_VERSION are defined
++# in $(GAMMADIR)/make/defs.make
++ifeq ($(HOTSPOT_BUILD_VERSION),)
++ BUILD_VERSION = -DHOTSPOT_RELEASE_VERSION="\"$(HOTSPOT_RELEASE_VERSION)\""
++else
++ BUILD_VERSION = -DHOTSPOT_RELEASE_VERSION="\"$(HOTSPOT_RELEASE_VERSION)-$(HOTSPOT_BUILD_VERSION)\""
++endif
++
++# The following variables are defined in the generated flags.make file.
++BUILD_VERSION = -DHOTSPOT_RELEASE_VERSION="\"$(HS_BUILD_VER)\""
++JRE_VERSION = -DJRE_RELEASE_VERSION="\"$(JRE_RELEASE_VER)\""
++HS_LIB_ARCH = -DHOTSPOT_LIB_ARCH=\"$(LIBARCH)\"
++BUILD_TARGET = -DHOTSPOT_BUILD_TARGET="\"$(TARGET)\""
++BUILD_USER = -DHOTSPOT_BUILD_USER="\"$(HOTSPOT_BUILD_USER)\""
++VM_DISTRO = -DHOTSPOT_VM_DISTRO="\"$(HOTSPOT_VM_DISTRO)\""
++
++CXXFLAGS = \
++ ${SYSDEFS} \
++ ${INCLUDES} \
++ ${BUILD_VERSION} \
++ ${BUILD_TARGET} \
++ ${BUILD_USER} \
++ ${HS_LIB_ARCH} \
++ ${VM_DISTRO}
++
++# This is VERY important! The version define must only be supplied to vm_version.o
++# If not, ccache will not re-use the cache at all, since the version string might contain
++# a time and date.
++CXXFLAGS/vm_version.o += ${JRE_VERSION}
++
++CXXFLAGS/BYFILE = $(CXXFLAGS/$@)
++
++# File specific flags
++CXXFLAGS += $(CXXFLAGS/BYFILE)
++
++
++# CFLAGS_WARN holds compiler options to suppress/enable warnings.
++CFLAGS += $(CFLAGS_WARN/BYFILE)
++
++# Do not use C++ exception handling
++CFLAGS += $(CFLAGS/NOEX)
++
++# Extra flags from gnumake's invocation or environment
++CFLAGS += $(EXTRA_CFLAGS)
++LFLAGS += $(EXTRA_CFLAGS)
++
++# Don't set excutable bit on stack segment
++# the same could be done by separate execstack command
++#LFLAGS += -Xlinker -z -Xlinker noexecstack
++
++LIBS += -lm -ldl -lpthread
++
++# By default, link the *.o into the library, not the executable.
++LINK_INTO$(LINK_INTO) = LIBJVM
++
++JDK_LIBDIR = $(JAVA_HOME)/jre/lib/$(LIBARCH)
++
++#----------------------------------------------------------------------
++# jvm_db & dtrace
++include $(MAKEFILES_DIR)/dtrace.make
++
++#----------------------------------------------------------------------
++# JVM
++
++JVM = jvm
++LIBJVM = lib$(JVM).so
++
++CFLAGS += -DALLOW_OPERATOR_NEW_USAGE
++
++LIBJVM_DEBUGINFO = lib$(JVM).debuginfo
++LIBJVM_DIZ = lib$(JVM).diz
++
++SPECIAL_PATHS:=adlc c1 gc_implementation opto shark libadt
++
++SOURCE_PATHS=\
++ $(shell find $(HS_COMMON_SRC)/share/vm/* -type d \! \
++ \( -name DUMMY $(foreach dir,$(SPECIAL_PATHS),-o -name $(dir)) \))
++SOURCE_PATHS+=$(HS_COMMON_SRC)/os/$(Platform_os_family)/vm
++SOURCE_PATHS+=$(HS_COMMON_SRC)/os/posix/vm
++SOURCE_PATHS+=$(HS_COMMON_SRC)/cpu/$(SRCARCH)/vm
++SOURCE_PATHS+=$(HS_COMMON_SRC)/os_cpu/$(Platform_os_family)_$(SRCARCH)/vm
++
++CORE_PATHS=$(foreach path,$(SOURCE_PATHS),$(call altsrc,$(path)) $(path))
++CORE_PATHS+=$(GENERATED)/jvmtifiles $(GENERATED)/tracefiles
++
++ifneq ($(INCLUDE_TRACE), false)
++CORE_PATHS+=$(shell if [ -d $(HS_ALT_SRC)/share/vm/jfr ]; then \
++ find $(HS_ALT_SRC)/share/vm/jfr -type d; \
++ fi)
++endif
++
++COMPILER1_PATHS := $(call altsrc,$(HS_COMMON_SRC)/share/vm/c1)
++COMPILER1_PATHS += $(HS_COMMON_SRC)/share/vm/c1
++
++COMPILER2_PATHS := $(call altsrc,$(HS_COMMON_SRC)/share/vm/opto)
++COMPILER2_PATHS += $(call altsrc,$(HS_COMMON_SRC)/share/vm/libadt)
++COMPILER2_PATHS += $(HS_COMMON_SRC)/share/vm/opto
++COMPILER2_PATHS += $(HS_COMMON_SRC)/share/vm/libadt
++COMPILER2_PATHS += $(GENERATED)/adfiles
++
++SHARK_PATHS := $(GAMMADIR)/src/share/vm/shark
++
++# Include dirs per type.
++Src_Dirs/CORE := $(CORE_PATHS)
++Src_Dirs/COMPILER1 := $(CORE_PATHS) $(COMPILER1_PATHS)
++Src_Dirs/COMPILER2 := $(CORE_PATHS) $(COMPILER2_PATHS)
++Src_Dirs/TIERED := $(CORE_PATHS) $(COMPILER1_PATHS) $(COMPILER2_PATHS)
++Src_Dirs/ZERO := $(CORE_PATHS)
++Src_Dirs/SHARK := $(CORE_PATHS) $(SHARK_PATHS)
++Src_Dirs := $(Src_Dirs/$(TYPE))
++
++COMPILER2_SPECIFIC_FILES := opto libadt bcEscapeAnalyzer.cpp c2_\* runtime_\*
++COMPILER1_SPECIFIC_FILES := c1_\*
++SHARK_SPECIFIC_FILES := shark
++ZERO_SPECIFIC_FILES := zero
++
++# Always exclude these.
++Src_Files_EXCLUDE += jsig.c jvmtiEnvRecommended.cpp jvmtiEnvStub.cpp
++
++# Exclude per type.
++Src_Files_EXCLUDE/CORE := $(COMPILER1_SPECIFIC_FILES) $(COMPILER2_SPECIFIC_FILES) $(ZERO_SPECIFIC_FILES) $(SHARK_SPECIFIC_FILES) ciTypeFlow.cpp
++Src_Files_EXCLUDE/COMPILER1 := $(COMPILER2_SPECIFIC_FILES) $(ZERO_SPECIFIC_FILES) $(SHARK_SPECIFIC_FILES) ciTypeFlow.cpp
++Src_Files_EXCLUDE/COMPILER2 := $(COMPILER1_SPECIFIC_FILES) $(ZERO_SPECIFIC_FILES) $(SHARK_SPECIFIC_FILES)
++Src_Files_EXCLUDE/TIERED := $(ZERO_SPECIFIC_FILES) $(SHARK_SPECIFIC_FILES)
++Src_Files_EXCLUDE/ZERO := $(COMPILER1_SPECIFIC_FILES) $(COMPILER2_SPECIFIC_FILES) $(SHARK_SPECIFIC_FILES) ciTypeFlow.cpp
++Src_Files_EXCLUDE/SHARK := $(COMPILER1_SPECIFIC_FILES) $(COMPILER2_SPECIFIC_FILES) $(ZERO_SPECIFIC_FILES)
++
++Src_Files_EXCLUDE += $(Src_Files_EXCLUDE/$(TYPE))
++
++# Disable ELF decoder on AIX (AIX uses XCOFF).
++Src_Files_EXCLUDE += decoder_elf.cpp elfFile.cpp elfStringTable.cpp elfSymbolTable.cpp elfFuncDescTable.cpp
++
++# Special handling of arch model.
++ifeq ($(Platform_arch_model), x86_32)
++Src_Files_EXCLUDE += \*x86_64\*
++endif
++ifeq ($(Platform_arch_model), x86_64)
++Src_Files_EXCLUDE += \*x86_32\*
++endif
++
++# Locate all source files in the given directory, excluding files in Src_Files_EXCLUDE.
++define findsrc
++ $(notdir $(shell find $(1)/. ! -name . -prune \
++ -a \( -name \*.c -o -name \*.cpp -o -name \*.s \) \
++ -a ! \( -name DUMMY $(addprefix -o -name ,$(Src_Files_EXCLUDE)) \)))
++endef
++
++Src_Files := $(foreach e,$(Src_Dirs),$(call findsrc,$(e)))
++
++Obj_Files = $(sort $(addsuffix .o,$(basename $(Src_Files))))
++
++JVM_OBJ_FILES = $(Obj_Files)
++
++vm_version.o: $(filter-out vm_version.o,$(JVM_OBJ_FILES))
++
++mapfile : $(MAPFILE) vm.def
++ rm -f $@
++ awk '{ if ($$0 ~ "INSERT VTABLE SYMBOLS HERE") \
++ { system ("cat vm.def"); } \
++ else \
++ { print $$0 } \
++ }' > $@ < $(MAPFILE)
++
++mapfile_reorder : mapfile $(REORDERFILE)
++ rm -f $@
++ cat $^ > $@
++
++vm.def: $(Res_Files) $(Obj_Files)
++ sh $(GAMMADIR)/make/aix/makefiles/build_vm_def.sh *.o > $@
++
++ifeq ($(JVM_VARIANT_ZEROSHARK), true)
++ STATIC_CXX = false
++else
++ ifeq ($(ZERO_LIBARCH), ppc64)
++ STATIC_CXX = false
++ else
++ STATIC_CXX = true
++ endif
++endif
++
++ifeq ($(LINK_INTO),AOUT)
++ LIBJVM.o =
++ LIBJVM_MAPFILE =
++ LIBS_VM = $(LIBS)
++else
++ LIBJVM.o = $(JVM_OBJ_FILES)
++ LIBJVM_MAPFILE$(LDNOMAP) = mapfile_reorder
++ LFLAGS_VM$(LDNOMAP) += $(MAPFLAG:FILENAME=$(LIBJVM_MAPFILE))
++# xlC_r ignores the -o= syntax
++# LFLAGS_VM += $(SONAMEFLAG:SONAME=$(LIBJVM))
++
++ # JVM is statically linked with libgcc[_s] and libstdc++; this is needed to
++ # get around library dependency and compatibility issues. Must use gcc not
++ # g++ to link.
++ LIBS_VM += $(STATIC_STDCXX) $(LIBS)
++endif
++
++LINK_VM = $(LINK_LIB.CXX)
++
++# create loadmap for libjvm.so by default. Helps in diagnosing some problems.
++LFLAGS_VM += -bloadmap:libjvm.loadmap
++
++# rule for building precompiled header
++$(PRECOMPILED_HEADER):
++ $(QUIETLY) echo Generating precompiled header $@
++ $(QUIETLY) mkdir -p $(PRECOMPILED_HEADER_DIR)
++ $(QUIETLY) $(COMPILE.CXX) $(DEPFLAGS) -x c++-header $(PRECOMPILED_HEADER_SRC) -o $@ $(COMPILE_DONE)
++
++# making the library:
++
++ifneq ($(JVM_BASE_ADDR),)
++# By default shared library is linked at base address == 0. Modify the
++# linker script if JVM prefers a different base location. It can also be
++# implemented with 'prelink -r'. But 'prelink' is not (yet) available on
++# our build platform (AS-2.1).
++LD_SCRIPT = libjvm.so.lds
++$(LD_SCRIPT): $(LIBJVM_MAPFILE)
++ $(QUIETLY) { \
++ rm -rf $@; \
++ $(LINK_VM) -Wl,--verbose $(LFLAGS_VM) 2>&1 | \
++ sed -e '/^======/,/^======/!d' \
++ -e '/^======/d' \
++ -e 's/0\( + SIZEOF_HEADERS\)/$(JVM_BASE_ADDR)\1/' \
++ > $@; \
++ }
++LD_SCRIPT_FLAG = -Wl,-T,$(LD_SCRIPT)
++endif
++
++# With more recent Redhat releases (or the cutting edge version Fedora), if
++# SELinux is configured to be enabled, the runtime linker will fail to apply
++# the text relocation to libjvm.so considering that it is built as a non-PIC
++# DSO. To workaround that, we run chcon to libjvm.so after it is built. See
++# details in bug 6538311.
++$(LIBJVM): $(LIBJVM.o) $(LIBJVM_MAPFILE) $(LD_SCRIPT)
++ $(QUIETLY) { \
++ echo Linking vm...; \
++ $(LINK_LIB.CXX/PRE_HOOK) \
++ $(LINK_VM) $(LD_SCRIPT_FLAG) \
++ $(LFLAGS_VM) -o $@ $(sort $(LIBJVM.o)) $(LIBS_VM); \
++ $(LINK_LIB.CXX/POST_HOOK) \
++ rm -f $@.1; ln -s $@ $@.1; \
++ }
++# No security contexts on AIX
++# if [ \"$(CROSS_COMPILE_ARCH)\" = \"\" ] ; then \
++# if [ -x /usr/sbin/selinuxenabled ] ; then \
++# /usr/sbin/selinuxenabled; \
++# if [ $$? = 0 ] ; then \
++# /usr/bin/chcon -t textrel_shlib_t $@; \
++# if [ $$? != 0 ]; then \
++# echo "ERROR: Cannot chcon $@"; \
++# fi \
++# fi \
++# fi \
++# fi \
++# }
++
++#ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
++# $(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBJVM_DEBUGINFO)
++# $(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBJVM_DEBUGINFO) $@
++# ifeq ($(STRIP_POLICY),all_strip)
++# $(QUIETLY) $(STRIP) $@
++# else
++# ifeq ($(STRIP_POLICY),min_strip)
++# $(QUIETLY) $(STRIP) -g $@
++# # implied else here is no stripping at all
++# endif
++# endif
++# ifeq ($(ZIP_DEBUGINFO_FILES),1)
++# $(ZIPEXE) -q -y $(LIBJVM_DIZ) $(LIBJVM_DEBUGINFO)
++# $(RM) $(LIBJVM_DEBUGINFO)
++# endif
++#endif
++
++DEST_SUBDIR = $(JDK_LIBDIR)/$(VM_SUBDIR)
++DEST_JVM = $(DEST_SUBDIR)/$(LIBJVM)
++DEST_JVM_DEBUGINFO = $(DEST_SUBDIR)/$(LIBJVM_DEBUGINFO)
++DEST_JVM_DIZ = $(DEST_SUBDIR)/$(LIBJVM_DIZ)
++
++install_jvm: $(LIBJVM)
++ @echo "Copying $(LIBJVM) to $(DEST_JVM)"
++ $(QUIETLY) test -f $(LIBJVM_DEBUGINFO) && \
++ cp -f $(LIBJVM_DEBUGINFO) $(DEST_JVM_DEBUGINFO)
++ $(QUIETLY) test -f $(LIBJVM_DIZ) && \
++ cp -f $(LIBJVM_DIZ) $(DEST_JVM_DIZ)
++ $(QUIETLY) cp -f $(LIBJVM) $(DEST_JVM) && echo "Done"
++
++#----------------------------------------------------------------------
++# Other files
++
++# Signal interposition library
++include $(MAKEFILES_DIR)/jsig.make
++
++# Serviceability agent
++include $(MAKEFILES_DIR)/saproc.make
++
++#----------------------------------------------------------------------
++
++build: $(LIBJVM) $(LAUNCHER) $(LIBJSIG) $(LIBJVM_DB) $(BUILDLIBSAPROC)
++
++install: install_jvm install_jsig install_saproc
++
++.PHONY: default build install install_jvm
+--- ./hotspot/make/aix/makefiles/xlc.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/makefiles/xlc.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,159 @@
++#
++# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2012, 2013 SAP. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++#------------------------------------------------------------------------
++# CC, CXX & AS
++
++# Set compiler explicitly
++CXX = $(COMPILER_PATH)xlC_r
++CC = $(COMPILER_PATH)xlc_r
++HOSTCXX = $(CXX)
++HOSTCC = $(CC)
++
++AS = $(CC) -c
++
++# get xlc version
++CXX_VERSION := $(shell $(CXX) -qversion 2>&1 | sed -n 's/.*Version: \([0-9.]*\)/\1/p')
++
++# xlc 08.00.0000.0023 and higher supports -qtune=balanced
++CXX_SUPPORTS_BALANCED_TUNING=$(shell if [ $(subst .,,$(CXX_VERSION)) -ge 080000000023 ] ; then echo "true" ; fi)
++# xlc 10.01 is used with aggressive optimizations to boost performance
++CXX_IS_V10=$(shell if [ $(subst .,,$(CXX_VERSION)) -ge 100100000000 ] ; then echo "true" ; fi)
++
++# check for precompiled headers support
++
++# Switch off the precompiled header support. Neither xlC 8.0 nor xlC 10.0
++# support precompiled headers. Both "understand" the command line switches "-qusepcomp" and
++# "-qgenpcomp" but when we specify them the following message is printed:
++# "1506-755 (W) The -qusepcomp option is not supported in this release."
++USE_PRECOMPILED_HEADER = 0
++ifneq ($(USE_PRECOMPILED_HEADER),0)
++PRECOMPILED_HEADER_DIR=.
++PRECOMPILED_HEADER_SRC=$(GAMMADIR)/src/share/vm/precompiled/precompiled.hpp
++PRECOMPILED_HEADER=$(PRECOMPILED_HEADER_DIR)/precompiled.hpp.gch
++endif
++
++
++#------------------------------------------------------------------------
++# Compiler flags
++
++# position-independent code
++PICFLAG = -qpic=large
++
++VM_PICFLAG/LIBJVM = $(PICFLAG)
++VM_PICFLAG/AOUT =
++VM_PICFLAG = $(VM_PICFLAG/$(LINK_INTO))
++
++CFLAGS += $(VM_PICFLAG)
++CFLAGS += -qnortti
++CFLAGS += -qnoeh
++
++CFLAGS += -D_REENTRANT
++# no xlc counterpart for -fcheck-new
++# CFLAGS += -fcheck-new
++
++ARCHFLAG = -q64
++
++CFLAGS += $(ARCHFLAG)
++AOUT_FLAGS += $(ARCHFLAG)
++LFLAGS += $(ARCHFLAG)
++ASFLAGS += $(ARCHFLAG)
++
++# Use C++ Interpreter
++ifdef CC_INTERP
++ CFLAGS += -DCC_INTERP
++endif
++
++# Keep temporary files (.ii, .s)
++# no counterpart on xlc for -save-temps, -pipe
++
++# Compiler warnings are treated as errors
++# Do not treat warnings as errors
++# WARNINGS_ARE_ERRORS = -Werror
++# Except for a few acceptable ones
++# ACCEPTABLE_WARNINGS = -Wpointer-arith -Wconversion -Wsign-compare
++# CFLAGS_WARN/DEFAULT = $(WARNINGS_ARE_ERRORS) $(ACCEPTABLE_WARNINGS)
++CFLAGS_WARN/COMMON =
++CFLAGS_WARN/DEFAULT = $(CFLAGS_WARN/COMMON) $(EXTRA_WARNINGS)
++# Special cases
++CFLAGS_WARN/BYFILE = $(CFLAGS_WARN/$@)$(CFLAGS_WARN/DEFAULT$(CFLAGS_WARN/$@))
++
++# The flags to use for an optimized build
++OPT_CFLAGS += -O3
++
++# Hotspot uses very unstrict aliasing turn this optimization off
++OPT_CFLAGS += -qalias=noansi
++
++OPT_CFLAGS/NOOPT=-qnoopt
++
++DEPFLAGS = -qmakedep=gcc -MF $(DEP_DIR)/$(@:%=%.d)
++
++#------------------------------------------------------------------------
++# Linker flags
++
++# statically link libstdc++.so, work with gcc but ignored by g++
++STATIC_STDCXX = -Wl,-lC_r
++
++# Enable linker optimization
++# no counterpart on xlc for this
++# LFLAGS += -Xlinker -O1
++
++# Use $(MAPFLAG:FILENAME=real_file_name) to specify a map file.
++# MAPFLAG = -Xlinker --version-script=FILENAME
++
++# Build shared library
++SHARED_FLAG = -q64 -b64 -bexpall -G -bnoentry -qmkshrobj -brtl -bnolibpath
++
++#------------------------------------------------------------------------
++# Debug flags
++
++# Always compile with '-g' to get symbols in the stacktraces in the hs_err file
++DEBUG_CFLAGS += -g
++FASTDEBUG_CFLAGS += -g
++OPT_CFLAGS += -g
++
++# DEBUG_BINARIES overrides everything, use full -g debug information
++ifeq ($(DEBUG_BINARIES), true)
++ DEBUG_CFLAGS = -g
++ CFLAGS += $(DEBUG_CFLAGS)
++endif
++
++# If we are building HEADLESS, pass on to VM
++# so it can set the java.awt.headless property
++ifdef HEADLESS
++CFLAGS += -DHEADLESS
++endif
++
++# We are building Embedded for a small device
++# favor code space over speed
++ifdef MINIMIZE_RAM_USAGE
++CFLAGS += -DMINIMIZE_RAM_USAGE
++endif
++
++ifdef CROSS_COMPILE_ARCH
++ STRIP = $(ALT_COMPILER_PATH)/strip
++else
++ STRIP = strip
++endif
+--- ./hotspot/make/aix/platform_ppc64 Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/aix/platform_ppc64 Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,17 @@
++os_family = aix
++
++arch = ppc
++
++arch_model = ppc_64
++
++os_arch = aix_ppc
++
++os_arch_model = aix_ppc_64
++
++lib_arch = ppc64
++
++compiler = xlc
++
++gnu_dis_arch = ppc64
++
++sysdefs = -DAIX -DPPC64
+--- ./hotspot/make/bsd/makefiles/mapfile-vers-debug Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/bsd/makefiles/mapfile-vers-debug Wed Jul 30 03:51:43 2014 -0700
+@@ -161,6 +161,7 @@
+ _JVM_GetStackTraceElement
+ _JVM_GetSystemPackage
+ _JVM_GetSystemPackages
++ _JVM_GetTemporaryDirectory
+ _JVM_GetThreadStateNames
+ _JVM_GetThreadStateValues
+ _JVM_GetVersionInfo
+--- ./hotspot/make/bsd/makefiles/mapfile-vers-product Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/bsd/makefiles/mapfile-vers-product Wed Jul 30 03:51:43 2014 -0700
+@@ -161,6 +161,7 @@
+ _JVM_GetStackTraceElement
+ _JVM_GetSystemPackage
+ _JVM_GetSystemPackages
++ _JVM_GetTemporaryDirectory
+ _JVM_GetThreadStateNames
+ _JVM_GetThreadStateValues
+ _JVM_GetVersionInfo
+--- ./hotspot/make/bsd/makefiles/universal.gmk Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/bsd/makefiles/universal.gmk Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ #
+-# Copyright (c) 2006, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2006, 2014, Oracle and/or its affiliates. All rights reserved.
+ # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ #
+ # This code is free software; you can redistribute it and/or modify it
+@@ -74,19 +74,21 @@
+
+
+ # Replace arch specific binaries with universal binaries
++# Do not touch jre/lib/{client,server}/libjsig.$(LIBRARY_SUFFIX)
++# That symbolic link belongs to the 'jdk' build.
+ export_universal:
+ $(RM) -r $(EXPORT_PATH)/jre/lib/{i386,amd64}
+ $(RM) -r $(JDK_IMAGE_DIR)/jre/lib/{i386,amd64}
+- $(RM) $(JDK_IMAGE_DIR)/jre/lib/{client,server}/libjsig.$(LIBRARY_SUFFIX)
+ ($(CD) $(EXPORT_PATH) && \
+ $(TAR) -cf - *) | \
+ ($(CD) $(JDK_IMAGE_DIR) && $(TAR) -xpf -)
+
+
+ # Overlay universal binaries
++# Do not touch jre/lib/{client,server}/libjsig.$(LIBRARY_SUFFIX)
++# That symbolic link belongs to the 'jdk' build.
+ copy_universal:
+ $(RM) -r $(JDK_IMAGE_DIR)$(COPY_SUBDIR)/jre/lib/{i386,amd64}
+- $(RM) $(JDK_IMAGE_DIR)$(COPY_SUBDIR)/jre/lib/{client,server}/libjsig.$(LIBRARY_SUFFIX)
+ ($(CD) $(EXPORT_PATH)$(COPY_SUBDIR) && \
+ $(TAR) -cf - *) | \
+ ($(CD) $(JDK_IMAGE_DIR)$(COPY_SUBDIR) && $(TAR) -xpf -)
+--- ./hotspot/make/defs.make Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/defs.make Wed Jul 30 03:51:43 2014 -0700
+@@ -176,11 +176,15 @@
+ HOST := $(shell uname -n)
+ endif
+
+-# If not SunOS, not Linux and not BSD, assume Windows
++# If not SunOS, not Linux not BSD and not AIX, assume Windows
+ ifneq ($(OS), Linux)
+ ifneq ($(OS), SunOS)
+ ifneq ($(OS), bsd)
+- OSNAME=windows
++ ifneq ($(OS), AIX)
++ OSNAME=windows
++ else
++ OSNAME=aix
++ endif
+ else
+ OSNAME=bsd
+ endif
+@@ -269,7 +273,7 @@
+
+ # Use uname output for SRCARCH, but deal with platform differences. If ARCH
+ # is not explicitly listed below, it is treated as x86.
+- SRCARCH = $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64 arm ppc zero,$(ARCH)))
++ SRCARCH = $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64 arm ppc ppc64 zero,$(ARCH)))
+ ARCH/ = x86
+ ARCH/sparc = sparc
+ ARCH/sparc64= sparc
+@@ -295,6 +299,11 @@
+ BUILDARCH = sparcv9
+ endif
+ endif
++ ifeq ($(BUILDARCH), ppc)
++ ifdef LP64
++ BUILDARCH = ppc64
++ endif
++ endif
+
+ # LIBARCH is 1:1 mapping from BUILDARCH
+ LIBARCH = $(LIBARCH/$(BUILDARCH))
+@@ -303,12 +312,12 @@
+ LIBARCH/sparc = sparc
+ LIBARCH/sparcv9 = sparcv9
+ LIBARCH/ia64 = ia64
+- LIBARCH/ppc64 = ppc
++ LIBARCH/ppc64 = ppc64
+ LIBARCH/ppc = ppc
+ LIBARCH/arm = arm
+ LIBARCH/zero = $(ZERO_LIBARCH)
+
+- LP64_ARCH = sparcv9 amd64 ia64 zero
++ LP64_ARCH = sparcv9 amd64 ia64 ppc64 zero
+ endif
+
+ # Required make macro settings for all platforms
+--- ./hotspot/make/excludeSrc.make Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/excludeSrc.make Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ #
+-# Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ #
+ # This code is free software; you can redistribute it and/or modify it
+@@ -86,10 +86,11 @@
+ concurrentMark.cpp concurrentMarkThread.cpp dirtyCardQueue.cpp g1AllocRegion.cpp \
+ g1BlockOffsetTable.cpp g1CardCounts.cpp g1CollectedHeap.cpp g1CollectorPolicy.cpp \
+ g1ErgoVerbose.cpp g1GCPhaseTimes.cpp g1HRPrinter.cpp g1HotCardCache.cpp g1Log.cpp \
+- g1MMUTracker.cpp g1MarkSweep.cpp g1MemoryPool.cpp g1MonitoringSupport.cpp \
+- g1RemSet.cpp g1RemSetSummary.cpp g1SATBCardTableModRefBS.cpp g1_globals.cpp heapRegion.cpp \
++ g1MMUTracker.cpp g1MarkSweep.cpp g1MemoryPool.cpp g1MonitoringSupport.cpp g1OopClosures.cpp \
++ g1RemSet.cpp g1RemSetSummary.cpp g1SATBCardTableModRefBS.cpp g1StringDedup.cpp g1StringDedupStat.cpp \
++ g1StringDedupTable.cpp g1StringDedupThread.cpp g1StringDedupQueue.cpp g1_globals.cpp heapRegion.cpp \
+ g1BiasedArray.cpp heapRegionRemSet.cpp heapRegionSeq.cpp heapRegionSet.cpp heapRegionSets.cpp \
+- ptrQueue.cpp satbQueue.cpp sparsePRT.cpp survRateGroup.cpp vm_operations_g1.cpp \
++ ptrQueue.cpp satbQueue.cpp sparsePRT.cpp survRateGroup.cpp vm_operations_g1.cpp g1CodeCacheRemSet.cpp \
+ adjoiningGenerations.cpp adjoiningVirtualSpaces.cpp asPSOldGen.cpp asPSYoungGen.cpp \
+ cardTableExtension.cpp gcTaskManager.cpp gcTaskThread.cpp objectStartArray.cpp \
+ parallelScavengeHeap.cpp parMarkBitMap.cpp pcTasks.cpp psAdaptiveSizePolicy.cpp \
+--- ./hotspot/make/hotspot_version Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/hotspot_version Wed Jul 30 03:51:43 2014 -0700
+@@ -34,8 +34,8 @@
+ HOTSPOT_VM_COPYRIGHT=Copyright 2014
+
+ HS_MAJOR_VER=25
+-HS_MINOR_VER=11
+-HS_BUILD_NUMBER=03
++HS_MINOR_VER=20
++HS_BUILD_NUMBER=23
+
+ JDK_MAJOR_VER=1
+ JDK_MINOR_VER=8
+--- ./hotspot/make/jprt.properties Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/jprt.properties Wed Jul 30 03:51:43 2014 -0700
+@@ -33,7 +33,7 @@
+
+ # This tells jprt what default release we want to build
+
+-jprt.hotspot.default.release=jdk8
++jprt.hotspot.default.release=jdk8u20
+
+ jprt.tools.default.release=${jprt.submit.option.release?${jprt.submit.option.release}:${jprt.hotspot.default.release}}
+
+@@ -47,70 +47,65 @@
+ # sparc etc.
+
+ # Define the Solaris platforms we want for the various releases
+-jprt.my.solaris.sparcv9.jdk8=solaris_sparcv9_5.10
++jprt.my.solaris.sparcv9.jdk8u20=solaris_sparcv9_5.10
+ jprt.my.solaris.sparcv9.jdk7=solaris_sparcv9_5.10
+ jprt.my.solaris.sparcv9.jdk7u8=${jprt.my.solaris.sparcv9.jdk7}
+ jprt.my.solaris.sparcv9=${jprt.my.solaris.sparcv9.${jprt.tools.default.release}}
+
+-jprt.my.solaris.x64.jdk8=solaris_x64_5.10
++jprt.my.solaris.x64.jdk8u20=solaris_x64_5.10
+ jprt.my.solaris.x64.jdk7=solaris_x64_5.10
+ jprt.my.solaris.x64.jdk7u8=${jprt.my.solaris.x64.jdk7}
+ jprt.my.solaris.x64=${jprt.my.solaris.x64.${jprt.tools.default.release}}
+
+-jprt.my.linux.i586.jdk8=linux_i586_2.6
++jprt.my.linux.i586.jdk8u20=linux_i586_2.6
+ jprt.my.linux.i586.jdk7=linux_i586_2.6
+ jprt.my.linux.i586.jdk7u8=${jprt.my.linux.i586.jdk7}
+ jprt.my.linux.i586=${jprt.my.linux.i586.${jprt.tools.default.release}}
+
+-jprt.my.linux.x64.jdk8=linux_x64_2.6
++jprt.my.linux.x64.jdk8u20=linux_x64_2.6
+ jprt.my.linux.x64.jdk7=linux_x64_2.6
+ jprt.my.linux.x64.jdk7u8=${jprt.my.linux.x64.jdk7}
+ jprt.my.linux.x64=${jprt.my.linux.x64.${jprt.tools.default.release}}
+
+-jprt.my.linux.ppc.jdk8=linux_ppc_2.6
++jprt.my.linux.ppc.jdk8u20=linux_ppc_2.6
+ jprt.my.linux.ppc.jdk7=linux_ppc_2.6
+ jprt.my.linux.ppc.jdk7u8=${jprt.my.linux.ppc.jdk7}
+ jprt.my.linux.ppc=${jprt.my.linux.ppc.${jprt.tools.default.release}}
+
+-jprt.my.linux.ppcv2.jdk8=linux_ppcv2_2.6
++jprt.my.linux.ppcv2.jdk8u20=linux_ppcv2_2.6
+ jprt.my.linux.ppcv2.jdk7=linux_ppcv2_2.6
+ jprt.my.linux.ppcv2.jdk7u8=${jprt.my.linux.ppcv2.jdk7}
+ jprt.my.linux.ppcv2=${jprt.my.linux.ppcv2.${jprt.tools.default.release}}
+
+-jprt.my.linux.ppcsflt.jdk8=linux_ppcsflt_2.6
+-jprt.my.linux.ppcsflt.jdk7=linux_ppcsflt_2.6
+-jprt.my.linux.ppcsflt.jdk7u8=${jprt.my.linux.ppcsflt.jdk7}
+-jprt.my.linux.ppcsflt=${jprt.my.linux.ppcsflt.${jprt.tools.default.release}}
+-
+-jprt.my.linux.armvfpsflt.jdk8=linux_armvfpsflt_2.6
++jprt.my.linux.armvfpsflt.jdk8u20=linux_armvfpsflt_2.6
+ jprt.my.linux.armvfpsflt=${jprt.my.linux.armvfpsflt.${jprt.tools.default.release}}
+
+-jprt.my.linux.armvfphflt.jdk8=linux_armvfphflt_2.6
++jprt.my.linux.armvfphflt.jdk8u20=linux_armvfphflt_2.6
+ jprt.my.linux.armvfphflt=${jprt.my.linux.armvfphflt.${jprt.tools.default.release}}
+
+ # The ARM GP vfp-sflt build is not currently supported
+-#jprt.my.linux.armvs.jdk8=linux_armvs_2.6
++#jprt.my.linux.armvs.jdk8u20=linux_armvs_2.6
+ #jprt.my.linux.armvs=${jprt.my.linux.armvs.${jprt.tools.default.release}}
+
+-jprt.my.linux.armvh.jdk8=linux_armvh_2.6
++jprt.my.linux.armvh.jdk8u20=linux_armvh_2.6
+ jprt.my.linux.armvh=${jprt.my.linux.armvh.${jprt.tools.default.release}}
+
+-jprt.my.linux.armsflt.jdk8=linux_armsflt_2.6
++jprt.my.linux.armsflt.jdk8u20=linux_armsflt_2.6
+ jprt.my.linux.armsflt.jdk7=linux_armsflt_2.6
+ jprt.my.linux.armsflt.jdk7u8=${jprt.my.linux.armsflt.jdk7}
+ jprt.my.linux.armsflt=${jprt.my.linux.armsflt.${jprt.tools.default.release}}
+
+-jprt.my.macosx.x64.jdk8=macosx_x64_10.7
++jprt.my.macosx.x64.jdk8u20=macosx_x64_10.7
+ jprt.my.macosx.x64.jdk7=macosx_x64_10.7
+ jprt.my.macosx.x64.jdk7u8=${jprt.my.macosx.x64.jdk7}
+ jprt.my.macosx.x64=${jprt.my.macosx.x64.${jprt.tools.default.release}}
+
+-jprt.my.windows.i586.jdk8=windows_i586_6.1
++jprt.my.windows.i586.jdk8u20=windows_i586_6.1
+ jprt.my.windows.i586.jdk7=windows_i586_6.1
+ jprt.my.windows.i586.jdk7u8=${jprt.my.windows.i586.jdk7}
+ jprt.my.windows.i586=${jprt.my.windows.i586.${jprt.tools.default.release}}
+
+-jprt.my.windows.x64.jdk8=windows_x64_6.1
++jprt.my.windows.x64.jdk8u20=windows_x64_6.1
+ jprt.my.windows.x64.jdk7=windows_x64_6.1
+ jprt.my.windows.x64.jdk7u8=${jprt.my.windows.x64.jdk7}
+ jprt.my.windows.x64=${jprt.my.windows.x64.${jprt.tools.default.release}}
+@@ -135,7 +130,6 @@
+ ${jprt.my.linux.i586}-{productEmb|fastdebugEmb}, \
+ ${jprt.my.linux.ppc}-{productEmb|fastdebugEmb}, \
+ ${jprt.my.linux.ppcv2}-{productEmb|fastdebugEmb}, \
+- ${jprt.my.linux.ppcsflt}-{productEmb|fastdebugEmb}, \
+ ${jprt.my.linux.armvfpsflt}-{productEmb|fastdebugEmb}, \
+ ${jprt.my.linux.armvfphflt}-{productEmb|fastdebugEmb}, \
+ ${jprt.my.linux.armsflt}-{productEmb|fastdebugEmb}
+@@ -143,7 +137,7 @@
+ jprt.build.targets.all=${jprt.build.targets.standard}, \
+ ${jprt.build.targets.embedded}, ${jprt.build.targets.open}
+
+-jprt.build.targets.jdk8=${jprt.build.targets.all}
++jprt.build.targets.jdk8u20=${jprt.build.targets.all}
+ jprt.build.targets.jdk7=${jprt.build.targets.all}
+ jprt.build.targets.jdk7u8=${jprt.build.targets.all}
+ jprt.build.targets=${jprt.build.targets.${jprt.tools.default.release}}
+@@ -349,7 +343,7 @@
+ ${jprt.my.windows.i586.test.targets}, \
+ ${jprt.my.windows.x64.test.targets}
+
+-jprt.test.targets.jdk8=${jprt.test.targets.standard}
++jprt.test.targets.jdk8u20=${jprt.test.targets.standard}
+ jprt.test.targets.jdk7=${jprt.test.targets.standard}
+ jprt.test.targets.jdk7u8=${jprt.test.targets.jdk7}
+ jprt.test.targets=${jprt.test.targets.${jprt.tools.default.release}}
+@@ -399,7 +393,7 @@
+ jprt.make.rule.test.targets.embedded = \
+ ${jprt.make.rule.test.targets.standard.client}
+
+-jprt.make.rule.test.targets.jdk8=${jprt.make.rule.test.targets.standard}
++jprt.make.rule.test.targets.jdk8u20=${jprt.make.rule.test.targets.standard}
+ jprt.make.rule.test.targets.jdk7=${jprt.make.rule.test.targets.standard}
+ jprt.make.rule.test.targets.jdk7u8=${jprt.make.rule.test.targets.jdk7}
+ jprt.make.rule.test.targets=${jprt.make.rule.test.targets.${jprt.tools.default.release}}
+--- ./hotspot/make/linux/Makefile Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/linux/Makefile Wed Jul 30 03:51:43 2014 -0700
+@@ -66,6 +66,10 @@
+ FORCE_TIERED=1
+ endif
+ endif
++# C1 is not ported on ppc64, so we cannot build a tiered VM:
++ifeq ($(ARCH),ppc64)
++ FORCE_TIERED=0
++endif
+
+ ifdef LP64
+ ifeq ("$(filter $(LP64_ARCH),$(BUILDARCH))","")
+--- ./hotspot/make/linux/makefiles/buildtree.make Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/linux/makefiles/buildtree.make Wed Jul 30 03:51:43 2014 -0700
+@@ -193,6 +193,7 @@
+ DATA_MODE/sparc = 32
+ DATA_MODE/sparcv9 = 64
+ DATA_MODE/amd64 = 64
++DATA_MODE/ppc64 = 64
+
+ DATA_MODE = $(DATA_MODE/$(BUILDARCH))
+
+--- ./hotspot/make/linux/makefiles/defs.make Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/linux/makefiles/defs.make Wed Jul 30 03:51:43 2014 -0700
+@@ -33,6 +33,11 @@
+ # ARCH can be set explicitly in spec.gmk
+ ifndef ARCH
+ ARCH := $(shell uname -m)
++ # Fold little endian PowerPC64 into big-endian (if ARCH is set in
++ # hotspot-spec.gmk, this will be done by the configure script).
++ ifeq ($(ARCH),ppc64le)
++ ARCH := ppc64
++ endif
+ endif
+
+ PATH_SEP ?= :
+@@ -120,6 +125,15 @@
+ HS_ARCH = ppc
+ endif
+
++# PPC64
++ifeq ($(ARCH), ppc64)
++ ARCH_DATA_MODEL = 64
++ MAKE_ARGS += LP64=1
++ PLATFORM = linux-ppc64
++ VM_PLATFORM = linux_ppc64
++ HS_ARCH = ppc
++endif
++
+ # On 32 bit linux we build server and client, on 64 bit just server.
+ ifeq ($(JVM_VARIANTS),)
+ ifeq ($(ARCH_DATA_MODEL), 32)
+@@ -255,7 +269,7 @@
+ EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client
+ EXPORT_MINIMAL_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/minimal
+
+-ifeq ($(findstring true, $(JVM_VARIANT_SERVER) $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
++ifeq ($(findstring true, $(JVM_VARIANT_SERVER) $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK) $(JVM_VARIANT_CORE)), true)
+ EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
+ EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.$(LIBRARY_SUFFIX)
+ ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+--- ./hotspot/make/linux/makefiles/gcc.make Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/linux/makefiles/gcc.make Wed Jul 30 03:51:43 2014 -0700
+@@ -181,6 +181,7 @@
+ ifndef E500V2
+ ARCHFLAG/ppc = -mcpu=powerpc
+ endif
++ARCHFLAG/ppc64 = -m64
+
+ CFLAGS += $(ARCHFLAG)
+ AOUT_FLAGS += $(ARCHFLAG)
+@@ -346,6 +347,7 @@
+ DEBUG_CFLAGS/amd64 = -g
+ DEBUG_CFLAGS/arm = -g
+ DEBUG_CFLAGS/ppc = -g
++ DEBUG_CFLAGS/ppc64 = -g
+ DEBUG_CFLAGS += $(DEBUG_CFLAGS/$(BUILDARCH))
+ ifeq ($(DEBUG_CFLAGS/$(BUILDARCH)),)
+ ifeq ($(USE_CLANG), true)
+@@ -361,6 +363,7 @@
+ FASTDEBUG_CFLAGS/amd64 = -g
+ FASTDEBUG_CFLAGS/arm = -g
+ FASTDEBUG_CFLAGS/ppc = -g
++ FASTDEBUG_CFLAGS/ppc64 = -g
+ FASTDEBUG_CFLAGS += $(DEBUG_CFLAGS/$(BUILDARCH))
+ ifeq ($(FASTDEBUG_CFLAGS/$(BUILDARCH)),)
+ ifeq ($(USE_CLANG), true)
+@@ -375,6 +378,7 @@
+ OPT_CFLAGS/amd64 = -g
+ OPT_CFLAGS/arm = -g
+ OPT_CFLAGS/ppc = -g
++ OPT_CFLAGS/ppc64 = -g
+ OPT_CFLAGS += $(OPT_CFLAGS/$(BUILDARCH))
+ ifeq ($(OPT_CFLAGS/$(BUILDARCH)),)
+ ifeq ($(USE_CLANG), true)
+--- ./hotspot/make/linux/makefiles/mapfile-vers-debug Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/linux/makefiles/mapfile-vers-debug Wed Jul 30 03:51:43 2014 -0700
+@@ -122,7 +122,7 @@
+ JVM_GetClassModifiers;
+ JVM_GetClassName;
+ JVM_GetClassNameUTF;
+- JVM_GetClassSignature;
++ JVM_GetClassSignature;
+ JVM_GetClassSigners;
+ JVM_GetClassTypeAnnotations;
+ JVM_GetComponentType;
+@@ -163,6 +163,7 @@
+ JVM_GetStackTraceElement;
+ JVM_GetSystemPackage;
+ JVM_GetSystemPackages;
++ JVM_GetTemporaryDirectory;
+ JVM_GetThreadStateNames;
+ JVM_GetThreadStateValues;
+ JVM_GetVersionInfo;
+--- ./hotspot/make/linux/makefiles/mapfile-vers-product Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/linux/makefiles/mapfile-vers-product Wed Jul 30 03:51:43 2014 -0700
+@@ -163,6 +163,7 @@
+ JVM_GetStackTraceElement;
+ JVM_GetSystemPackage;
+ JVM_GetSystemPackages;
++ JVM_GetTemporaryDirectory;
+ JVM_GetThreadStateNames;
+ JVM_GetThreadStateValues;
+ JVM_GetVersionInfo;
+--- ./hotspot/make/linux/makefiles/ppc64.make Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/linux/makefiles/ppc64.make Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,51 @@
++#
++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright 2012, 2013 SAP AG. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++#
++
++# make c code know it is on a 64 bit platform.
++CFLAGS += -D_LP64=1
++
++ifeq ($(origin OPENJDK_TARGET_CPU_ENDIAN),undefined)
++ # This can happen during hotspot standalone build. Set endianness from
++ # uname. We assume build and target machines are the same.
++ OPENJDK_TARGET_CPU_ENDIAN:=$(if $(filter ppc64le,$(shell uname -m)),little,big)
++endif
++
++ifeq ($(filter $(OPENJDK_TARGET_CPU_ENDIAN),big little),)
++ $(error OPENJDK_TARGET_CPU_ENDIAN value should be 'big' or 'little')
++endif
++
++ifeq ($(OPENJDK_TARGET_CPU_ENDIAN),big)
++ # fixes `relocation truncated to fit' error for gcc 4.1.
++ CFLAGS += -mminimal-toc
++
++ # finds use ppc64 instructions, but schedule for power5
++ CFLAGS += -mcpu=powerpc64 -mtune=power5 -minsert-sched-nops=regroup_exact -mno-multiple -mno-string
++else
++ # Little endian machine uses ELFv2 ABI.
++ CFLAGS += -DVM_LITTLE_ENDIAN -DABI_ELFv2
++
++ # Use Power8, this is the first CPU to support PPC64 LE with ELFv2 ABI.
++ CFLAGS += -mcpu=power7 -mtune=power8 -minsert-sched-nops=regroup_exact -mno-multiple -mno-string
++endif
+--- ./hotspot/make/linux/makefiles/zeroshark.make Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/linux/makefiles/zeroshark.make Wed Jul 30 03:51:43 2014 -0700
+@@ -25,6 +25,9 @@
+
+ # Setup common to Zero (non-Shark) and Shark versions of VM
+
++# override this from the main file because some version of llvm do not like -Wundef
++WARNING_FLAGS = -Wpointer-arith -Wsign-compare -Wunused-function -Wunused-value
++
+ # The copied fdlibm routines in sharedRuntimeTrig.o must not be optimized
+ OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
+ # The copied fdlibm routines in sharedRuntimeTrans.o must not be optimized
+--- ./hotspot/make/linux/platform_ppc Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/linux/platform_ppc Wed Jul 30 03:51:43 2014 -0700
+@@ -2,11 +2,11 @@
+
+ arch = ppc
+
+-arch_model = ppc
++arch_model = ppc_32
+
+ os_arch = linux_ppc
+
+-os_arch_model = linux_ppc
++os_arch_model = linux_ppc_32
+
+ lib_arch = ppc
+
+@@ -14,4 +14,4 @@
+
+ gnu_dis_arch = ppc
+
+-sysdefs = -DLINUX -D_GNU_SOURCE -DPPC
++sysdefs = -DLINUX -D_GNU_SOURCE -DPPC32
+--- ./hotspot/make/linux/platform_ppc64 Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/make/linux/platform_ppc64 Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,17 @@
++os_family = linux
++
++arch = ppc
++
++arch_model = ppc_64
++
++os_arch = linux_ppc
++
++os_arch_model = linux_ppc_64
++
++lib_arch = ppc64
++
++compiler = gcc
++
++gnu_dis_arch = ppc64
++
++sysdefs = -DLINUX -D_GNU_SOURCE -DPPC64
+--- ./hotspot/make/solaris/makefiles/mapfile-vers Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/solaris/makefiles/mapfile-vers Wed Jul 30 03:51:43 2014 -0700
+@@ -163,6 +163,7 @@
+ JVM_GetStackTraceElement;
+ JVM_GetSystemPackage;
+ JVM_GetSystemPackages;
++ JVM_GetTemporaryDirectory;
+ JVM_GetThreadStateNames;
+ JVM_GetThreadStateValues;
+ JVM_GetVersionInfo;
+--- ./hotspot/make/windows/makefiles/defs.make Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/make/windows/makefiles/defs.make Wed Jul 30 03:51:43 2014 -0700
+@@ -260,7 +260,6 @@
+ EXPORT_LIST += $(EXPORT_SERVER_DIR)/jvm.map
+ endif
+ endif
+- EXPORT_LIST += $(EXPORT_LIB_DIR)/jvm.lib
+ endif
+ ifeq ($(JVM_VARIANT_CLIENT),true)
+ EXPORT_LIST += $(EXPORT_CLIENT_DIR)/Xusage.txt
+@@ -275,6 +274,8 @@
+ endif
+ endif
+
++EXPORT_LIST += $(EXPORT_LIB_DIR)/jvm.lib
++
+ ifeq ($(BUILD_WIN_SA), 1)
+ EXPORT_LIST += $(EXPORT_JRE_BIN_DIR)/sawindbg.$(LIBRARY_SUFFIX)
+ ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+--- ./hotspot/src/cpu/ppc/vm/assembler_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/assembler_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,700 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.inline.hpp"
++#include "gc_interface/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/cardTableModRefBS.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++#if INCLUDE_ALL_GCS
++#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
++#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
++#include "gc_implementation/g1/heapRegion.hpp"
++#endif // INCLUDE_ALL_GCS
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) // nothing
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#endif
++
++int AbstractAssembler::code_fill_byte() {
++ return 0x00; // illegal instruction 0x00000000
++}
++
++void Assembler::print_instruction(int inst) {
++ Unimplemented();
++}
++
++// Patch instruction `inst' at offset `inst_pos' to refer to
++// `dest_pos' and return the resulting instruction. We should have
++// pcs, not offsets, but since all is relative, it will work out fine.
++int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
++ int m = 0; // mask for displacement field
++ int v = 0; // new value for displacement field
++
++ switch (inv_op_ppc(inst)) {
++ case b_op: m = li(-1); v = li(disp(dest_pos, inst_pos)); break;
++ case bc_op: m = bd(-1); v = bd(disp(dest_pos, inst_pos)); break;
++ default: ShouldNotReachHere();
++ }
++ return inst & ~m | v;
++}
++
++// Return the offset, relative to _code_begin, of the destination of
++// the branch inst at offset pos.
++int Assembler::branch_destination(int inst, int pos) {
++ int r = 0;
++ switch (inv_op_ppc(inst)) {
++ case b_op: r = bxx_destination_offset(inst, pos); break;
++ case bc_op: r = inv_bd_field(inst, pos); break;
++ default: ShouldNotReachHere();
++ }
++ return r;
++}
++
++// Low-level andi-one-instruction-macro.
++void Assembler::andi(Register a, Register s, const int ui16) {
++ assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
++ if (is_power_of_2_long(((jlong) ui16)+1)) {
++ // pow2minus1
++ clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
++ } else if (is_power_of_2_long((jlong) ui16)) {
++ // pow2
++ rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16));
++ } else if (is_power_of_2_long((jlong)-ui16)) {
++ // negpow2
++ clrrdi(a, s, log2_long((jlong)-ui16));
++ } else {
++ andi_(a, s, ui16);
++ }
++}
++
++// RegisterOrConstant version.
++void Assembler::ld(Register d, RegisterOrConstant roc, Register s1) {
++ if (roc.is_constant()) {
++ if (s1 == noreg) {
++ int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
++ Assembler::ld(d, simm16_rest, d);
++ } else if (is_simm(roc.as_constant(), 16)) {
++ Assembler::ld(d, roc.as_constant(), s1);
++ } else {
++ load_const_optimized(d, roc.as_constant());
++ Assembler::ldx(d, d, s1);
++ }
++ } else {
++ if (s1 == noreg)
++ Assembler::ld(d, 0, roc.as_register());
++ else
++ Assembler::ldx(d, roc.as_register(), s1);
++ }
++}
++
++void Assembler::lwa(Register d, RegisterOrConstant roc, Register s1) {
++ if (roc.is_constant()) {
++ if (s1 == noreg) {
++ int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
++ Assembler::lwa(d, simm16_rest, d);
++ } else if (is_simm(roc.as_constant(), 16)) {
++ Assembler::lwa(d, roc.as_constant(), s1);
++ } else {
++ load_const_optimized(d, roc.as_constant());
++ Assembler::lwax(d, d, s1);
++ }
++ } else {
++ if (s1 == noreg)
++ Assembler::lwa(d, 0, roc.as_register());
++ else
++ Assembler::lwax(d, roc.as_register(), s1);
++ }
++}
++
++void Assembler::lwz(Register d, RegisterOrConstant roc, Register s1) {
++ if (roc.is_constant()) {
++ if (s1 == noreg) {
++ int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
++ Assembler::lwz(d, simm16_rest, d);
++ } else if (is_simm(roc.as_constant(), 16)) {
++ Assembler::lwz(d, roc.as_constant(), s1);
++ } else {
++ load_const_optimized(d, roc.as_constant());
++ Assembler::lwzx(d, d, s1);
++ }
++ } else {
++ if (s1 == noreg)
++ Assembler::lwz(d, 0, roc.as_register());
++ else
++ Assembler::lwzx(d, roc.as_register(), s1);
++ }
++}
++
++void Assembler::lha(Register d, RegisterOrConstant roc, Register s1) {
++ if (roc.is_constant()) {
++ if (s1 == noreg) {
++ int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
++ Assembler::lha(d, simm16_rest, d);
++ } else if (is_simm(roc.as_constant(), 16)) {
++ Assembler::lha(d, roc.as_constant(), s1);
++ } else {
++ load_const_optimized(d, roc.as_constant());
++ Assembler::lhax(d, d, s1);
++ }
++ } else {
++ if (s1 == noreg)
++ Assembler::lha(d, 0, roc.as_register());
++ else
++ Assembler::lhax(d, roc.as_register(), s1);
++ }
++}
++
++void Assembler::lhz(Register d, RegisterOrConstant roc, Register s1) {
++ if (roc.is_constant()) {
++ if (s1 == noreg) {
++ int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
++ Assembler::lhz(d, simm16_rest, d);
++ } else if (is_simm(roc.as_constant(), 16)) {
++ Assembler::lhz(d, roc.as_constant(), s1);
++ } else {
++ load_const_optimized(d, roc.as_constant());
++ Assembler::lhzx(d, d, s1);
++ }
++ } else {
++ if (s1 == noreg)
++ Assembler::lhz(d, 0, roc.as_register());
++ else
++ Assembler::lhzx(d, roc.as_register(), s1);
++ }
++}
++
++void Assembler::lbz(Register d, RegisterOrConstant roc, Register s1) {
++ if (roc.is_constant()) {
++ if (s1 == noreg) {
++ int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
++ Assembler::lbz(d, simm16_rest, d);
++ } else if (is_simm(roc.as_constant(), 16)) {
++ Assembler::lbz(d, roc.as_constant(), s1);
++ } else {
++ load_const_optimized(d, roc.as_constant());
++ Assembler::lbzx(d, d, s1);
++ }
++ } else {
++ if (s1 == noreg)
++ Assembler::lbz(d, 0, roc.as_register());
++ else
++ Assembler::lbzx(d, roc.as_register(), s1);
++ }
++}
++
++void Assembler::std(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
++ if (roc.is_constant()) {
++ if (s1 == noreg) {
++ guarantee(tmp != noreg, "Need tmp reg to encode large constants");
++ int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
++ Assembler::std(d, simm16_rest, tmp);
++ } else if (is_simm(roc.as_constant(), 16)) {
++ Assembler::std(d, roc.as_constant(), s1);
++ } else {
++ guarantee(tmp != noreg, "Need tmp reg to encode large constants");
++ load_const_optimized(tmp, roc.as_constant());
++ Assembler::stdx(d, tmp, s1);
++ }
++ } else {
++ if (s1 == noreg)
++ Assembler::std(d, 0, roc.as_register());
++ else
++ Assembler::stdx(d, roc.as_register(), s1);
++ }
++}
++
++void Assembler::stw(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
++ if (roc.is_constant()) {
++ if (s1 == noreg) {
++ guarantee(tmp != noreg, "Need tmp reg to encode large constants");
++ int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
++ Assembler::stw(d, simm16_rest, tmp);
++ } else if (is_simm(roc.as_constant(), 16)) {
++ Assembler::stw(d, roc.as_constant(), s1);
++ } else {
++ guarantee(tmp != noreg, "Need tmp reg to encode large constants");
++ load_const_optimized(tmp, roc.as_constant());
++ Assembler::stwx(d, tmp, s1);
++ }
++ } else {
++ if (s1 == noreg)
++ Assembler::stw(d, 0, roc.as_register());
++ else
++ Assembler::stwx(d, roc.as_register(), s1);
++ }
++}
++
++void Assembler::sth(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
++ if (roc.is_constant()) {
++ if (s1 == noreg) {
++ guarantee(tmp != noreg, "Need tmp reg to encode large constants");
++ int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
++ Assembler::sth(d, simm16_rest, tmp);
++ } else if (is_simm(roc.as_constant(), 16)) {
++ Assembler::sth(d, roc.as_constant(), s1);
++ } else {
++ guarantee(tmp != noreg, "Need tmp reg to encode large constants");
++ load_const_optimized(tmp, roc.as_constant());
++ Assembler::sthx(d, tmp, s1);
++ }
++ } else {
++ if (s1 == noreg)
++ Assembler::sth(d, 0, roc.as_register());
++ else
++ Assembler::sthx(d, roc.as_register(), s1);
++ }
++}
++
++void Assembler::stb(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
++ if (roc.is_constant()) {
++ if (s1 == noreg) {
++ guarantee(tmp != noreg, "Need tmp reg to encode large constants");
++ int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
++ Assembler::stb(d, simm16_rest, tmp);
++ } else if (is_simm(roc.as_constant(), 16)) {
++ Assembler::stb(d, roc.as_constant(), s1);
++ } else {
++ guarantee(tmp != noreg, "Need tmp reg to encode large constants");
++ load_const_optimized(tmp, roc.as_constant());
++ Assembler::stbx(d, tmp, s1);
++ }
++ } else {
++ if (s1 == noreg)
++ Assembler::stb(d, 0, roc.as_register());
++ else
++ Assembler::stbx(d, roc.as_register(), s1);
++ }
++}
++
++void Assembler::add(Register d, RegisterOrConstant roc, Register s1) {
++ if (roc.is_constant()) {
++ intptr_t c = roc.as_constant();
++ assert(is_simm(c, 16), "too big");
++ addi(d, s1, (int)c);
++ }
++ else add(d, roc.as_register(), s1);
++}
++
++void Assembler::subf(Register d, RegisterOrConstant roc, Register s1) {
++ if (roc.is_constant()) {
++ intptr_t c = roc.as_constant();
++ assert(is_simm(-c, 16), "too big");
++ addi(d, s1, (int)-c);
++ }
++ else subf(d, roc.as_register(), s1);
++}
++
++void Assembler::cmpd(ConditionRegister d, RegisterOrConstant roc, Register s1) {
++ if (roc.is_constant()) {
++ intptr_t c = roc.as_constant();
++ assert(is_simm(c, 16), "too big");
++ cmpdi(d, s1, (int)c);
++ }
++ else cmpd(d, roc.as_register(), s1);
++}
++
++// Load a 64 bit constant. Patchable.
++void Assembler::load_const(Register d, long x, Register tmp) {
++ // 64-bit value: x = xa xb xc xd
++ int xa = (x >> 48) & 0xffff;
++ int xb = (x >> 32) & 0xffff;
++ int xc = (x >> 16) & 0xffff;
++ int xd = (x >> 0) & 0xffff;
++ if (tmp == noreg) {
++ Assembler::lis( d, (int)(short)xa);
++ Assembler::ori( d, d, (unsigned int)xb);
++ Assembler::sldi(d, d, 32);
++ Assembler::oris(d, d, (unsigned int)xc);
++ Assembler::ori( d, d, (unsigned int)xd);
++ } else {
++ // exploit instruction level parallelism if we have a tmp register
++ assert_different_registers(d, tmp);
++ Assembler::lis(tmp, (int)(short)xa);
++ Assembler::lis(d, (int)(short)xc);
++ Assembler::ori(tmp, tmp, (unsigned int)xb);
++ Assembler::ori(d, d, (unsigned int)xd);
++ Assembler::insrdi(d, tmp, 32, 0);
++ }
++}
++
++// Load a 64 bit constant, optimized, not identifyable.
++// Tmp can be used to increase ILP. Set return_simm16_rest=true to get a
++// 16 bit immediate offset.
++int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
++ // Avoid accidentally trying to use R0 for indexed addressing.
++ assert(d != R0, "R0 not allowed");
++ assert_different_registers(d, tmp);
++
++ short xa, xb, xc, xd; // Four 16-bit chunks of const.
++ long rem = x; // Remaining part of const.
++
++ xd = rem & 0xFFFF; // Lowest 16-bit chunk.
++ rem = (rem >> 16) + ((unsigned short)xd >> 15); // Compensation for sign extend.
++
++ if (rem == 0) { // opt 1: simm16
++ li(d, xd);
++ return 0;
++ }
++
++ xc = rem & 0xFFFF; // Next 16-bit chunk.
++ rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.
++
++ if (rem == 0) { // opt 2: simm32
++ lis(d, xc);
++ } else { // High 32 bits needed.
++
++ if (tmp != noreg) { // opt 3: We have a temp reg.
++ // No carry propagation between xc and higher chunks here (use logical instructions).
++ xa = (x >> 48) & 0xffff;
++ xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
++ bool load_xa = (xa != 0) || (xb < 0);
++ bool return_xd = false;
++
++ if (load_xa) { lis(tmp, xa); }
++ if (xc) { lis(d, xc); }
++ if (load_xa) {
++ if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
++ } else {
++ li(tmp, xb); // non-negative
++ }
++ if (xc) {
++ if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi.
++ else if (xd) { addi(d, d, xd); }
++ } else {
++ li(d, xd);
++ }
++ insrdi(d, tmp, 32, 0);
++ return return_xd ? xd : 0; // non-negative
++ }
++
++ xb = rem & 0xFFFF; // Next 16-bit chunk.
++ rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend.
++
++ xa = rem & 0xFFFF; // Highest 16-bit chunk.
++
++ // opt 4: avoid adding 0
++ if (xa) { // Highest 16-bit needed?
++ lis(d, xa);
++ if (xb) { addi(d, d, xb); }
++ } else {
++ li(d, xb);
++ }
++ sldi(d, d, 32);
++ if (xc) { addis(d, d, xc); }
++ }
++
++ // opt 5: Return offset to be inserted into following instruction.
++ if (return_simm16_rest) return xd;
++
++ if (xd) { addi(d, d, xd); }
++ return 0;
++}
++
++#ifndef PRODUCT
++// Test of ppc assembler.
++void Assembler::test_asm() {
++ // PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
++ addi( R0, R1, 10);
++ addis( R5, R2, 11);
++ addic_( R3, R31, 42);
++ subfic( R21, R12, 2112);
++ add( R3, R2, R1);
++ add_( R11, R22, R30);
++ subf( R7, R6, R5);
++ subf_( R8, R9, R4);
++ addc( R11, R12, R13);
++ addc_( R14, R14, R14);
++ subfc( R15, R16, R17);
++ subfc_( R18, R20, R19);
++ adde( R20, R22, R24);
++ adde_( R29, R27, R26);
++ subfe( R28, R1, R0);
++ subfe_( R21, R11, R29);
++ neg( R21, R22);
++ neg_( R13, R23);
++ mulli( R0, R11, -31);
++ mulld( R1, R18, R21);
++ mulld_( R2, R17, R22);
++ mullw( R3, R16, R23);
++ mullw_( R4, R15, R24);
++ divd( R5, R14, R25);
++ divd_( R6, R13, R26);
++ divw( R7, R12, R27);
++ divw_( R8, R11, R28);
++
++ li( R3, -4711);
++
++ // PPC 1, section 3.3.9, Fixed-Point Compare Instructions
++ cmpi( CCR7, 0, R27, 4711);
++ cmp( CCR0, 1, R14, R11);
++ cmpli( CCR5, 1, R17, 45);
++ cmpl( CCR3, 0, R9, R10);
++
++ cmpwi( CCR7, R27, 4711);
++ cmpw( CCR0, R14, R11);
++ cmplwi( CCR5, R17, 45);
++ cmplw( CCR3, R9, R10);
++
++ cmpdi( CCR7, R27, 4711);
++ cmpd( CCR0, R14, R11);
++ cmpldi( CCR5, R17, 45);
++ cmpld( CCR3, R9, R10);
++
++ // PPC 1, section 3.3.11, Fixed-Point Logical Instructions
++ andi_( R4, R5, 0xff);
++ andis_( R12, R13, 0x7b51);
++ ori( R1, R4, 13);
++ oris( R3, R5, 177);
++ xori( R7, R6, 51);
++ xoris( R29, R0, 1);
++ andr( R17, R21, R16);
++ and_( R3, R5, R15);
++ orr( R2, R1, R9);
++ or_( R17, R15, R11);
++ xorr( R19, R18, R10);
++ xor_( R31, R21, R11);
++ nand( R5, R7, R3);
++ nand_( R3, R1, R0);
++ nor( R2, R3, R5);
++ nor_( R3, R6, R8);
++ andc( R25, R12, R11);
++ andc_( R24, R22, R21);
++ orc( R20, R10, R12);
++ orc_( R22, R2, R13);
++
++ nop();
++
++ // PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions
++ sld( R5, R6, R8);
++ sld_( R3, R5, R9);
++ slw( R2, R1, R10);
++ slw_( R6, R26, R16);
++ srd( R16, R24, R8);
++ srd_( R21, R14, R7);
++ srw( R22, R25, R29);
++ srw_( R5, R18, R17);
++ srad( R7, R11, R0);
++ srad_( R9, R13, R1);
++ sraw( R7, R15, R2);
++ sraw_( R4, R17, R3);
++ sldi( R3, R18, 63);
++ sldi_( R2, R20, 30);
++ slwi( R1, R21, 30);
++ slwi_( R7, R23, 8);
++ srdi( R0, R19, 2);
++ srdi_( R12, R24, 5);
++ srwi( R13, R27, 6);
++ srwi_( R14, R29, 7);
++ sradi( R15, R30, 9);
++ sradi_( R16, R31, 19);
++ srawi( R17, R31, 15);
++ srawi_( R18, R31, 12);
++
++ clrrdi( R3, R30, 5);
++ clrldi( R9, R10, 11);
++
++ rldicr( R19, R20, 13, 15);
++ rldicr_(R20, R20, 16, 14);
++ rldicl( R21, R21, 30, 33);
++ rldicl_(R22, R1, 20, 25);
++ rlwinm( R23, R2, 25, 10, 11);
++ rlwinm_(R24, R3, 12, 13, 14);
++
++ // PPC 1, section 3.3.2 Fixed-Point Load Instructions
++ lwzx( R3, R5, R7);
++ lwz( R11, 0, R1);
++ lwzu( R31, -4, R11);
++
++ lwax( R3, R5, R7);
++ lwa( R31, -4, R11);
++ lhzx( R3, R5, R7);
++ lhz( R31, -4, R11);
++ lhzu( R31, -4, R11);
++
++
++ lhax( R3, R5, R7);
++ lha( R31, -4, R11);
++ lhau( R11, 0, R1);
++
++ lbzx( R3, R5, R7);
++ lbz( R31, -4, R11);
++ lbzu( R11, 0, R1);
++
++ ld( R31, -4, R11);
++ ldx( R3, R5, R7);
++ ldu( R31, -4, R11);
++
++ // PPC 1, section 3.3.3 Fixed-Point Store Instructions
++ stwx( R3, R5, R7);
++ stw( R31, -4, R11);
++ stwu( R11, 0, R1);
++
++ sthx( R3, R5, R7 );
++ sth( R31, -4, R11);
++ sthu( R31, -4, R11);
++
++ stbx( R3, R5, R7);
++ stb( R31, -4, R11);
++ stbu( R31, -4, R11);
++
++ std( R31, -4, R11);
++ stdx( R3, R5, R7);
++ stdu( R31, -4, R11);
++
++ // PPC 1, section 3.3.13 Move To/From System Register Instructions
++ mtlr( R3);
++ mflr( R3);
++ mtctr( R3);
++ mfctr( R3);
++ mtcrf( 0xff, R15);
++ mtcr( R15);
++ mtcrf( 0x03, R15);
++ mtcr( R15);
++ mfcr( R15);
++
++ // PPC 1, section 2.4.1 Branch Instructions
++ Label lbl1, lbl2, lbl3;
++ bind(lbl1);
++
++ b(pc());
++ b(pc() - 8);
++ b(lbl1);
++ b(lbl2);
++ b(lbl3);
++
++ bl(pc() - 8);
++ bl(lbl1);
++ bl(lbl2);
++
++ bcl(4, 10, pc() - 8);
++ bcl(4, 10, lbl1);
++ bcl(4, 10, lbl2);
++
++ bclr( 4, 6, 0);
++ bclrl(4, 6, 0);
++
++ bind(lbl2);
++
++ bcctr( 4, 6, 0);
++ bcctrl(4, 6, 0);
++
++ blt(CCR0, lbl2);
++ bgt(CCR1, lbl2);
++ beq(CCR2, lbl2);
++ bso(CCR3, lbl2);
++ bge(CCR4, lbl2);
++ ble(CCR5, lbl2);
++ bne(CCR6, lbl2);
++ bns(CCR7, lbl2);
++
++ bltl(CCR0, lbl2);
++ bgtl(CCR1, lbl2);
++ beql(CCR2, lbl2);
++ bsol(CCR3, lbl2);
++ bgel(CCR4, lbl2);
++ blel(CCR5, lbl2);
++ bnel(CCR6, lbl2);
++ bnsl(CCR7, lbl2);
++ blr();
++
++ sync();
++ icbi( R1, R2);
++ dcbst(R2, R3);
++
++ // FLOATING POINT instructions ppc.
++ // PPC 1, section 4.6.2 Floating-Point Load Instructions
++ lfs( F1, -11, R3);
++ lfsu(F2, 123, R4);
++ lfsx(F3, R5, R6);
++ lfd( F4, 456, R7);
++ lfdu(F5, 789, R8);
++ lfdx(F6, R10, R11);
++
++ // PPC 1, section 4.6.3 Floating-Point Store Instructions
++ stfs( F7, 876, R12);
++ stfsu( F8, 543, R13);
++ stfsx( F9, R14, R15);
++ stfd( F10, 210, R16);
++ stfdu( F11, 111, R17);
++ stfdx( F12, R18, R19);
++
++ // PPC 1, section 4.6.4 Floating-Point Move Instructions
++ fmr( F13, F14);
++ fmr_( F14, F15);
++ fneg( F16, F17);
++ fneg_( F18, F19);
++ fabs( F20, F21);
++ fabs_( F22, F23);
++ fnabs( F24, F25);
++ fnabs_(F26, F27);
++
++ // PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic
++ // Instructions
++ fadd( F28, F29, F30);
++ fadd_( F31, F0, F1);
++ fadds( F2, F3, F4);
++ fadds_(F5, F6, F7);
++ fsub( F8, F9, F10);
++ fsub_( F11, F12, F13);
++ fsubs( F14, F15, F16);
++ fsubs_(F17, F18, F19);
++ fmul( F20, F21, F22);
++ fmul_( F23, F24, F25);
++ fmuls( F26, F27, F28);
++ fmuls_(F29, F30, F31);
++ fdiv( F0, F1, F2);
++ fdiv_( F3, F4, F5);
++ fdivs( F6, F7, F8);
++ fdivs_(F9, F10, F11);
++
++ // PPC 1, section 4.6.6 Floating-Point Rounding and Conversion
++ // Instructions
++ frsp( F12, F13);
++ fctid( F14, F15);
++ fctidz(F16, F17);
++ fctiw( F18, F19);
++ fctiwz(F20, F21);
++ fcfid( F22, F23);
++
++ // PPC 1, section 4.6.7 Floating-Point Compare Instructions
++ fcmpu( CCR7, F24, F25);
++
++ tty->print_cr("\ntest_asm disassembly (0x%lx 0x%lx):", code()->insts_begin(), code()->insts_end());
++ code()->decode();
++}
++
++#endif // !PRODUCT
+--- ./hotspot/src/cpu/ppc/vm/assembler_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/assembler_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,1976 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_ASSEMBLER_PPC_HPP
++#define CPU_PPC_VM_ASSEMBLER_PPC_HPP
++
++#include "asm/register.hpp"
++
++// Address is an abstraction used to represent a memory location
++// as used in assembler instructions.
++// PPC instructions grok either baseReg + indexReg or baseReg + disp.
++// So far we do not use this as simplification by this class is low
++// on PPC with its simple addressing mode. Use RegisterOrConstant to
++// represent an offset.
++class Address VALUE_OBJ_CLASS_SPEC {
++};
++
++class AddressLiteral VALUE_OBJ_CLASS_SPEC {
++ private:
++ address _address;
++ RelocationHolder _rspec;
++
++ RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
++ switch (rtype) {
++ case relocInfo::external_word_type:
++ return external_word_Relocation::spec(addr);
++ case relocInfo::internal_word_type:
++ return internal_word_Relocation::spec(addr);
++ case relocInfo::opt_virtual_call_type:
++ return opt_virtual_call_Relocation::spec();
++ case relocInfo::static_call_type:
++ return static_call_Relocation::spec();
++ case relocInfo::runtime_call_type:
++ return runtime_call_Relocation::spec();
++ case relocInfo::none:
++ return RelocationHolder();
++ default:
++ ShouldNotReachHere();
++ return RelocationHolder();
++ }
++ }
++
++ protected:
++ // creation
++ AddressLiteral() : _address(NULL), _rspec(NULL) {}
++
++ public:
++ AddressLiteral(address addr, RelocationHolder const& rspec)
++ : _address(addr),
++ _rspec(rspec) {}
++
++ AddressLiteral(address addr, relocInfo::relocType rtype = relocInfo::none)
++ : _address((address) addr),
++ _rspec(rspec_from_rtype(rtype, (address) addr)) {}
++
++ AddressLiteral(oop* addr, relocInfo::relocType rtype = relocInfo::none)
++ : _address((address) addr),
++ _rspec(rspec_from_rtype(rtype, (address) addr)) {}
++
++ intptr_t value() const { return (intptr_t) _address; }
++
++ const RelocationHolder& rspec() const { return _rspec; }
++};
++
++// Argument is an abstraction used to represent an outgoing
++// actual argument or an incoming formal parameter, whether
++// it resides in memory or in a register, in a manner consistent
++// with the PPC Application Binary Interface, or ABI. This is
++// often referred to as the native or C calling convention.
++
++class Argument VALUE_OBJ_CLASS_SPEC {
++ private:
++ int _number; // The number of the argument.
++ public:
++ enum {
++ // Only 8 registers may contain integer parameters.
++ n_register_parameters = 8,
++ // Can have up to 8 floating registers.
++ n_float_register_parameters = 8,
++
++ // PPC C calling conventions.
++ // The first eight arguments are passed in int regs if they are int.
++ n_int_register_parameters_c = 8,
++ // The first thirteen float arguments are passed in float regs.
++ n_float_register_parameters_c = 13,
++ // Only the first 8 parameters are not placed on the stack. Aix disassembly
++ // shows that xlC places all float args after argument 8 on the stack AND
++ // in a register. This is not documented, but we follow this convention, too.
++ n_regs_not_on_stack_c = 8,
++ };
++ // creation
++ Argument(int number) : _number(number) {}
++
++ int number() const { return _number; }
++
++ // Locating register-based arguments:
++ bool is_register() const { return _number < n_register_parameters; }
++
++ Register as_register() const {
++ assert(is_register(), "must be a register argument");
++ return as_Register(number() + R3_ARG1->encoding());
++ }
++};
++
++#if !defined(ABI_ELFv2)
++// A ppc64 function descriptor.
++struct FunctionDescriptor VALUE_OBJ_CLASS_SPEC {
++ private:
++ address _entry;
++ address _toc;
++ address _env;
++
++ public:
++ inline address entry() const { return _entry; }
++ inline address toc() const { return _toc; }
++ inline address env() const { return _env; }
++
++ inline void set_entry(address entry) { _entry = entry; }
++ inline void set_toc( address toc) { _toc = toc; }
++ inline void set_env( address env) { _env = env; }
++
++ inline static ByteSize entry_offset() { return byte_offset_of(FunctionDescriptor, _entry); }
++ inline static ByteSize toc_offset() { return byte_offset_of(FunctionDescriptor, _toc); }
++ inline static ByteSize env_offset() { return byte_offset_of(FunctionDescriptor, _env); }
++
++ // Friend functions can be called without loading toc and env.
++ enum {
++ friend_toc = 0xcafe,
++ friend_env = 0xc0de
++ };
++
++ inline bool is_friend_function() const {
++ return (toc() == (address) friend_toc) && (env() == (address) friend_env);
++ }
++
++ // Constructor for stack-allocated instances.
++ FunctionDescriptor() {
++ _entry = (address) 0xbad;
++ _toc = (address) 0xbad;
++ _env = (address) 0xbad;
++ }
++};
++#endif
++
++class Assembler : public AbstractAssembler {
++ protected:
++ // Displacement routines
++ static void print_instruction(int inst);
++ static int patched_branch(int dest_pos, int inst, int inst_pos);
++ static int branch_destination(int inst, int pos);
++
++ friend class AbstractAssembler;
++
++ // Code patchers need various routines like inv_wdisp()
++ friend class NativeInstruction;
++ friend class NativeGeneralJump;
++ friend class Relocation;
++
++ public:
++
++ enum shifts {
++ XO_21_29_SHIFT = 2,
++ XO_21_30_SHIFT = 1,
++ XO_27_29_SHIFT = 2,
++ XO_30_31_SHIFT = 0,
++ SPR_5_9_SHIFT = 11u, // SPR_5_9 field in bits 11 -- 15
++ SPR_0_4_SHIFT = 16u, // SPR_0_4 field in bits 16 -- 20
++ RS_SHIFT = 21u, // RS field in bits 21 -- 25
++ OPCODE_SHIFT = 26u, // opcode in bits 26 -- 31
++ };
++
++ enum opcdxos_masks {
++ XL_FORM_OPCODE_MASK = (63u << OPCODE_SHIFT) | (1023u << 1),
++ ADDI_OPCODE_MASK = (63u << OPCODE_SHIFT),
++ ADDIS_OPCODE_MASK = (63u << OPCODE_SHIFT),
++ BXX_OPCODE_MASK = (63u << OPCODE_SHIFT),
++ BCXX_OPCODE_MASK = (63u << OPCODE_SHIFT),
++ // trap instructions
++ TDI_OPCODE_MASK = (63u << OPCODE_SHIFT),
++ TWI_OPCODE_MASK = (63u << OPCODE_SHIFT),
++ TD_OPCODE_MASK = (63u << OPCODE_SHIFT) | (1023u << 1),
++ TW_OPCODE_MASK = (63u << OPCODE_SHIFT) | (1023u << 1),
++ LD_OPCODE_MASK = (63u << OPCODE_SHIFT) | (3u << XO_30_31_SHIFT), // DS-FORM
++ STD_OPCODE_MASK = LD_OPCODE_MASK,
++ STDU_OPCODE_MASK = STD_OPCODE_MASK,
++ STDX_OPCODE_MASK = (63u << OPCODE_SHIFT) | (1023u << 1),
++ STDUX_OPCODE_MASK = STDX_OPCODE_MASK,
++ STW_OPCODE_MASK = (63u << OPCODE_SHIFT),
++ STWU_OPCODE_MASK = STW_OPCODE_MASK,
++ STWX_OPCODE_MASK = (63u << OPCODE_SHIFT) | (1023u << 1),
++ STWUX_OPCODE_MASK = STWX_OPCODE_MASK,
++ MTCTR_OPCODE_MASK = ~(31u << RS_SHIFT),
++ ORI_OPCODE_MASK = (63u << OPCODE_SHIFT),
++ ORIS_OPCODE_MASK = (63u << OPCODE_SHIFT),
++ RLDICR_OPCODE_MASK = (63u << OPCODE_SHIFT) | (7u << XO_27_29_SHIFT)
++ };
++
++ enum opcdxos {
++ ADD_OPCODE = (31u << OPCODE_SHIFT | 266u << 1),
++ ADDC_OPCODE = (31u << OPCODE_SHIFT | 10u << 1),
++ ADDI_OPCODE = (14u << OPCODE_SHIFT),
++ ADDIS_OPCODE = (15u << OPCODE_SHIFT),
++ ADDIC__OPCODE = (13u << OPCODE_SHIFT),
++ ADDE_OPCODE = (31u << OPCODE_SHIFT | 138u << 1),
++ SUBF_OPCODE = (31u << OPCODE_SHIFT | 40u << 1),
++ SUBFC_OPCODE = (31u << OPCODE_SHIFT | 8u << 1),
++ SUBFE_OPCODE = (31u << OPCODE_SHIFT | 136u << 1),
++ SUBFIC_OPCODE = (8u << OPCODE_SHIFT),
++ SUBFZE_OPCODE = (31u << OPCODE_SHIFT | 200u << 1),
++ DIVW_OPCODE = (31u << OPCODE_SHIFT | 491u << 1),
++ MULLW_OPCODE = (31u << OPCODE_SHIFT | 235u << 1),
++ MULHW_OPCODE = (31u << OPCODE_SHIFT | 75u << 1),
++ MULHWU_OPCODE = (31u << OPCODE_SHIFT | 11u << 1),
++ MULLI_OPCODE = (7u << OPCODE_SHIFT),
++ AND_OPCODE = (31u << OPCODE_SHIFT | 28u << 1),
++ ANDI_OPCODE = (28u << OPCODE_SHIFT),
++ ANDIS_OPCODE = (29u << OPCODE_SHIFT),
++ ANDC_OPCODE = (31u << OPCODE_SHIFT | 60u << 1),
++ ORC_OPCODE = (31u << OPCODE_SHIFT | 412u << 1),
++ OR_OPCODE = (31u << OPCODE_SHIFT | 444u << 1),
++ ORI_OPCODE = (24u << OPCODE_SHIFT),
++ ORIS_OPCODE = (25u << OPCODE_SHIFT),
++ XOR_OPCODE = (31u << OPCODE_SHIFT | 316u << 1),
++ XORI_OPCODE = (26u << OPCODE_SHIFT),
++ XORIS_OPCODE = (27u << OPCODE_SHIFT),
++
++ NEG_OPCODE = (31u << OPCODE_SHIFT | 104u << 1),
++
++ RLWINM_OPCODE = (21u << OPCODE_SHIFT),
++ CLRRWI_OPCODE = RLWINM_OPCODE,
++ CLRLWI_OPCODE = RLWINM_OPCODE,
++
++ RLWIMI_OPCODE = (20u << OPCODE_SHIFT),
++
++ SLW_OPCODE = (31u << OPCODE_SHIFT | 24u << 1),
++ SLWI_OPCODE = RLWINM_OPCODE,
++ SRW_OPCODE = (31u << OPCODE_SHIFT | 536u << 1),
++ SRWI_OPCODE = RLWINM_OPCODE,
++ SRAW_OPCODE = (31u << OPCODE_SHIFT | 792u << 1),
++ SRAWI_OPCODE = (31u << OPCODE_SHIFT | 824u << 1),
++
++ CMP_OPCODE = (31u << OPCODE_SHIFT | 0u << 1),
++ CMPI_OPCODE = (11u << OPCODE_SHIFT),
++ CMPL_OPCODE = (31u << OPCODE_SHIFT | 32u << 1),
++ CMPLI_OPCODE = (10u << OPCODE_SHIFT),
++
++ ISEL_OPCODE = (31u << OPCODE_SHIFT | 15u << 1),
++
++ MTLR_OPCODE = (31u << OPCODE_SHIFT | 467u << 1 | 8 << SPR_0_4_SHIFT),
++ MFLR_OPCODE = (31u << OPCODE_SHIFT | 339u << 1 | 8 << SPR_0_4_SHIFT),
++
++ MTCRF_OPCODE = (31u << OPCODE_SHIFT | 144u << 1),
++ MFCR_OPCODE = (31u << OPCODE_SHIFT | 19u << 1),
++ MCRF_OPCODE = (19u << OPCODE_SHIFT | 0u << 1),
++
++ // condition register logic instructions
++ CRAND_OPCODE = (19u << OPCODE_SHIFT | 257u << 1),
++ CRNAND_OPCODE = (19u << OPCODE_SHIFT | 225u << 1),
++ CROR_OPCODE = (19u << OPCODE_SHIFT | 449u << 1),
++ CRXOR_OPCODE = (19u << OPCODE_SHIFT | 193u << 1),
++ CRNOR_OPCODE = (19u << OPCODE_SHIFT | 33u << 1),
++ CREQV_OPCODE = (19u << OPCODE_SHIFT | 289u << 1),
++ CRANDC_OPCODE = (19u << OPCODE_SHIFT | 129u << 1),
++ CRORC_OPCODE = (19u << OPCODE_SHIFT | 417u << 1),
++
++ BCLR_OPCODE = (19u << OPCODE_SHIFT | 16u << 1),
++ BXX_OPCODE = (18u << OPCODE_SHIFT),
++ BCXX_OPCODE = (16u << OPCODE_SHIFT),
++
++ // CTR-related opcodes
++ BCCTR_OPCODE = (19u << OPCODE_SHIFT | 528u << 1),
++ MTCTR_OPCODE = (31u << OPCODE_SHIFT | 467u << 1 | 9 << SPR_0_4_SHIFT),
++ MFCTR_OPCODE = (31u << OPCODE_SHIFT | 339u << 1 | 9 << SPR_0_4_SHIFT),
++
++
++ LWZ_OPCODE = (32u << OPCODE_SHIFT),
++ LWZX_OPCODE = (31u << OPCODE_SHIFT | 23u << 1),
++ LWZU_OPCODE = (33u << OPCODE_SHIFT),
++
++ LHA_OPCODE = (42u << OPCODE_SHIFT),
++ LHAX_OPCODE = (31u << OPCODE_SHIFT | 343u << 1),
++ LHAU_OPCODE = (43u << OPCODE_SHIFT),
++
++ LHZ_OPCODE = (40u << OPCODE_SHIFT),
++ LHZX_OPCODE = (31u << OPCODE_SHIFT | 279u << 1),
++ LHZU_OPCODE = (41u << OPCODE_SHIFT),
++
++ LBZ_OPCODE = (34u << OPCODE_SHIFT),
++ LBZX_OPCODE = (31u << OPCODE_SHIFT | 87u << 1),
++ LBZU_OPCODE = (35u << OPCODE_SHIFT),
++
++ STW_OPCODE = (36u << OPCODE_SHIFT),
++ STWX_OPCODE = (31u << OPCODE_SHIFT | 151u << 1),
++ STWU_OPCODE = (37u << OPCODE_SHIFT),
++ STWUX_OPCODE = (31u << OPCODE_SHIFT | 183u << 1),
++
++ STH_OPCODE = (44u << OPCODE_SHIFT),
++ STHX_OPCODE = (31u << OPCODE_SHIFT | 407u << 1),
++ STHU_OPCODE = (45u << OPCODE_SHIFT),
++
++ STB_OPCODE = (38u << OPCODE_SHIFT),
++ STBX_OPCODE = (31u << OPCODE_SHIFT | 215u << 1),
++ STBU_OPCODE = (39u << OPCODE_SHIFT),
++
++ EXTSB_OPCODE = (31u << OPCODE_SHIFT | 954u << 1),
++ EXTSH_OPCODE = (31u << OPCODE_SHIFT | 922u << 1),
++ EXTSW_OPCODE = (31u << OPCODE_SHIFT | 986u << 1), // X-FORM
++
++ // 32 bit opcode encodings
++
++ LWA_OPCODE = (58u << OPCODE_SHIFT | 2u << XO_30_31_SHIFT), // DS-FORM
++ LWAX_OPCODE = (31u << OPCODE_SHIFT | 341u << XO_21_30_SHIFT), // X-FORM
++
++ CNTLZW_OPCODE = (31u << OPCODE_SHIFT | 26u << XO_21_30_SHIFT), // X-FORM
++
++ // 64 bit opcode encodings
++
++ LD_OPCODE = (58u << OPCODE_SHIFT | 0u << XO_30_31_SHIFT), // DS-FORM
++ LDU_OPCODE = (58u << OPCODE_SHIFT | 1u << XO_30_31_SHIFT), // DS-FORM
++ LDX_OPCODE = (31u << OPCODE_SHIFT | 21u << XO_21_30_SHIFT), // X-FORM
++
++ STD_OPCODE = (62u << OPCODE_SHIFT | 0u << XO_30_31_SHIFT), // DS-FORM
++ STDU_OPCODE = (62u << OPCODE_SHIFT | 1u << XO_30_31_SHIFT), // DS-FORM
++ STDUX_OPCODE = (31u << OPCODE_SHIFT | 181u << 1), // X-FORM
++ STDX_OPCODE = (31u << OPCODE_SHIFT | 149u << XO_21_30_SHIFT), // X-FORM
++
++ RLDICR_OPCODE = (30u << OPCODE_SHIFT | 1u << XO_27_29_SHIFT), // MD-FORM
++ RLDICL_OPCODE = (30u << OPCODE_SHIFT | 0u << XO_27_29_SHIFT), // MD-FORM
++ RLDIC_OPCODE = (30u << OPCODE_SHIFT | 2u << XO_27_29_SHIFT), // MD-FORM
++ RLDIMI_OPCODE = (30u << OPCODE_SHIFT | 3u << XO_27_29_SHIFT), // MD-FORM
++
++ SRADI_OPCODE = (31u << OPCODE_SHIFT | 413u << XO_21_29_SHIFT), // XS-FORM
++
++ SLD_OPCODE = (31u << OPCODE_SHIFT | 27u << 1), // X-FORM
++ SRD_OPCODE = (31u << OPCODE_SHIFT | 539u << 1), // X-FORM
++ SRAD_OPCODE = (31u << OPCODE_SHIFT | 794u << 1), // X-FORM
++
++ MULLD_OPCODE = (31u << OPCODE_SHIFT | 233u << 1), // XO-FORM
++ MULHD_OPCODE = (31u << OPCODE_SHIFT | 73u << 1), // XO-FORM
++ MULHDU_OPCODE = (31u << OPCODE_SHIFT | 9u << 1), // XO-FORM
++ DIVD_OPCODE = (31u << OPCODE_SHIFT | 489u << 1), // XO-FORM
++
++ CNTLZD_OPCODE = (31u << OPCODE_SHIFT | 58u << XO_21_30_SHIFT), // X-FORM
++ NAND_OPCODE = (31u << OPCODE_SHIFT | 476u << XO_21_30_SHIFT), // X-FORM
++ NOR_OPCODE = (31u << OPCODE_SHIFT | 124u << XO_21_30_SHIFT), // X-FORM
++
++
++ // opcodes only used for floating arithmetic
++ FADD_OPCODE = (63u << OPCODE_SHIFT | 21u << 1),
++ FADDS_OPCODE = (59u << OPCODE_SHIFT | 21u << 1),
++ FCMPU_OPCODE = (63u << OPCODE_SHIFT | 00u << 1),
++ FDIV_OPCODE = (63u << OPCODE_SHIFT | 18u << 1),
++ FDIVS_OPCODE = (59u << OPCODE_SHIFT | 18u << 1),
++ FMR_OPCODE = (63u << OPCODE_SHIFT | 72u << 1),
++ // These are special Power6 opcodes, reused for "lfdepx" and "stfdepx"
++ // on Power7. Do not use.
++ // MFFGPR_OPCODE = (31u << OPCODE_SHIFT | 607u << 1),
++ // MFTGPR_OPCODE = (31u << OPCODE_SHIFT | 735u << 1),
++ CMPB_OPCODE = (31u << OPCODE_SHIFT | 508 << 1),
++ POPCNTB_OPCODE = (31u << OPCODE_SHIFT | 122 << 1),
++ POPCNTW_OPCODE = (31u << OPCODE_SHIFT | 378 << 1),
++ POPCNTD_OPCODE = (31u << OPCODE_SHIFT | 506 << 1),
++ FABS_OPCODE = (63u << OPCODE_SHIFT | 264u << 1),
++ FNABS_OPCODE = (63u << OPCODE_SHIFT | 136u << 1),
++ FMUL_OPCODE = (63u << OPCODE_SHIFT | 25u << 1),
++ FMULS_OPCODE = (59u << OPCODE_SHIFT | 25u << 1),
++ FNEG_OPCODE = (63u << OPCODE_SHIFT | 40u << 1),
++ FSUB_OPCODE = (63u << OPCODE_SHIFT | 20u << 1),
++ FSUBS_OPCODE = (59u << OPCODE_SHIFT | 20u << 1),
++
++ // PPC64-internal FPU conversion opcodes
++ FCFID_OPCODE = (63u << OPCODE_SHIFT | 846u << 1),
++ FCFIDS_OPCODE = (59u << OPCODE_SHIFT | 846u << 1),
++ FCTID_OPCODE = (63u << OPCODE_SHIFT | 814u << 1),
++ FCTIDZ_OPCODE = (63u << OPCODE_SHIFT | 815u << 1),
++ FCTIW_OPCODE = (63u << OPCODE_SHIFT | 14u << 1),
++ FCTIWZ_OPCODE = (63u << OPCODE_SHIFT | 15u << 1),
++ FRSP_OPCODE = (63u << OPCODE_SHIFT | 12u << 1),
++
++ // WARNING: using fmadd results in a non-compliant vm. Some floating
++ // point tck tests will fail.
++ FMADD_OPCODE = (59u << OPCODE_SHIFT | 29u << 1),
++ DMADD_OPCODE = (63u << OPCODE_SHIFT | 29u << 1),
++ FMSUB_OPCODE = (59u << OPCODE_SHIFT | 28u << 1),
++ DMSUB_OPCODE = (63u << OPCODE_SHIFT | 28u << 1),
++ FNMADD_OPCODE = (59u << OPCODE_SHIFT | 31u << 1),
++ DNMADD_OPCODE = (63u << OPCODE_SHIFT | 31u << 1),
++ FNMSUB_OPCODE = (59u << OPCODE_SHIFT | 30u << 1),
++ DNMSUB_OPCODE = (63u << OPCODE_SHIFT | 30u << 1),
++
++ LFD_OPCODE = (50u << OPCODE_SHIFT | 00u << 1),
++ LFDU_OPCODE = (51u << OPCODE_SHIFT | 00u << 1),
++ LFDX_OPCODE = (31u << OPCODE_SHIFT | 599u << 1),
++ LFS_OPCODE = (48u << OPCODE_SHIFT | 00u << 1),
++ LFSU_OPCODE = (49u << OPCODE_SHIFT | 00u << 1),
++ LFSX_OPCODE = (31u << OPCODE_SHIFT | 535u << 1),
++
++ STFD_OPCODE = (54u << OPCODE_SHIFT | 00u << 1),
++ STFDU_OPCODE = (55u << OPCODE_SHIFT | 00u << 1),
++ STFDX_OPCODE = (31u << OPCODE_SHIFT | 727u << 1),
++ STFS_OPCODE = (52u << OPCODE_SHIFT | 00u << 1),
++ STFSU_OPCODE = (53u << OPCODE_SHIFT | 00u << 1),
++ STFSX_OPCODE = (31u << OPCODE_SHIFT | 663u << 1),
++
++ FSQRT_OPCODE = (63u << OPCODE_SHIFT | 22u << 1), // A-FORM
++ FSQRTS_OPCODE = (59u << OPCODE_SHIFT | 22u << 1), // A-FORM
++
++ // Vector instruction support for >= Power6
++ // Vector Storage Access
++ LVEBX_OPCODE = (31u << OPCODE_SHIFT | 7u << 1),
++ LVEHX_OPCODE = (31u << OPCODE_SHIFT | 39u << 1),
++ LVEWX_OPCODE = (31u << OPCODE_SHIFT | 71u << 1),
++ LVX_OPCODE = (31u << OPCODE_SHIFT | 103u << 1),
++ LVXL_OPCODE = (31u << OPCODE_SHIFT | 359u << 1),
++ STVEBX_OPCODE = (31u << OPCODE_SHIFT | 135u << 1),
++ STVEHX_OPCODE = (31u << OPCODE_SHIFT | 167u << 1),
++ STVEWX_OPCODE = (31u << OPCODE_SHIFT | 199u << 1),
++ STVX_OPCODE = (31u << OPCODE_SHIFT | 231u << 1),
++ STVXL_OPCODE = (31u << OPCODE_SHIFT | 487u << 1),
++ LVSL_OPCODE = (31u << OPCODE_SHIFT | 6u << 1),
++ LVSR_OPCODE = (31u << OPCODE_SHIFT | 38u << 1),
++
++ // Vector Permute and Formatting
++ VPKPX_OPCODE = (4u << OPCODE_SHIFT | 782u ),
++ VPKSHSS_OPCODE = (4u << OPCODE_SHIFT | 398u ),
++ VPKSWSS_OPCODE = (4u << OPCODE_SHIFT | 462u ),
++ VPKSHUS_OPCODE = (4u << OPCODE_SHIFT | 270u ),
++ VPKSWUS_OPCODE = (4u << OPCODE_SHIFT | 334u ),
++ VPKUHUM_OPCODE = (4u << OPCODE_SHIFT | 14u ),
++ VPKUWUM_OPCODE = (4u << OPCODE_SHIFT | 78u ),
++ VPKUHUS_OPCODE = (4u << OPCODE_SHIFT | 142u ),
++ VPKUWUS_OPCODE = (4u << OPCODE_SHIFT | 206u ),
++ VUPKHPX_OPCODE = (4u << OPCODE_SHIFT | 846u ),
++ VUPKHSB_OPCODE = (4u << OPCODE_SHIFT | 526u ),
++ VUPKHSH_OPCODE = (4u << OPCODE_SHIFT | 590u ),
++ VUPKLPX_OPCODE = (4u << OPCODE_SHIFT | 974u ),
++ VUPKLSB_OPCODE = (4u << OPCODE_SHIFT | 654u ),
++ VUPKLSH_OPCODE = (4u << OPCODE_SHIFT | 718u ),
++
++ VMRGHB_OPCODE = (4u << OPCODE_SHIFT | 12u ),
++ VMRGHW_OPCODE = (4u << OPCODE_SHIFT | 140u ),
++ VMRGHH_OPCODE = (4u << OPCODE_SHIFT | 76u ),
++ VMRGLB_OPCODE = (4u << OPCODE_SHIFT | 268u ),
++ VMRGLW_OPCODE = (4u << OPCODE_SHIFT | 396u ),
++ VMRGLH_OPCODE = (4u << OPCODE_SHIFT | 332u ),
++
++ VSPLT_OPCODE = (4u << OPCODE_SHIFT | 524u ),
++ VSPLTH_OPCODE = (4u << OPCODE_SHIFT | 588u ),
++ VSPLTW_OPCODE = (4u << OPCODE_SHIFT | 652u ),
++ VSPLTISB_OPCODE= (4u << OPCODE_SHIFT | 780u ),
++ VSPLTISH_OPCODE= (4u << OPCODE_SHIFT | 844u ),
++ VSPLTISW_OPCODE= (4u << OPCODE_SHIFT | 908u ),
++
++ VPERM_OPCODE = (4u << OPCODE_SHIFT | 43u ),
++ VSEL_OPCODE = (4u << OPCODE_SHIFT | 42u ),
++
++ VSL_OPCODE = (4u << OPCODE_SHIFT | 452u ),
++ VSLDOI_OPCODE = (4u << OPCODE_SHIFT | 44u ),
++ VSLO_OPCODE = (4u << OPCODE_SHIFT | 1036u ),
++ VSR_OPCODE = (4u << OPCODE_SHIFT | 708u ),
++ VSRO_OPCODE = (4u << OPCODE_SHIFT | 1100u ),
++
++ // Vector Integer
++ VADDCUW_OPCODE = (4u << OPCODE_SHIFT | 384u ),
++ VADDSHS_OPCODE = (4u << OPCODE_SHIFT | 832u ),
++ VADDSBS_OPCODE = (4u << OPCODE_SHIFT | 768u ),
++ VADDSWS_OPCODE = (4u << OPCODE_SHIFT | 896u ),
++ VADDUBM_OPCODE = (4u << OPCODE_SHIFT | 0u ),
++ VADDUWM_OPCODE = (4u << OPCODE_SHIFT | 128u ),
++ VADDUHM_OPCODE = (4u << OPCODE_SHIFT | 64u ),
++ VADDUBS_OPCODE = (4u << OPCODE_SHIFT | 512u ),
++ VADDUWS_OPCODE = (4u << OPCODE_SHIFT | 640u ),
++ VADDUHS_OPCODE = (4u << OPCODE_SHIFT | 576u ),
++ VSUBCUW_OPCODE = (4u << OPCODE_SHIFT | 1408u ),
++ VSUBSHS_OPCODE = (4u << OPCODE_SHIFT | 1856u ),
++ VSUBSBS_OPCODE = (4u << OPCODE_SHIFT | 1792u ),
++ VSUBSWS_OPCODE = (4u << OPCODE_SHIFT | 1920u ),
++ VSUBUBM_OPCODE = (4u << OPCODE_SHIFT | 1024u ),
++ VSUBUWM_OPCODE = (4u << OPCODE_SHIFT | 1152u ),
++ VSUBUHM_OPCODE = (4u << OPCODE_SHIFT | 1088u ),
++ VSUBUBS_OPCODE = (4u << OPCODE_SHIFT | 1536u ),
++ VSUBUWS_OPCODE = (4u << OPCODE_SHIFT | 1664u ),
++ VSUBUHS_OPCODE = (4u << OPCODE_SHIFT | 1600u ),
++
++ VMULESB_OPCODE = (4u << OPCODE_SHIFT | 776u ),
++ VMULEUB_OPCODE = (4u << OPCODE_SHIFT | 520u ),
++ VMULESH_OPCODE = (4u << OPCODE_SHIFT | 840u ),
++ VMULEUH_OPCODE = (4u << OPCODE_SHIFT | 584u ),
++ VMULOSB_OPCODE = (4u << OPCODE_SHIFT | 264u ),
++ VMULOUB_OPCODE = (4u << OPCODE_SHIFT | 8u ),
++ VMULOSH_OPCODE = (4u << OPCODE_SHIFT | 328u ),
++ VMULOUH_OPCODE = (4u << OPCODE_SHIFT | 72u ),
++ VMHADDSHS_OPCODE=(4u << OPCODE_SHIFT | 32u ),
++ VMHRADDSHS_OPCODE=(4u << OPCODE_SHIFT | 33u ),
++ VMLADDUHM_OPCODE=(4u << OPCODE_SHIFT | 34u ),
++ VMSUBUHM_OPCODE= (4u << OPCODE_SHIFT | 36u ),
++ VMSUMMBM_OPCODE= (4u << OPCODE_SHIFT | 37u ),
++ VMSUMSHM_OPCODE= (4u << OPCODE_SHIFT | 40u ),
++ VMSUMSHS_OPCODE= (4u << OPCODE_SHIFT | 41u ),
++ VMSUMUHM_OPCODE= (4u << OPCODE_SHIFT | 38u ),
++ VMSUMUHS_OPCODE= (4u << OPCODE_SHIFT | 39u ),
++
++ VSUMSWS_OPCODE = (4u << OPCODE_SHIFT | 1928u ),
++ VSUM2SWS_OPCODE= (4u << OPCODE_SHIFT | 1672u ),
++ VSUM4SBS_OPCODE= (4u << OPCODE_SHIFT | 1800u ),
++ VSUM4UBS_OPCODE= (4u << OPCODE_SHIFT | 1544u ),
++ VSUM4SHS_OPCODE= (4u << OPCODE_SHIFT | 1608u ),
++
++ VAVGSB_OPCODE = (4u << OPCODE_SHIFT | 1282u ),
++ VAVGSW_OPCODE = (4u << OPCODE_SHIFT | 1410u ),
++ VAVGSH_OPCODE = (4u << OPCODE_SHIFT | 1346u ),
++ VAVGUB_OPCODE = (4u << OPCODE_SHIFT | 1026u ),
++ VAVGUW_OPCODE = (4u << OPCODE_SHIFT | 1154u ),
++ VAVGUH_OPCODE = (4u << OPCODE_SHIFT | 1090u ),
++
++ VMAXSB_OPCODE = (4u << OPCODE_SHIFT | 258u ),
++ VMAXSW_OPCODE = (4u << OPCODE_SHIFT | 386u ),
++ VMAXSH_OPCODE = (4u << OPCODE_SHIFT | 322u ),
++ VMAXUB_OPCODE = (4u << OPCODE_SHIFT | 2u ),
++ VMAXUW_OPCODE = (4u << OPCODE_SHIFT | 130u ),
++ VMAXUH_OPCODE = (4u << OPCODE_SHIFT | 66u ),
++ VMINSB_OPCODE = (4u << OPCODE_SHIFT | 770u ),
++ VMINSW_OPCODE = (4u << OPCODE_SHIFT | 898u ),
++ VMINSH_OPCODE = (4u << OPCODE_SHIFT | 834u ),
++ VMINUB_OPCODE = (4u << OPCODE_SHIFT | 514u ),
++ VMINUW_OPCODE = (4u << OPCODE_SHIFT | 642u ),
++ VMINUH_OPCODE = (4u << OPCODE_SHIFT | 578u ),
++
++ VCMPEQUB_OPCODE= (4u << OPCODE_SHIFT | 6u ),
++ VCMPEQUH_OPCODE= (4u << OPCODE_SHIFT | 70u ),
++ VCMPEQUW_OPCODE= (4u << OPCODE_SHIFT | 134u ),
++ VCMPGTSH_OPCODE= (4u << OPCODE_SHIFT | 838u ),
++ VCMPGTSB_OPCODE= (4u << OPCODE_SHIFT | 774u ),
++ VCMPGTSW_OPCODE= (4u << OPCODE_SHIFT | 902u ),
++ VCMPGTUB_OPCODE= (4u << OPCODE_SHIFT | 518u ),
++ VCMPGTUH_OPCODE= (4u << OPCODE_SHIFT | 582u ),
++ VCMPGTUW_OPCODE= (4u << OPCODE_SHIFT | 646u ),
++
++ VAND_OPCODE = (4u << OPCODE_SHIFT | 1028u ),
++ VANDC_OPCODE = (4u << OPCODE_SHIFT | 1092u ),
++ VNOR_OPCODE = (4u << OPCODE_SHIFT | 1284u ),
++ VOR_OPCODE = (4u << OPCODE_SHIFT | 1156u ),
++ VXOR_OPCODE = (4u << OPCODE_SHIFT | 1220u ),
++ VRLB_OPCODE = (4u << OPCODE_SHIFT | 4u ),
++ VRLW_OPCODE = (4u << OPCODE_SHIFT | 132u ),
++ VRLH_OPCODE = (4u << OPCODE_SHIFT | 68u ),
++ VSLB_OPCODE = (4u << OPCODE_SHIFT | 260u ),
++ VSKW_OPCODE = (4u << OPCODE_SHIFT | 388u ),
++ VSLH_OPCODE = (4u << OPCODE_SHIFT | 324u ),
++ VSRB_OPCODE = (4u << OPCODE_SHIFT | 516u ),
++ VSRW_OPCODE = (4u << OPCODE_SHIFT | 644u ),
++ VSRH_OPCODE = (4u << OPCODE_SHIFT | 580u ),
++ VSRAB_OPCODE = (4u << OPCODE_SHIFT | 772u ),
++ VSRAW_OPCODE = (4u << OPCODE_SHIFT | 900u ),
++ VSRAH_OPCODE = (4u << OPCODE_SHIFT | 836u ),
++
++ // Vector Floating-Point
++ // not implemented yet
++
++ // Vector Status and Control
++ MTVSCR_OPCODE = (4u << OPCODE_SHIFT | 1604u ),
++ MFVSCR_OPCODE = (4u << OPCODE_SHIFT | 1540u ),
++
++ // Icache and dcache related instructions
++ DCBA_OPCODE = (31u << OPCODE_SHIFT | 758u << 1),
++ DCBZ_OPCODE = (31u << OPCODE_SHIFT | 1014u << 1),
++ DCBST_OPCODE = (31u << OPCODE_SHIFT | 54u << 1),
++ DCBF_OPCODE = (31u << OPCODE_SHIFT | 86u << 1),
++
++ DCBT_OPCODE = (31u << OPCODE_SHIFT | 278u << 1),
++ DCBTST_OPCODE = (31u << OPCODE_SHIFT | 246u << 1),
++ ICBI_OPCODE = (31u << OPCODE_SHIFT | 982u << 1),
++
++ // Instruction synchronization
++ ISYNC_OPCODE = (19u << OPCODE_SHIFT | 150u << 1),
++ // Memory barriers
++ SYNC_OPCODE = (31u << OPCODE_SHIFT | 598u << 1),
++ EIEIO_OPCODE = (31u << OPCODE_SHIFT | 854u << 1),
++
++ // Trap instructions
++ TDI_OPCODE = (2u << OPCODE_SHIFT),
++ TWI_OPCODE = (3u << OPCODE_SHIFT),
++ TD_OPCODE = (31u << OPCODE_SHIFT | 68u << 1),
++ TW_OPCODE = (31u << OPCODE_SHIFT | 4u << 1),
++
++ // Atomics.
++ LWARX_OPCODE = (31u << OPCODE_SHIFT | 20u << 1),
++ LDARX_OPCODE = (31u << OPCODE_SHIFT | 84u << 1),
++ STWCX_OPCODE = (31u << OPCODE_SHIFT | 150u << 1),
++ STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1)
++
++ };
++
++ // Trap instructions TO bits
++ enum trap_to_bits {
++ // single bits
++ traptoLessThanSigned = 1 << 4, // 0, left end
++ traptoGreaterThanSigned = 1 << 3,
++ traptoEqual = 1 << 2,
++ traptoLessThanUnsigned = 1 << 1,
++ traptoGreaterThanUnsigned = 1 << 0, // 4, right end
++
++ // compound ones
++ traptoUnconditional = (traptoLessThanSigned |
++ traptoGreaterThanSigned |
++ traptoEqual |
++ traptoLessThanUnsigned |
++ traptoGreaterThanUnsigned)
++ };
++
++ // Branch hints BH field
++ enum branch_hint_bh {
++ // bclr cases:
++ bhintbhBCLRisReturn = 0,
++ bhintbhBCLRisNotReturnButSame = 1,
++ bhintbhBCLRisNotPredictable = 3,
++
++ // bcctr cases:
++ bhintbhBCCTRisNotReturnButSame = 0,
++ bhintbhBCCTRisNotPredictable = 3
++ };
++
++ // Branch prediction hints AT field
++ enum branch_hint_at {
++ bhintatNoHint = 0, // at=00
++ bhintatIsNotTaken = 2, // at=10
++ bhintatIsTaken = 3 // at=11
++ };
++
++ // Branch prediction hints
++ enum branch_hint_concept {
++ // Use the same encoding as branch_hint_at to simply code.
++ bhintNoHint = bhintatNoHint,
++ bhintIsNotTaken = bhintatIsNotTaken,
++ bhintIsTaken = bhintatIsTaken
++ };
++
++ // Used in BO field of branch instruction.
++ enum branch_condition {
++ bcondCRbiIs0 = 4, // bo=001at
++ bcondCRbiIs1 = 12, // bo=011at
++ bcondAlways = 20 // bo=10100
++ };
++
++ // Branch condition with combined prediction hints.
++ enum branch_condition_with_hint {
++ bcondCRbiIs0_bhintNoHint = bcondCRbiIs0 | bhintatNoHint,
++ bcondCRbiIs0_bhintIsNotTaken = bcondCRbiIs0 | bhintatIsNotTaken,
++ bcondCRbiIs0_bhintIsTaken = bcondCRbiIs0 | bhintatIsTaken,
++ bcondCRbiIs1_bhintNoHint = bcondCRbiIs1 | bhintatNoHint,
++ bcondCRbiIs1_bhintIsNotTaken = bcondCRbiIs1 | bhintatIsNotTaken,
++ bcondCRbiIs1_bhintIsTaken = bcondCRbiIs1 | bhintatIsTaken,
++ };
++
++ // Elemental Memory Barriers (>=Power 8)
++ enum Elemental_Membar_mask_bits {
++ StoreStore = 1 << 0,
++ StoreLoad = 1 << 1,
++ LoadStore = 1 << 2,
++ LoadLoad = 1 << 3
++ };
++
++ // Branch prediction hints.
++ inline static int add_bhint_to_boint(const int bhint, const int boint) {
++ switch (boint) {
++ case bcondCRbiIs0:
++ case bcondCRbiIs1:
++ // branch_hint and branch_hint_at have same encodings
++ assert( (int)bhintNoHint == (int)bhintatNoHint
++ && (int)bhintIsNotTaken == (int)bhintatIsNotTaken
++ && (int)bhintIsTaken == (int)bhintatIsTaken,
++ "wrong encodings");
++ assert((bhint & 0x03) == bhint, "wrong encodings");
++ return (boint & ~0x03) | bhint;
++ case bcondAlways:
++ // no branch_hint
++ return boint;
++ default:
++ ShouldNotReachHere();
++ return 0;
++ }
++ }
++
++ // Extract bcond from boint.
++ inline static int inv_boint_bcond(const int boint) {
++ int r_bcond = boint & ~0x03;
++ assert(r_bcond == bcondCRbiIs0 ||
++ r_bcond == bcondCRbiIs1 ||
++ r_bcond == bcondAlways,
++ "bad branch condition");
++ return r_bcond;
++ }
++
++ // Extract bhint from boint.
++ inline static int inv_boint_bhint(const int boint) {
++ int r_bhint = boint & 0x03;
++ assert(r_bhint == bhintatNoHint ||
++ r_bhint == bhintatIsNotTaken ||
++ r_bhint == bhintatIsTaken,
++ "bad branch hint");
++ return r_bhint;
++ }
++
++ // Calculate opposite of given bcond.
++ inline static int opposite_bcond(const int bcond) {
++ switch (bcond) {
++ case bcondCRbiIs0:
++ return bcondCRbiIs1;
++ case bcondCRbiIs1:
++ return bcondCRbiIs0;
++ default:
++ ShouldNotReachHere();
++ return 0;
++ }
++ }
++
++ // Calculate opposite of given bhint.
++ inline static int opposite_bhint(const int bhint) {
++ switch (bhint) {
++ case bhintatNoHint:
++ return bhintatNoHint;
++ case bhintatIsNotTaken:
++ return bhintatIsTaken;
++ case bhintatIsTaken:
++ return bhintatIsNotTaken;
++ default:
++ ShouldNotReachHere();
++ return 0;
++ }
++ }
++
++ // PPC branch instructions
++ enum ppcops {
++ b_op = 18,
++ bc_op = 16,
++ bcr_op = 19
++ };
++
++ enum Condition {
++ negative = 0,
++ less = 0,
++ positive = 1,
++ greater = 1,
++ zero = 2,
++ equal = 2,
++ summary_overflow = 3,
++ };
++
++ public:
++ // Helper functions for groups of instructions
++
++ enum Predict { pt = 1, pn = 0 }; // pt = predict taken
++
++ // instruction must start at passed address
++ static int instr_len(unsigned char *instr) { return BytesPerInstWord; }
++
++ // instruction must be left-justified in argument
++ static int instr_len(unsigned long instr) { return BytesPerInstWord; }
++
++ // longest instructions
++ static int instr_maxlen() { return BytesPerInstWord; }
++
++ // Test if x is within signed immediate range for nbits.
++ static bool is_simm(int x, unsigned int nbits) {
++ assert(0 < nbits && nbits < 32, "out of bounds");
++ const int min = -( ((int)1) << nbits-1 );
++ const int maxplus1 = ( ((int)1) << nbits-1 );
++ return min <= x && x < maxplus1;
++ }
++
++ static bool is_simm(jlong x, unsigned int nbits) {
++ assert(0 < nbits && nbits < 64, "out of bounds");
++ const jlong min = -( ((jlong)1) << nbits-1 );
++ const jlong maxplus1 = ( ((jlong)1) << nbits-1 );
++ return min <= x && x < maxplus1;
++ }
++
++ // Test if x is within unsigned immediate range for nbits
++ static bool is_uimm(int x, unsigned int nbits) {
++ assert(0 < nbits && nbits < 32, "out of bounds");
++ const int maxplus1 = ( ((int)1) << nbits );
++ return 0 <= x && x < maxplus1;
++ }
++
++ static bool is_uimm(jlong x, unsigned int nbits) {
++ assert(0 < nbits && nbits < 64, "out of bounds");
++ const jlong maxplus1 = ( ((jlong)1) << nbits );
++ return 0 <= x && x < maxplus1;
++ }
++
++ protected:
++ // helpers
++
++ // X is supposed to fit in a field "nbits" wide
++ // and be sign-extended. Check the range.
++ static void assert_signed_range(intptr_t x, int nbits) {
++ assert(nbits == 32 || (-(1 << nbits-1) <= x && x < (1 << nbits-1)),
++ "value out of range");
++ }
++
++ static void assert_signed_word_disp_range(intptr_t x, int nbits) {
++ assert((x & 3) == 0, "not word aligned");
++ assert_signed_range(x, nbits + 2);
++ }
++
++ static void assert_unsigned_const(int x, int nbits) {
++ assert(juint(x) < juint(1 << nbits), "unsigned constant out of range");
++ }
++
++ static int fmask(juint hi_bit, juint lo_bit) {
++ assert(hi_bit >= lo_bit && hi_bit < 32, "bad bits");
++ return (1 << ( hi_bit-lo_bit + 1 )) - 1;
++ }
++
++ // inverse of u_field
++ static int inv_u_field(int x, int hi_bit, int lo_bit) {
++ juint r = juint(x) >> lo_bit;
++ r &= fmask(hi_bit, lo_bit);
++ return int(r);
++ }
++
++ // signed version: extract from field and sign-extend
++ static int inv_s_field_ppc(int x, int hi_bit, int lo_bit) {
++ x = x << (31-hi_bit);
++ x = x >> (31-hi_bit+lo_bit);
++ return x;
++ }
++
++ static int u_field(int x, int hi_bit, int lo_bit) {
++ assert((x & ~fmask(hi_bit, lo_bit)) == 0, "value out of range");
++ int r = x << lo_bit;
++ assert(inv_u_field(r, hi_bit, lo_bit) == x, "just checking");
++ return r;
++ }
++
++ // Same as u_field for signed values
++ static int s_field(int x, int hi_bit, int lo_bit) {
++ int nbits = hi_bit - lo_bit + 1;
++ assert(nbits == 32 || (-(1 << nbits-1) <= x && x < (1 << nbits-1)),
++ "value out of range");
++ x &= fmask(hi_bit, lo_bit);
++ int r = x << lo_bit;
++ return r;
++ }
++
++ // inv_op for ppc instructions
++ static int inv_op_ppc(int x) { return inv_u_field(x, 31, 26); }
++
++ // Determine target address from li, bd field of branch instruction.
++ static intptr_t inv_li_field(int x) {
++ intptr_t r = inv_s_field_ppc(x, 25, 2);
++ r = (r << 2);
++ return r;
++ }
++ static intptr_t inv_bd_field(int x, intptr_t pos) {
++ intptr_t r = inv_s_field_ppc(x, 15, 2);
++ r = (r << 2) + pos;
++ return r;
++ }
++
++ #define inv_opp_u_field(x, hi_bit, lo_bit) inv_u_field(x, 31-(lo_bit), 31-(hi_bit))
++ #define inv_opp_s_field(x, hi_bit, lo_bit) inv_s_field_ppc(x, 31-(lo_bit), 31-(hi_bit))
++ // Extract instruction fields from instruction words.
++ public:
++ static int inv_ra_field(int x) { return inv_opp_u_field(x, 15, 11); }
++ static int inv_rb_field(int x) { return inv_opp_u_field(x, 20, 16); }
++ static int inv_rt_field(int x) { return inv_opp_u_field(x, 10, 6); }
++ static int inv_rta_field(int x) { return inv_opp_u_field(x, 15, 11); }
++ static int inv_rs_field(int x) { return inv_opp_u_field(x, 10, 6); }
++ // Ds uses opp_s_field(x, 31, 16), but lowest 2 bits must be 0.
++ // Inv_ds_field uses range (x, 29, 16) but shifts by 2 to ensure that lowest bits are 0.
++ static int inv_ds_field(int x) { return inv_opp_s_field(x, 29, 16) << 2; }
++ static int inv_d1_field(int x) { return inv_opp_s_field(x, 31, 16); }
++ static int inv_si_field(int x) { return inv_opp_s_field(x, 31, 16); }
++ static int inv_to_field(int x) { return inv_opp_u_field(x, 10, 6); }
++ static int inv_lk_field(int x) { return inv_opp_u_field(x, 31, 31); }
++ static int inv_bo_field(int x) { return inv_opp_u_field(x, 10, 6); }
++ static int inv_bi_field(int x) { return inv_opp_u_field(x, 15, 11); }
++
++ #define opp_u_field(x, hi_bit, lo_bit) u_field(x, 31-(lo_bit), 31-(hi_bit))
++ #define opp_s_field(x, hi_bit, lo_bit) s_field(x, 31-(lo_bit), 31-(hi_bit))
++
++ // instruction fields
++ static int aa( int x) { return opp_u_field(x, 30, 30); }
++ static int ba( int x) { return opp_u_field(x, 15, 11); }
++ static int bb( int x) { return opp_u_field(x, 20, 16); }
++ static int bc( int x) { return opp_u_field(x, 25, 21); }
++ static int bd( int x) { return opp_s_field(x, 29, 16); }
++ static int bf( ConditionRegister cr) { return bf(cr->encoding()); }
++ static int bf( int x) { return opp_u_field(x, 8, 6); }
++ static int bfa(ConditionRegister cr) { return bfa(cr->encoding()); }
++ static int bfa( int x) { return opp_u_field(x, 13, 11); }
++ static int bh( int x) { return opp_u_field(x, 20, 19); }
++ static int bi( int x) { return opp_u_field(x, 15, 11); }
++ static int bi0(ConditionRegister cr, Condition c) { return (cr->encoding() << 2) | c; }
++ static int bo( int x) { return opp_u_field(x, 10, 6); }
++ static int bt( int x) { return opp_u_field(x, 10, 6); }
++ static int d1( int x) { return opp_s_field(x, 31, 16); }
++ static int ds( int x) { assert((x & 0x3) == 0, "unaligned offset"); return opp_s_field(x, 31, 16); }
++ static int eh( int x) { return opp_u_field(x, 31, 31); }
++ static int flm( int x) { return opp_u_field(x, 14, 7); }
++ static int fra( FloatRegister r) { return fra(r->encoding());}
++ static int frb( FloatRegister r) { return frb(r->encoding());}
++ static int frc( FloatRegister r) { return frc(r->encoding());}
++ static int frs( FloatRegister r) { return frs(r->encoding());}
++ static int frt( FloatRegister r) { return frt(r->encoding());}
++ static int fra( int x) { return opp_u_field(x, 15, 11); }
++ static int frb( int x) { return opp_u_field(x, 20, 16); }
++ static int frc( int x) { return opp_u_field(x, 25, 21); }
++ static int frs( int x) { return opp_u_field(x, 10, 6); }
++ static int frt( int x) { return opp_u_field(x, 10, 6); }
++ static int fxm( int x) { return opp_u_field(x, 19, 12); }
++ static int l10( int x) { return opp_u_field(x, 10, 10); }
++ static int l15( int x) { return opp_u_field(x, 15, 15); }
++ static int l910( int x) { return opp_u_field(x, 10, 9); }
++ static int e1215( int x) { return opp_u_field(x, 15, 12); }
++ static int lev( int x) { return opp_u_field(x, 26, 20); }
++ static int li( int x) { return opp_s_field(x, 29, 6); }
++ static int lk( int x) { return opp_u_field(x, 31, 31); }
++ static int mb2125( int x) { return opp_u_field(x, 25, 21); }
++ static int me2630( int x) { return opp_u_field(x, 30, 26); }
++ static int mb2126( int x) { return opp_u_field(((x & 0x1f) << 1) | ((x & 0x20) >> 5), 26, 21); }
++ static int me2126( int x) { return mb2126(x); }
++ static int nb( int x) { return opp_u_field(x, 20, 16); }
++ //static int opcd( int x) { return opp_u_field(x, 5, 0); } // is contained in our opcodes
++ static int oe( int x) { return opp_u_field(x, 21, 21); }
++ static int ra( Register r) { return ra(r->encoding()); }
++ static int ra( int x) { return opp_u_field(x, 15, 11); }
++ static int rb( Register r) { return rb(r->encoding()); }
++ static int rb( int x) { return opp_u_field(x, 20, 16); }
++ static int rc( int x) { return opp_u_field(x, 31, 31); }
++ static int rs( Register r) { return rs(r->encoding()); }
++ static int rs( int x) { return opp_u_field(x, 10, 6); }
++ // we don't want to use R0 in memory accesses, because it has value `0' then
++ static int ra0mem( Register r) { assert(r != R0, "cannot use register R0 in memory access"); return ra(r); }
++ static int ra0mem( int x) { assert(x != 0, "cannot use register 0 in memory access"); return ra(x); }
++
++ // register r is target
++ static int rt( Register r) { return rs(r); }
++ static int rt( int x) { return rs(x); }
++ static int rta( Register r) { return ra(r); }
++ static int rta0mem( Register r) { rta(r); return ra0mem(r); }
++
++ static int sh1620( int x) { return opp_u_field(x, 20, 16); }
++ static int sh30( int x) { return opp_u_field(x, 30, 30); }
++ static int sh162030( int x) { return sh1620(x & 0x1f) | sh30((x & 0x20) >> 5); }
++ static int si( int x) { return opp_s_field(x, 31, 16); }
++ static int spr( int x) { return opp_u_field(x, 20, 11); }
++ static int sr( int x) { return opp_u_field(x, 15, 12); }
++ static int tbr( int x) { return opp_u_field(x, 20, 11); }
++ static int th( int x) { return opp_u_field(x, 10, 7); }
++ static int thct( int x) { assert((x&8) == 0, "must be valid cache specification"); return th(x); }
++ static int thds( int x) { assert((x&8) == 8, "must be valid stream specification"); return th(x); }
++ static int to( int x) { return opp_u_field(x, 10, 6); }
++ static int u( int x) { return opp_u_field(x, 19, 16); }
++ static int ui( int x) { return opp_u_field(x, 31, 16); }
++
++ // Support vector instructions for >= Power6.
++ static int vra( int x) { return opp_u_field(x, 15, 11); }
++ static int vrb( int x) { return opp_u_field(x, 20, 16); }
++ static int vrc( int x) { return opp_u_field(x, 25, 21); }
++ static int vrs( int x) { return opp_u_field(x, 10, 6); }
++ static int vrt( int x) { return opp_u_field(x, 10, 6); }
++
++ static int vra( VectorRegister r) { return vra(r->encoding());}
++ static int vrb( VectorRegister r) { return vrb(r->encoding());}
++ static int vrc( VectorRegister r) { return vrc(r->encoding());}
++ static int vrs( VectorRegister r) { return vrs(r->encoding());}
++ static int vrt( VectorRegister r) { return vrt(r->encoding());}
++
++ static int vsplt_uim( int x) { return opp_u_field(x, 15, 12); } // for vsplt* instructions
++ static int vsplti_sim(int x) { return opp_u_field(x, 15, 11); } // for vsplti* instructions
++ static int vsldoi_shb(int x) { return opp_u_field(x, 25, 22); } // for vsldoi instruction
++ static int vcmp_rc( int x) { return opp_u_field(x, 21, 21); } // for vcmp* instructions
++
++ //static int xo1( int x) { return opp_u_field(x, 29, 21); }// is contained in our opcodes
++ //static int xo2( int x) { return opp_u_field(x, 30, 21); }// is contained in our opcodes
++ //static int xo3( int x) { return opp_u_field(x, 30, 22); }// is contained in our opcodes
++ //static int xo4( int x) { return opp_u_field(x, 30, 26); }// is contained in our opcodes
++ //static int xo5( int x) { return opp_u_field(x, 29, 27); }// is contained in our opcodes
++ //static int xo6( int x) { return opp_u_field(x, 30, 27); }// is contained in our opcodes
++ //static int xo7( int x) { return opp_u_field(x, 31, 30); }// is contained in our opcodes
++
++ protected:
++ // Compute relative address for branch.
++ static intptr_t disp(intptr_t x, intptr_t off) {
++ int xx = x - off;
++ xx = xx >> 2;
++ return xx;
++ }
++
++ public:
++ // signed immediate, in low bits, nbits long
++ static int simm(int x, int nbits) {
++ assert_signed_range(x, nbits);
++ return x & ((1 << nbits) - 1);
++ }
++
++ // unsigned immediate, in low bits, nbits long
++ static int uimm(int x, int nbits) {
++ assert_unsigned_const(x, nbits);
++ return x & ((1 << nbits) - 1);
++ }
++
++ static void set_imm(int* instr, short s) {
++ // imm is always in the lower 16 bits of the instruction,
++ // so this is endian-neutral. Same for the get_imm below.
++ uint32_t w = *(uint32_t *)instr;
++ *instr = (int)((w & ~0x0000FFFF) | (s & 0x0000FFFF));
++ }
++
++ static int get_imm(address a, int instruction_number) {
++ return (short)((int *)a)[instruction_number];
++ }
++
++ static inline int hi16_signed( int x) { return (int)(int16_t)(x >> 16); }
++ static inline int lo16_unsigned(int x) { return x & 0xffff; }
++
++ protected:
++
++ // Extract the top 32 bits in a 64 bit word.
++ static int32_t hi32(int64_t x) {
++ int32_t r = int32_t((uint64_t)x >> 32);
++ return r;
++ }
++
++ public:
++
++ static inline unsigned int align_addr(unsigned int addr, unsigned int a) {
++ return ((addr + (a - 1)) & ~(a - 1));
++ }
++
++ static inline bool is_aligned(unsigned int addr, unsigned int a) {
++ return (0 == addr % a);
++ }
++
++ void flush() {
++ AbstractAssembler::flush();
++ }
++
++ inline void emit_int32(int); // shadows AbstractAssembler::emit_int32
++ inline void emit_data(int);
++ inline void emit_data(int, RelocationHolder const&);
++ inline void emit_data(int, relocInfo::relocType rtype);
++
++ // Emit an address.
++ inline address emit_addr(const address addr = NULL);
++
++#if !defined(ABI_ELFv2)
++ // Emit a function descriptor with the specified entry point, TOC,
++ // and ENV. If the entry point is NULL, the descriptor will point
++ // just past the descriptor.
++ // Use values from friend functions as defaults.
++ inline address emit_fd(address entry = NULL,
++ address toc = (address) FunctionDescriptor::friend_toc,
++ address env = (address) FunctionDescriptor::friend_env);
++#endif
++
++ /////////////////////////////////////////////////////////////////////////////////////
++ // PPC instructions
++ /////////////////////////////////////////////////////////////////////////////////////
++
++ // Memory instructions use r0 as hard coded 0, e.g. to simulate loading
++ // immediates. The normal instruction encoders enforce that r0 is not
++ // passed to them. Use either extended mnemonics encoders or the special ra0
++ // versions.
++
++ // Issue an illegal instruction.
++ inline void illtrap();
++ static inline bool is_illtrap(int x);
++
++ // PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
++ inline void addi( Register d, Register a, int si16);
++ inline void addis(Register d, Register a, int si16);
++ private:
++ inline void addi_r0ok( Register d, Register a, int si16);
++ inline void addis_r0ok(Register d, Register a, int si16);
++ public:
++ inline void addic_( Register d, Register a, int si16);
++ inline void subfic( Register d, Register a, int si16);
++ inline void add( Register d, Register a, Register b);
++ inline void add_( Register d, Register a, Register b);
++ inline void subf( Register d, Register a, Register b); // d = b - a "Sub_from", as in ppc spec.
++ inline void sub( Register d, Register a, Register b); // d = a - b Swap operands of subf for readability.
++ inline void subf_( Register d, Register a, Register b);
++ inline void addc( Register d, Register a, Register b);
++ inline void addc_( Register d, Register a, Register b);
++ inline void subfc( Register d, Register a, Register b);
++ inline void subfc_( Register d, Register a, Register b);
++ inline void adde( Register d, Register a, Register b);
++ inline void adde_( Register d, Register a, Register b);
++ inline void subfe( Register d, Register a, Register b);
++ inline void subfe_( Register d, Register a, Register b);
++ inline void neg( Register d, Register a);
++ inline void neg_( Register d, Register a);
++ inline void mulli( Register d, Register a, int si16);
++ inline void mulld( Register d, Register a, Register b);
++ inline void mulld_( Register d, Register a, Register b);
++ inline void mullw( Register d, Register a, Register b);
++ inline void mullw_( Register d, Register a, Register b);
++ inline void mulhw( Register d, Register a, Register b);
++ inline void mulhw_( Register d, Register a, Register b);
++ inline void mulhd( Register d, Register a, Register b);
++ inline void mulhd_( Register d, Register a, Register b);
++ inline void mulhdu( Register d, Register a, Register b);
++ inline void mulhdu_(Register d, Register a, Register b);
++ inline void divd( Register d, Register a, Register b);
++ inline void divd_( Register d, Register a, Register b);
++ inline void divw( Register d, Register a, Register b);
++ inline void divw_( Register d, Register a, Register b);
++
++ // extended mnemonics
++ inline void li( Register d, int si16);
++ inline void lis( Register d, int si16);
++ inline void addir(Register d, int si16, Register a);
++
++ static bool is_addi(int x) {
++ return ADDI_OPCODE == (x & ADDI_OPCODE_MASK);
++ }
++ static bool is_addis(int x) {
++ return ADDIS_OPCODE == (x & ADDIS_OPCODE_MASK);
++ }
++ static bool is_bxx(int x) {
++ return BXX_OPCODE == (x & BXX_OPCODE_MASK);
++ }
++ static bool is_b(int x) {
++ return BXX_OPCODE == (x & BXX_OPCODE_MASK) && inv_lk_field(x) == 0;
++ }
++ static bool is_bl(int x) {
++ return BXX_OPCODE == (x & BXX_OPCODE_MASK) && inv_lk_field(x) == 1;
++ }
++ static bool is_bcxx(int x) {
++ return BCXX_OPCODE == (x & BCXX_OPCODE_MASK);
++ }
++ static bool is_bxx_or_bcxx(int x) {
++ return is_bxx(x) || is_bcxx(x);
++ }
++ static bool is_bctrl(int x) {
++ return x == 0x4e800421;
++ }
++ static bool is_bctr(int x) {
++ return x == 0x4e800420;
++ }
++ static bool is_bclr(int x) {
++ return BCLR_OPCODE == (x & XL_FORM_OPCODE_MASK);
++ }
++ static bool is_li(int x) {
++ return is_addi(x) && inv_ra_field(x)==0;
++ }
++ static bool is_lis(int x) {
++ return is_addis(x) && inv_ra_field(x)==0;
++ }
++ static bool is_mtctr(int x) {
++ return MTCTR_OPCODE == (x & MTCTR_OPCODE_MASK);
++ }
++ static bool is_ld(int x) {
++ return LD_OPCODE == (x & LD_OPCODE_MASK);
++ }
++ static bool is_std(int x) {
++ return STD_OPCODE == (x & STD_OPCODE_MASK);
++ }
++ static bool is_stdu(int x) {
++ return STDU_OPCODE == (x & STDU_OPCODE_MASK);
++ }
++ static bool is_stdx(int x) {
++ return STDX_OPCODE == (x & STDX_OPCODE_MASK);
++ }
++ static bool is_stdux(int x) {
++ return STDUX_OPCODE == (x & STDUX_OPCODE_MASK);
++ }
++ static bool is_stwx(int x) {
++ return STWX_OPCODE == (x & STWX_OPCODE_MASK);
++ }
++ static bool is_stwux(int x) {
++ return STWUX_OPCODE == (x & STWUX_OPCODE_MASK);
++ }
++ static bool is_stw(int x) {
++ return STW_OPCODE == (x & STW_OPCODE_MASK);
++ }
++ static bool is_stwu(int x) {
++ return STWU_OPCODE == (x & STWU_OPCODE_MASK);
++ }
++ static bool is_ori(int x) {
++ return ORI_OPCODE == (x & ORI_OPCODE_MASK);
++ };
++ static bool is_oris(int x) {
++ return ORIS_OPCODE == (x & ORIS_OPCODE_MASK);
++ };
++ static bool is_rldicr(int x) {
++ return (RLDICR_OPCODE == (x & RLDICR_OPCODE_MASK));
++ };
++ static bool is_nop(int x) {
++ return x == 0x60000000;
++ }
++ // endgroup opcode for Power6
++ static bool is_endgroup(int x) {
++ return is_ori(x) && inv_ra_field(x) == 1 && inv_rs_field(x) == 1 && inv_d1_field(x) == 0;
++ }
++
++
++ private:
++ // PPC 1, section 3.3.9, Fixed-Point Compare Instructions
++ inline void cmpi( ConditionRegister bf, int l, Register a, int si16);
++ inline void cmp( ConditionRegister bf, int l, Register a, Register b);
++ inline void cmpli(ConditionRegister bf, int l, Register a, int ui16);
++ inline void cmpl( ConditionRegister bf, int l, Register a, Register b);
++
++ public:
++ // extended mnemonics of Compare Instructions
++ inline void cmpwi( ConditionRegister crx, Register a, int si16);
++ inline void cmpdi( ConditionRegister crx, Register a, int si16);
++ inline void cmpw( ConditionRegister crx, Register a, Register b);
++ inline void cmpd( ConditionRegister crx, Register a, Register b);
++ inline void cmplwi(ConditionRegister crx, Register a, int ui16);
++ inline void cmpldi(ConditionRegister crx, Register a, int ui16);
++ inline void cmplw( ConditionRegister crx, Register a, Register b);
++ inline void cmpld( ConditionRegister crx, Register a, Register b);
++
++ inline void isel( Register d, Register a, Register b, int bc);
++ // Convenient version which takes: Condition register, Condition code and invert flag. Omit b to keep old value.
++ inline void isel( Register d, ConditionRegister cr, Condition cc, bool inv, Register a, Register b = noreg);
++ // Set d = 0 if (cr.cc) equals 1, otherwise b.
++ inline void isel_0( Register d, ConditionRegister cr, Condition cc, Register b = noreg);
++
++ // PPC 1, section 3.3.11, Fixed-Point Logical Instructions
++ void andi( Register a, Register s, int ui16); // optimized version
++ inline void andi_( Register a, Register s, int ui16);
++ inline void andis_( Register a, Register s, int ui16);
++ inline void ori( Register a, Register s, int ui16);
++ inline void oris( Register a, Register s, int ui16);
++ inline void xori( Register a, Register s, int ui16);
++ inline void xoris( Register a, Register s, int ui16);
++ inline void andr( Register a, Register s, Register b); // suffixed by 'r' as 'and' is C++ keyword
++ inline void and_( Register a, Register s, Register b);
++ // Turn or0(rx,rx,rx) into a nop and avoid that we accidently emit a
++ // SMT-priority change instruction (see SMT instructions below).
++ inline void or_unchecked(Register a, Register s, Register b);
++ inline void orr( Register a, Register s, Register b); // suffixed by 'r' as 'or' is C++ keyword
++ inline void or_( Register a, Register s, Register b);
++ inline void xorr( Register a, Register s, Register b); // suffixed by 'r' as 'xor' is C++ keyword
++ inline void xor_( Register a, Register s, Register b);
++ inline void nand( Register a, Register s, Register b);
++ inline void nand_( Register a, Register s, Register b);
++ inline void nor( Register a, Register s, Register b);
++ inline void nor_( Register a, Register s, Register b);
++ inline void andc( Register a, Register s, Register b);
++ inline void andc_( Register a, Register s, Register b);
++ inline void orc( Register a, Register s, Register b);
++ inline void orc_( Register a, Register s, Register b);
++ inline void extsb( Register a, Register s);
++ inline void extsh( Register a, Register s);
++ inline void extsw( Register a, Register s);
++
++ // extended mnemonics
++ inline void nop();
++ // NOP for FP and BR units (different versions to allow them to be in one group)
++ inline void fpnop0();
++ inline void fpnop1();
++ inline void brnop0();
++ inline void brnop1();
++ inline void brnop2();
++
++ inline void mr( Register d, Register s);
++ inline void ori_opt( Register d, int ui16);
++ inline void oris_opt(Register d, int ui16);
++
++ // endgroup opcode for Power6
++ inline void endgroup();
++
++ // count instructions
++ inline void cntlzw( Register a, Register s);
++ inline void cntlzw_( Register a, Register s);
++ inline void cntlzd( Register a, Register s);
++ inline void cntlzd_( Register a, Register s);
++
++ // PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions
++ inline void sld( Register a, Register s, Register b);
++ inline void sld_( Register a, Register s, Register b);
++ inline void slw( Register a, Register s, Register b);
++ inline void slw_( Register a, Register s, Register b);
++ inline void srd( Register a, Register s, Register b);
++ inline void srd_( Register a, Register s, Register b);
++ inline void srw( Register a, Register s, Register b);
++ inline void srw_( Register a, Register s, Register b);
++ inline void srad( Register a, Register s, Register b);
++ inline void srad_( Register a, Register s, Register b);
++ inline void sraw( Register a, Register s, Register b);
++ inline void sraw_( Register a, Register s, Register b);
++ inline void sradi( Register a, Register s, int sh6);
++ inline void sradi_( Register a, Register s, int sh6);
++ inline void srawi( Register a, Register s, int sh5);
++ inline void srawi_( Register a, Register s, int sh5);
++
++ // extended mnemonics for Shift Instructions
++ inline void sldi( Register a, Register s, int sh6);
++ inline void sldi_( Register a, Register s, int sh6);
++ inline void slwi( Register a, Register s, int sh5);
++ inline void slwi_( Register a, Register s, int sh5);
++ inline void srdi( Register a, Register s, int sh6);
++ inline void srdi_( Register a, Register s, int sh6);
++ inline void srwi( Register a, Register s, int sh5);
++ inline void srwi_( Register a, Register s, int sh5);
++
++ inline void clrrdi( Register a, Register s, int ui6);
++ inline void clrrdi_( Register a, Register s, int ui6);
++ inline void clrldi( Register a, Register s, int ui6);
++ inline void clrldi_( Register a, Register s, int ui6);
++ inline void clrlsldi(Register a, Register s, int clrl6, int shl6);
++ inline void clrlsldi_(Register a, Register s, int clrl6, int shl6);
++ inline void extrdi( Register a, Register s, int n, int b);
++ // testbit with condition register
++ inline void testbitdi(ConditionRegister cr, Register a, Register s, int ui6);
++
++ // rotate instructions
++ inline void rotldi( Register a, Register s, int n);
++ inline void rotrdi( Register a, Register s, int n);
++ inline void rotlwi( Register a, Register s, int n);
++ inline void rotrwi( Register a, Register s, int n);
++
++ // Rotate Instructions
++ inline void rldic( Register a, Register s, int sh6, int mb6);
++ inline void rldic_( Register a, Register s, int sh6, int mb6);
++ inline void rldicr( Register a, Register s, int sh6, int mb6);
++ inline void rldicr_( Register a, Register s, int sh6, int mb6);
++ inline void rldicl( Register a, Register s, int sh6, int mb6);
++ inline void rldicl_( Register a, Register s, int sh6, int mb6);
++ inline void rlwinm( Register a, Register s, int sh5, int mb5, int me5);
++ inline void rlwinm_( Register a, Register s, int sh5, int mb5, int me5);
++ inline void rldimi( Register a, Register s, int sh6, int mb6);
++ inline void rldimi_( Register a, Register s, int sh6, int mb6);
++ inline void rlwimi( Register a, Register s, int sh5, int mb5, int me5);
++ inline void insrdi( Register a, Register s, int n, int b);
++ inline void insrwi( Register a, Register s, int n, int b);
++
++ // PPC 1, section 3.3.2 Fixed-Point Load Instructions
++ // 4 bytes
++ inline void lwzx( Register d, Register s1, Register s2);
++ inline void lwz( Register d, int si16, Register s1);
++ inline void lwzu( Register d, int si16, Register s1);
++
++ // 4 bytes
++ inline void lwax( Register d, Register s1, Register s2);
++ inline void lwa( Register d, int si16, Register s1);
++
++ // 2 bytes
++ inline void lhzx( Register d, Register s1, Register s2);
++ inline void lhz( Register d, int si16, Register s1);
++ inline void lhzu( Register d, int si16, Register s1);
++
++ // 2 bytes
++ inline void lhax( Register d, Register s1, Register s2);
++ inline void lha( Register d, int si16, Register s1);
++ inline void lhau( Register d, int si16, Register s1);
++
++ // 1 byte
++ inline void lbzx( Register d, Register s1, Register s2);
++ inline void lbz( Register d, int si16, Register s1);
++ inline void lbzu( Register d, int si16, Register s1);
++
++ // 8 bytes
++ inline void ldx( Register d, Register s1, Register s2);
++ inline void ld( Register d, int si16, Register s1);
++ inline void ldu( Register d, int si16, Register s1);
++
++ // PPC 1, section 3.3.3 Fixed-Point Store Instructions
++ inline void stwx( Register d, Register s1, Register s2);
++ inline void stw( Register d, int si16, Register s1);
++ inline void stwu( Register d, int si16, Register s1);
++
++ inline void sthx( Register d, Register s1, Register s2);
++ inline void sth( Register d, int si16, Register s1);
++ inline void sthu( Register d, int si16, Register s1);
++
++ inline void stbx( Register d, Register s1, Register s2);
++ inline void stb( Register d, int si16, Register s1);
++ inline void stbu( Register d, int si16, Register s1);
++
++ inline void stdx( Register d, Register s1, Register s2);
++ inline void std( Register d, int si16, Register s1);
++ inline void stdu( Register d, int si16, Register s1);
++ inline void stdux(Register s, Register a, Register b);
++
++ // PPC 1, section 3.3.13 Move To/From System Register Instructions
++ inline void mtlr( Register s1);
++ inline void mflr( Register d);
++ inline void mtctr(Register s1);
++ inline void mfctr(Register d);
++ inline void mtcrf(int fxm, Register s);
++ inline void mfcr( Register d);
++ inline void mcrf( ConditionRegister crd, ConditionRegister cra);
++ inline void mtcr( Register s);
++
++ // PPC 1, section 2.4.1 Branch Instructions
++ inline void b( address a, relocInfo::relocType rt = relocInfo::none);
++ inline void b( Label& L);
++ inline void bl( address a, relocInfo::relocType rt = relocInfo::none);
++ inline void bl( Label& L);
++ inline void bc( int boint, int biint, address a, relocInfo::relocType rt = relocInfo::none);
++ inline void bc( int boint, int biint, Label& L);
++ inline void bcl(int boint, int biint, address a, relocInfo::relocType rt = relocInfo::none);
++ inline void bcl(int boint, int biint, Label& L);
++
++ inline void bclr( int boint, int biint, int bhint, relocInfo::relocType rt = relocInfo::none);
++ inline void bclrl( int boint, int biint, int bhint, relocInfo::relocType rt = relocInfo::none);
++ inline void bcctr( int boint, int biint, int bhint = bhintbhBCCTRisNotReturnButSame,
++ relocInfo::relocType rt = relocInfo::none);
++ inline void bcctrl(int boint, int biint, int bhint = bhintbhBCLRisReturn,
++ relocInfo::relocType rt = relocInfo::none);
++
++ // helper function for b, bcxx
++ inline bool is_within_range_of_b(address a, address pc);
++ inline bool is_within_range_of_bcxx(address a, address pc);
++
++ // get the destination of a bxx branch (b, bl, ba, bla)
++ static inline address bxx_destination(address baddr);
++ static inline address bxx_destination(int instr, address pc);
++ static inline intptr_t bxx_destination_offset(int instr, intptr_t bxx_pos);
++
++ // extended mnemonics for branch instructions
++ inline void blt(ConditionRegister crx, Label& L);
++ inline void bgt(ConditionRegister crx, Label& L);
++ inline void beq(ConditionRegister crx, Label& L);
++ inline void bso(ConditionRegister crx, Label& L);
++ inline void bge(ConditionRegister crx, Label& L);
++ inline void ble(ConditionRegister crx, Label& L);
++ inline void bne(ConditionRegister crx, Label& L);
++ inline void bns(ConditionRegister crx, Label& L);
++
++ // Branch instructions with static prediction hints.
++ inline void blt_predict_taken( ConditionRegister crx, Label& L);
++ inline void bgt_predict_taken( ConditionRegister crx, Label& L);
++ inline void beq_predict_taken( ConditionRegister crx, Label& L);
++ inline void bso_predict_taken( ConditionRegister crx, Label& L);
++ inline void bge_predict_taken( ConditionRegister crx, Label& L);
++ inline void ble_predict_taken( ConditionRegister crx, Label& L);
++ inline void bne_predict_taken( ConditionRegister crx, Label& L);
++ inline void bns_predict_taken( ConditionRegister crx, Label& L);
++ inline void blt_predict_not_taken(ConditionRegister crx, Label& L);
++ inline void bgt_predict_not_taken(ConditionRegister crx, Label& L);
++ inline void beq_predict_not_taken(ConditionRegister crx, Label& L);
++ inline void bso_predict_not_taken(ConditionRegister crx, Label& L);
++ inline void bge_predict_not_taken(ConditionRegister crx, Label& L);
++ inline void ble_predict_not_taken(ConditionRegister crx, Label& L);
++ inline void bne_predict_not_taken(ConditionRegister crx, Label& L);
++ inline void bns_predict_not_taken(ConditionRegister crx, Label& L);
++
++ // for use in conjunction with testbitdi:
++ inline void btrue( ConditionRegister crx, Label& L);
++ inline void bfalse(ConditionRegister crx, Label& L);
++
++ inline void bltl(ConditionRegister crx, Label& L);
++ inline void bgtl(ConditionRegister crx, Label& L);
++ inline void beql(ConditionRegister crx, Label& L);
++ inline void bsol(ConditionRegister crx, Label& L);
++ inline void bgel(ConditionRegister crx, Label& L);
++ inline void blel(ConditionRegister crx, Label& L);
++ inline void bnel(ConditionRegister crx, Label& L);
++ inline void bnsl(ConditionRegister crx, Label& L);
++
++ // extended mnemonics for Branch Instructions via LR
++ // We use `blr' for returns.
++ inline void blr(relocInfo::relocType rt = relocInfo::none);
++
++ // extended mnemonics for Branch Instructions with CTR
++ // bdnz means `decrement CTR and jump to L if CTR is not zero'
++ inline void bdnz(Label& L);
++ // Decrement and branch if result is zero.
++ inline void bdz(Label& L);
++ // we use `bctr[l]' for jumps/calls in function descriptor glue
++ // code, e.g. calls to runtime functions
++ inline void bctr( relocInfo::relocType rt = relocInfo::none);
++ inline void bctrl(relocInfo::relocType rt = relocInfo::none);
++ // conditional jumps/branches via CTR
++ inline void beqctr( ConditionRegister crx, relocInfo::relocType rt = relocInfo::none);
++ inline void beqctrl(ConditionRegister crx, relocInfo::relocType rt = relocInfo::none);
++ inline void bnectr( ConditionRegister crx, relocInfo::relocType rt = relocInfo::none);
++ inline void bnectrl(ConditionRegister crx, relocInfo::relocType rt = relocInfo::none);
++
++ // condition register logic instructions
++ inline void crand( int d, int s1, int s2);
++ inline void crnand(int d, int s1, int s2);
++ inline void cror( int d, int s1, int s2);
++ inline void crxor( int d, int s1, int s2);
++ inline void crnor( int d, int s1, int s2);
++ inline void creqv( int d, int s1, int s2);
++ inline void crandc(int d, int s1, int s2);
++ inline void crorc( int d, int s1, int s2);
++
++ // icache and dcache related instructions
++ inline void icbi( Register s1, Register s2);
++ //inline void dcba(Register s1, Register s2); // Instruction for embedded processor only.
++ inline void dcbz( Register s1, Register s2);
++ inline void dcbst( Register s1, Register s2);
++ inline void dcbf( Register s1, Register s2);
++
++ enum ct_cache_specification {
++ ct_primary_cache = 0,
++ ct_secondary_cache = 2
++ };
++ // dcache read hint
++ inline void dcbt( Register s1, Register s2);
++ inline void dcbtct( Register s1, Register s2, int ct);
++ inline void dcbtds( Register s1, Register s2, int ds);
++ // dcache write hint
++ inline void dcbtst( Register s1, Register s2);
++ inline void dcbtstct(Register s1, Register s2, int ct);
++
++ // machine barrier instructions:
++ //
++ // - sync two-way memory barrier, aka fence
++ // - lwsync orders Store|Store,
++ // Load|Store,
++ // Load|Load,
++ // but not Store|Load
++ // - eieio orders memory accesses for device memory (only)
++ // - isync invalidates speculatively executed instructions
++ // From the Power ISA 2.06 documentation:
++ // "[...] an isync instruction prevents the execution of
++ // instructions following the isync until instructions
++ // preceding the isync have completed, [...]"
++ // From IBM's AIX assembler reference:
++ // "The isync [...] instructions causes the processor to
++ // refetch any instructions that might have been fetched
++ // prior to the isync instruction. The instruction isync
++ // causes the processor to wait for all previous instructions
++ // to complete. Then any instructions already fetched are
++ // discarded and instruction processing continues in the
++ // environment established by the previous instructions."
++ //
++ // semantic barrier instructions:
++ // (as defined in orderAccess.hpp)
++ //
++ // - release orders Store|Store, (maps to lwsync)
++ // Load|Store
++ // - acquire orders Load|Store, (maps to lwsync)
++ // Load|Load
++ // - fence orders Store|Store, (maps to sync)
++ // Load|Store,
++ // Load|Load,
++ // Store|Load
++ //
++ private:
++ inline void sync(int l);
++ public:
++ inline void sync();
++ inline void lwsync();
++ inline void ptesync();
++ inline void eieio();
++ inline void isync();
++ inline void elemental_membar(int e); // Elemental Memory Barriers (>=Power 8)
++
++ // atomics
++ inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
++ inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
++ inline bool lxarx_hint_exclusive_access();
++ inline void lwarx( Register d, Register a, Register b, bool hint_exclusive_access = false);
++ inline void ldarx( Register d, Register a, Register b, bool hint_exclusive_access = false);
++ inline void stwcx_( Register s, Register a, Register b);
++ inline void stdcx_( Register s, Register a, Register b);
++
++ // Instructions for adjusting thread priority for simultaneous
++ // multithreading (SMT) on Power5.
++ private:
++ inline void smt_prio_very_low();
++ inline void smt_prio_medium_high();
++ inline void smt_prio_high();
++
++ public:
++ inline void smt_prio_low();
++ inline void smt_prio_medium_low();
++ inline void smt_prio_medium();
++
++ // trap instructions
++ inline void twi_0(Register a); // for load with acquire semantics use load+twi_0+isync (trap can't occur)
++ // NOT FOR DIRECT USE!!
++ protected:
++ inline void tdi_unchecked(int tobits, Register a, int si16);
++ inline void twi_unchecked(int tobits, Register a, int si16);
++ inline void tdi( int tobits, Register a, int si16); // asserts UseSIGTRAP
++ inline void twi( int tobits, Register a, int si16); // asserts UseSIGTRAP
++ inline void td( int tobits, Register a, Register b); // asserts UseSIGTRAP
++ inline void tw( int tobits, Register a, Register b); // asserts UseSIGTRAP
++
++ static bool is_tdi(int x, int tobits, int ra, int si16) {
++ return (TDI_OPCODE == (x & TDI_OPCODE_MASK))
++ && (tobits == inv_to_field(x))
++ && (ra == -1/*any reg*/ || ra == inv_ra_field(x))
++ && (si16 == inv_si_field(x));
++ }
++
++ static bool is_twi(int x, int tobits, int ra, int si16) {
++ return (TWI_OPCODE == (x & TWI_OPCODE_MASK))
++ && (tobits == inv_to_field(x))
++ && (ra == -1/*any reg*/ || ra == inv_ra_field(x))
++ && (si16 == inv_si_field(x));
++ }
++
++ static bool is_twi(int x, int tobits, int ra) {
++ return (TWI_OPCODE == (x & TWI_OPCODE_MASK))
++ && (tobits == inv_to_field(x))
++ && (ra == -1/*any reg*/ || ra == inv_ra_field(x));
++ }
++
++ static bool is_td(int x, int tobits, int ra, int rb) {
++ return (TD_OPCODE == (x & TD_OPCODE_MASK))
++ && (tobits == inv_to_field(x))
++ && (ra == -1/*any reg*/ || ra == inv_ra_field(x))
++ && (rb == -1/*any reg*/ || rb == inv_rb_field(x));
++ }
++
++ static bool is_tw(int x, int tobits, int ra, int rb) {
++ return (TW_OPCODE == (x & TW_OPCODE_MASK))
++ && (tobits == inv_to_field(x))
++ && (ra == -1/*any reg*/ || ra == inv_ra_field(x))
++ && (rb == -1/*any reg*/ || rb == inv_rb_field(x));
++ }
++
++ public:
++ // PPC floating point instructions
++ // PPC 1, section 4.6.2 Floating-Point Load Instructions
++ inline void lfs( FloatRegister d, int si16, Register a);
++ inline void lfsu( FloatRegister d, int si16, Register a);
++ inline void lfsx( FloatRegister d, Register a, Register b);
++ inline void lfd( FloatRegister d, int si16, Register a);
++ inline void lfdu( FloatRegister d, int si16, Register a);
++ inline void lfdx( FloatRegister d, Register a, Register b);
++
++ // PPC 1, section 4.6.3 Floating-Point Store Instructions
++ inline void stfs( FloatRegister s, int si16, Register a);
++ inline void stfsu( FloatRegister s, int si16, Register a);
++ inline void stfsx( FloatRegister s, Register a, Register b);
++ inline void stfd( FloatRegister s, int si16, Register a);
++ inline void stfdu( FloatRegister s, int si16, Register a);
++ inline void stfdx( FloatRegister s, Register a, Register b);
++
++ // PPC 1, section 4.6.4 Floating-Point Move Instructions
++ inline void fmr( FloatRegister d, FloatRegister b);
++ inline void fmr_( FloatRegister d, FloatRegister b);
++
++ // inline void mffgpr( FloatRegister d, Register b);
++ // inline void mftgpr( Register d, FloatRegister b);
++ inline void cmpb( Register a, Register s, Register b);
++ inline void popcntb(Register a, Register s);
++ inline void popcntw(Register a, Register s);
++ inline void popcntd(Register a, Register s);
++
++ inline void fneg( FloatRegister d, FloatRegister b);
++ inline void fneg_( FloatRegister d, FloatRegister b);
++ inline void fabs( FloatRegister d, FloatRegister b);
++ inline void fabs_( FloatRegister d, FloatRegister b);
++ inline void fnabs( FloatRegister d, FloatRegister b);
++ inline void fnabs_(FloatRegister d, FloatRegister b);
++
++ // PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic Instructions
++ inline void fadd( FloatRegister d, FloatRegister a, FloatRegister b);
++ inline void fadd_( FloatRegister d, FloatRegister a, FloatRegister b);
++ inline void fadds( FloatRegister d, FloatRegister a, FloatRegister b);
++ inline void fadds_(FloatRegister d, FloatRegister a, FloatRegister b);
++ inline void fsub( FloatRegister d, FloatRegister a, FloatRegister b);
++ inline void fsub_( FloatRegister d, FloatRegister a, FloatRegister b);
++ inline void fsubs( FloatRegister d, FloatRegister a, FloatRegister b);
++ inline void fsubs_(FloatRegister d, FloatRegister a, FloatRegister b);
++ inline void fmul( FloatRegister d, FloatRegister a, FloatRegister c);
++ inline void fmul_( FloatRegister d, FloatRegister a, FloatRegister c);
++ inline void fmuls( FloatRegister d, FloatRegister a, FloatRegister c);
++ inline void fmuls_(FloatRegister d, FloatRegister a, FloatRegister c);
++ inline void fdiv( FloatRegister d, FloatRegister a, FloatRegister b);
++ inline void fdiv_( FloatRegister d, FloatRegister a, FloatRegister b);
++ inline void fdivs( FloatRegister d, FloatRegister a, FloatRegister b);
++ inline void fdivs_(FloatRegister d, FloatRegister a, FloatRegister b);
++
++ // PPC 1, section 4.6.6 Floating-Point Rounding and Conversion Instructions
++ inline void frsp( FloatRegister d, FloatRegister b);
++ inline void fctid( FloatRegister d, FloatRegister b);
++ inline void fctidz(FloatRegister d, FloatRegister b);
++ inline void fctiw( FloatRegister d, FloatRegister b);
++ inline void fctiwz(FloatRegister d, FloatRegister b);
++ inline void fcfid( FloatRegister d, FloatRegister b);
++ inline void fcfids(FloatRegister d, FloatRegister b);
++
++ // PPC 1, section 4.6.7 Floating-Point Compare Instructions
++ inline void fcmpu( ConditionRegister crx, FloatRegister a, FloatRegister b);
++
++ inline void fsqrt( FloatRegister d, FloatRegister b);
++ inline void fsqrts(FloatRegister d, FloatRegister b);
++
++ // Vector instructions for >= Power6.
++ inline void lvebx( VectorRegister d, Register s1, Register s2);
++ inline void lvehx( VectorRegister d, Register s1, Register s2);
++ inline void lvewx( VectorRegister d, Register s1, Register s2);
++ inline void lvx( VectorRegister d, Register s1, Register s2);
++ inline void lvxl( VectorRegister d, Register s1, Register s2);
++ inline void stvebx( VectorRegister d, Register s1, Register s2);
++ inline void stvehx( VectorRegister d, Register s1, Register s2);
++ inline void stvewx( VectorRegister d, Register s1, Register s2);
++ inline void stvx( VectorRegister d, Register s1, Register s2);
++ inline void stvxl( VectorRegister d, Register s1, Register s2);
++ inline void lvsl( VectorRegister d, Register s1, Register s2);
++ inline void lvsr( VectorRegister d, Register s1, Register s2);
++ inline void vpkpx( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vpkshss( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vpkswss( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vpkshus( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vpkswus( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vpkuhum( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vpkuwum( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vpkuhus( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vpkuwus( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vupkhpx( VectorRegister d, VectorRegister b);
++ inline void vupkhsb( VectorRegister d, VectorRegister b);
++ inline void vupkhsh( VectorRegister d, VectorRegister b);
++ inline void vupklpx( VectorRegister d, VectorRegister b);
++ inline void vupklsb( VectorRegister d, VectorRegister b);
++ inline void vupklsh( VectorRegister d, VectorRegister b);
++ inline void vmrghb( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmrghw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmrghh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmrglb( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmrglw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmrglh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsplt( VectorRegister d, int ui4, VectorRegister b);
++ inline void vsplth( VectorRegister d, int ui3, VectorRegister b);
++ inline void vspltw( VectorRegister d, int ui2, VectorRegister b);
++ inline void vspltisb( VectorRegister d, int si5);
++ inline void vspltish( VectorRegister d, int si5);
++ inline void vspltisw( VectorRegister d, int si5);
++ inline void vperm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
++ inline void vsel( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
++ inline void vsl( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsldoi( VectorRegister d, VectorRegister a, VectorRegister b, int si4);
++ inline void vslo( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsr( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsro( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vaddcuw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vaddshs( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vaddsbs( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vaddsws( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vaddubm( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vadduwm( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vadduhm( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vaddubs( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vadduws( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vadduhs( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsubcuw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsubshs( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsubsbs( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsubsws( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsububm( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsubuwm( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsubuhm( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsububs( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsubuws( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsubuhs( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmulesb( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmuleub( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmulesh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmuleuh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmulosb( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmuloub( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmulosh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmulouh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmhaddshs(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
++ inline void vmhraddshs(VectorRegister d,VectorRegister a, VectorRegister b, VectorRegister c);
++ inline void vmladduhm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
++ inline void vmsubuhm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
++ inline void vmsummbm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
++ inline void vmsumshm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
++ inline void vmsumshs( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
++ inline void vmsumuhm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
++ inline void vmsumuhs( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
++ inline void vsumsws( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsum2sws( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsum4sbs( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsum4ubs( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsum4shs( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vavgsb( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vavgsw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vavgsh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vavgub( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vavguw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vavguh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmaxsb( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmaxsw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmaxsh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmaxub( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmaxuw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vmaxuh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vminsb( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vminsw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vminsh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vminub( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vminuw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vminuh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpequb( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpequh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpequw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpgtsh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpgtsb( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpgtsw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpgtub( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpgtuh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpgtuw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpequb_(VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpequh_(VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpequw_(VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpgtsh_(VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpgtsb_(VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpgtsw_(VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpgtub_(VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpgtuh_(VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vcmpgtuw_(VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vand( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vandc( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vnor( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vor( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vxor( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vrlb( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vrlw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vrlh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vslb( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vskw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vslh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsrb( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsrw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsrh( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsrab( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsraw( VectorRegister d, VectorRegister a, VectorRegister b);
++ inline void vsrah( VectorRegister d, VectorRegister a, VectorRegister b);
++ // Vector Floating-Point not implemented yet
++ inline void mtvscr( VectorRegister b);
++ inline void mfvscr( VectorRegister d);
++
++ // The following encoders use r0 as second operand. These instructions
++ // read r0 as '0'.
++ inline void lwzx( Register d, Register s2);
++ inline void lwz( Register d, int si16);
++ inline void lwax( Register d, Register s2);
++ inline void lwa( Register d, int si16);
++ inline void lhzx( Register d, Register s2);
++ inline void lhz( Register d, int si16);
++ inline void lhax( Register d, Register s2);
++ inline void lha( Register d, int si16);
++ inline void lbzx( Register d, Register s2);
++ inline void lbz( Register d, int si16);
++ inline void ldx( Register d, Register s2);
++ inline void ld( Register d, int si16);
++ inline void stwx( Register d, Register s2);
++ inline void stw( Register d, int si16);
++ inline void sthx( Register d, Register s2);
++ inline void sth( Register d, int si16);
++ inline void stbx( Register d, Register s2);
++ inline void stb( Register d, int si16);
++ inline void stdx( Register d, Register s2);
++ inline void std( Register d, int si16);
++
++ // PPC 2, section 3.2.1 Instruction Cache Instructions
++ inline void icbi( Register s2);
++ // PPC 2, section 3.2.2 Data Cache Instructions
++ //inlinevoid dcba( Register s2); // Instruction for embedded processor only.
++ inline void dcbz( Register s2);
++ inline void dcbst( Register s2);
++ inline void dcbf( Register s2);
++ // dcache read hint
++ inline void dcbt( Register s2);
++ inline void dcbtct( Register s2, int ct);
++ inline void dcbtds( Register s2, int ds);
++ // dcache write hint
++ inline void dcbtst( Register s2);
++ inline void dcbtstct(Register s2, int ct);
++
++ // Atomics: use ra0mem to disallow R0 as base.
++ inline void lwarx_unchecked(Register d, Register b, int eh1);
++ inline void ldarx_unchecked(Register d, Register b, int eh1);
++ inline void lwarx( Register d, Register b, bool hint_exclusive_access);
++ inline void ldarx( Register d, Register b, bool hint_exclusive_access);
++ inline void stwcx_(Register s, Register b);
++ inline void stdcx_(Register s, Register b);
++ inline void lfs( FloatRegister d, int si16);
++ inline void lfsx( FloatRegister d, Register b);
++ inline void lfd( FloatRegister d, int si16);
++ inline void lfdx( FloatRegister d, Register b);
++ inline void stfs( FloatRegister s, int si16);
++ inline void stfsx( FloatRegister s, Register b);
++ inline void stfd( FloatRegister s, int si16);
++ inline void stfdx( FloatRegister s, Register b);
++ inline void lvebx( VectorRegister d, Register s2);
++ inline void lvehx( VectorRegister d, Register s2);
++ inline void lvewx( VectorRegister d, Register s2);
++ inline void lvx( VectorRegister d, Register s2);
++ inline void lvxl( VectorRegister d, Register s2);
++ inline void stvebx(VectorRegister d, Register s2);
++ inline void stvehx(VectorRegister d, Register s2);
++ inline void stvewx(VectorRegister d, Register s2);
++ inline void stvx( VectorRegister d, Register s2);
++ inline void stvxl( VectorRegister d, Register s2);
++ inline void lvsl( VectorRegister d, Register s2);
++ inline void lvsr( VectorRegister d, Register s2);
++
++ // RegisterOrConstant versions.
++ // These emitters choose between the versions using two registers and
++ // those with register and immediate, depending on the content of roc.
++ // If the constant is not encodable as immediate, instructions to
++ // load the constant are emitted beforehand. Store instructions need a
++ // tmp reg if the constant is not encodable as immediate.
++ // Size unpredictable.
++ void ld( Register d, RegisterOrConstant roc, Register s1 = noreg);
++ void lwa( Register d, RegisterOrConstant roc, Register s1 = noreg);
++ void lwz( Register d, RegisterOrConstant roc, Register s1 = noreg);
++ void lha( Register d, RegisterOrConstant roc, Register s1 = noreg);
++ void lhz( Register d, RegisterOrConstant roc, Register s1 = noreg);
++ void lbz( Register d, RegisterOrConstant roc, Register s1 = noreg);
++ void std( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg);
++ void stw( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg);
++ void sth( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg);
++ void stb( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg);
++ void add( Register d, RegisterOrConstant roc, Register s1);
++ void subf(Register d, RegisterOrConstant roc, Register s1);
++ void cmpd(ConditionRegister d, RegisterOrConstant roc, Register s1);
++
++
++ // Emit several instructions to load a 64 bit constant. This issues a fixed
++ // instruction pattern so that the constant can be patched later on.
++ enum {
++ load_const_size = 5 * BytesPerInstWord
++ };
++ void load_const(Register d, long a, Register tmp = noreg);
++ inline void load_const(Register d, void* a, Register tmp = noreg);
++ inline void load_const(Register d, Label& L, Register tmp = noreg);
++ inline void load_const(Register d, AddressLiteral& a, Register tmp = noreg);
++
++ // Load a 64 bit constant, optimized, not identifyable.
++ // Tmp can be used to increase ILP. Set return_simm16_rest = true to get a
++ // 16 bit immediate offset. This is useful if the offset can be encoded in
++ // a succeeding instruction.
++ int load_const_optimized(Register d, long a, Register tmp = noreg, bool return_simm16_rest = false);
++ inline int load_const_optimized(Register d, void* a, Register tmp = noreg, bool return_simm16_rest = false) {
++ return load_const_optimized(d, (long)(unsigned long)a, tmp, return_simm16_rest);
++ }
++
++ // Creation
++ Assembler(CodeBuffer* code) : AbstractAssembler(code) {
++#ifdef CHECK_DELAY
++ delay_state = no_delay;
++#endif
++ }
++
++ // Testing
++#ifndef PRODUCT
++ void test_asm();
++#endif
++};
++
++
++#endif // CPU_PPC_VM_ASSEMBLER_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,823 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_ASSEMBLER_PPC_INLINE_HPP
++#define CPU_PPC_VM_ASSEMBLER_PPC_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++inline void Assembler::emit_int32(int x) {
++ AbstractAssembler::emit_int32(x);
++}
++
++inline void Assembler::emit_data(int x) {
++ emit_int32(x);
++}
++
++inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
++ relocate(rtype);
++ emit_int32(x);
++}
++
++inline void Assembler::emit_data(int x, RelocationHolder const& rspec) {
++ relocate(rspec);
++ emit_int32(x);
++}
++
++// Emit an address
++inline address Assembler::emit_addr(const address addr) {
++ address start = pc();
++ emit_address(addr);
++ return start;
++}
++
++#if !defined(ABI_ELFv2)
++// Emit a function descriptor with the specified entry point, TOC, and
++// ENV. If the entry point is NULL, the descriptor will point just
++// past the descriptor.
++inline address Assembler::emit_fd(address entry, address toc, address env) {
++ FunctionDescriptor* fd = (FunctionDescriptor*)pc();
++
++ assert(sizeof(FunctionDescriptor) == 3*sizeof(address), "function descriptor size");
++
++ (void)emit_addr();
++ (void)emit_addr();
++ (void)emit_addr();
++
++ fd->set_entry(entry == NULL ? pc() : entry);
++ fd->set_toc(toc);
++ fd->set_env(env);
++
++ return (address)fd;
++}
++#endif
++
++// Issue an illegal instruction. 0 is guaranteed to be an illegal instruction.
++inline void Assembler::illtrap() { Assembler::emit_int32(0); }
++inline bool Assembler::is_illtrap(int x) { return x == 0; }
++
++// PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
++inline void Assembler::addi( Register d, Register a, int si16) { assert(a != R0, "r0 not allowed"); addi_r0ok( d, a, si16); }
++inline void Assembler::addis( Register d, Register a, int si16) { assert(a != R0, "r0 not allowed"); addis_r0ok(d, a, si16); }
++inline void Assembler::addi_r0ok(Register d,Register a,int si16) { emit_int32(ADDI_OPCODE | rt(d) | ra(a) | simm(si16, 16)); }
++inline void Assembler::addis_r0ok(Register d,Register a,int si16) { emit_int32(ADDIS_OPCODE | rt(d) | ra(a) | simm(si16, 16)); }
++inline void Assembler::addic_( Register d, Register a, int si16) { emit_int32(ADDIC__OPCODE | rt(d) | ra(a) | simm(si16, 16)); }
++inline void Assembler::subfic( Register d, Register a, int si16) { emit_int32(SUBFIC_OPCODE | rt(d) | ra(a) | simm(si16, 16)); }
++inline void Assembler::add( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
++inline void Assembler::add_( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
++inline void Assembler::subf( Register d, Register a, Register b) { emit_int32(SUBF_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
++inline void Assembler::sub( Register d, Register a, Register b) { subf(d, b, a); }
++inline void Assembler::subf_( Register d, Register a, Register b) { emit_int32(SUBF_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
++inline void Assembler::addc( Register d, Register a, Register b) { emit_int32(ADDC_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
++inline void Assembler::addc_( Register d, Register a, Register b) { emit_int32(ADDC_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
++inline void Assembler::subfc( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
++inline void Assembler::subfc_( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
++inline void Assembler::adde( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
++inline void Assembler::adde_( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
++inline void Assembler::subfe( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
++inline void Assembler::subfe_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
++inline void Assembler::neg( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); }
++inline void Assembler::neg_( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); }
++inline void Assembler::mulli( Register d, Register a, int si16) { emit_int32(MULLI_OPCODE | rt(d) | ra(a) | simm(si16, 16)); }
++inline void Assembler::mulld( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
++inline void Assembler::mulld_( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
++inline void Assembler::mullw( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
++inline void Assembler::mullw_( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
++inline void Assembler::mulhw( Register d, Register a, Register b) { emit_int32(MULHW_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); }
++inline void Assembler::mulhw_( Register d, Register a, Register b) { emit_int32(MULHW_OPCODE | rt(d) | ra(a) | rb(b) | rc(1)); }
++inline void Assembler::mulhd( Register d, Register a, Register b) { emit_int32(MULHD_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); }
++inline void Assembler::mulhd_( Register d, Register a, Register b) { emit_int32(MULHD_OPCODE | rt(d) | ra(a) | rb(b) | rc(1)); }
++inline void Assembler::mulhdu( Register d, Register a, Register b) { emit_int32(MULHDU_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); }
++inline void Assembler::mulhdu_(Register d, Register a, Register b) { emit_int32(MULHDU_OPCODE | rt(d) | ra(a) | rb(b) | rc(1)); }
++inline void Assembler::divd( Register d, Register a, Register b) { emit_int32(DIVD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
++inline void Assembler::divd_( Register d, Register a, Register b) { emit_int32(DIVD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
++inline void Assembler::divw( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
++inline void Assembler::divw_( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
++
++// extended mnemonics
++inline void Assembler::li( Register d, int si16) { Assembler::addi_r0ok( d, R0, si16); }
++inline void Assembler::lis( Register d, int si16) { Assembler::addis_r0ok(d, R0, si16); }
++inline void Assembler::addir(Register d, int si16, Register a) { Assembler::addi(d, a, si16); }
++
++// PPC 1, section 3.3.9, Fixed-Point Compare Instructions
++inline void Assembler::cmpi( ConditionRegister f, int l, Register a, int si16) { emit_int32( CMPI_OPCODE | bf(f) | l10(l) | ra(a) | simm(si16,16)); }
++inline void Assembler::cmp( ConditionRegister f, int l, Register a, Register b) { emit_int32( CMP_OPCODE | bf(f) | l10(l) | ra(a) | rb(b)); }
++inline void Assembler::cmpli( ConditionRegister f, int l, Register a, int ui16) { emit_int32( CMPLI_OPCODE | bf(f) | l10(l) | ra(a) | uimm(ui16,16)); }
++inline void Assembler::cmpl( ConditionRegister f, int l, Register a, Register b) { emit_int32( CMPL_OPCODE | bf(f) | l10(l) | ra(a) | rb(b)); }
++
++// extended mnemonics of Compare Instructions
++inline void Assembler::cmpwi( ConditionRegister crx, Register a, int si16) { Assembler::cmpi( crx, 0, a, si16); }
++inline void Assembler::cmpdi( ConditionRegister crx, Register a, int si16) { Assembler::cmpi( crx, 1, a, si16); }
++inline void Assembler::cmpw( ConditionRegister crx, Register a, Register b) { Assembler::cmp( crx, 0, a, b); }
++inline void Assembler::cmpd( ConditionRegister crx, Register a, Register b) { Assembler::cmp( crx, 1, a, b); }
++inline void Assembler::cmplwi(ConditionRegister crx, Register a, int ui16) { Assembler::cmpli(crx, 0, a, ui16); }
++inline void Assembler::cmpldi(ConditionRegister crx, Register a, int ui16) { Assembler::cmpli(crx, 1, a, ui16); }
++inline void Assembler::cmplw( ConditionRegister crx, Register a, Register b) { Assembler::cmpl( crx, 0, a, b); }
++inline void Assembler::cmpld( ConditionRegister crx, Register a, Register b) { Assembler::cmpl( crx, 1, a, b); }
++
++inline void Assembler::isel(Register d, Register a, Register b, int c) { guarantee(VM_Version::has_isel(), "opcode not supported on this hardware");
++ emit_int32(ISEL_OPCODE | rt(d) | ra(a) | rb(b) | bc(c)); }
++
++// PPC 1, section 3.3.11, Fixed-Point Logical Instructions
++inline void Assembler::andi_( Register a, Register s, int ui16) { emit_int32(ANDI_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); }
++inline void Assembler::andis_( Register a, Register s, int ui16) { emit_int32(ANDIS_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); }
++inline void Assembler::ori( Register a, Register s, int ui16) { emit_int32(ORI_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); }
++inline void Assembler::oris( Register a, Register s, int ui16) { emit_int32(ORIS_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); }
++inline void Assembler::xori( Register a, Register s, int ui16) { emit_int32(XORI_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); }
++inline void Assembler::xoris( Register a, Register s, int ui16) { emit_int32(XORIS_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); }
++inline void Assembler::andr( Register a, Register s, Register b) { emit_int32(AND_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); }
++inline void Assembler::and_( Register a, Register s, Register b) { emit_int32(AND_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); }
++
++inline void Assembler::or_unchecked(Register a, Register s, Register b){ emit_int32(OR_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); }
++inline void Assembler::orr( Register a, Register s, Register b) { if (a==s && s==b) { Assembler::nop(); } else { Assembler::or_unchecked(a,s,b); } }
++inline void Assembler::or_( Register a, Register s, Register b) { emit_int32(OR_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); }
++inline void Assembler::xorr( Register a, Register s, Register b) { emit_int32(XOR_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); }
++inline void Assembler::xor_( Register a, Register s, Register b) { emit_int32(XOR_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); }
++inline void Assembler::nand( Register a, Register s, Register b) { emit_int32(NAND_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); }
++inline void Assembler::nand_( Register a, Register s, Register b) { emit_int32(NAND_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); }
++inline void Assembler::nor( Register a, Register s, Register b) { emit_int32(NOR_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); }
++inline void Assembler::nor_( Register a, Register s, Register b) { emit_int32(NOR_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); }
++inline void Assembler::andc( Register a, Register s, Register b) { emit_int32(ANDC_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); }
++inline void Assembler::andc_( Register a, Register s, Register b) { emit_int32(ANDC_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); }
++inline void Assembler::orc( Register a, Register s, Register b) { emit_int32(ORC_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); }
++inline void Assembler::orc_( Register a, Register s, Register b) { emit_int32(ORC_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); }
++inline void Assembler::extsb( Register a, Register s) { emit_int32(EXTSB_OPCODE | rta(a) | rs(s) | rc(0)); }
++inline void Assembler::extsh( Register a, Register s) { emit_int32(EXTSH_OPCODE | rta(a) | rs(s) | rc(0)); }
++inline void Assembler::extsw( Register a, Register s) { emit_int32(EXTSW_OPCODE | rta(a) | rs(s) | rc(0)); }
++
++// extended mnemonics
++inline void Assembler::nop() { Assembler::ori(R0, R0, 0); }
++// NOP for FP and BR units (different versions to allow them to be in one group)
++inline void Assembler::fpnop0() { Assembler::fmr(F30, F30); }
++inline void Assembler::fpnop1() { Assembler::fmr(F31, F31); }
++inline void Assembler::brnop0() { Assembler::mcrf(CCR2, CCR2); }
++inline void Assembler::brnop1() { Assembler::mcrf(CCR3, CCR3); }
++inline void Assembler::brnop2() { Assembler::mcrf(CCR4, CCR4); }
++
++inline void Assembler::mr( Register d, Register s) { Assembler::orr(d, s, s); }
++inline void Assembler::ori_opt( Register d, int ui16) { if (ui16!=0) Assembler::ori( d, d, ui16); }
++inline void Assembler::oris_opt(Register d, int ui16) { if (ui16!=0) Assembler::oris(d, d, ui16); }
++
++inline void Assembler::endgroup() { Assembler::ori(R1, R1, 0); }
++
++// count instructions
++inline void Assembler::cntlzw( Register a, Register s) { emit_int32(CNTLZW_OPCODE | rta(a) | rs(s) | rc(0)); }
++inline void Assembler::cntlzw_( Register a, Register s) { emit_int32(CNTLZW_OPCODE | rta(a) | rs(s) | rc(1)); }
++inline void Assembler::cntlzd( Register a, Register s) { emit_int32(CNTLZD_OPCODE | rta(a) | rs(s) | rc(0)); }
++inline void Assembler::cntlzd_( Register a, Register s) { emit_int32(CNTLZD_OPCODE | rta(a) | rs(s) | rc(1)); }
++
++// PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions
++inline void Assembler::sld( Register a, Register s, Register b) { emit_int32(SLD_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); }
++inline void Assembler::sld_( Register a, Register s, Register b) { emit_int32(SLD_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); }
++inline void Assembler::slw( Register a, Register s, Register b) { emit_int32(SLW_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); }
++inline void Assembler::slw_( Register a, Register s, Register b) { emit_int32(SLW_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); }
++inline void Assembler::srd( Register a, Register s, Register b) { emit_int32(SRD_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); }
++inline void Assembler::srd_( Register a, Register s, Register b) { emit_int32(SRD_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); }
++inline void Assembler::srw( Register a, Register s, Register b) { emit_int32(SRW_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); }
++inline void Assembler::srw_( Register a, Register s, Register b) { emit_int32(SRW_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); }
++inline void Assembler::srad( Register a, Register s, Register b) { emit_int32(SRAD_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); }
++inline void Assembler::srad_( Register a, Register s, Register b) { emit_int32(SRAD_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); }
++inline void Assembler::sraw( Register a, Register s, Register b) { emit_int32(SRAW_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); }
++inline void Assembler::sraw_( Register a, Register s, Register b) { emit_int32(SRAW_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); }
++inline void Assembler::sradi( Register a, Register s, int sh6) { emit_int32(SRADI_OPCODE | rta(a) | rs(s) | sh162030(sh6) | rc(0)); }
++inline void Assembler::sradi_( Register a, Register s, int sh6) { emit_int32(SRADI_OPCODE | rta(a) | rs(s) | sh162030(sh6) | rc(1)); }
++inline void Assembler::srawi( Register a, Register s, int sh5) { emit_int32(SRAWI_OPCODE | rta(a) | rs(s) | sh1620(sh5) | rc(0)); }
++inline void Assembler::srawi_( Register a, Register s, int sh5) { emit_int32(SRAWI_OPCODE | rta(a) | rs(s) | sh1620(sh5) | rc(1)); }
++
++// extended mnemonics for Shift Instructions
++inline void Assembler::sldi( Register a, Register s, int sh6) { Assembler::rldicr(a, s, sh6, 63-sh6); }
++inline void Assembler::sldi_( Register a, Register s, int sh6) { Assembler::rldicr_(a, s, sh6, 63-sh6); }
++inline void Assembler::slwi( Register a, Register s, int sh5) { Assembler::rlwinm(a, s, sh5, 0, 31-sh5); }
++inline void Assembler::slwi_( Register a, Register s, int sh5) { Assembler::rlwinm_(a, s, sh5, 0, 31-sh5); }
++inline void Assembler::srdi( Register a, Register s, int sh6) { Assembler::rldicl(a, s, 64-sh6, sh6); }
++inline void Assembler::srdi_( Register a, Register s, int sh6) { Assembler::rldicl_(a, s, 64-sh6, sh6); }
++inline void Assembler::srwi( Register a, Register s, int sh5) { Assembler::rlwinm(a, s, 32-sh5, sh5, 31); }
++inline void Assembler::srwi_( Register a, Register s, int sh5) { Assembler::rlwinm_(a, s, 32-sh5, sh5, 31); }
++
++inline void Assembler::clrrdi( Register a, Register s, int ui6) { Assembler::rldicr(a, s, 0, 63-ui6); }
++inline void Assembler::clrrdi_( Register a, Register s, int ui6) { Assembler::rldicr_(a, s, 0, 63-ui6); }
++inline void Assembler::clrldi( Register a, Register s, int ui6) { Assembler::rldicl(a, s, 0, ui6); }
++inline void Assembler::clrldi_( Register a, Register s, int ui6) { Assembler::rldicl_(a, s, 0, ui6); }
++inline void Assembler::clrlsldi( Register a, Register s, int clrl6, int shl6) { Assembler::rldic( a, s, shl6, clrl6-shl6); }
++inline void Assembler::clrlsldi_(Register a, Register s, int clrl6, int shl6) { Assembler::rldic_(a, s, shl6, clrl6-shl6); }
++inline void Assembler::extrdi( Register a, Register s, int n, int b){ Assembler::rldicl(a, s, b+n, 64-n); }
++// testbit with condition register.
++inline void Assembler::testbitdi(ConditionRegister cr, Register a, Register s, int ui6) {
++ if (cr == CCR0) {
++ Assembler::rldicr_(a, s, 63-ui6, 0);
++ } else {
++ Assembler::rldicr(a, s, 63-ui6, 0);
++ Assembler::cmpdi(cr, a, 0);
++ }
++}
++
++// rotate instructions
++inline void Assembler::rotldi( Register a, Register s, int n) { Assembler::rldicl(a, s, n, 0); }
++inline void Assembler::rotrdi( Register a, Register s, int n) { Assembler::rldicl(a, s, 64-n, 0); }
++inline void Assembler::rotlwi( Register a, Register s, int n) { Assembler::rlwinm(a, s, n, 0, 31); }
++inline void Assembler::rotrwi( Register a, Register s, int n) { Assembler::rlwinm(a, s, 32-n, 0, 31); }
++
++inline void Assembler::rldic( Register a, Register s, int sh6, int mb6) { emit_int32(RLDIC_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(0)); }
++inline void Assembler::rldic_( Register a, Register s, int sh6, int mb6) { emit_int32(RLDIC_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(1)); }
++inline void Assembler::rldicr( Register a, Register s, int sh6, int mb6) { emit_int32(RLDICR_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(0)); }
++inline void Assembler::rldicr_( Register a, Register s, int sh6, int mb6) { emit_int32(RLDICR_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(1)); }
++inline void Assembler::rldicl( Register a, Register s, int sh6, int me6) { emit_int32(RLDICL_OPCODE | rta(a) | rs(s) | sh162030(sh6) | me2126(me6) | rc(0)); }
++inline void Assembler::rldicl_( Register a, Register s, int sh6, int me6) { emit_int32(RLDICL_OPCODE | rta(a) | rs(s) | sh162030(sh6) | me2126(me6) | rc(1)); }
++inline void Assembler::rlwinm( Register a, Register s, int sh5, int mb5, int me5){ emit_int32(RLWINM_OPCODE | rta(a) | rs(s) | sh1620(sh5) | mb2125(mb5) | me2630(me5) | rc(0)); }
++inline void Assembler::rlwinm_( Register a, Register s, int sh5, int mb5, int me5){ emit_int32(RLWINM_OPCODE | rta(a) | rs(s) | sh1620(sh5) | mb2125(mb5) | me2630(me5) | rc(1)); }
++inline void Assembler::rldimi( Register a, Register s, int sh6, int mb6) { emit_int32(RLDIMI_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(0)); }
++inline void Assembler::rlwimi( Register a, Register s, int sh5, int mb5, int me5){ emit_int32(RLWIMI_OPCODE | rta(a) | rs(s) | sh1620(sh5) | mb2125(mb5) | me2630(me5) | rc(0)); }
++inline void Assembler::rldimi_( Register a, Register s, int sh6, int mb6) { emit_int32(RLDIMI_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(1)); }
++inline void Assembler::insrdi( Register a, Register s, int n, int b) { Assembler::rldimi(a, s, 64-(b+n), b); }
++inline void Assembler::insrwi( Register a, Register s, int n, int b) { Assembler::rlwimi(a, s, 32-(b+n), b, b+n-1); }
++
++// PPC 1, section 3.3.2 Fixed-Point Load Instructions
++inline void Assembler::lwzx( Register d, Register s1, Register s2) { emit_int32(LWZX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
++inline void Assembler::lwz( Register d, int si16, Register s1) { emit_int32(LWZ_OPCODE | rt(d) | d1(si16) | ra0mem(s1));}
++inline void Assembler::lwzu( Register d, int si16, Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LWZU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));}
++
++inline void Assembler::lwax( Register d, Register s1, Register s2) { emit_int32(LWAX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
++inline void Assembler::lwa( Register d, int si16, Register s1) { emit_int32(LWA_OPCODE | rt(d) | ds(si16) | ra0mem(s1));}
++
++inline void Assembler::lhzx( Register d, Register s1, Register s2) { emit_int32(LHZX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
++inline void Assembler::lhz( Register d, int si16, Register s1) { emit_int32(LHZ_OPCODE | rt(d) | d1(si16) | ra0mem(s1));}
++inline void Assembler::lhzu( Register d, int si16, Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LHZU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));}
++
++inline void Assembler::lhax( Register d, Register s1, Register s2) { emit_int32(LHAX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
++inline void Assembler::lha( Register d, int si16, Register s1) { emit_int32(LHA_OPCODE | rt(d) | d1(si16) | ra0mem(s1));}
++inline void Assembler::lhau( Register d, int si16, Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LHAU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));}
++
++inline void Assembler::lbzx( Register d, Register s1, Register s2) { emit_int32(LBZX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
++inline void Assembler::lbz( Register d, int si16, Register s1) { emit_int32(LBZ_OPCODE | rt(d) | d1(si16) | ra0mem(s1));}
++inline void Assembler::lbzu( Register d, int si16, Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LBZU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));}
++
++inline void Assembler::ld( Register d, int si16, Register s1) { emit_int32(LD_OPCODE | rt(d) | ds(si16) | ra0mem(s1));}
++inline void Assembler::ldx( Register d, Register s1, Register s2) { emit_int32(LDX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
++inline void Assembler::ldu( Register d, int si16, Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LDU_OPCODE | rt(d) | ds(si16) | rta0mem(s1));}
++
++// PPC 1, section 3.3.3 Fixed-Point Store Instructions
++inline void Assembler::stwx( Register d, Register s1, Register s2) { emit_int32(STWX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));}
++inline void Assembler::stw( Register d, int si16, Register s1) { emit_int32(STW_OPCODE | rs(d) | d1(si16) | ra0mem(s1));}
++inline void Assembler::stwu( Register d, int si16, Register s1) { emit_int32(STWU_OPCODE | rs(d) | d1(si16) | rta0mem(s1));}
++
++inline void Assembler::sthx( Register d, Register s1, Register s2) { emit_int32(STHX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));}
++inline void Assembler::sth( Register d, int si16, Register s1) { emit_int32(STH_OPCODE | rs(d) | d1(si16) | ra0mem(s1));}
++inline void Assembler::sthu( Register d, int si16, Register s1) { emit_int32(STHU_OPCODE | rs(d) | d1(si16) | rta0mem(s1));}
++
++inline void Assembler::stbx( Register d, Register s1, Register s2) { emit_int32(STBX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));}
++inline void Assembler::stb( Register d, int si16, Register s1) { emit_int32(STB_OPCODE | rs(d) | d1(si16) | ra0mem(s1));}
++inline void Assembler::stbu( Register d, int si16, Register s1) { emit_int32(STBU_OPCODE | rs(d) | d1(si16) | rta0mem(s1));}
++
++inline void Assembler::std( Register d, int si16, Register s1) { emit_int32(STD_OPCODE | rs(d) | ds(si16) | ra0mem(s1));}
++inline void Assembler::stdx( Register d, Register s1, Register s2) { emit_int32(STDX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));}
++inline void Assembler::stdu( Register d, int si16, Register s1) { emit_int32(STDU_OPCODE | rs(d) | ds(si16) | rta0mem(s1));}
++inline void Assembler::stdux(Register s, Register a, Register b) { emit_int32(STDUX_OPCODE| rs(s) | rta0mem(a) | rb(b));}
++
++// PPC 1, section 3.3.13 Move To/From System Register Instructions
++inline void Assembler::mtlr( Register s1) { emit_int32(MTLR_OPCODE | rs(s1)); }
++inline void Assembler::mflr( Register d ) { emit_int32(MFLR_OPCODE | rt(d)); }
++inline void Assembler::mtctr(Register s1) { emit_int32(MTCTR_OPCODE | rs(s1)); }
++inline void Assembler::mfctr(Register d ) { emit_int32(MFCTR_OPCODE | rt(d)); }
++inline void Assembler::mtcrf(int afxm, Register s){ emit_int32(MTCRF_OPCODE | fxm(afxm) | rs(s)); }
++inline void Assembler::mfcr( Register d ) { emit_int32(MFCR_OPCODE | rt(d)); }
++inline void Assembler::mcrf( ConditionRegister crd, ConditionRegister cra)
++ { emit_int32(MCRF_OPCODE | bf(crd) | bfa(cra)); }
++inline void Assembler::mtcr( Register s) { Assembler::mtcrf(0xff, s); }
++
++// SAP JVM 2006-02-13 PPC branch instruction.
++// PPC 1, section 2.4.1 Branch Instructions
++inline void Assembler::b( address a, relocInfo::relocType rt) { emit_data(BXX_OPCODE| li(disp( intptr_t(a), intptr_t(pc()))) |aa(0)|lk(0), rt); }
++inline void Assembler::b( Label& L) { b( target(L)); }
++inline void Assembler::bl(address a, relocInfo::relocType rt) { emit_data(BXX_OPCODE| li(disp( intptr_t(a), intptr_t(pc()))) |aa(0)|lk(1), rt); }
++inline void Assembler::bl(Label& L) { bl(target(L)); }
++inline void Assembler::bc( int boint, int biint, address a, relocInfo::relocType rt) { emit_data(BCXX_OPCODE| bo(boint) | bi(biint) | bd(disp( intptr_t(a), intptr_t(pc()))) | aa(0) | lk(0), rt); }
++inline void Assembler::bc( int boint, int biint, Label& L) { bc(boint, biint, target(L)); }
++inline void Assembler::bcl(int boint, int biint, address a, relocInfo::relocType rt) { emit_data(BCXX_OPCODE| bo(boint) | bi(biint) | bd(disp( intptr_t(a), intptr_t(pc()))) | aa(0)|lk(1)); }
++inline void Assembler::bcl(int boint, int biint, Label& L) { bcl(boint, biint, target(L)); }
++
++inline void Assembler::bclr( int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCLR_OPCODE | bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(0), rt); }
++inline void Assembler::bclrl( int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCLR_OPCODE | bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(1), rt); }
++inline void Assembler::bcctr( int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCCTR_OPCODE| bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(0), rt); }
++inline void Assembler::bcctrl(int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCCTR_OPCODE| bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(1), rt); }
++
++// helper function for b
++inline bool Assembler::is_within_range_of_b(address a, address pc) {
++ // Guard against illegal branch targets, e.g. -1 (see CompiledStaticCall and ad-file).
++ if ((((uint64_t)a) & 0x3) != 0) return false;
++
++ const int range = 1 << (29-6); // li field is from bit 6 to bit 29.
++ int value = disp(intptr_t(a), intptr_t(pc));
++ bool result = -range <= value && value < range-1;
++#ifdef ASSERT
++ if (result) li(value); // Assert that value is in correct range.
++#endif
++ return result;
++}
++
++// helper functions for bcxx.
++inline bool Assembler::is_within_range_of_bcxx(address a, address pc) {
++ // Guard against illegal branch targets, e.g. -1 (see CompiledStaticCall and ad-file).
++ if ((((uint64_t)a) & 0x3) != 0) return false;
++
++ const int range = 1 << (29-16); // bd field is from bit 16 to bit 29.
++ int value = disp(intptr_t(a), intptr_t(pc));
++ bool result = -range <= value && value < range-1;
++#ifdef ASSERT
++ if (result) bd(value); // Assert that value is in correct range.
++#endif
++ return result;
++}
++
++// Get the destination of a bxx branch (b, bl, ba, bla).
++address Assembler::bxx_destination(address baddr) { return bxx_destination(*(int*)baddr, baddr); }
++address Assembler::bxx_destination(int instr, address pc) { return (address)bxx_destination_offset(instr, (intptr_t)pc); }
++intptr_t Assembler::bxx_destination_offset(int instr, intptr_t bxx_pos) {
++ intptr_t displ = inv_li_field(instr);
++ return bxx_pos + displ;
++}
++
++// Extended mnemonics for Branch Instructions
++inline void Assembler::blt(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs1, bi0(crx, less), L); }
++inline void Assembler::bgt(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs1, bi0(crx, greater), L); }
++inline void Assembler::beq(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs1, bi0(crx, equal), L); }
++inline void Assembler::bso(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs1, bi0(crx, summary_overflow), L); }
++inline void Assembler::bge(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs0, bi0(crx, less), L); }
++inline void Assembler::ble(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs0, bi0(crx, greater), L); }
++inline void Assembler::bne(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs0, bi0(crx, equal), L); }
++inline void Assembler::bns(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs0, bi0(crx, summary_overflow), L); }
++
++// Branch instructions with static prediction hints.
++inline void Assembler::blt_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsTaken, bi0(crx, less), L); }
++inline void Assembler::bgt_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsTaken, bi0(crx, greater), L); }
++inline void Assembler::beq_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsTaken, bi0(crx, equal), L); }
++inline void Assembler::bso_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsTaken, bi0(crx, summary_overflow), L); }
++inline void Assembler::bge_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsTaken, bi0(crx, less), L); }
++inline void Assembler::ble_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsTaken, bi0(crx, greater), L); }
++inline void Assembler::bne_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsTaken, bi0(crx, equal), L); }
++inline void Assembler::bns_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsTaken, bi0(crx, summary_overflow), L); }
++inline void Assembler::blt_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsNotTaken, bi0(crx, less), L); }
++inline void Assembler::bgt_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsNotTaken, bi0(crx, greater), L); }
++inline void Assembler::beq_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsNotTaken, bi0(crx, equal), L); }
++inline void Assembler::bso_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsNotTaken, bi0(crx, summary_overflow), L); }
++inline void Assembler::bge_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsNotTaken, bi0(crx, less), L); }
++inline void Assembler::ble_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsNotTaken, bi0(crx, greater), L); }
++inline void Assembler::bne_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsNotTaken, bi0(crx, equal), L); }
++inline void Assembler::bns_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsNotTaken, bi0(crx, summary_overflow), L); }
++
++// For use in conjunction with testbitdi:
++inline void Assembler::btrue( ConditionRegister crx, Label& L) { Assembler::bne(crx, L); }
++inline void Assembler::bfalse(ConditionRegister crx, Label& L) { Assembler::beq(crx, L); }
++
++inline void Assembler::bltl(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs1, bi0(crx, less), L); }
++inline void Assembler::bgtl(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs1, bi0(crx, greater), L); }
++inline void Assembler::beql(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs1, bi0(crx, equal), L); }
++inline void Assembler::bsol(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs1, bi0(crx, summary_overflow), L); }
++inline void Assembler::bgel(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs0, bi0(crx, less), L); }
++inline void Assembler::blel(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs0, bi0(crx, greater), L); }
++inline void Assembler::bnel(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs0, bi0(crx, equal), L); }
++inline void Assembler::bnsl(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs0, bi0(crx, summary_overflow), L); }
++
++// Extended mnemonics for Branch Instructions via LR.
++// We use `blr' for returns.
++inline void Assembler::blr(relocInfo::relocType rt) { Assembler::bclr(bcondAlways, 0, bhintbhBCLRisReturn, rt); }
++
++// Extended mnemonics for Branch Instructions with CTR.
++// Bdnz means `decrement CTR and jump to L if CTR is not zero'.
++inline void Assembler::bdnz(Label& L) { Assembler::bc(16, 0, L); }
++// Decrement and branch if result is zero.
++inline void Assembler::bdz(Label& L) { Assembler::bc(18, 0, L); }
++// We use `bctr[l]' for jumps/calls in function descriptor glue
++// code, e.g. for calls to runtime functions.
++inline void Assembler::bctr( relocInfo::relocType rt) { Assembler::bcctr(bcondAlways, 0, bhintbhBCCTRisNotReturnButSame, rt); }
++inline void Assembler::bctrl(relocInfo::relocType rt) { Assembler::bcctrl(bcondAlways, 0, bhintbhBCCTRisNotReturnButSame, rt); }
++// Conditional jumps/branches via CTR.
++inline void Assembler::beqctr( ConditionRegister crx, relocInfo::relocType rt) { Assembler::bcctr( bcondCRbiIs1, bi0(crx, equal), bhintbhBCCTRisNotReturnButSame, rt); }
++inline void Assembler::beqctrl(ConditionRegister crx, relocInfo::relocType rt) { Assembler::bcctrl(bcondCRbiIs1, bi0(crx, equal), bhintbhBCCTRisNotReturnButSame, rt); }
++inline void Assembler::bnectr( ConditionRegister crx, relocInfo::relocType rt) { Assembler::bcctr( bcondCRbiIs0, bi0(crx, equal), bhintbhBCCTRisNotReturnButSame, rt); }
++inline void Assembler::bnectrl(ConditionRegister crx, relocInfo::relocType rt) { Assembler::bcctrl(bcondCRbiIs0, bi0(crx, equal), bhintbhBCCTRisNotReturnButSame, rt); }
++
++// condition register logic instructions
++inline void Assembler::crand( int d, int s1, int s2) { emit_int32(CRAND_OPCODE | bt(d) | ba(s1) | bb(s2)); }
++inline void Assembler::crnand(int d, int s1, int s2) { emit_int32(CRNAND_OPCODE | bt(d) | ba(s1) | bb(s2)); }
++inline void Assembler::cror( int d, int s1, int s2) { emit_int32(CROR_OPCODE | bt(d) | ba(s1) | bb(s2)); }
++inline void Assembler::crxor( int d, int s1, int s2) { emit_int32(CRXOR_OPCODE | bt(d) | ba(s1) | bb(s2)); }
++inline void Assembler::crnor( int d, int s1, int s2) { emit_int32(CRNOR_OPCODE | bt(d) | ba(s1) | bb(s2)); }
++inline void Assembler::creqv( int d, int s1, int s2) { emit_int32(CREQV_OPCODE | bt(d) | ba(s1) | bb(s2)); }
++inline void Assembler::crandc(int d, int s1, int s2) { emit_int32(CRANDC_OPCODE | bt(d) | ba(s1) | bb(s2)); }
++inline void Assembler::crorc( int d, int s1, int s2) { emit_int32(CRORC_OPCODE | bt(d) | ba(s1) | bb(s2)); }
++
++// Conditional move (>= Power7)
++inline void Assembler::isel(Register d, ConditionRegister cr, Condition cc, bool inv, Register a, Register b) {
++ if (b == noreg) {
++ b = d; // Can be omitted if old value should be kept in "else" case.
++ }
++ Register first = a;
++ Register second = b;
++ if (inv) {
++ first = b;
++ second = a; // exchange
++ }
++ assert(first != R0, "r0 not allowed");
++ isel(d, first, second, bi0(cr, cc));
++}
++inline void Assembler::isel_0(Register d, ConditionRegister cr, Condition cc, Register b) {
++ if (b == noreg) {
++ b = d; // Can be omitted if old value should be kept in "else" case.
++ }
++ isel(d, R0, b, bi0(cr, cc));
++}
++
++// PPC 2, section 3.2.1 Instruction Cache Instructions
++inline void Assembler::icbi( Register s1, Register s2) { emit_int32( ICBI_OPCODE | ra0mem(s1) | rb(s2) ); }
++// PPC 2, section 3.2.2 Data Cache Instructions
++//inline void Assembler::dcba( Register s1, Register s2) { emit_int32( DCBA_OPCODE | ra0mem(s1) | rb(s2) ); }
++inline void Assembler::dcbz( Register s1, Register s2) { emit_int32( DCBZ_OPCODE | ra0mem(s1) | rb(s2) ); }
++inline void Assembler::dcbst( Register s1, Register s2) { emit_int32( DCBST_OPCODE | ra0mem(s1) | rb(s2) ); }
++inline void Assembler::dcbf( Register s1, Register s2) { emit_int32( DCBF_OPCODE | ra0mem(s1) | rb(s2) ); }
++// dcache read hint
++inline void Assembler::dcbt( Register s1, Register s2) { emit_int32( DCBT_OPCODE | ra0mem(s1) | rb(s2) ); }
++inline void Assembler::dcbtct( Register s1, Register s2, int ct) { emit_int32( DCBT_OPCODE | ra0mem(s1) | rb(s2) | thct(ct)); }
++inline void Assembler::dcbtds( Register s1, Register s2, int ds) { emit_int32( DCBT_OPCODE | ra0mem(s1) | rb(s2) | thds(ds)); }
++// dcache write hint
++inline void Assembler::dcbtst( Register s1, Register s2) { emit_int32( DCBTST_OPCODE | ra0mem(s1) | rb(s2) ); }
++inline void Assembler::dcbtstct(Register s1, Register s2, int ct) { emit_int32( DCBTST_OPCODE | ra0mem(s1) | rb(s2) | thct(ct)); }
++
++// machine barrier instructions:
++inline void Assembler::sync(int a) { emit_int32( SYNC_OPCODE | l910(a)); }
++inline void Assembler::sync() { Assembler::sync(0); }
++inline void Assembler::lwsync() { Assembler::sync(1); }
++inline void Assembler::ptesync() { Assembler::sync(2); }
++inline void Assembler::eieio() { emit_int32( EIEIO_OPCODE); }
++inline void Assembler::isync() { emit_int32( ISYNC_OPCODE); }
++inline void Assembler::elemental_membar(int e) { assert(0 < e && e < 16, "invalid encoding"); emit_int32( SYNC_OPCODE | e1215(e)); }
++
++// atomics
++// Use ra0mem to disallow R0 as base.
++inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
++inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
++inline bool Assembler::lxarx_hint_exclusive_access() { return VM_Version::has_lxarxeh(); }
++inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
++inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
++inline void Assembler::stwcx_(Register s, Register a, Register b) { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
++inline void Assembler::stdcx_(Register s, Register a, Register b) { emit_int32( STDCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
++
++// Instructions for adjusting thread priority
++// for simultaneous multithreading (SMT) on POWER5.
++inline void Assembler::smt_prio_very_low() { Assembler::or_unchecked(R31, R31, R31); }
++inline void Assembler::smt_prio_low() { Assembler::or_unchecked(R1, R1, R1); }
++inline void Assembler::smt_prio_medium_low() { Assembler::or_unchecked(R6, R6, R6); }
++inline void Assembler::smt_prio_medium() { Assembler::or_unchecked(R2, R2, R2); }
++inline void Assembler::smt_prio_medium_high() { Assembler::or_unchecked(R5, R5, R5); }
++inline void Assembler::smt_prio_high() { Assembler::or_unchecked(R3, R3, R3); }
++
++inline void Assembler::twi_0(Register a) { twi_unchecked(0, a, 0);}
++
++// trap instructions
++inline void Assembler::tdi_unchecked(int tobits, Register a, int si16){ emit_int32( TDI_OPCODE | to(tobits) | ra(a) | si(si16)); }
++inline void Assembler::twi_unchecked(int tobits, Register a, int si16){ emit_int32( TWI_OPCODE | to(tobits) | ra(a) | si(si16)); }
++inline void Assembler::tdi(int tobits, Register a, int si16) { assert(UseSIGTRAP, "precondition"); tdi_unchecked(tobits, a, si16); }
++inline void Assembler::twi(int tobits, Register a, int si16) { assert(UseSIGTRAP, "precondition"); twi_unchecked(tobits, a, si16); }
++inline void Assembler::td( int tobits, Register a, Register b) { assert(UseSIGTRAP, "precondition"); emit_int32( TD_OPCODE | to(tobits) | ra(a) | rb(b)); }
++inline void Assembler::tw( int tobits, Register a, Register b) { assert(UseSIGTRAP, "precondition"); emit_int32( TW_OPCODE | to(tobits) | ra(a) | rb(b)); }
++
++// FLOATING POINT instructions ppc.
++// PPC 1, section 4.6.2 Floating-Point Load Instructions
++// Use ra0mem instead of ra in some instructions below.
++inline void Assembler::lfs( FloatRegister d, int si16, Register a) { emit_int32( LFS_OPCODE | frt(d) | ra0mem(a) | simm(si16,16)); }
++inline void Assembler::lfsu(FloatRegister d, int si16, Register a) { emit_int32( LFSU_OPCODE | frt(d) | ra(a) | simm(si16,16)); }
++inline void Assembler::lfsx(FloatRegister d, Register a, Register b) { emit_int32( LFSX_OPCODE | frt(d) | ra0mem(a) | rb(b)); }
++inline void Assembler::lfd( FloatRegister d, int si16, Register a) { emit_int32( LFD_OPCODE | frt(d) | ra0mem(a) | simm(si16,16)); }
++inline void Assembler::lfdu(FloatRegister d, int si16, Register a) { emit_int32( LFDU_OPCODE | frt(d) | ra(a) | simm(si16,16)); }
++inline void Assembler::lfdx(FloatRegister d, Register a, Register b) { emit_int32( LFDX_OPCODE | frt(d) | ra0mem(a) | rb(b)); }
++
++// PPC 1, section 4.6.3 Floating-Point Store Instructions
++// Use ra0mem instead of ra in some instructions below.
++inline void Assembler::stfs( FloatRegister s, int si16, Register a) { emit_int32( STFS_OPCODE | frs(s) | ra0mem(a) | simm(si16,16)); }
++inline void Assembler::stfsu(FloatRegister s, int si16, Register a) { emit_int32( STFSU_OPCODE | frs(s) | ra(a) | simm(si16,16)); }
++inline void Assembler::stfsx(FloatRegister s, Register a, Register b){ emit_int32( STFSX_OPCODE | frs(s) | ra0mem(a) | rb(b)); }
++inline void Assembler::stfd( FloatRegister s, int si16, Register a) { emit_int32( STFD_OPCODE | frs(s) | ra0mem(a) | simm(si16,16)); }
++inline void Assembler::stfdu(FloatRegister s, int si16, Register a) { emit_int32( STFDU_OPCODE | frs(s) | ra(a) | simm(si16,16)); }
++inline void Assembler::stfdx(FloatRegister s, Register a, Register b){ emit_int32( STFDX_OPCODE | frs(s) | ra0mem(a) | rb(b)); }
++
++// PPC 1, section 4.6.4 Floating-Point Move Instructions
++inline void Assembler::fmr( FloatRegister d, FloatRegister b) { emit_int32( FMR_OPCODE | frt(d) | frb(b) | rc(0)); }
++inline void Assembler::fmr_(FloatRegister d, FloatRegister b) { emit_int32( FMR_OPCODE | frt(d) | frb(b) | rc(1)); }
++
++// These are special Power6 opcodes, reused for "lfdepx" and "stfdepx"
++// on Power7. Do not use.
++//inline void Assembler::mffgpr( FloatRegister d, Register b) { emit_int32( MFFGPR_OPCODE | frt(d) | rb(b) | rc(0)); }
++//inline void Assembler::mftgpr( Register d, FloatRegister b) { emit_int32( MFTGPR_OPCODE | rt(d) | frb(b) | rc(0)); }
++// add cmpb and popcntb to detect ppc power version.
++inline void Assembler::cmpb( Register a, Register s, Register b) { guarantee(VM_Version::has_cmpb(), "opcode not supported on this hardware");
++ emit_int32( CMPB_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); }
++inline void Assembler::popcntb(Register a, Register s) { guarantee(VM_Version::has_popcntb(), "opcode not supported on this hardware");
++ emit_int32( POPCNTB_OPCODE | rta(a) | rs(s)); };
++inline void Assembler::popcntw(Register a, Register s) { guarantee(VM_Version::has_popcntw(), "opcode not supported on this hardware");
++ emit_int32( POPCNTW_OPCODE | rta(a) | rs(s)); };
++inline void Assembler::popcntd(Register a, Register s) { emit_int32( POPCNTD_OPCODE | rta(a) | rs(s)); };
++
++inline void Assembler::fneg( FloatRegister d, FloatRegister b) { emit_int32( FNEG_OPCODE | frt(d) | frb(b) | rc(0)); }
++inline void Assembler::fneg_( FloatRegister d, FloatRegister b) { emit_int32( FNEG_OPCODE | frt(d) | frb(b) | rc(1)); }
++inline void Assembler::fabs( FloatRegister d, FloatRegister b) { emit_int32( FABS_OPCODE | frt(d) | frb(b) | rc(0)); }
++inline void Assembler::fabs_( FloatRegister d, FloatRegister b) { emit_int32( FABS_OPCODE | frt(d) | frb(b) | rc(1)); }
++inline void Assembler::fnabs( FloatRegister d, FloatRegister b) { emit_int32( FNABS_OPCODE | frt(d) | frb(b) | rc(0)); }
++inline void Assembler::fnabs_(FloatRegister d, FloatRegister b) { emit_int32( FNABS_OPCODE | frt(d) | frb(b) | rc(1)); }
++
++// PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic Instructions
++inline void Assembler::fadd( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FADD_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); }
++inline void Assembler::fadd_( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FADD_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); }
++inline void Assembler::fadds( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FADDS_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); }
++inline void Assembler::fadds_(FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FADDS_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); }
++inline void Assembler::fsub( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FSUB_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); }
++inline void Assembler::fsub_( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FSUB_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); }
++inline void Assembler::fsubs( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FSUBS_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); }
++inline void Assembler::fsubs_(FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FSUBS_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); }
++inline void Assembler::fmul( FloatRegister d, FloatRegister a, FloatRegister c) { emit_int32( FMUL_OPCODE | frt(d) | fra(a) | frc(c) | rc(0)); }
++inline void Assembler::fmul_( FloatRegister d, FloatRegister a, FloatRegister c) { emit_int32( FMUL_OPCODE | frt(d) | fra(a) | frc(c) | rc(1)); }
++inline void Assembler::fmuls( FloatRegister d, FloatRegister a, FloatRegister c) { emit_int32( FMULS_OPCODE | frt(d) | fra(a) | frc(c) | rc(0)); }
++inline void Assembler::fmuls_(FloatRegister d, FloatRegister a, FloatRegister c) { emit_int32( FMULS_OPCODE | frt(d) | fra(a) | frc(c) | rc(1)); }
++inline void Assembler::fdiv( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIV_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); }
++inline void Assembler::fdiv_( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIV_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); }
++inline void Assembler::fdivs( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIVS_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); }
++inline void Assembler::fdivs_(FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIVS_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); }
++
++// PPC 1, section 4.6.6 Floating-Point Rounding and Conversion Instructions
++inline void Assembler::frsp( FloatRegister d, FloatRegister b) { emit_int32( FRSP_OPCODE | frt(d) | frb(b) | rc(0)); }
++inline void Assembler::fctid( FloatRegister d, FloatRegister b) { emit_int32( FCTID_OPCODE | frt(d) | frb(b) | rc(0)); }
++inline void Assembler::fctidz(FloatRegister d, FloatRegister b) { emit_int32( FCTIDZ_OPCODE | frt(d) | frb(b) | rc(0)); }
++inline void Assembler::fctiw( FloatRegister d, FloatRegister b) { emit_int32( FCTIW_OPCODE | frt(d) | frb(b) | rc(0)); }
++inline void Assembler::fctiwz(FloatRegister d, FloatRegister b) { emit_int32( FCTIWZ_OPCODE | frt(d) | frb(b) | rc(0)); }
++inline void Assembler::fcfid( FloatRegister d, FloatRegister b) { emit_int32( FCFID_OPCODE | frt(d) | frb(b) | rc(0)); }
++inline void Assembler::fcfids(FloatRegister d, FloatRegister b) { guarantee(VM_Version::has_fcfids(), "opcode not supported on this hardware");
++ emit_int32( FCFIDS_OPCODE | frt(d) | frb(b) | rc(0)); }
++
++// PPC 1, section 4.6.7 Floating-Point Compare Instructions
++inline void Assembler::fcmpu( ConditionRegister crx, FloatRegister a, FloatRegister b) { emit_int32( FCMPU_OPCODE | bf(crx) | fra(a) | frb(b)); }
++
++// PPC 1, section 5.2.1 Floating-Point Arithmetic Instructions
++inline void Assembler::fsqrt( FloatRegister d, FloatRegister b) { guarantee(VM_Version::has_fsqrt(), "opcode not supported on this hardware");
++ emit_int32( FSQRT_OPCODE | frt(d) | frb(b) | rc(0)); }
++inline void Assembler::fsqrts(FloatRegister d, FloatRegister b) { guarantee(VM_Version::has_fsqrts(), "opcode not supported on this hardware");
++ emit_int32( FSQRTS_OPCODE | frt(d) | frb(b) | rc(0)); }
++
++// Vector instructions for >= Power6.
++inline void Assembler::lvebx( VectorRegister d, Register s1, Register s2) { emit_int32( LVEBX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
++inline void Assembler::lvehx( VectorRegister d, Register s1, Register s2) { emit_int32( LVEHX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
++inline void Assembler::lvewx( VectorRegister d, Register s1, Register s2) { emit_int32( LVEWX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
++inline void Assembler::lvx( VectorRegister d, Register s1, Register s2) { emit_int32( LVX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
++inline void Assembler::lvxl( VectorRegister d, Register s1, Register s2) { emit_int32( LVXL_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
++inline void Assembler::stvebx(VectorRegister d, Register s1, Register s2) { emit_int32( STVEBX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
++inline void Assembler::stvehx(VectorRegister d, Register s1, Register s2) { emit_int32( STVEHX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
++inline void Assembler::stvewx(VectorRegister d, Register s1, Register s2) { emit_int32( STVEWX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
++inline void Assembler::stvx( VectorRegister d, Register s1, Register s2) { emit_int32( STVX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
++inline void Assembler::stvxl( VectorRegister d, Register s1, Register s2) { emit_int32( STVXL_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
++inline void Assembler::lvsl( VectorRegister d, Register s1, Register s2) { emit_int32( LVSL_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
++inline void Assembler::lvsr( VectorRegister d, Register s1, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
++
++inline void Assembler::vpkpx( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKPX_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vpkshss( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSHSS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vpkswss( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSWSS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vpkshus( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSHUS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vpkswus( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSWUS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vpkuhum( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKUHUM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vpkuwum( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKUWUM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vpkuhus( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKUHUS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vpkuwus( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKUWUS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vupkhpx( VectorRegister d, VectorRegister b) { emit_int32( VUPKHPX_OPCODE | vrt(d) | vrb(b)); }
++inline void Assembler::vupkhsb( VectorRegister d, VectorRegister b) { emit_int32( VUPKHSB_OPCODE | vrt(d) | vrb(b)); }
++inline void Assembler::vupkhsh( VectorRegister d, VectorRegister b) { emit_int32( VUPKHSH_OPCODE | vrt(d) | vrb(b)); }
++inline void Assembler::vupklpx( VectorRegister d, VectorRegister b) { emit_int32( VUPKLPX_OPCODE | vrt(d) | vrb(b)); }
++inline void Assembler::vupklsb( VectorRegister d, VectorRegister b) { emit_int32( VUPKLSB_OPCODE | vrt(d) | vrb(b)); }
++inline void Assembler::vupklsh( VectorRegister d, VectorRegister b) { emit_int32( VUPKLSH_OPCODE | vrt(d) | vrb(b)); }
++inline void Assembler::vmrghb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGHB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmrghw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGHW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmrghh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGHH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmrglb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGLB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmrglw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGLW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmrglh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGLH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsplt( VectorRegister d, int ui4, VectorRegister b) { emit_int32( VSPLT_OPCODE | vrt(d) | vsplt_uim(uimm(ui4,4)) | vrb(b)); }
++inline void Assembler::vsplth( VectorRegister d, int ui3, VectorRegister b) { emit_int32( VSPLTH_OPCODE | vrt(d) | vsplt_uim(uimm(ui3,3)) | vrb(b)); }
++inline void Assembler::vspltw( VectorRegister d, int ui2, VectorRegister b) { emit_int32( VSPLTW_OPCODE | vrt(d) | vsplt_uim(uimm(ui2,2)) | vrb(b)); }
++inline void Assembler::vspltisb(VectorRegister d, int si5) { emit_int32( VSPLTISB_OPCODE| vrt(d) | vsplti_sim(simm(si5,5))); }
++inline void Assembler::vspltish(VectorRegister d, int si5) { emit_int32( VSPLTISH_OPCODE| vrt(d) | vsplti_sim(simm(si5,5))); }
++inline void Assembler::vspltisw(VectorRegister d, int si5) { emit_int32( VSPLTISW_OPCODE| vrt(d) | vsplti_sim(simm(si5,5))); }
++inline void Assembler::vperm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c){ emit_int32( VPERM_OPCODE | vrt(d) | vra(a) | vrb(b) | vrc(c)); }
++inline void Assembler::vsel( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c){ emit_int32( VSEL_OPCODE | vrt(d) | vra(a) | vrb(b) | vrc(c)); }
++inline void Assembler::vsl( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSL_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsldoi( VectorRegister d, VectorRegister a, VectorRegister b, int si4) { emit_int32( VSLDOI_OPCODE| vrt(d) | vra(a) | vrb(b) | vsldoi_shb(simm(si4,4))); }
++inline void Assembler::vslo( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSLO_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsr( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSR_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsro( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRO_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vaddcuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDCUW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vaddshs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDSHS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vaddsbs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDSBS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vaddsws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDSWS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vaddubm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUBM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vadduwm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUWM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vadduhm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUHM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vaddubs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUBS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vadduws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUWS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vadduhs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUHS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsubcuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBCUW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsubshs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBSHS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsubsbs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBSBS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsubsws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBSWS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsububm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUBM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsubuwm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUWM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsubuhm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUHM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsububs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUBS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsubuws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUWS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsubuhs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUHS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmulesb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULESB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmuleub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULEUB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmulesh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULESH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmuleuh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULEUH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmulosb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOSB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmuloub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOUB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmulosh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOSH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmulouh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOUH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmhaddshs(VectorRegister d,VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMHADDSHS_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
++inline void Assembler::vmhraddshs(VectorRegister d,VectorRegister a,VectorRegister b, VectorRegister c) { emit_int32( VMHRADDSHS_OPCODE| vrt(d) | vra(a) | vrb(b)| vrc(c)); }
++inline void Assembler::vmladduhm(VectorRegister d,VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMLADDUHM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
++inline void Assembler::vmsubuhm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUBUHM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
++inline void Assembler::vmsummbm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMMBM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
++inline void Assembler::vmsumshm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMSHM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
++inline void Assembler::vmsumshs(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMSHS_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
++inline void Assembler::vmsumuhm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMUHM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
++inline void Assembler::vmsumuhs(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMUHS_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
++inline void Assembler::vsumsws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUMSWS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsum2sws(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM2SWS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsum4sbs(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM4SBS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsum4ubs(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM4UBS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsum4shs(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM4SHS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vavgsb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGSB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vavgsw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGSW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vavgsh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGSH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vavgub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGUB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vavguw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGUW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vavguh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGUH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmaxsb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmaxsw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmaxsh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmaxub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmaxuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vmaxuh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vminsb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vminsw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vminsh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vminub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vminuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vminuh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vcmpequb(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
++inline void Assembler::vcmpequh(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
++inline void Assembler::vcmpequw(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
++inline void Assembler::vcmpgtsh(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
++inline void Assembler::vcmpgtsb(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
++inline void Assembler::vcmpgtsw(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
++inline void Assembler::vcmpgtub(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
++inline void Assembler::vcmpgtuh(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
++inline void Assembler::vcmpgtuw(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
++inline void Assembler::vcmpequb_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
++inline void Assembler::vcmpequh_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
++inline void Assembler::vcmpequw_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
++inline void Assembler::vcmpgtsh_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
++inline void Assembler::vcmpgtsb_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
++inline void Assembler::vcmpgtsw_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
++inline void Assembler::vcmpgtub_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
++inline void Assembler::vcmpgtuh_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
++inline void Assembler::vcmpgtuw_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
++inline void Assembler::vand( VectorRegister d, VectorRegister a, VectorRegister b) { guarantee(VM_Version::has_vand(), "opcode not supported on this hardware");
++ emit_int32( VAND_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vandc( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VANDC_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vnor( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VNOR_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vor( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VOR_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vxor( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VXOR_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vrlb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VRLB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vrlw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VRLW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vrlh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VRLH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vslb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSLB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vskw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSKW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vslh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSLH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsrb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsrw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsrh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsrab( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRAB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsraw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRAW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::vsrah( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRAH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
++inline void Assembler::mtvscr( VectorRegister b) { emit_int32( MTVSCR_OPCODE | vrb(b)); }
++inline void Assembler::mfvscr( VectorRegister d) { emit_int32( MFVSCR_OPCODE | vrt(d)); }
++
++// ra0 version
++inline void Assembler::lwzx( Register d, Register s2) { emit_int32( LWZX_OPCODE | rt(d) | rb(s2));}
++inline void Assembler::lwz( Register d, int si16 ) { emit_int32( LWZ_OPCODE | rt(d) | d1(si16));}
++inline void Assembler::lwax( Register d, Register s2) { emit_int32( LWAX_OPCODE | rt(d) | rb(s2));}
++inline void Assembler::lwa( Register d, int si16 ) { emit_int32( LWA_OPCODE | rt(d) | ds(si16));}
++inline void Assembler::lhzx( Register d, Register s2) { emit_int32( LHZX_OPCODE | rt(d) | rb(s2));}
++inline void Assembler::lhz( Register d, int si16 ) { emit_int32( LHZ_OPCODE | rt(d) | d1(si16));}
++inline void Assembler::lhax( Register d, Register s2) { emit_int32( LHAX_OPCODE | rt(d) | rb(s2));}
++inline void Assembler::lha( Register d, int si16 ) { emit_int32( LHA_OPCODE | rt(d) | d1(si16));}
++inline void Assembler::lbzx( Register d, Register s2) { emit_int32( LBZX_OPCODE | rt(d) | rb(s2));}
++inline void Assembler::lbz( Register d, int si16 ) { emit_int32( LBZ_OPCODE | rt(d) | d1(si16));}
++inline void Assembler::ld( Register d, int si16 ) { emit_int32( LD_OPCODE | rt(d) | ds(si16));}
++inline void Assembler::ldx( Register d, Register s2) { emit_int32( LDX_OPCODE | rt(d) | rb(s2));}
++inline void Assembler::stwx( Register d, Register s2) { emit_int32( STWX_OPCODE | rs(d) | rb(s2));}
++inline void Assembler::stw( Register d, int si16 ) { emit_int32( STW_OPCODE | rs(d) | d1(si16));}
++inline void Assembler::sthx( Register d, Register s2) { emit_int32( STHX_OPCODE | rs(d) | rb(s2));}
++inline void Assembler::sth( Register d, int si16 ) { emit_int32( STH_OPCODE | rs(d) | d1(si16));}
++inline void Assembler::stbx( Register d, Register s2) { emit_int32( STBX_OPCODE | rs(d) | rb(s2));}
++inline void Assembler::stb( Register d, int si16 ) { emit_int32( STB_OPCODE | rs(d) | d1(si16));}
++inline void Assembler::std( Register d, int si16 ) { emit_int32( STD_OPCODE | rs(d) | ds(si16));}
++inline void Assembler::stdx( Register d, Register s2) { emit_int32( STDX_OPCODE | rs(d) | rb(s2));}
++
++// ra0 version
++inline void Assembler::icbi( Register s2) { emit_int32( ICBI_OPCODE | rb(s2) ); }
++//inline void Assembler::dcba( Register s2) { emit_int32( DCBA_OPCODE | rb(s2) ); }
++inline void Assembler::dcbz( Register s2) { emit_int32( DCBZ_OPCODE | rb(s2) ); }
++inline void Assembler::dcbst( Register s2) { emit_int32( DCBST_OPCODE | rb(s2) ); }
++inline void Assembler::dcbf( Register s2) { emit_int32( DCBF_OPCODE | rb(s2) ); }
++inline void Assembler::dcbt( Register s2) { emit_int32( DCBT_OPCODE | rb(s2) ); }
++inline void Assembler::dcbtct( Register s2, int ct) { emit_int32( DCBT_OPCODE | rb(s2) | thct(ct)); }
++inline void Assembler::dcbtds( Register s2, int ds) { emit_int32( DCBT_OPCODE | rb(s2) | thds(ds)); }
++inline void Assembler::dcbtst( Register s2) { emit_int32( DCBTST_OPCODE | rb(s2) ); }
++inline void Assembler::dcbtstct(Register s2, int ct) { emit_int32( DCBTST_OPCODE | rb(s2) | thct(ct)); }
++
++// ra0 version
++inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
++inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
++inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
++inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
++inline void Assembler::stwcx_(Register s, Register b) { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); }
++inline void Assembler::stdcx_(Register s, Register b) { emit_int32( STDCX_OPCODE | rs(s) | rb(b) | rc(1)); }
++
++// ra0 version
++inline void Assembler::lfs( FloatRegister d, int si16) { emit_int32( LFS_OPCODE | frt(d) | simm(si16,16)); }
++inline void Assembler::lfsx(FloatRegister d, Register b) { emit_int32( LFSX_OPCODE | frt(d) | rb(b)); }
++inline void Assembler::lfd( FloatRegister d, int si16) { emit_int32( LFD_OPCODE | frt(d) | simm(si16,16)); }
++inline void Assembler::lfdx(FloatRegister d, Register b) { emit_int32( LFDX_OPCODE | frt(d) | rb(b)); }
++
++// ra0 version
++inline void Assembler::stfs( FloatRegister s, int si16) { emit_int32( STFS_OPCODE | frs(s) | simm(si16, 16)); }
++inline void Assembler::stfsx(FloatRegister s, Register b) { emit_int32( STFSX_OPCODE | frs(s) | rb(b)); }
++inline void Assembler::stfd( FloatRegister s, int si16) { emit_int32( STFD_OPCODE | frs(s) | simm(si16, 16)); }
++inline void Assembler::stfdx(FloatRegister s, Register b) { emit_int32( STFDX_OPCODE | frs(s) | rb(b)); }
++
++// ra0 version
++inline void Assembler::lvebx( VectorRegister d, Register s2) { emit_int32( LVEBX_OPCODE | vrt(d) | rb(s2)); }
++inline void Assembler::lvehx( VectorRegister d, Register s2) { emit_int32( LVEHX_OPCODE | vrt(d) | rb(s2)); }
++inline void Assembler::lvewx( VectorRegister d, Register s2) { emit_int32( LVEWX_OPCODE | vrt(d) | rb(s2)); }
++inline void Assembler::lvx( VectorRegister d, Register s2) { emit_int32( LVX_OPCODE | vrt(d) | rb(s2)); }
++inline void Assembler::lvxl( VectorRegister d, Register s2) { emit_int32( LVXL_OPCODE | vrt(d) | rb(s2)); }
++inline void Assembler::stvebx(VectorRegister d, Register s2) { emit_int32( STVEBX_OPCODE | vrt(d) | rb(s2)); }
++inline void Assembler::stvehx(VectorRegister d, Register s2) { emit_int32( STVEHX_OPCODE | vrt(d) | rb(s2)); }
++inline void Assembler::stvewx(VectorRegister d, Register s2) { emit_int32( STVEWX_OPCODE | vrt(d) | rb(s2)); }
++inline void Assembler::stvx( VectorRegister d, Register s2) { emit_int32( STVX_OPCODE | vrt(d) | rb(s2)); }
++inline void Assembler::stvxl( VectorRegister d, Register s2) { emit_int32( STVXL_OPCODE | vrt(d) | rb(s2)); }
++inline void Assembler::lvsl( VectorRegister d, Register s2) { emit_int32( LVSL_OPCODE | vrt(d) | rb(s2)); }
++inline void Assembler::lvsr( VectorRegister d, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | rb(s2)); }
++
++inline void Assembler::load_const(Register d, void* x, Register tmp) {
++ load_const(d, (long)x, tmp);
++}
++
++// Load a 64 bit constant encoded by a `Label'. This works for bound
++// labels as well as unbound ones. For unbound labels, the code will
++// be patched as soon as the label gets bound.
++inline void Assembler::load_const(Register d, Label& L, Register tmp) {
++ load_const(d, target(L), tmp);
++}
++
++// Load a 64 bit constant encoded by an AddressLiteral. patchable.
++inline void Assembler::load_const(Register d, AddressLiteral& a, Register tmp) {
++ assert(d != R0, "R0 not allowed");
++ // First relocate (we don't change the offset in the RelocationHolder,
++ // just pass a.rspec()), then delegate to load_const(Register, long).
++ relocate(a.rspec());
++ load_const(d, (long)a.value(), tmp);
++}
++
++
++#endif // CPU_PPC_VM_ASSEMBLER_PPC_INLINE_HPP
+--- ./hotspot/src/cpu/ppc/vm/bytecodeInterpreter_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/bytecodeInterpreter_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,106 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_BYTECODEINTERPRETER_PPC_HPP
++#define CPU_PPC_VM_BYTECODEINTERPRETER_PPC_HPP
++
++// Platform specific for C++ based Interpreter
++#define LOTS_OF_REGS /* Lets interpreter use plenty of registers */
++
++private:
++
++ // Save the bottom of the stack after frame manager setup. For ease of restoration after return
++ // from recursive interpreter call.
++ intptr_t* _frame_bottom; // Saved bottom of frame manager frame.
++ address _last_Java_pc; // Pc to return to in frame manager.
++ intptr_t* _last_Java_fp; // frame pointer
++ intptr_t* _last_Java_sp; // stack pointer
++ interpreterState _self_link; // Previous interpreter state // sometimes points to self???
++ double _native_fresult; // Save result of native calls that might return floats.
++ intptr_t _native_lresult; // Save result of native calls that might return handle/longs.
++
++public:
++ address last_Java_pc(void) { return _last_Java_pc; }
++ intptr_t* last_Java_fp(void) { return _last_Java_fp; }
++
++ static ByteSize native_lresult_offset() {
++ return byte_offset_of(BytecodeInterpreter, _native_lresult);
++ }
++
++ static ByteSize native_fresult_offset() {
++ return byte_offset_of(BytecodeInterpreter, _native_fresult);
++ }
++
++ static void pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp);
++
++#define SET_LAST_JAVA_FRAME() THREAD->frame_anchor()->set(istate->_last_Java_sp, istate->_last_Java_pc);
++#define RESET_LAST_JAVA_FRAME() THREAD->frame_anchor()->clear();
++
++
++// Macros for accessing the stack.
++#undef STACK_INT
++#undef STACK_FLOAT
++#undef STACK_ADDR
++#undef STACK_OBJECT
++#undef STACK_DOUBLE
++#undef STACK_LONG
++
++// JavaStack Implementation
++#define STACK_SLOT(offset) ((address) &topOfStack[-(offset)])
++#define STACK_INT(offset) (*((jint*) &topOfStack[-(offset)]))
++#define STACK_FLOAT(offset) (*((jfloat *) &topOfStack[-(offset)]))
++#define STACK_OBJECT(offset) (*((oop *) &topOfStack [-(offset)]))
++#define STACK_DOUBLE(offset) (((VMJavaVal64*) &topOfStack[-(offset)])->d)
++#define STACK_LONG(offset) (((VMJavaVal64 *) &topOfStack[-(offset)])->l)
++
++#define SET_STACK_SLOT(value, offset) (*(intptr_t*)&topOfStack[-(offset)] = *(intptr_t*)(value))
++#define SET_STACK_ADDR(value, offset) (*((address *)&topOfStack[-(offset)]) = (value))
++#define SET_STACK_INT(value, offset) (*((jint *)&topOfStack[-(offset)]) = (value))
++#define SET_STACK_FLOAT(value, offset) (*((jfloat *)&topOfStack[-(offset)]) = (value))
++#define SET_STACK_OBJECT(value, offset) (*((oop *)&topOfStack[-(offset)]) = (value))
++#define SET_STACK_DOUBLE(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = (value))
++#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = \
++ ((VMJavaVal64*)(addr))->d)
++#define SET_STACK_LONG(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->l = (value))
++#define SET_STACK_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->l = \
++ ((VMJavaVal64*)(addr))->l)
++// JavaLocals implementation
++
++#define LOCALS_SLOT(offset) ((intptr_t*)&locals[-(offset)])
++#define LOCALS_ADDR(offset) ((address)locals[-(offset)])
++#define LOCALS_INT(offset) (*(jint*)&(locals[-(offset)]))
++#define LOCALS_OBJECT(offset) (cast_to_oop(locals[-(offset)]))
++#define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)]))
++#define LOCALS_DOUBLE_AT(offset) (((address)&locals[-((offset) + 1)]))
++
++#define SET_LOCALS_SLOT(value, offset) (*(intptr_t*)&locals[-(offset)] = *(intptr_t *)(value))
++#define SET_LOCALS_INT(value, offset) (*((jint *)&locals[-(offset)]) = (value))
++#define SET_LOCALS_DOUBLE(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value))
++#define SET_LOCALS_LONG(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value))
++#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \
++ ((VMJavaVal64*)(addr))->d)
++
++
++#endif // CPU_PPC_VM_BYTECODEINTERPRETER_PPC_PP
+--- ./hotspot/src/cpu/ppc/vm/bytecodeInterpreter_ppc.inline.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/bytecodeInterpreter_ppc.inline.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,290 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_BYTECODEINTERPRETER_PPC_INLINE_HPP
++#define CPU_PPC_VM_BYTECODEINTERPRETER_PPC_INLINE_HPP
++
++#ifdef CC_INTERP
++
++// Inline interpreter functions for ppc.
++
++#include
++
++inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) { return op1 + op2; }
++inline jfloat BytecodeInterpreter::VMfloatSub(jfloat op1, jfloat op2) { return op1 - op2; }
++inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) { return op1 * op2; }
++inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) { return op1 / op2; }
++inline jfloat BytecodeInterpreter::VMfloatRem(jfloat op1, jfloat op2) { return (jfloat)fmod((double)op1, (double)op2); }
++
++inline jfloat BytecodeInterpreter::VMfloatNeg(jfloat op) { return -op; }
++
++inline int32_t BytecodeInterpreter::VMfloatCompare(jfloat op1, jfloat op2, int32_t direction) {
++ return ( op1 < op2 ? -1 :
++ op1 > op2 ? 1 :
++ op1 == op2 ? 0 :
++ (direction == -1 || direction == 1) ? direction : 0);
++
++}
++
++inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2], const uint32_t from[2]) {
++ to[0] = from[0]; to[1] = from[1];
++}
++
++// The long operations depend on compiler support for "long long" on ppc.
++
++inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) {
++ return op1 + op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) {
++ return op1 & op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) {
++ if (op1 == min_jlong && op2 == -1) return op1;
++ return op1 / op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) {
++ return op1 * op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) {
++ return op1 | op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) {
++ return op1 - op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) {
++ return op1 ^ op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) {
++ if (op1 == min_jlong && op2 == -1) return 0;
++ return op1 % op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) {
++ return ((uint64_t) op1) >> (op2 & 0x3F);
++}
++
++inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) {
++ return op1 >> (op2 & 0x3F);
++}
++
++inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) {
++ return op1 << (op2 & 0x3F);
++}
++
++inline jlong BytecodeInterpreter::VMlongNeg(jlong op) {
++ return -op;
++}
++
++inline jlong BytecodeInterpreter::VMlongNot(jlong op) {
++ return ~op;
++}
++
++inline int32_t BytecodeInterpreter::VMlongLtz(jlong op) {
++ return (op <= 0);
++}
++
++inline int32_t BytecodeInterpreter::VMlongGez(jlong op) {
++ return (op >= 0);
++}
++
++inline int32_t BytecodeInterpreter::VMlongEqz(jlong op) {
++ return (op == 0);
++}
++
++inline int32_t BytecodeInterpreter::VMlongEq(jlong op1, jlong op2) {
++ return (op1 == op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongNe(jlong op1, jlong op2) {
++ return (op1 != op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongGe(jlong op1, jlong op2) {
++ return (op1 >= op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongLe(jlong op1, jlong op2) {
++ return (op1 <= op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongLt(jlong op1, jlong op2) {
++ return (op1 < op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongGt(jlong op1, jlong op2) {
++ return (op1 > op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongCompare(jlong op1, jlong op2) {
++ return (VMlongLt(op1, op2) ? -1 : VMlongGt(op1, op2) ? 1 : 0);
++}
++
++// Long conversions
++
++inline jdouble BytecodeInterpreter::VMlong2Double(jlong val) {
++ return (jdouble) val;
++}
++
++inline jfloat BytecodeInterpreter::VMlong2Float(jlong val) {
++ return (jfloat) val;
++}
++
++inline jint BytecodeInterpreter::VMlong2Int(jlong val) {
++ return (jint) val;
++}
++
++// Double Arithmetic
++
++inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) {
++ return op1 + op2;
++}
++
++inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) {
++ return op1 / op2;
++}
++
++inline jdouble BytecodeInterpreter::VMdoubleMul(jdouble op1, jdouble op2) {
++ return op1 * op2;
++}
++
++inline jdouble BytecodeInterpreter::VMdoubleNeg(jdouble op) {
++ return -op;
++}
++
++inline jdouble BytecodeInterpreter::VMdoubleRem(jdouble op1, jdouble op2) {
++ return fmod(op1, op2);
++}
++
++inline jdouble BytecodeInterpreter::VMdoubleSub(jdouble op1, jdouble op2) {
++ return op1 - op2;
++}
++
++inline int32_t BytecodeInterpreter::VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction) {
++ return ( op1 < op2 ? -1 :
++ op1 > op2 ? 1 :
++ op1 == op2 ? 0 :
++ (direction == -1 || direction == 1) ? direction : 0);
++}
++
++// Double Conversions
++
++inline jfloat BytecodeInterpreter::VMdouble2Float(jdouble val) {
++ return (jfloat) val;
++}
++
++// Float Conversions
++
++inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) {
++ return (jdouble) op;
++}
++
++// Integer Arithmetic
++
++inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) {
++ return op1 + op2;
++}
++
++inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) {
++ return op1 & op2;
++}
++
++inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) {
++ /* it's possible we could catch this special case implicitly */
++ if ((juint)op1 == 0x80000000 && op2 == -1) return op1;
++ else return op1 / op2;
++}
++
++inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) {
++ return op1 * op2;
++}
++
++inline jint BytecodeInterpreter::VMintNeg(jint op) {
++ return -op;
++}
++
++inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) {
++ return op1 | op2;
++}
++
++inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) {
++ /* it's possible we could catch this special case implicitly */
++ if ((juint)op1 == 0x80000000 && op2 == -1) return 0;
++ else return op1 % op2;
++}
++
++inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) {
++ return op1 << (op2 & 0x1f);
++}
++
++inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) {
++ return op1 >> (op2 & 0x1f);
++}
++
++inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) {
++ return op1 - op2;
++}
++
++inline juint BytecodeInterpreter::VMintUshr(jint op1, jint op2) {
++ return ((juint) op1) >> (op2 & 0x1f);
++}
++
++inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) {
++ return op1 ^ op2;
++}
++
++inline jdouble BytecodeInterpreter::VMint2Double(jint val) {
++ return (jdouble) val;
++}
++
++inline jfloat BytecodeInterpreter::VMint2Float(jint val) {
++ return (jfloat) val;
++}
++
++inline jlong BytecodeInterpreter::VMint2Long(jint val) {
++ return (jlong) val;
++}
++
++inline jchar BytecodeInterpreter::VMint2Char(jint val) {
++ return (jchar) val;
++}
++
++inline jshort BytecodeInterpreter::VMint2Short(jint val) {
++ return (jshort) val;
++}
++
++inline jbyte BytecodeInterpreter::VMint2Byte(jint val) {
++ return (jbyte) val;
++}
++
++#endif // CC_INTERP
++
++#endif // CPU_PPC_VM_BYTECODEINTERPRETER_PPC_INLINE_HPP
+--- ./hotspot/src/cpu/ppc/vm/bytecodes_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/bytecodes_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/bytecodes.hpp"
++
++void Bytecodes::pd_initialize() {
++ // No ppc specific initialization.
++}
+--- ./hotspot/src/cpu/ppc/vm/bytecodes_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/bytecodes_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_BYTECODES_PPC_HPP
++#define CPU_PPC_VM_BYTECODES_PPC_HPP
++
++// No ppc64 specific bytecodes
++
++#endif // CPU_PPC_VM_BYTECODES_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/bytes_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/bytes_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,281 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_BYTES_PPC_HPP
++#define CPU_PPC_VM_BYTES_PPC_HPP
++
++#include "memory/allocation.hpp"
++
++class Bytes: AllStatic {
++ public:
++ // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
++ // PowerPC needs to check for alignment.
++
++ // Can I count on address always being a pointer to an unsigned char? Yes.
++
++#if defined(VM_LITTLE_ENDIAN)
++
++ // Returns true, if the byte ordering used by Java is different from the native byte ordering
++ // of the underlying machine. For example, true for Intel x86, False, for Solaris on Sparc.
++ static inline bool is_Java_byte_ordering_different() { return true; }
++
++ // Forward declarations of the compiler-dependent implementation
++ static inline u2 swap_u2(u2 x);
++ static inline u4 swap_u4(u4 x);
++ static inline u8 swap_u8(u8 x);
++
++ static inline u2 get_native_u2(address p) {
++ return (intptr_t(p) & 1) == 0
++ ? *(u2*)p
++ : ( u2(p[1]) << 8 )
++ | ( u2(p[0]) );
++ }
++
++ static inline u4 get_native_u4(address p) {
++ switch (intptr_t(p) & 3) {
++ case 0: return *(u4*)p;
++
++ case 2: return ( u4( ((u2*)p)[1] ) << 16 )
++ | ( u4( ((u2*)p)[0] ) );
++
++ default: return ( u4(p[3]) << 24 )
++ | ( u4(p[2]) << 16 )
++ | ( u4(p[1]) << 8 )
++ | u4(p[0]);
++ }
++ }
++
++ static inline u8 get_native_u8(address p) {
++ switch (intptr_t(p) & 7) {
++ case 0: return *(u8*)p;
++
++ case 4: return ( u8( ((u4*)p)[1] ) << 32 )
++ | ( u8( ((u4*)p)[0] ) );
++
++ case 2: return ( u8( ((u2*)p)[3] ) << 48 )
++ | ( u8( ((u2*)p)[2] ) << 32 )
++ | ( u8( ((u2*)p)[1] ) << 16 )
++ | ( u8( ((u2*)p)[0] ) );
++
++ default: return ( u8(p[7]) << 56 )
++ | ( u8(p[6]) << 48 )
++ | ( u8(p[5]) << 40 )
++ | ( u8(p[4]) << 32 )
++ | ( u8(p[3]) << 24 )
++ | ( u8(p[2]) << 16 )
++ | ( u8(p[1]) << 8 )
++ | u8(p[0]);
++ }
++ }
++
++
++
++ static inline void put_native_u2(address p, u2 x) {
++ if ( (intptr_t(p) & 1) == 0 ) *(u2*)p = x;
++ else {
++ p[1] = x >> 8;
++ p[0] = x;
++ }
++ }
++
++ static inline void put_native_u4(address p, u4 x) {
++ switch ( intptr_t(p) & 3 ) {
++ case 0: *(u4*)p = x;
++ break;
++
++ case 2: ((u2*)p)[1] = x >> 16;
++ ((u2*)p)[0] = x;
++ break;
++
++ default: ((u1*)p)[3] = x >> 24;
++ ((u1*)p)[2] = x >> 16;
++ ((u1*)p)[1] = x >> 8;
++ ((u1*)p)[0] = x;
++ break;
++ }
++ }
++
++ static inline void put_native_u8(address p, u8 x) {
++ switch ( intptr_t(p) & 7 ) {
++ case 0: *(u8*)p = x;
++ break;
++
++ case 4: ((u4*)p)[1] = x >> 32;
++ ((u4*)p)[0] = x;
++ break;
++
++ case 2: ((u2*)p)[3] = x >> 48;
++ ((u2*)p)[2] = x >> 32;
++ ((u2*)p)[1] = x >> 16;
++ ((u2*)p)[0] = x;
++ break;
++
++ default: ((u1*)p)[7] = x >> 56;
++ ((u1*)p)[6] = x >> 48;
++ ((u1*)p)[5] = x >> 40;
++ ((u1*)p)[4] = x >> 32;
++ ((u1*)p)[3] = x >> 24;
++ ((u1*)p)[2] = x >> 16;
++ ((u1*)p)[1] = x >> 8;
++ ((u1*)p)[0] = x;
++ }
++ }
++
++ // Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering)
++ // (no byte-order reversal is needed since Power CPUs are big-endian oriented).
++ static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); }
++ static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); }
++ static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); }
++
++ static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); }
++ static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); }
++ static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); }
++
++#else // !defined(VM_LITTLE_ENDIAN)
++
++ // Returns true, if the byte ordering used by Java is different from the nativ byte ordering
++ // of the underlying machine. For example, true for Intel x86, False, for Solaris on Sparc.
++ static inline bool is_Java_byte_ordering_different() { return false; }
++
++ // Thus, a swap between native and Java ordering is always a no-op:
++ static inline u2 swap_u2(u2 x) { return x; }
++ static inline u4 swap_u4(u4 x) { return x; }
++ static inline u8 swap_u8(u8 x) { return x; }
++
++ static inline u2 get_native_u2(address p) {
++ return (intptr_t(p) & 1) == 0
++ ? *(u2*)p
++ : ( u2(p[0]) << 8 )
++ | ( u2(p[1]) );
++ }
++
++ static inline u4 get_native_u4(address p) {
++ switch (intptr_t(p) & 3) {
++ case 0: return *(u4*)p;
++
++ case 2: return ( u4( ((u2*)p)[0] ) << 16 )
++ | ( u4( ((u2*)p)[1] ) );
++
++ default: return ( u4(p[0]) << 24 )
++ | ( u4(p[1]) << 16 )
++ | ( u4(p[2]) << 8 )
++ | u4(p[3]);
++ }
++ }
++
++ static inline u8 get_native_u8(address p) {
++ switch (intptr_t(p) & 7) {
++ case 0: return *(u8*)p;
++
++ case 4: return ( u8( ((u4*)p)[0] ) << 32 )
++ | ( u8( ((u4*)p)[1] ) );
++
++ case 2: return ( u8( ((u2*)p)[0] ) << 48 )
++ | ( u8( ((u2*)p)[1] ) << 32 )
++ | ( u8( ((u2*)p)[2] ) << 16 )
++ | ( u8( ((u2*)p)[3] ) );
++
++ default: return ( u8(p[0]) << 56 )
++ | ( u8(p[1]) << 48 )
++ | ( u8(p[2]) << 40 )
++ | ( u8(p[3]) << 32 )
++ | ( u8(p[4]) << 24 )
++ | ( u8(p[5]) << 16 )
++ | ( u8(p[6]) << 8 )
++ | u8(p[7]);
++ }
++ }
++
++
++
++ static inline void put_native_u2(address p, u2 x) {
++ if ( (intptr_t(p) & 1) == 0 ) { *(u2*)p = x; }
++ else {
++ p[0] = x >> 8;
++ p[1] = x;
++ }
++ }
++
++ static inline void put_native_u4(address p, u4 x) {
++ switch ( intptr_t(p) & 3 ) {
++ case 0: *(u4*)p = x;
++ break;
++
++ case 2: ((u2*)p)[0] = x >> 16;
++ ((u2*)p)[1] = x;
++ break;
++
++ default: ((u1*)p)[0] = x >> 24;
++ ((u1*)p)[1] = x >> 16;
++ ((u1*)p)[2] = x >> 8;
++ ((u1*)p)[3] = x;
++ break;
++ }
++ }
++
++ static inline void put_native_u8(address p, u8 x) {
++ switch ( intptr_t(p) & 7 ) {
++ case 0: *(u8*)p = x;
++ break;
++
++ case 4: ((u4*)p)[0] = x >> 32;
++ ((u4*)p)[1] = x;
++ break;
++
++ case 2: ((u2*)p)[0] = x >> 48;
++ ((u2*)p)[1] = x >> 32;
++ ((u2*)p)[2] = x >> 16;
++ ((u2*)p)[3] = x;
++ break;
++
++ default: ((u1*)p)[0] = x >> 56;
++ ((u1*)p)[1] = x >> 48;
++ ((u1*)p)[2] = x >> 40;
++ ((u1*)p)[3] = x >> 32;
++ ((u1*)p)[4] = x >> 24;
++ ((u1*)p)[5] = x >> 16;
++ ((u1*)p)[6] = x >> 8;
++ ((u1*)p)[7] = x;
++ }
++ }
++
++ // Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering)
++ // (no byte-order reversal is needed since Power CPUs are big-endian oriented).
++ static inline u2 get_Java_u2(address p) { return get_native_u2(p); }
++ static inline u4 get_Java_u4(address p) { return get_native_u4(p); }
++ static inline u8 get_Java_u8(address p) { return get_native_u8(p); }
++
++ static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, x); }
++ static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, x); }
++ static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, x); }
++
++#endif // VM_LITTLE_ENDIAN
++};
++
++#if defined(TARGET_OS_ARCH_linux_ppc)
++#include "bytes_linux_ppc.inline.hpp"
++#endif
++
++#endif // CPU_PPC_VM_BYTES_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,98 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_C2_GLOBALS_PPC_HPP
++#define CPU_PPC_VM_C2_GLOBALS_PPC_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the server compiler.
++// (see c2_globals.hpp).
++
++define_pd_global(bool, BackgroundCompilation, true);
++define_pd_global(bool, CICompileOSR, true);
++define_pd_global(bool, InlineIntrinsics, true);
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps, true);
++define_pd_global(bool, UseOnStackReplacement, true);
++define_pd_global(bool, ProfileInterpreter, true);
++define_pd_global(bool, TieredCompilation, false);
++define_pd_global(intx, CompileThreshold, 10000);
++define_pd_global(intx, BackEdgeThreshold, 140000);
++
++define_pd_global(intx, OnStackReplacePercentage, 140);
++define_pd_global(intx, ConditionalMoveLimit, 3);
++define_pd_global(intx, FLOATPRESSURE, 28);
++define_pd_global(intx, FreqInlineSize, 175);
++define_pd_global(intx, MinJumpTableSize, 10);
++define_pd_global(intx, INTPRESSURE, 25);
++define_pd_global(intx, InteriorEntryAlignment, 16);
++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
++define_pd_global(intx, RegisterCostAreaRatio, 16000);
++define_pd_global(bool, UseTLAB, true);
++define_pd_global(bool, ResizeTLAB, true);
++define_pd_global(intx, LoopUnrollLimit, 60);
++
++// Peephole and CISC spilling both break the graph, and so make the
++// scheduler sick.
++define_pd_global(bool, OptoPeephole, false);
++define_pd_global(bool, UseCISCSpill, false);
++define_pd_global(bool, OptoBundling, false);
++// GL:
++// Detected a problem with unscaled compressed oops and
++// narrow_oop_use_complex_address() == false.
++// -Djava.io.tmpdir=./tmp -jar SPECjvm2008.jar -ikv -wt 3 -it 3
++// -bt 1 --base compiler.sunflow
++// fails in Lower.visitIf->translate->tranlate->translate and
++// throws an unexpected NPE. A load and a store seem to be
++// reordered. Java reads about:
++// loc = x.f
++// x.f = 0
++// NullCheck loc
++// While assembler reads:
++// x.f = 0
++// loc = x.f
++// NullCheck loc
++define_pd_global(bool, OptoScheduling, false);
++
++define_pd_global(intx, InitialCodeCacheSize, 2048*K); // Integral multiple of CodeCacheExpansionSize
++define_pd_global(intx, ReservedCodeCacheSize, 256*M);
++define_pd_global(intx, CodeCacheExpansionSize, 64*K);
++
++// Ergonomics related flags
++define_pd_global(uint64_t,MaxRAM, 4ULL*G);
++define_pd_global(uintx, CodeCacheMinBlockLength, 4);
++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K);
++
++define_pd_global(bool, TrapBasedRangeChecks, true);
++
++// Heap related flags
++define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M));
++
++// Ergonomics related flags
++define_pd_global(bool, NeverActAsServerClassMachine, false);
++
++#endif // CPU_PPC_VM_C2_GLOBALS_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/c2_init_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/c2_init_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,48 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
++#include "runtime/globals.hpp"
++#include "utilities/debug.hpp"
++
++// processor dependent initialization for ppc
++
++void Compile::pd_compiler2_init() {
++
++ // Power7 and later
++ if (PowerArchitecturePPC64 > 6) {
++ if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
++ FLAG_SET_ERGO(bool, UsePopCountInstruction, true);
++ }
++ }
++
++ if (PowerArchitecturePPC64 == 6) {
++ if (FLAG_IS_DEFAULT(InsertEndGroupPPC64)) {
++ FLAG_SET_ERGO(bool, InsertEndGroupPPC64, true);
++ }
++ }
++}
+--- ./hotspot/src/cpu/ppc/vm/codeBuffer_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/codeBuffer_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_CODEBUFFER_PPC_HPP
++#define CPU_PPC_VM_CODEBUFFER_PPC_HPP
++
++private:
++ void pd_initialize() {}
++
++public:
++ void flush_bundle(bool start_new_bundle) {}
++
++#endif // CPU_PPC_VM_CODEBUFFER_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/compiledIC_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/compiledIC_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,261 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/compiledIC.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nmethod.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/safepoint.hpp"
++#ifdef COMPILER2
++#include "opto/matcher.hpp"
++#endif
++
++// Release the CompiledICHolder* associated with this call site is there is one.
++void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
++ // This call site might have become stale so inspect it carefully.
++ NativeCall* call = nativeCall_at(call_site->addr());
++ if (is_icholder_entry(call->destination())) {
++ NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
++ InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
++ }
++}
++
++bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
++ // This call site might have become stale so inspect it carefully.
++ NativeCall* call = nativeCall_at(call_site->addr());
++ return is_icholder_entry(call->destination());
++}
++
++//-----------------------------------------------------------------------------
++// High-level access to an inline cache. Guaranteed to be MT-safe.
++
++CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
++ : _ic_call(call)
++{
++ address ic_call = call->instruction_address();
++
++ assert(ic_call != NULL, "ic_call address must be set");
++ assert(nm != NULL, "must pass nmethod");
++ assert(nm->contains(ic_call), "must be in nmethod");
++
++ // Search for the ic_call at the given address.
++ RelocIterator iter(nm, ic_call, ic_call+1);
++ bool ret = iter.next();
++ assert(ret == true, "relocInfo must exist at this address");
++ assert(iter.addr() == ic_call, "must find ic_call");
++ if (iter.type() == relocInfo::virtual_call_type) {
++ virtual_call_Relocation* r = iter.virtual_call_reloc();
++ _is_optimized = false;
++ _value = nativeMovConstReg_at(r->cached_value());
++ } else {
++ assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
++ _is_optimized = true;
++ _value = NULL;
++ }
++}
++
++// ----------------------------------------------------------------------------
++
++// A PPC CompiledStaticCall looks like this:
++//
++// >>>> consts
++//
++// [call target1]
++// [IC cache]
++// [call target2]
++//
++// <<<< consts
++// >>>> insts
++//
++// bl offset16 -+ -+ ??? // How many bits available?
++// | |
++// <<<< insts | |
++// >>>> stubs | |
++// | |- trampoline_stub_Reloc
++// trampoline stub: | <-+
++// r2 = toc |
++// r2 = [r2 + offset] | // Load call target1 from const section
++// mtctr r2 |
++// bctr |- static_stub_Reloc
++// comp_to_interp_stub: <---+
++// r1 = toc
++// ICreg = [r1 + IC_offset] // Load IC from const section
++// r1 = [r1 + offset] // Load call target2 from const section
++// mtctr r1
++// bctr
++//
++// <<<< stubs
++//
++// The call instruction in the code either
++// - branches directly to a compiled method if offset encodable in instruction
++// - branches to the trampoline stub if offset to compiled method not encodable
++// - branches to the compiled_to_interp stub if target interpreted
++//
++// Further there are three relocations from the loads to the constants in
++// the constant section.
++//
++// Usage of r1 and r2 in the stubs allows to distinguish them.
++
++const int IC_pos_in_java_to_interp_stub = 8;
++#define __ _masm.
++void CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
++#ifdef COMPILER2
++ // Get the mark within main instrs section which is set to the address of the call.
++ address call_addr = cbuf.insts_mark();
++
++ // Note that the code buffer's insts_mark is always relative to insts.
++ // That's why we must use the macroassembler to generate a stub.
++ MacroAssembler _masm(&cbuf);
++
++ // Start the stub.
++ address stub = __ start_a_stub(CompiledStaticCall::to_interp_stub_size());
++ if (stub == NULL) {
++ Compile::current()->env()->record_out_of_memory_failure();
++ return;
++ }
++
++ // For java_to_interp stubs we use R11_scratch1 as scratch register
++ // and in call trampoline stubs we use R12_scratch2. This way we
++ // can distinguish them (see is_NativeCallTrampolineStub_at()).
++ Register reg_scratch = R11_scratch1;
++
++ // Create a static stub relocation which relates this stub
++ // with the call instruction at insts_call_instruction_offset in the
++ // instructions code-section.
++ __ relocate(static_stub_Relocation::spec(call_addr));
++ const int stub_start_offset = __ offset();
++
++ // Now, create the stub's code:
++ // - load the TOC
++ // - load the inline cache oop from the constant pool
++ // - load the call target from the constant pool
++ // - call
++ __ calculate_address_from_global_toc(reg_scratch, __ method_toc());
++ AddressLiteral ic = __ allocate_metadata_address((Metadata *)NULL);
++ __ load_const_from_method_toc(as_Register(Matcher::inline_cache_reg_encode()), ic, reg_scratch);
++
++ if (ReoptimizeCallSequences) {
++ __ b64_patchable((address)-1, relocInfo::none);
++ } else {
++ AddressLiteral a((address)-1);
++ __ load_const_from_method_toc(reg_scratch, a, reg_scratch);
++ __ mtctr(reg_scratch);
++ __ bctr();
++ }
++
++ // FIXME: Assert that the stub can be identified and patched.
++
++ // Java_to_interp_stub_size should be good.
++ assert((__ offset() - stub_start_offset) <= CompiledStaticCall::to_interp_stub_size(),
++ "should be good size");
++ assert(!is_NativeCallTrampolineStub_at(__ addr_at(stub_start_offset)),
++ "must not confuse java_to_interp with trampoline stubs");
++
++ // End the stub.
++ __ end_a_stub();
++#else
++ ShouldNotReachHere();
++#endif
++}
++#undef __
++
++// Size of java_to_interp stub, this doesn't need to be accurate but it must
++// be larger or equal to the real size of the stub.
++// Used for optimization in Compile::Shorten_branches.
++int CompiledStaticCall::to_interp_stub_size() {
++ return 12 * BytesPerInstWord;
++}
++
++// Relocation entries for call stub, compiled java to interpreter.
++// Used for optimization in Compile::Shorten_branches.
++int CompiledStaticCall::reloc_to_interp_stub() {
++ return 5;
++}
++
++void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) {
++ address stub = find_stub();
++ guarantee(stub != NULL, "stub not found");
++
++ if (TraceICs) {
++ ResourceMark rm;
++ tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
++ instruction_address(),
++ callee->name_and_sig_as_C_string());
++ }
++
++ // Creation also verifies the object.
++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + IC_pos_in_java_to_interp_stub);
++ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
++
++ assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(),
++ "a) MT-unsafe modification of inline cache");
++ assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry,
++ "b) MT-unsafe modification of inline cache");
++
++ // Update stub.
++ method_holder->set_data((intptr_t)callee());
++ jump->set_jump_destination(entry);
++
++ // Update jump to call.
++ set_destination_mt_safe(stub);
++}
++
++void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
++ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
++ // Reset stub.
++ address stub = static_stub->addr();
++ assert(stub != NULL, "stub not found");
++ // Creation also verifies the object.
++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + IC_pos_in_java_to_interp_stub);
++ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
++ method_holder->set_data(0);
++ jump->set_jump_destination((address)-1);
++}
++
++//-----------------------------------------------------------------------------
++// Non-product mode code
++#ifndef PRODUCT
++
++void CompiledStaticCall::verify() {
++ // Verify call.
++ NativeCall::verify();
++ if (os::is_MP()) {
++ verify_alignment();
++ }
++
++ // Verify stub.
++ address stub = find_stub();
++ assert(stub != NULL, "no stub found for static call");
++ // Creation also verifies the object.
++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + IC_pos_in_java_to_interp_stub);
++ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
++
++ // Verify state.
++ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
++}
++
++#endif // !PRODUCT
+--- ./hotspot/src/cpu/ppc/vm/copy_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/copy_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,171 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_COPY_PPC_HPP
++#define CPU_PPC_VM_COPY_PPC_HPP
++
++#ifndef PPC64
++#error "copy currently only implemented for PPC64"
++#endif
++
++// Inline functions for memory copy and fill.
++
++static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
++ (void)memmove(to, from, count * HeapWordSize);
++}
++
++static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
++ switch (count) {
++ case 8: to[7] = from[7];
++ case 7: to[6] = from[6];
++ case 6: to[5] = from[5];
++ case 5: to[4] = from[4];
++ case 4: to[3] = from[3];
++ case 3: to[2] = from[2];
++ case 2: to[1] = from[1];
++ case 1: to[0] = from[0];
++ case 0: break;
++ default: (void)memcpy(to, from, count * HeapWordSize);
++ break;
++ }
++}
++
++static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) {
++ switch (count) {
++ case 8: to[7] = from[7];
++ case 7: to[6] = from[6];
++ case 6: to[5] = from[5];
++ case 5: to[4] = from[4];
++ case 4: to[3] = from[3];
++ case 3: to[2] = from[2];
++ case 2: to[1] = from[1];
++ case 1: to[0] = from[0];
++ case 0: break;
++ default: while (count-- > 0) {
++ *to++ = *from++;
++ }
++ break;
++ }
++}
++
++static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
++ (void)memmove(to, from, count * HeapWordSize);
++}
++
++static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
++ pd_disjoint_words(from, to, count);
++}
++
++static void pd_conjoint_bytes(void* from, void* to, size_t count) {
++ (void)memmove(to, from, count);
++}
++
++static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) {
++ (void)memmove(to, from, count);
++}
++
++// Template for atomic, element-wise copy.
++template
++static void copy_conjoint_atomic(T* from, T* to, size_t count) {
++ if (from > to) {
++ while (count-- > 0) {
++ // Copy forwards
++ *to++ = *from++;
++ }
++ } else {
++ from += count - 1;
++ to += count - 1;
++ while (count-- > 0) {
++ // Copy backwards
++ *to-- = *from--;
++ }
++ }
++}
++
++static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
++ // TODO: contribute optimized version.
++ copy_conjoint_atomic(from, to, count);
++}
++
++static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
++ // TODO: contribute optimized version.
++ copy_conjoint_atomic(from, to, count);
++}
++
++static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) {
++ copy_conjoint_atomic(from, to, count);
++}
++
++static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
++ copy_conjoint_atomic(from, to, count);
++}
++
++static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
++ pd_conjoint_bytes_atomic(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
++ // TODO: contribute optimized version.
++ pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
++}
++
++static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
++ // TODO: contribute optimized version.
++ pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
++}
++
++static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) {
++ pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
++}
++
++static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) {
++ pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
++}
++
++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
++ julong* to = (julong*)tohw;
++ julong v = ((julong)value << 32) | value;
++ while (count-- > 0) {
++ *to++ = v;
++ }
++}
++
++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
++ pd_fill_to_words(tohw, count, value);
++}
++
++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
++ (void)memset(to, value, count);
++}
++
++static void pd_zero_to_words(HeapWord* tohw, size_t count) {
++ pd_fill_to_words(tohw, count, 0);
++}
++
++static void pd_zero_to_bytes(void* to, size_t count) {
++ (void)memset(to, 0, count);
++}
++
++#endif // CPU_PPC_VM_COPY_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/cppInterpreterGenerator_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/cppInterpreterGenerator_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_CPPINTERPRETERGENERATOR_PPC_HPP
++#define CPU_PPC_VM_CPPINTERPRETERGENERATOR_PPC_HPP
++
++ address generate_normal_entry(void);
++ address generate_native_entry(void);
++
++ void lock_method(void);
++ void unlock_method(void);
++
++ void generate_counter_incr(Label& overflow);
++ void generate_counter_overflow(Label& do_continue);
++
++ void generate_more_monitors();
++ void generate_deopt_handling(Register result_index);
++
++ void generate_compute_interpreter_state(Label& exception_return);
++
++#endif // CPU_PPC_VM_CPPINTERPRETERGENERATOR_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/cppInterpreter_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/cppInterpreter_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,3074 @@
++
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/cppInterpreter.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterGenerator.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++#ifdef SHARK
++#include "shark/shark_globals.hpp"
++#endif
++
++#ifdef CC_INTERP
++
++#define __ _masm->
++
++// Contains is used for identifying interpreter frames during a stack-walk.
++// A frame with a PC in InterpretMethod must be identified as a normal C frame.
++bool CppInterpreter::contains(address pc) {
++ return _code->contains(pc);
++}
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) // nothing
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++static address interpreter_frame_manager = NULL;
++static address frame_manager_specialized_return = NULL;
++static address native_entry = NULL;
++
++static address interpreter_return_address = NULL;
++
++static address unctrap_frame_manager_entry = NULL;
++
++static address deopt_frame_manager_return_atos = NULL;
++static address deopt_frame_manager_return_btos = NULL;
++static address deopt_frame_manager_return_itos = NULL;
++static address deopt_frame_manager_return_ltos = NULL;
++static address deopt_frame_manager_return_ftos = NULL;
++static address deopt_frame_manager_return_dtos = NULL;
++static address deopt_frame_manager_return_vtos = NULL;
++
++// A result handler converts/unboxes a native call result into
++// a java interpreter/compiler result. The current frame is an
++// interpreter frame.
++address CppInterpreterGenerator::generate_result_handler_for(BasicType type) {
++ return AbstractInterpreterGenerator::generate_result_handler_for(type);
++}
++
++// tosca based result to c++ interpreter stack based result.
++address CppInterpreterGenerator::generate_tosca_to_stack_converter(BasicType type) {
++ //
++ // A result is in the native abi result register from a native
++ // method call. We need to return this result to the interpreter by
++ // pushing the result on the interpreter's stack.
++ //
++ // Registers alive:
++ // R3_ARG1(R3_RET)/F1_ARG1(F1_RET) - result to move
++ // R4_ARG2 - address of tos
++ // LR
++ //
++ // Registers updated:
++ // R3_RET(R3_ARG1) - address of new tos (== R17_tos for T_VOID)
++ //
++
++ int number_of_used_slots = 1;
++
++ const Register tos = R4_ARG2;
++ Label done;
++ Label is_false;
++
++ address entry = __ pc();
++
++ switch (type) {
++ case T_BOOLEAN:
++ __ cmpwi(CCR0, R3_RET, 0);
++ __ beq(CCR0, is_false);
++ __ li(R3_RET, 1);
++ __ stw(R3_RET, 0, tos);
++ __ b(done);
++ __ bind(is_false);
++ __ li(R3_RET, 0);
++ __ stw(R3_RET, 0, tos);
++ break;
++ case T_BYTE:
++ case T_CHAR:
++ case T_SHORT:
++ case T_INT:
++ __ stw(R3_RET, 0, tos);
++ break;
++ case T_LONG:
++ number_of_used_slots = 2;
++ // mark unused slot for debugging
++ // long goes to topmost slot
++ __ std(R3_RET, -BytesPerWord, tos);
++ __ li(R3_RET, 0);
++ __ std(R3_RET, 0, tos);
++ break;
++ case T_OBJECT:
++ __ verify_oop(R3_RET);
++ __ std(R3_RET, 0, tos);
++ break;
++ case T_FLOAT:
++ __ stfs(F1_RET, 0, tos);
++ break;
++ case T_DOUBLE:
++ number_of_used_slots = 2;
++ // mark unused slot for debugging
++ __ li(R3_RET, 0);
++ __ std(R3_RET, 0, tos);
++ // double goes to topmost slot
++ __ stfd(F1_RET, -BytesPerWord, tos);
++ break;
++ case T_VOID:
++ number_of_used_slots = 0;
++ break;
++ default:
++ ShouldNotReachHere();
++ }
++
++ __ BIND(done);
++
++ // new expression stack top
++ __ addi(R3_RET, tos, -BytesPerWord * number_of_used_slots);
++
++ __ blr();
++
++ return entry;
++}
++
++address CppInterpreterGenerator::generate_stack_to_stack_converter(BasicType type) {
++ //
++ // Copy the result from the callee's stack to the caller's stack,
++ // caller and callee both being interpreted.
++ //
++ // Registers alive
++ // R3_ARG1 - address of callee's tos + BytesPerWord
++ // R4_ARG2 - address of caller's tos [i.e. free location]
++ // LR
++ //
++ // stack grows upwards, memory grows downwards.
++ //
++ // [ free ] <-- callee's tos
++ // [ optional result ] <-- R3_ARG1
++ // [ optional dummy ]
++ // ...
++ // [ free ] <-- caller's tos, R4_ARG2
++ // ...
++ // Registers updated
++ // R3_RET(R3_ARG1) - address of caller's new tos
++ //
++ // stack grows upwards, memory grows downwards.
++ //
++ // [ free ] <-- current tos, R3_RET
++ // [ optional result ]
++ // [ optional dummy ]
++ // ...
++ //
++
++ const Register from = R3_ARG1;
++ const Register ret = R3_ARG1;
++ const Register tos = R4_ARG2;
++ const Register tmp1 = R21_tmp1;
++ const Register tmp2 = R22_tmp2;
++
++ address entry = __ pc();
++
++ switch (type) {
++ case T_BOOLEAN:
++ case T_BYTE:
++ case T_CHAR:
++ case T_SHORT:
++ case T_INT:
++ case T_FLOAT:
++ __ lwz(tmp1, 0, from);
++ __ stw(tmp1, 0, tos);
++ // New expression stack top.
++ __ addi(ret, tos, - BytesPerWord);
++ break;
++ case T_LONG:
++ case T_DOUBLE:
++ // Move both entries for debug purposes even though only one is live.
++ __ ld(tmp1, BytesPerWord, from);
++ __ ld(tmp2, 0, from);
++ __ std(tmp1, 0, tos);
++ __ std(tmp2, -BytesPerWord, tos);
++ // New expression stack top.
++ __ addi(ret, tos, - 2 * BytesPerWord); // two slots
++ break;
++ case T_OBJECT:
++ __ ld(tmp1, 0, from);
++ __ verify_oop(tmp1);
++ __ std(tmp1, 0, tos);
++ // New expression stack top.
++ __ addi(ret, tos, - BytesPerWord);
++ break;
++ case T_VOID:
++ // New expression stack top.
++ __ mr(ret, tos);
++ break;
++ default:
++ ShouldNotReachHere();
++ }
++
++ __ blr();
++
++ return entry;
++}
++
++address CppInterpreterGenerator::generate_stack_to_native_abi_converter(BasicType type) {
++ //
++ // Load a result from the callee's stack into the caller's expecting
++ // return register, callee being interpreted, caller being call stub
++ // or jit code.
++ //
++ // Registers alive
++ // R3_ARG1 - callee expression tos + BytesPerWord
++ // LR
++ //
++ // stack grows upwards, memory grows downwards.
++ //
++ // [ free ] <-- callee's tos
++ // [ optional result ] <-- R3_ARG1
++ // [ optional dummy ]
++ // ...
++ //
++ // Registers updated
++ // R3_RET(R3_ARG1)/F1_RET - result
++ //
++
++ const Register from = R3_ARG1;
++ const Register ret = R3_ARG1;
++ const FloatRegister fret = F1_ARG1;
++
++ address entry = __ pc();
++
++ // Implemented uniformly for both kinds of endianness. The interpreter
++ // implements boolean, byte, char, and short as jint (4 bytes).
++ switch (type) {
++ case T_BOOLEAN:
++ case T_CHAR:
++ // zero extension
++ __ lwz(ret, 0, from);
++ break;
++ case T_BYTE:
++ case T_SHORT:
++ case T_INT:
++ // sign extension
++ __ lwa(ret, 0, from);
++ break;
++ case T_LONG:
++ __ ld(ret, 0, from);
++ break;
++ case T_OBJECT:
++ __ ld(ret, 0, from);
++ __ verify_oop(ret);
++ break;
++ case T_FLOAT:
++ __ lfs(fret, 0, from);
++ break;
++ case T_DOUBLE:
++ __ lfd(fret, 0, from);
++ break;
++ case T_VOID:
++ break;
++ default:
++ ShouldNotReachHere();
++ }
++
++ __ blr();
++
++ return entry;
++}
++
++address CppInterpreter::return_entry(TosState state, int length, Bytecodes::Code code) {
++ assert(interpreter_return_address != NULL, "Not initialized");
++ return interpreter_return_address;
++}
++
++address CppInterpreter::deopt_entry(TosState state, int length) {
++ address ret = NULL;
++ if (length != 0) {
++ switch (state) {
++ case atos: ret = deopt_frame_manager_return_atos; break;
++ case btos: ret = deopt_frame_manager_return_itos; break;
++ case ctos:
++ case stos:
++ case itos: ret = deopt_frame_manager_return_itos; break;
++ case ltos: ret = deopt_frame_manager_return_ltos; break;
++ case ftos: ret = deopt_frame_manager_return_ftos; break;
++ case dtos: ret = deopt_frame_manager_return_dtos; break;
++ case vtos: ret = deopt_frame_manager_return_vtos; break;
++ default: ShouldNotReachHere();
++ }
++ } else {
++ ret = unctrap_frame_manager_entry; // re-execute the bytecode (e.g. uncommon trap, popframe)
++ }
++ assert(ret != NULL, "Not initialized");
++ return ret;
++}
++
++//
++// Helpers for commoning out cases in the various type of method entries.
++//
++
++//
++// Registers alive
++// R16_thread - JavaThread*
++// R1_SP - old stack pointer
++// R19_method - callee's Method
++// R17_tos - address of caller's tos (prepushed)
++// R15_prev_state - address of caller's BytecodeInterpreter or 0
++// return_pc in R21_tmp15 (only when called within generate_native_entry)
++//
++// Registers updated
++// R14_state - address of callee's interpreter state
++// R1_SP - new stack pointer
++// CCR4_is_synced - current method is synchronized
++//
++void CppInterpreterGenerator::generate_compute_interpreter_state(Label& stack_overflow_return) {
++ //
++ // Stack layout at this point:
++ //
++ // F1 [TOP_IJAVA_FRAME_ABI] <-- R1_SP
++ // alignment (optional)
++ // [F1's outgoing Java arguments] <-- R17_tos
++ // ...
++ // F2 [PARENT_IJAVA_FRAME_ABI]
++ // ...
++
++ //=============================================================================
++ // Allocate space for locals other than the parameters, the
++ // interpreter state, monitors, and the expression stack.
++
++ const Register local_count = R21_tmp1;
++ const Register parameter_count = R22_tmp2;
++ const Register max_stack = R23_tmp3;
++ // Must not be overwritten within this method!
++ // const Register return_pc = R29_tmp9;
++
++ const ConditionRegister is_synced = CCR4_is_synced;
++ const ConditionRegister is_native = CCR6;
++ const ConditionRegister is_static = CCR7;
++
++ assert(is_synced != is_native, "condition code registers must be distinct");
++ assert(is_synced != is_static, "condition code registers must be distinct");
++ assert(is_native != is_static, "condition code registers must be distinct");
++
++ {
++
++ // Local registers
++ const Register top_frame_size = R24_tmp4;
++ const Register access_flags = R25_tmp5;
++ const Register state_offset = R26_tmp6;
++ Register mem_stack_limit = R27_tmp7;
++ const Register page_size = R28_tmp8;
++
++ BLOCK_COMMENT("compute_interpreter_state {");
++
++ // access_flags = method->access_flags();
++ // TODO: PPC port: assert(4 == sizeof(AccessFlags), "unexpected field size");
++ __ lwa(access_flags, method_(access_flags));
++
++ // parameter_count = method->constMethod->size_of_parameters();
++ // TODO: PPC port: assert(2 == ConstMethod::sz_size_of_parameters(), "unexpected field size");
++ __ ld(max_stack, in_bytes(Method::const_offset()), R19_method); // Max_stack holds constMethod for a while.
++ __ lhz(parameter_count, in_bytes(ConstMethod::size_of_parameters_offset()), max_stack);
++
++ // local_count = method->constMethod()->max_locals();
++ // TODO: PPC port: assert(2 == ConstMethod::sz_max_locals(), "unexpected field size");
++ __ lhz(local_count, in_bytes(ConstMethod::size_of_locals_offset()), max_stack);
++
++ // max_stack = method->constMethod()->max_stack();
++ // TODO: PPC port: assert(2 == ConstMethod::sz_max_stack(), "unexpected field size");
++ __ lhz(max_stack, in_bytes(ConstMethod::max_stack_offset()), max_stack);
++
++ if (EnableInvokeDynamic) {
++ // Take into account 'extra_stack_entries' needed by method handles (see method.hpp).
++ __ addi(max_stack, max_stack, Method::extra_stack_entries());
++ }
++
++ // mem_stack_limit = thread->stack_limit();
++ __ ld(mem_stack_limit, thread_(stack_overflow_limit));
++
++ // Point locals at the first argument. Method's locals are the
++ // parameters on top of caller's expression stack.
++
++ // tos points past last Java argument
++ __ sldi(R18_locals, parameter_count, Interpreter::logStackElementSize);
++ __ add(R18_locals, R17_tos, R18_locals);
++
++ // R18_locals - i*BytesPerWord points to i-th Java local (i starts at 0)
++
++ // Set is_native, is_synced, is_static - will be used later.
++ __ testbitdi(is_native, R0, access_flags, JVM_ACC_NATIVE_BIT);
++ __ testbitdi(is_synced, R0, access_flags, JVM_ACC_SYNCHRONIZED_BIT);
++ assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile");
++ __ testbitdi(is_static, R0, access_flags, JVM_ACC_STATIC_BIT);
++
++ // PARENT_IJAVA_FRAME_ABI
++ //
++ // frame_size =
++ // round_to((local_count - parameter_count)*BytesPerWord +
++ // 2*BytesPerWord +
++ // alignment +
++ // frame::interpreter_frame_cinterpreterstate_size_in_bytes()
++ // sizeof(PARENT_IJAVA_FRAME_ABI)
++ // method->is_synchronized() ? sizeof(BasicObjectLock) : 0 +
++ // max_stack*BytesPerWord,
++ // 16)
++ //
++ // Note that this calculation is exactly mirrored by
++ // AbstractInterpreter::layout_activation_impl() [ and
++ // AbstractInterpreter::size_activation() ]. Which is used by
++ // deoptimization so that it can allocate the proper sized
++ // frame. This only happens for interpreted frames so the extra
++ // notes below about max_stack below are not important. The other
++ // thing to note is that for interpreter frames other than the
++ // current activation the size of the stack is the size of the live
++ // portion of the stack at the particular bcp and NOT the maximum
++ // stack that the method might use.
++ //
++ // If we're calling a native method, we replace max_stack (which is
++ // zero) with space for the worst-case signature handler varargs
++ // vector, which is:
++ //
++ // max_stack = max(Argument::n_register_parameters, parameter_count+2);
++ //
++ // We add two slots to the parameter_count, one for the jni
++ // environment and one for a possible native mirror. We allocate
++ // space for at least the number of ABI registers, even though
++ // InterpreterRuntime::slow_signature_handler won't write more than
++ // parameter_count+2 words when it creates the varargs vector at the
++ // top of the stack. The generated slow signature handler will just
++ // load trash into registers beyond the necessary number. We're
++ // still going to cut the stack back by the ABI register parameter
++ // count so as to get SP+16 pointing at the ABI outgoing parameter
++ // area, so we need to allocate at least that much even though we're
++ // going to throw it away.
++ //
++
++ // Adjust max_stack for native methods:
++ Label skip_native_calculate_max_stack;
++ __ bfalse(is_native, skip_native_calculate_max_stack);
++ // if (is_native) {
++ // max_stack = max(Argument::n_register_parameters, parameter_count+2);
++ __ addi(max_stack, parameter_count, 2*Interpreter::stackElementWords);
++ __ cmpwi(CCR0, max_stack, Argument::n_register_parameters);
++ __ bge(CCR0, skip_native_calculate_max_stack);
++ __ li(max_stack, Argument::n_register_parameters);
++ // }
++ __ bind(skip_native_calculate_max_stack);
++ // max_stack is now in bytes
++ __ slwi(max_stack, max_stack, Interpreter::logStackElementSize);
++
++ // Calculate number of non-parameter locals (in slots):
++ Label not_java;
++ __ btrue(is_native, not_java);
++ // if (!is_native) {
++ // local_count = non-parameter local count
++ __ sub(local_count, local_count, parameter_count);
++ // } else {
++ // // nothing to do: method->max_locals() == 0 for native methods
++ // }
++ __ bind(not_java);
++
++
++ // Calculate top_frame_size and parent_frame_resize.
++ {
++ const Register parent_frame_resize = R12_scratch2;
++
++ BLOCK_COMMENT("Compute top_frame_size.");
++ // top_frame_size = TOP_IJAVA_FRAME_ABI
++ // + size of interpreter state
++ __ li(top_frame_size, frame::top_ijava_frame_abi_size
++ + frame::interpreter_frame_cinterpreterstate_size_in_bytes());
++ // + max_stack
++ __ add(top_frame_size, top_frame_size, max_stack);
++ // + stack slots for a BasicObjectLock for synchronized methods
++ {
++ Label not_synced;
++ __ bfalse(is_synced, not_synced);
++ __ addi(top_frame_size, top_frame_size, frame::interpreter_frame_monitor_size_in_bytes());
++ __ bind(not_synced);
++ }
++ // align
++ __ round_to(top_frame_size, frame::alignment_in_bytes);
++
++
++ BLOCK_COMMENT("Compute parent_frame_resize.");
++ // parent_frame_resize = R1_SP - R17_tos
++ __ sub(parent_frame_resize, R1_SP, R17_tos);
++ //__ li(parent_frame_resize, 0);
++ // + PARENT_IJAVA_FRAME_ABI
++ // + extra two slots for the no-parameter/no-locals
++ // method result
++ __ addi(parent_frame_resize, parent_frame_resize,
++ frame::parent_ijava_frame_abi_size
++ + 2*Interpreter::stackElementSize);
++ // + (locals_count - params_count)
++ __ sldi(R0, local_count, Interpreter::logStackElementSize);
++ __ add(parent_frame_resize, parent_frame_resize, R0);
++ // align
++ __ round_to(parent_frame_resize, frame::alignment_in_bytes);
++
++ //
++ // Stack layout at this point:
++ //
++ // The new frame F0 hasn't yet been pushed, F1 is still the top frame.
++ //
++ // F0 [TOP_IJAVA_FRAME_ABI]
++ // alignment (optional)
++ // [F0's full operand stack]
++ // [F0's monitors] (optional)
++ // [F0's BytecodeInterpreter object]
++ // F1 [PARENT_IJAVA_FRAME_ABI]
++ // alignment (optional)
++ // [F0's Java result]
++ // [F0's non-arg Java locals]
++ // [F1's outgoing Java arguments] <-- R17_tos
++ // ...
++ // F2 [PARENT_IJAVA_FRAME_ABI]
++ // ...
++
++
++ // Calculate new R14_state
++ // and
++ // test that the new memory stack pointer is above the limit,
++ // throw a StackOverflowError otherwise.
++ __ sub(R11_scratch1/*F1's SP*/, R1_SP, parent_frame_resize);
++ __ addi(R14_state, R11_scratch1/*F1's SP*/,
++ -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
++ __ sub(R11_scratch1/*F0's SP*/,
++ R11_scratch1/*F1's SP*/, top_frame_size);
++
++ BLOCK_COMMENT("Test for stack overflow:");
++ __ cmpld(CCR0/*is_stack_overflow*/, R11_scratch1, mem_stack_limit);
++ __ blt(CCR0/*is_stack_overflow*/, stack_overflow_return);
++
++
++ //=============================================================================
++ // Frame_size doesn't overflow the stack. Allocate new frame and
++ // initialize interpreter state.
++
++ // Register state
++ //
++ // R15 - local_count
++ // R16 - parameter_count
++ // R17 - max_stack
++ //
++ // R18 - frame_size
++ // R19 - access_flags
++ // CCR4_is_synced - is_synced
++ //
++ // GR_Lstate - pointer to the uninitialized new BytecodeInterpreter.
++
++ // _last_Java_pc just needs to be close enough that we can identify
++ // the frame as an interpreted frame. It does not need to be the
++ // exact return address from either calling
++ // BytecodeInterpreter::InterpretMethod or the call to a jni native method.
++ // So we can initialize it here with a value of a bundle in this
++ // code fragment. We only do this initialization for java frames
++ // where InterpretMethod needs a a way to get a good pc value to
++ // store in the thread state. For interpreter frames used to call
++ // jni native code we just zero the value in the state and move an
++ // ip as needed in the native entry code.
++ //
++ // const Register last_Java_pc_addr = GR24_SCRATCH; // QQQ 27
++ // const Register last_Java_pc = GR26_SCRATCH;
++
++ // Must reference stack before setting new SP since Windows
++ // will not be able to deliver the exception on a bad SP.
++ // Windows also insists that we bang each page one at a time in order
++ // for the OS to map in the reserved pages. If we bang only
++ // the final page, Windows stops delivering exceptions to our
++ // VectoredExceptionHandler and terminates our program.
++ // Linux only requires a single bang but it's rare to have
++ // to bang more than 1 page so the code is enabled for both OS's.
++
++ // BANG THE STACK
++ //
++ // Nothing to do for PPC, because updating the SP will automatically
++ // bang the page.
++
++ // Up to here we have calculated the delta for the new C-frame and
++ // checked for a stack-overflow. Now we can savely update SP and
++ // resize the C-frame.
++
++ // R14_state has already been calculated.
++ __ push_interpreter_frame(top_frame_size, parent_frame_resize,
++ R25_tmp5, R26_tmp6, R27_tmp7, R28_tmp8);
++
++ }
++
++ //
++ // Stack layout at this point:
++ //
++ // F0 has been been pushed!
++ //
++ // F0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP
++ // alignment (optional) (now it's here, if required)
++ // [F0's full operand stack]
++ // [F0's monitors] (optional)
++ // [F0's BytecodeInterpreter object]
++ // F1 [PARENT_IJAVA_FRAME_ABI]
++ // alignment (optional) (now it's here, if required)
++ // [F0's Java result]
++ // [F0's non-arg Java locals]
++ // [F1's outgoing Java arguments]
++ // ...
++ // F2 [PARENT_IJAVA_FRAME_ABI]
++ // ...
++ //
++ // R14_state points to F0's BytecodeInterpreter object.
++ //
++
++ }
++
++ //=============================================================================
++ // new BytecodeInterpreter-object is save, let's initialize it:
++ BLOCK_COMMENT("New BytecodeInterpreter-object is save.");
++
++ {
++ // Locals
++ const Register bytecode_addr = R24_tmp4;
++ const Register constants = R25_tmp5;
++ const Register tos = R26_tmp6;
++ const Register stack_base = R27_tmp7;
++ const Register local_addr = R28_tmp8;
++ {
++ Label L;
++ __ btrue(is_native, L);
++ // if (!is_native) {
++ // bytecode_addr = constMethod->codes();
++ __ ld(bytecode_addr, method_(const));
++ __ addi(bytecode_addr, bytecode_addr, in_bytes(ConstMethod::codes_offset()));
++ // }
++ __ bind(L);
++ }
++
++ __ ld(constants, in_bytes(Method::const_offset()), R19_method);
++ __ ld(constants, in_bytes(ConstMethod::constants_offset()), constants);
++
++ // state->_prev_link = prev_state;
++ __ std(R15_prev_state, state_(_prev_link));
++
++ // For assertions only.
++ // TODO: not needed anyway because it coincides with `_monitor_base'. remove!
++ // state->_self_link = state;
++ DEBUG_ONLY(__ std(R14_state, state_(_self_link));)
++
++ // state->_thread = thread;
++ __ std(R16_thread, state_(_thread));
++
++ // state->_method = method;
++ __ std(R19_method, state_(_method));
++
++ // state->_locals = locals;
++ __ std(R18_locals, state_(_locals));
++
++ // state->_oop_temp = NULL;
++ __ li(R0, 0);
++ __ std(R0, state_(_oop_temp));
++
++ // state->_last_Java_fp = *R1_SP // Use *R1_SP as fp
++ __ ld(R0, _abi(callers_sp), R1_SP);
++ __ std(R0, state_(_last_Java_fp));
++
++ BLOCK_COMMENT("load Stack base:");
++ {
++ // Stack_base.
++ // if (!method->synchronized()) {
++ // stack_base = state;
++ // } else {
++ // stack_base = (uintptr_t)state - sizeof(BasicObjectLock);
++ // }
++ Label L;
++ __ mr(stack_base, R14_state);
++ __ bfalse(is_synced, L);
++ __ addi(stack_base, stack_base, -frame::interpreter_frame_monitor_size_in_bytes());
++ __ bind(L);
++ }
++
++ // state->_mdx = NULL;
++ __ li(R0, 0);
++ __ std(R0, state_(_mdx));
++
++ {
++ // if (method->is_native()) state->_bcp = NULL;
++ // else state->_bcp = bytecode_addr;
++ Label label1, label2;
++ __ bfalse(is_native, label1);
++ __ std(R0, state_(_bcp));
++ __ b(label2);
++ __ bind(label1);
++ __ std(bytecode_addr, state_(_bcp));
++ __ bind(label2);
++ }
++
++
++ // state->_result._to_call._callee = NULL;
++ __ std(R0, state_(_result._to_call._callee));
++
++ // state->_monitor_base = state;
++ __ std(R14_state, state_(_monitor_base));
++
++ // state->_msg = BytecodeInterpreter::method_entry;
++ __ li(R0, BytecodeInterpreter::method_entry);
++ __ stw(R0, state_(_msg));
++
++ // state->_last_Java_sp = R1_SP;
++ __ std(R1_SP, state_(_last_Java_sp));
++
++ // state->_stack_base = stack_base;
++ __ std(stack_base, state_(_stack_base));
++
++ // tos = stack_base - 1 slot (prepushed);
++ // state->_stack.Tos(tos);
++ __ addi(tos, stack_base, - Interpreter::stackElementSize);
++ __ std(tos, state_(_stack));
++
++
++ {
++ BLOCK_COMMENT("get last_Java_pc:");
++ // if (!is_native) state->_last_Java_pc = ;
++ // else state->_last_Java_pc = NULL; (just for neatness)
++ Label label1, label2;
++ __ btrue(is_native, label1);
++ __ get_PC_trash_LR(R0);
++ __ std(R0, state_(_last_Java_pc));
++ __ b(label2);
++ __ bind(label1);
++ __ li(R0, 0);
++ __ std(R0, state_(_last_Java_pc));
++ __ bind(label2);
++ }
++
++
++ // stack_limit = tos - max_stack;
++ __ sub(R0, tos, max_stack);
++ // state->_stack_limit = stack_limit;
++ __ std(R0, state_(_stack_limit));
++
++
++ // cache = method->constants()->cache();
++ __ ld(R0, ConstantPool::cache_offset_in_bytes(), constants);
++ // state->_constants = method->constants()->cache();
++ __ std(R0, state_(_constants));
++
++
++
++ //=============================================================================
++ // synchronized method, allocate and initialize method object lock.
++ // if (!method->is_synchronized()) goto fill_locals_with_0x0s;
++ Label fill_locals_with_0x0s;
++ __ bfalse(is_synced, fill_locals_with_0x0s);
++
++ // pool_holder = method->constants()->pool_holder();
++ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++ {
++ Label label1, label2;
++ // lockee = NULL; for java methods, correct value will be inserted in BytecodeInterpretMethod.hpp
++ __ li(R0,0);
++ __ bfalse(is_native, label2);
++
++ __ bfalse(is_static, label1);
++ // if (method->is_static()) lockee =
++ // pool_holder->klass_part()->java_mirror();
++ __ ld(R11_scratch1/*pool_holder*/, ConstantPool::pool_holder_offset_in_bytes(), constants);
++ __ ld(R0/*lockee*/, mirror_offset, R11_scratch1/*pool_holder*/);
++ __ b(label2);
++
++ __ bind(label1);
++ // else lockee = *(oop*)locals;
++ __ ld(R0/*lockee*/, 0, R18_locals);
++ __ bind(label2);
++
++ // monitor->set_obj(lockee);
++ __ std(R0/*lockee*/, BasicObjectLock::obj_offset_in_bytes(), stack_base);
++ }
++
++ // See if we need to zero the locals
++ __ BIND(fill_locals_with_0x0s);
++
++
++ //=============================================================================
++ // fill locals with 0x0s
++ Label locals_zeroed;
++ __ btrue(is_native, locals_zeroed);
++
++ if (true /* zerolocals */ || ClearInterpreterLocals) {
++ // local_count is already num_locals_slots - num_param_slots
++ __ sldi(R0, parameter_count, Interpreter::logStackElementSize);
++ __ sub(local_addr, R18_locals, R0);
++ __ cmpdi(CCR0, local_count, 0);
++ __ ble(CCR0, locals_zeroed);
++
++ __ mtctr(local_count);
++ //__ ld_const_addr(R0, (address) 0xcafe0000babe);
++ __ li(R0, 0);
++
++ Label zero_slot;
++ __ bind(zero_slot);
++
++ // first local is at local_addr
++ __ std(R0, 0, local_addr);
++ __ addi(local_addr, local_addr, -BytesPerWord);
++ __ bdnz(zero_slot);
++ }
++
++ __ BIND(locals_zeroed);
++
++ }
++ BLOCK_COMMENT("} compute_interpreter_state");
++}
++
++// Generate code to initiate compilation on invocation counter overflow.
++void CppInterpreterGenerator::generate_counter_overflow(Label& continue_entry) {
++ // Registers alive
++ // R14_state
++ // R16_thread
++ //
++ // Registers updated
++ // R14_state
++ // R3_ARG1 (=R3_RET)
++ // R4_ARG2
++
++ // After entering the vm we remove the activation and retry the
++ // entry point in case the compilation is complete.
++
++ // InterpreterRuntime::frequency_counter_overflow takes one argument
++ // that indicates if the counter overflow occurs at a backwards
++ // branch (NULL bcp). We pass zero. The call returns the address
++ // of the verified entry point for the method or NULL if the
++ // compilation did not complete (either went background or bailed
++ // out).
++ __ li(R4_ARG2, 0);
++
++ // Pass false to call_VM so it doesn't check for pending exceptions,
++ // since at this point in the method invocation the exception
++ // handler would try to exit the monitor of synchronized methods
++ // which haven't been entered yet.
++ //
++ // Returns verified_entry_point or NULL, we don't care which.
++ //
++ // Do not use the variant `frequency_counter_overflow' that returns
++ // a structure, because this will change the argument list by a
++ // hidden parameter (gcc 4.1).
++
++ __ call_VM(noreg,
++ CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow),
++ R4_ARG2,
++ false);
++ // Returns verified_entry_point or NULL, we don't care which as we ignore it
++ // and run interpreted.
++
++ // Reload method, it may have moved.
++ __ ld(R19_method, state_(_method));
++
++ // We jump now to the label "continue_after_compile".
++ __ b(continue_entry);
++}
++
++// Increment invocation count and check for overflow.
++//
++// R19_method must contain Method* of method to profile.
++void CppInterpreterGenerator::generate_counter_incr(Label& overflow) {
++ Label done;
++ const Register Rcounters = R12_scratch2;
++ const Register iv_be_count = R11_scratch1;
++ const Register invocation_limit = R12_scratch2;
++ const Register invocation_limit_addr = invocation_limit;
++
++ // Load and ev. allocate MethodCounters object.
++ __ get_method_counters(R19_method, Rcounters, done);
++
++ // Update standard invocation counters.
++ __ increment_invocation_counter(Rcounters, iv_be_count, R0);
++
++ // Compare against limit.
++ BLOCK_COMMENT("Compare counter against limit:");
++ assert(4 == sizeof(InvocationCounter::InterpreterInvocationLimit),
++ "must be 4 bytes");
++ __ load_const(invocation_limit_addr, (address)&InvocationCounter::InterpreterInvocationLimit);
++ __ lwa(invocation_limit, 0, invocation_limit_addr);
++ __ cmpw(CCR0, iv_be_count, invocation_limit);
++ __ bge(CCR0, overflow);
++ __ bind(done);
++}
++
++//
++// Call a JNI method.
++//
++// Interpreter stub for calling a native method. (C++ interpreter)
++// This sets up a somewhat different looking stack for calling the native method
++// than the typical interpreter frame setup.
++//
++address CppInterpreterGenerator::generate_native_entry(void) {
++ if (native_entry != NULL) return native_entry;
++ address entry = __ pc();
++
++ // Read
++ // R16_thread
++ // R15_prev_state - address of caller's BytecodeInterpreter, if this snippet
++ // gets called by the frame manager.
++ // R19_method - callee's Method
++ // R17_tos - address of caller's tos
++ // R1_SP - caller's stack pointer
++ // R21_sender_SP - initial caller sp
++ //
++ // Update
++ // R14_state - address of caller's BytecodeInterpreter
++ // R3_RET - integer result, if any.
++ // F1_RET - float result, if any.
++ //
++ //
++ // Stack layout at this point:
++ //
++ // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP
++ // alignment (optional)
++ // [outgoing Java arguments] <-- R17_tos
++ // ...
++ // PARENT [PARENT_IJAVA_FRAME_ABI]
++ // ...
++ //
++
++ const bool inc_counter = UseCompiler || CountCompiledCalls;
++
++ const Register signature_handler_fd = R21_tmp1;
++ const Register pending_exception = R22_tmp2;
++ const Register result_handler_addr = R23_tmp3;
++ const Register native_method_fd = R24_tmp4;
++ const Register access_flags = R25_tmp5;
++ const Register active_handles = R26_tmp6;
++ const Register sync_state = R27_tmp7;
++ const Register sync_state_addr = sync_state; // Address is dead after use.
++ const Register suspend_flags = R24_tmp4;
++
++ const Register return_pc = R28_tmp8; // Register will be locked for some time.
++
++ const ConditionRegister is_synced = CCR4_is_synced; // Live-on-exit from compute_interpreter_state.
++
++
++ // R1_SP still points to caller's SP at this point.
++
++ // Save initial_caller_sp to caller's abi. The caller frame must be
++ // resized before returning to get rid of the c2i arguments (if
++ // any).
++ // Override the saved SP with the senderSP so we can pop c2i
++ // arguments (if any) off when we return
++ __ std(R21_sender_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
++
++ // Save LR to caller's frame. We don't use _abi(lr) here, because it is not safe.
++ __ mflr(return_pc);
++ __ std(return_pc, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
++
++ assert(return_pc->is_nonvolatile(), "return_pc must be a non-volatile register");
++
++ __ verify_method_ptr(R19_method);
++
++ //=============================================================================
++
++ // If this snippet gets called by the frame manager (at label
++ // `call_special'), then R15_prev_state is valid. If this snippet
++ // is not called by the frame manager, but e.g. by the call stub or
++ // by compiled code, then R15_prev_state is invalid.
++ {
++ // Set R15_prev_state to 0 if we don't return to the frame
++ // manager; we will return to the call_stub or to compiled code
++ // instead. If R15_prev_state is 0 there will be only one
++ // interpreter frame (we will set this up later) in this C frame!
++ // So we must take care about retrieving prev_state_(_prev_link)
++ // and restoring R1_SP when popping that interpreter.
++ Label prev_state_is_valid;
++
++ __ load_const(R11_scratch1/*frame_manager_returnpc_addr*/, (address)&frame_manager_specialized_return);
++ __ ld(R12_scratch2/*frame_manager_returnpc*/, 0, R11_scratch1/*frame_manager_returnpc_addr*/);
++ __ cmpd(CCR0, return_pc, R12_scratch2/*frame_manager_returnpc*/);
++ __ beq(CCR0, prev_state_is_valid);
++
++ __ li(R15_prev_state, 0);
++
++ __ BIND(prev_state_is_valid);
++ }
++
++ //=============================================================================
++ // Allocate new frame and initialize interpreter state.
++
++ Label exception_return;
++ Label exception_return_sync_check;
++ Label stack_overflow_return;
++
++ // Generate new interpreter state and jump to stack_overflow_return in case of
++ // a stack overflow.
++ generate_compute_interpreter_state(stack_overflow_return);
++
++ //=============================================================================
++ // Increment invocation counter. On overflow, entry to JNI method
++ // will be compiled.
++ Label invocation_counter_overflow;
++ if (inc_counter) {
++ generate_counter_incr(invocation_counter_overflow);
++ }
++
++ Label continue_after_compile;
++ __ BIND(continue_after_compile);
++
++ // access_flags = method->access_flags();
++ // Load access flags.
++ assert(access_flags->is_nonvolatile(),
++ "access_flags must be in a non-volatile register");
++ // Type check.
++ // TODO: PPC port: assert(4 == sizeof(AccessFlags), "unexpected field size");
++ __ lwz(access_flags, method_(access_flags));
++
++ // We don't want to reload R19_method and access_flags after calls
++ // to some helper functions.
++ assert(R19_method->is_nonvolatile(), "R19_method must be a non-volatile register");
++
++ // Check for synchronized methods. Must happen AFTER invocation counter
++ // check, so method is not locked if counter overflows.
++
++ {
++ Label method_is_not_synced;
++ // Is_synced is still alive.
++ assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile");
++ __ bfalse(is_synced, method_is_not_synced);
++
++ lock_method();
++ // Reload method, it may have moved.
++ __ ld(R19_method, state_(_method));
++
++ __ BIND(method_is_not_synced);
++ }
++
++ // jvmti/jvmpi support
++ __ notify_method_entry();
++
++ // Reload method, it may have moved.
++ __ ld(R19_method, state_(_method));
++
++ //=============================================================================
++ // Get and call the signature handler
++
++ __ ld(signature_handler_fd, method_(signature_handler));
++ Label call_signature_handler;
++
++ __ cmpdi(CCR0, signature_handler_fd, 0);
++ __ bne(CCR0, call_signature_handler);
++
++ // Method has never been called. Either generate a specialized
++ // handler or point to the slow one.
++ //
++ // Pass parameter 'false' to avoid exception check in call_VM.
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), R19_method, false);
++
++ // Check for an exception while looking up the target method. If we
++ // incurred one, bail.
++ __ ld(pending_exception, thread_(pending_exception));
++ __ cmpdi(CCR0, pending_exception, 0);
++ __ bne(CCR0, exception_return_sync_check); // has pending exception
++
++ // reload method
++ __ ld(R19_method, state_(_method));
++
++ // Reload signature handler, it may have been created/assigned in the meanwhile
++ __ ld(signature_handler_fd, method_(signature_handler));
++
++ __ BIND(call_signature_handler);
++
++ // Before we call the signature handler we push a new frame to
++ // protect the interpreter frame volatile registers when we return
++ // from jni but before we can get back to Java.
++
++ // First set the frame anchor while the SP/FP registers are
++ // convenient and the slow signature handler can use this same frame
++ // anchor.
++
++ // We have a TOP_IJAVA_FRAME here, which belongs to us.
++ __ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R12_scratch2/*tmp*/);
++
++ // Now the interpreter frame (and its call chain) have been
++ // invalidated and flushed. We are now protected against eager
++ // being enabled in native code. Even if it goes eager the
++ // registers will be reloaded as clean and we will invalidate after
++ // the call so no spurious flush should be possible.
++
++ // Call signature handler and pass locals address.
++ //
++ // Our signature handlers copy required arguments to the C stack
++ // (outgoing C args), R3_ARG1 to R10_ARG8, and F1_ARG1 to
++ // F13_ARG13.
++ __ mr(R3_ARG1, R18_locals);
++#if !defined(ABI_ELFv2)
++ __ ld(signature_handler_fd, 0, signature_handler_fd);
++#endif
++ __ call_stub(signature_handler_fd);
++ // reload method
++ __ ld(R19_method, state_(_method));
++
++ // Remove the register parameter varargs slots we allocated in
++ // compute_interpreter_state. SP+16 ends up pointing to the ABI
++ // outgoing argument area.
++ //
++ // Not needed on PPC64.
++ //__ add(SP, SP, Argument::n_register_parameters*BytesPerWord);
++
++ assert(result_handler_addr->is_nonvolatile(), "result_handler_addr must be in a non-volatile register");
++ // Save across call to native method.
++ __ mr(result_handler_addr, R3_RET);
++
++ // Set up fixed parameters and call the native method.
++ // If the method is static, get mirror into R4_ARG2.
++
++ {
++ Label method_is_not_static;
++ // access_flags is non-volatile and still, no need to restore it
++
++ // restore access flags
++ __ testbitdi(CCR0, R0, access_flags, JVM_ACC_STATIC_BIT);
++ __ bfalse(CCR0, method_is_not_static);
++
++ // constants = method->constants();
++ __ ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method);
++ __ ld(R11_scratch1/*constants*/, in_bytes(ConstMethod::constants_offset()), R11_scratch1);
++ // pool_holder = method->constants()->pool_holder();
++ __ ld(R11_scratch1/*pool_holder*/, ConstantPool::pool_holder_offset_in_bytes(),
++ R11_scratch1/*constants*/);
++
++ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++
++ // mirror = pool_holder->klass_part()->java_mirror();
++ __ ld(R0/*mirror*/, mirror_offset, R11_scratch1/*pool_holder*/);
++ // state->_native_mirror = mirror;
++ __ std(R0/*mirror*/, state_(_oop_temp));
++ // R4_ARG2 = &state->_oop_temp;
++ __ addir(R4_ARG2, state_(_oop_temp));
++
++ __ BIND(method_is_not_static);
++ }
++
++ // At this point, arguments have been copied off the stack into
++ // their JNI positions. Oops are boxed in-place on the stack, with
++ // handles copied to arguments. The result handler address is in a
++ // register.
++
++ // pass JNIEnv address as first parameter
++ __ addir(R3_ARG1, thread_(jni_environment));
++
++ // Load the native_method entry before we change the thread state.
++ __ ld(native_method_fd, method_(native_function));
++
++ //=============================================================================
++ // Transition from _thread_in_Java to _thread_in_native. As soon as
++ // we make this change the safepoint code needs to be certain that
++ // the last Java frame we established is good. The pc in that frame
++ // just needs to be near here not an actual return address.
++
++ // We use release_store_fence to update values like the thread state, where
++ // we don't want the current thread to continue until all our prior memory
++ // accesses (including the new thread state) are visible to other threads.
++ __ li(R0, _thread_in_native);
++ __ release();
++
++ // TODO: PPC port: assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
++ __ stw(R0, thread_(thread_state));
++
++ if (UseMembar) {
++ __ fence();
++ }
++
++ //=============================================================================
++ // Call the native method. Argument registers must not have been
++ // overwritten since "__ call_stub(signature_handler);" (except for
++ // ARG1 and ARG2 for static methods)
++ __ call_c(native_method_fd);
++
++ __ std(R3_RET, state_(_native_lresult));
++ __ stfd(F1_RET, state_(_native_fresult));
++
++ // The frame_manager_lr field, which we use for setting the last
++ // java frame, gets overwritten by the signature handler. Restore
++ // it now.
++ __ get_PC_trash_LR(R11_scratch1);
++ __ std(R11_scratch1, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
++
++ // Because of GC R19_method may no longer be valid.
++
++ // Block, if necessary, before resuming in _thread_in_Java state.
++ // In order for GC to work, don't clear the last_Java_sp until after
++ // blocking.
++
++
++
++ //=============================================================================
++ // Switch thread to "native transition" state before reading the
++ // synchronization state. This additional state is necessary
++ // because reading and testing the synchronization state is not
++ // atomic w.r.t. GC, as this scenario demonstrates: Java thread A,
++ // in _thread_in_native state, loads _not_synchronized and is
++ // preempted. VM thread changes sync state to synchronizing and
++ // suspends threads for GC. Thread A is resumed to finish this
++ // native method, but doesn't block here since it didn't see any
++ // synchronization in progress, and escapes.
++
++ // We use release_store_fence to update values like the thread state, where
++ // we don't want the current thread to continue until all our prior memory
++ // accesses (including the new thread state) are visible to other threads.
++ __ li(R0/*thread_state*/, _thread_in_native_trans);
++ __ release();
++ __ stw(R0/*thread_state*/, thread_(thread_state));
++ if (UseMembar) {
++ __ fence();
++ }
++ // Write serialization page so that the VM thread can do a pseudo remote
++ // membar. We use the current thread pointer to calculate a thread
++ // specific offset to write to within the page. This minimizes bus
++ // traffic due to cache line collision.
++ else {
++ __ serialize_memory(R16_thread, R11_scratch1, R12_scratch2);
++ }
++
++ // Now before we return to java we must look for a current safepoint
++ // (a new safepoint can not start since we entered native_trans).
++ // We must check here because a current safepoint could be modifying
++ // the callers registers right this moment.
++
++ // Acquire isn't strictly necessary here because of the fence, but
++ // sync_state is declared to be volatile, so we do it anyway.
++ __ load_const(sync_state_addr, SafepointSynchronize::address_of_state());
++
++ // TODO: PPC port: assert(4 == SafepointSynchronize::sz_state(), "unexpected field size");
++ __ lwz(sync_state, 0, sync_state_addr);
++
++ // TODO: PPC port: assert(4 == Thread::sz_suspend_flags(), "unexpected field size");
++ __ lwz(suspend_flags, thread_(suspend_flags));
++
++ __ acquire();
++
++ Label sync_check_done;
++ Label do_safepoint;
++ // No synchronization in progress nor yet synchronized
++ __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized);
++ // not suspended
++ __ cmpwi(CCR1, suspend_flags, 0);
++
++ __ bne(CCR0, do_safepoint);
++ __ beq(CCR1, sync_check_done);
++ __ bind(do_safepoint);
++ // Block. We do the call directly and leave the current
++ // last_Java_frame setup undisturbed. We must save any possible
++ // native result acrosss the call. No oop is present
++
++ __ mr(R3_ARG1, R16_thread);
++#if defined(ABI_ELFv2)
++ __ call_c(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
++ relocInfo::none);
++#else
++ __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, JavaThread::check_special_condition_for_native_trans),
++ relocInfo::none);
++#endif
++ __ bind(sync_check_done);
++
++ //=============================================================================
++ // <<<<<< Back in Interpreter Frame >>>>>
++
++ // We are in thread_in_native_trans here and back in the normal
++ // interpreter frame. We don't have to do anything special about
++ // safepoints and we can switch to Java mode anytime we are ready.
++
++ // Note: frame::interpreter_frame_result has a dependency on how the
++ // method result is saved across the call to post_method_exit. For
++ // native methods it assumes that the non-FPU/non-void result is
++ // saved in _native_lresult and a FPU result in _native_fresult. If
++ // this changes then the interpreter_frame_result implementation
++ // will need to be updated too.
++
++ // On PPC64, we have stored the result directly after the native call.
++
++ //=============================================================================
++ // back in Java
++
++ // We use release_store_fence to update values like the thread state, where
++ // we don't want the current thread to continue until all our prior memory
++ // accesses (including the new thread state) are visible to other threads.
++ __ li(R0/*thread_state*/, _thread_in_Java);
++ __ release();
++ __ stw(R0/*thread_state*/, thread_(thread_state));
++ if (UseMembar) {
++ __ fence();
++ }
++
++ __ reset_last_Java_frame();
++
++ // Reload GR27_method, call killed it. We can't look at
++ // state->_method until we're back in java state because in java
++ // state gc can't happen until we get to a safepoint.
++ //
++ // We've set thread_state to _thread_in_Java already, so restoring
++ // R19_method from R14_state works; R19_method is invalid, because
++ // GC may have happened.
++ __ ld(R19_method, state_(_method)); // reload method, may have moved
++
++ // jvmdi/jvmpi support. Whether we've got an exception pending or
++ // not, and whether unlocking throws an exception or not, we notify
++ // on native method exit. If we do have an exception, we'll end up
++ // in the caller's context to handle it, so if we don't do the
++ // notify here, we'll drop it on the floor.
++
++ __ notify_method_exit(true/*native method*/,
++ ilgl /*illegal state (not used for native methods)*/,
++ InterpreterMacroAssembler::NotifyJVMTI,
++ false /*check_exceptions*/);
++
++ //=============================================================================
++ // Handle exceptions
++
++ // See if we must unlock.
++ //
++ {
++ Label method_is_not_synced;
++ // is_synced is still alive
++ assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile");
++ __ bfalse(is_synced, method_is_not_synced);
++
++ unlock_method();
++
++ __ bind(method_is_not_synced);
++ }
++
++ // Reset active handles after returning from native.
++ // thread->active_handles()->clear();
++ __ ld(active_handles, thread_(active_handles));
++ // JNIHandleBlock::_top is an int.
++ // TODO: PPC port: assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size");
++ __ li(R0, 0);
++ __ stw(R0, JNIHandleBlock::top_offset_in_bytes(), active_handles);
++
++ Label no_pending_exception_from_native_method;
++ __ ld(R0/*pending_exception*/, thread_(pending_exception));
++ __ cmpdi(CCR0, R0/*pending_exception*/, 0);
++ __ beq(CCR0, no_pending_exception_from_native_method);
++
++
++ //-----------------------------------------------------------------------------
++ // An exception is pending. We call into the runtime only if the
++ // caller was not interpreted. If it was interpreted the
++ // interpreter will do the correct thing. If it isn't interpreted
++ // (call stub/compiled code) we will change our return and continue.
++ __ BIND(exception_return);
++
++ Label return_to_initial_caller_with_pending_exception;
++ __ cmpdi(CCR0, R15_prev_state, 0);
++ __ beq(CCR0, return_to_initial_caller_with_pending_exception);
++
++ // We are returning to an interpreter activation, just pop the state,
++ // pop our frame, leave the exception pending, and return.
++ __ pop_interpreter_state(/*prev_state_may_be_0=*/false);
++ __ pop_interpreter_frame(R11_scratch1, R12_scratch2, R21_tmp1 /* set to return pc */, R22_tmp2);
++ __ mtlr(R21_tmp1);
++ __ blr();
++
++ __ BIND(exception_return_sync_check);
++
++ assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile");
++ __ bfalse(is_synced, exception_return);
++ unlock_method();
++ __ b(exception_return);
++
++
++ __ BIND(return_to_initial_caller_with_pending_exception);
++ // We are returning to a c2i-adapter / call-stub, get the address of the
++ // exception handler, pop the frame and return to the handler.
++
++ // First, pop to caller's frame.
++ __ pop_interpreter_frame(R11_scratch1, R12_scratch2, R21_tmp1 /* set to return pc */, R22_tmp2);
++
++ __ push_frame_reg_args(0, R11_scratch1);
++ // Get the address of the exception handler.
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address),
++ R16_thread,
++ R21_tmp1 /* return pc */);
++ __ pop_frame();
++
++ // Load the PC of the the exception handler into LR.
++ __ mtlr(R3_RET);
++
++ // Load exception into R3_ARG1 and clear pending exception in thread.
++ __ ld(R3_ARG1/*exception*/, thread_(pending_exception));
++ __ li(R4_ARG2, 0);
++ __ std(R4_ARG2, thread_(pending_exception));
++
++ // Load the original return pc into R4_ARG2.
++ __ mr(R4_ARG2/*issuing_pc*/, R21_tmp1);
++
++ // Resize frame to get rid of a potential extension.
++ __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2);
++
++ // Return to exception handler.
++ __ blr();
++
++
++ //-----------------------------------------------------------------------------
++ // No exception pending.
++ __ BIND(no_pending_exception_from_native_method);
++
++ // Move native method result back into proper registers and return.
++ // Invoke result handler (may unbox/promote).
++ __ ld(R3_RET, state_(_native_lresult));
++ __ lfd(F1_RET, state_(_native_fresult));
++ __ call_stub(result_handler_addr);
++
++ // We have created a new BytecodeInterpreter object, now we must destroy it.
++ //
++ // Restore previous R14_state and caller's SP. R15_prev_state may
++ // be 0 here, because our caller may be the call_stub or compiled
++ // code.
++ __ pop_interpreter_state(/*prev_state_may_be_0=*/true);
++ __ pop_interpreter_frame(R11_scratch1, R12_scratch2, R21_tmp1 /* set to return pc */, R22_tmp2);
++ // Resize frame to get rid of a potential extension.
++ __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2);
++
++ // Must use the return pc which was loaded from the caller's frame
++ // as the VM uses return-pc-patching for deoptimization.
++ __ mtlr(R21_tmp1);
++ __ blr();
++
++
++
++ //=============================================================================
++ // We encountered an exception while computing the interpreter
++ // state, so R14_state isn't valid. Act as if we just returned from
++ // the callee method with a pending exception.
++ __ BIND(stack_overflow_return);
++
++ //
++ // Register state:
++ // R14_state invalid; trashed by compute_interpreter_state
++ // R15_prev_state valid, but may be 0
++ //
++ // R1_SP valid, points to caller's SP; wasn't yet updated by
++ // compute_interpreter_state
++ //
++
++ // Create exception oop and make it pending.
++
++ // Throw the exception via RuntimeStub "throw_StackOverflowError_entry".
++ //
++ // Previously, we called C-Code directly. As a consequence, a
++ // possible GC tried to process the argument oops of the top frame
++ // (see RegisterMap::clear, which sets the corresponding flag to
++ // true). This lead to crashes because:
++ // 1. The top register map did not contain locations for the argument registers
++ // 2. The arguments are dead anyway, could be already overwritten in the worst case
++ // Solution: Call via special runtime stub that pushes it's own
++ // frame. This runtime stub has the flag "CodeBlob::caller_must_gc_arguments()"
++ // set to "false", what prevents the dead arguments getting GC'd.
++ //
++ // 2 cases exist:
++ // 1. We were called by the c2i adapter / call stub
++ // 2. We were called by the frame manager
++ //
++ // Both cases are handled by this code:
++ // 1. - initial_caller_sp was saved in both cases on entry, so it's safe to load it back even if it was not changed.
++ // - control flow will be:
++ // throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->excp_blob of caller method
++ // 2. - control flow will be:
++ // throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->rethrow_excp_entry of frame manager->resume_method
++ // Since we restored the caller SP above, the rethrow_excp_entry can restore the original interpreter state
++ // registers using the stack and resume the calling method with a pending excp.
++
++ // Pop any c2i extension from the stack, restore LR just to be sure
++ __ ld(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
++ __ mtlr(R0);
++ // Resize frame to get rid of a potential extension.
++ __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2);
++
++ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "generated in wrong order");
++ // Load target address of the runtime stub.
++ __ load_const(R12_scratch2, (StubRoutines::throw_StackOverflowError_entry()));
++ __ mtctr(R12_scratch2);
++ __ bctr();
++
++
++ //=============================================================================
++ // Counter overflow.
++
++ if (inc_counter) {
++ // Handle invocation counter overflow
++ __ bind(invocation_counter_overflow);
++
++ generate_counter_overflow(continue_after_compile);
++ }
++
++ native_entry = entry;
++ return entry;
++}
++
++bool AbstractInterpreter::can_be_compiled(methodHandle m) {
++ // No special entry points that preclude compilation.
++ return true;
++}
++
++// Unlock the current method.
++//
++void CppInterpreterGenerator::unlock_method(void) {
++ // Find preallocated monitor and unlock method. Method monitor is
++ // the first one.
++
++ // Registers alive
++ // R14_state
++ //
++ // Registers updated
++ // volatiles
++ //
++ const Register monitor = R4_ARG2;
++
++ // Pass address of initial monitor we allocated.
++ //
++ // First monitor.
++ __ addi(monitor, R14_state, -frame::interpreter_frame_monitor_size_in_bytes());
++
++ // Unlock method
++ __ unlock_object(monitor);
++}
++
++// Lock the current method.
++//
++void CppInterpreterGenerator::lock_method(void) {
++ // Find preallocated monitor and lock method. Method monitor is the
++ // first one.
++
++ //
++ // Registers alive
++ // R14_state
++ //
++ // Registers updated
++ // volatiles
++ //
++
++ const Register monitor = R4_ARG2;
++ const Register object = R5_ARG3;
++
++ // Pass address of initial monitor we allocated.
++ __ addi(monitor, R14_state, -frame::interpreter_frame_monitor_size_in_bytes());
++
++ // Pass object address.
++ __ ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor);
++
++ // Lock method.
++ __ lock_object(monitor, object);
++}
++
++// Generate code for handling resuming a deopted method.
++void CppInterpreterGenerator::generate_deopt_handling(Register result_index) {
++
++ //=============================================================================
++ // Returning from a compiled method into a deopted method. The
++ // bytecode at the bcp has completed. The result of the bytecode is
++ // in the native abi (the tosca for the template based
++ // interpreter). Any stack space that was used by the bytecode that
++ // has completed has been removed (e.g. parameters for an invoke) so
++ // all that we have to do is place any pending result on the
++ // expression stack and resume execution on the next bytecode.
++
++ Label return_from_deopt_common;
++
++ // R3_RET and F1_RET are live here! Load the array index of the
++ // required result stub address and continue at return_from_deopt_common.
++
++ // Deopt needs to jump to here to enter the interpreter (return a result).
++ deopt_frame_manager_return_atos = __ pc();
++ __ li(result_index, AbstractInterpreter::BasicType_as_index(T_OBJECT));
++ __ b(return_from_deopt_common);
++
++ deopt_frame_manager_return_btos = __ pc();
++ __ li(result_index, AbstractInterpreter::BasicType_as_index(T_BOOLEAN));
++ __ b(return_from_deopt_common);
++
++ deopt_frame_manager_return_itos = __ pc();
++ __ li(result_index, AbstractInterpreter::BasicType_as_index(T_INT));
++ __ b(return_from_deopt_common);
++
++ deopt_frame_manager_return_ltos = __ pc();
++ __ li(result_index, AbstractInterpreter::BasicType_as_index(T_LONG));
++ __ b(return_from_deopt_common);
++
++ deopt_frame_manager_return_ftos = __ pc();
++ __ li(result_index, AbstractInterpreter::BasicType_as_index(T_FLOAT));
++ __ b(return_from_deopt_common);
++
++ deopt_frame_manager_return_dtos = __ pc();
++ __ li(result_index, AbstractInterpreter::BasicType_as_index(T_DOUBLE));
++ __ b(return_from_deopt_common);
++
++ deopt_frame_manager_return_vtos = __ pc();
++ __ li(result_index, AbstractInterpreter::BasicType_as_index(T_VOID));
++ // Last one, fall-through to return_from_deopt_common.
++
++ // Deopt return common. An index is present that lets us move any
++ // possible result being return to the interpreter's stack.
++ //
++ __ BIND(return_from_deopt_common);
++
++}
++
++// Generate the code to handle a more_monitors message from the c++ interpreter.
++void CppInterpreterGenerator::generate_more_monitors() {
++
++ //
++ // Registers alive
++ // R16_thread - JavaThread*
++ // R15_prev_state - previous BytecodeInterpreter or 0
++ // R14_state - BytecodeInterpreter* address of receiver's interpreter state
++ // R1_SP - old stack pointer
++ //
++ // Registers updated
++ // R1_SP - new stack pointer
++ //
++
++ // Very-local scratch registers.
++ const Register old_tos = R21_tmp1;
++ const Register new_tos = R22_tmp2;
++ const Register stack_base = R23_tmp3;
++ const Register stack_limit = R24_tmp4;
++ const Register slot = R25_tmp5;
++ const Register n_slots = R25_tmp5;
++
++ // Interpreter state fields.
++ const Register msg = R24_tmp4;
++
++ // Load up relevant interpreter state.
++
++ __ ld(stack_base, state_(_stack_base)); // Old stack_base
++ __ ld(old_tos, state_(_stack)); // Old tos
++ __ ld(stack_limit, state_(_stack_limit)); // Old stack_limit
++
++ // extracted monitor_size
++ int monitor_size = frame::interpreter_frame_monitor_size_in_bytes();
++ assert(Assembler::is_aligned((unsigned int)monitor_size,
++ (unsigned int)frame::alignment_in_bytes),
++ "size of a monitor must respect alignment of SP");
++
++ // Save and restore top LR
++ __ ld(R12_scratch2, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
++ __ resize_frame(-monitor_size, R11_scratch1);// Allocate space for new monitor
++ __ std(R12_scratch2, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
++ // Initial_caller_sp is used as unextended_sp for non initial callers.
++ __ std(R1_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
++ __ addi(stack_base, stack_base, -monitor_size); // New stack_base
++ __ addi(new_tos, old_tos, -monitor_size); // New tos
++ __ addi(stack_limit, stack_limit, -monitor_size); // New stack_limit
++
++ __ std(R1_SP, state_(_last_Java_sp)); // Update frame_bottom
++
++ __ std(stack_base, state_(_stack_base)); // Update stack_base
++ __ std(new_tos, state_(_stack)); // Update tos
++ __ std(stack_limit, state_(_stack_limit)); // Update stack_limit
++
++ __ li(msg, BytecodeInterpreter::got_monitors); // Tell interpreter we allocated the lock
++ __ stw(msg, state_(_msg));
++
++ // Shuffle expression stack down. Recall that stack_base points
++ // just above the new expression stack bottom. Old_tos and new_tos
++ // are used to scan thru the old and new expression stacks.
++
++ Label copy_slot, copy_slot_finished;
++ __ sub(n_slots, stack_base, new_tos);
++ __ srdi_(n_slots, n_slots, LogBytesPerWord); // compute number of slots to copy
++ assert(LogBytesPerWord == 3, "conflicts assembler instructions");
++ __ beq(CCR0, copy_slot_finished); // nothing to copy
++
++ __ mtctr(n_slots);
++
++ // loop
++ __ bind(copy_slot);
++ __ ldu(slot, BytesPerWord, old_tos); // slot = *++old_tos;
++ __ stdu(slot, BytesPerWord, new_tos); // *++new_tos = slot;
++ __ bdnz(copy_slot);
++
++ __ bind(copy_slot_finished);
++
++ // Restart interpreter
++ __ li(R0, 0);
++ __ std(R0, BasicObjectLock::obj_offset_in_bytes(), stack_base); // Mark lock as unused
++}
++
++address CppInterpreterGenerator::generate_normal_entry(void) {
++ if (interpreter_frame_manager != NULL) return interpreter_frame_manager;
++
++ address entry = __ pc();
++
++ address return_from_native_pc = (address) NULL;
++
++ // Initial entry to frame manager (from call_stub or c2i_adapter)
++
++ //
++ // Registers alive
++ // R16_thread - JavaThread*
++ // R19_method - callee's Method (method to be invoked)
++ // R17_tos - address of sender tos (prepushed)
++ // R1_SP - SP prepared by call stub such that caller's outgoing args are near top
++ // LR - return address to caller (call_stub or c2i_adapter)
++ // R21_sender_SP - initial caller sp
++ //
++ // Registers updated
++ // R15_prev_state - 0
++ //
++ // Stack layout at this point:
++ //
++ // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP
++ // alignment (optional)
++ // [outgoing Java arguments] <-- R17_tos
++ // ...
++ // PARENT [PARENT_IJAVA_FRAME_ABI]
++ // ...
++ //
++
++ // Save initial_caller_sp to caller's abi.
++ // The caller frame must be resized before returning to get rid of
++ // the c2i part on top of the calling compiled frame (if any).
++ // R21_tmp1 must match sender_sp in gen_c2i_adapter.
++ // Now override the saved SP with the senderSP so we can pop c2i
++ // arguments (if any) off when we return.
++ __ std(R21_sender_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
++
++ // Save LR to caller's frame. We don't use _abi(lr) here,
++ // because it is not safe.
++ __ mflr(R0);
++ __ std(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
++
++ // If we come here, it is the first invocation of the frame manager.
++ // So there is no previous interpreter state.
++ __ li(R15_prev_state, 0);
++
++
++ // Fall through to where "recursive" invocations go.
++
++ //=============================================================================
++ // Dispatch an instance of the interpreter. Recursive activations
++ // come here.
++
++ Label re_dispatch;
++ __ BIND(re_dispatch);
++
++ //
++ // Registers alive
++ // R16_thread - JavaThread*
++ // R19_method - callee's Method
++ // R17_tos - address of caller's tos (prepushed)
++ // R15_prev_state - address of caller's BytecodeInterpreter or 0
++ // R1_SP - caller's SP trimmed such that caller's outgoing args are near top.
++ //
++ // Stack layout at this point:
++ //
++ // 0 [TOP_IJAVA_FRAME_ABI]
++ // alignment (optional)
++ // [outgoing Java arguments]
++ // ...
++ // PARENT [PARENT_IJAVA_FRAME_ABI]
++ // ...
++
++ // fall through to interpreted execution
++
++ //=============================================================================
++ // Allocate a new Java frame and initialize the new interpreter state.
++
++ Label stack_overflow_return;
++
++ // Create a suitable new Java frame plus a new BytecodeInterpreter instance
++ // in the current (frame manager's) C frame.
++ generate_compute_interpreter_state(stack_overflow_return);
++
++ // fall through
++
++ //=============================================================================
++ // Interpreter dispatch.
++
++ Label call_interpreter;
++ __ BIND(call_interpreter);
++
++ //
++ // Registers alive
++ // R16_thread - JavaThread*
++ // R15_prev_state - previous BytecodeInterpreter or 0
++ // R14_state - address of receiver's BytecodeInterpreter
++ // R1_SP - receiver's stack pointer
++ //
++
++ // Thread fields.
++ const Register pending_exception = R21_tmp1;
++
++ // Interpreter state fields.
++ const Register msg = R24_tmp4;
++
++ // Method fields.
++ const Register parameter_count = R25_tmp5;
++ const Register result_index = R26_tmp6;
++
++ const Register dummy = R28_tmp8;
++
++ // Address of various interpreter stubs.
++ // R29_tmp9 is reserved.
++ const Register stub_addr = R27_tmp7;
++
++ // Uncommon trap needs to jump to here to enter the interpreter
++ // (re-execute current bytecode).
++ unctrap_frame_manager_entry = __ pc();
++
++ // If we are profiling, store our fp (BSP) in the thread so we can
++ // find it during a tick.
++ if (Arguments::has_profile()) {
++ // On PPC64 we store the pointer to the current BytecodeInterpreter,
++ // instead of the bsp of ia64. This should suffice to be able to
++ // find all interesting information.
++ __ std(R14_state, thread_(last_interpreter_fp));
++ }
++
++ // R16_thread, R14_state and R15_prev_state are nonvolatile
++ // registers. There is no need to save these. If we needed to save
++ // some state in the current Java frame, this could be a place to do
++ // so.
++
++ // Call Java bytecode dispatcher passing "BytecodeInterpreter* istate".
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address,
++ JvmtiExport::can_post_interpreter_events()
++ ? BytecodeInterpreter::runWithChecks
++ : BytecodeInterpreter::run),
++ R14_state);
++
++ interpreter_return_address = __ last_calls_return_pc();
++
++ // R16_thread, R14_state and R15_prev_state have their values preserved.
++
++ // If we are profiling, clear the fp in the thread to tell
++ // the profiler that we are no longer in the interpreter.
++ if (Arguments::has_profile()) {
++ __ li(R11_scratch1, 0);
++ __ std(R11_scratch1, thread_(last_interpreter_fp));
++ }
++
++ // Load message from bytecode dispatcher.
++ // TODO: PPC port: guarantee(4 == BytecodeInterpreter::sz_msg(), "unexpected field size");
++ __ lwz(msg, state_(_msg));
++
++
++ Label more_monitors;
++ Label return_from_native;
++ Label return_from_native_common;
++ Label return_from_native_no_exception;
++ Label return_from_interpreted_method;
++ Label return_from_recursive_activation;
++ Label unwind_recursive_activation;
++ Label resume_interpreter;
++ Label return_to_initial_caller;
++ Label unwind_initial_activation;
++ Label unwind_initial_activation_pending_exception;
++ Label call_method;
++ Label call_special;
++ Label retry_method;
++ Label retry_method_osr;
++ Label popping_frame;
++ Label throwing_exception;
++
++ // Branch according to the received message
++
++ __ cmpwi(CCR1, msg, BytecodeInterpreter::call_method);
++ __ cmpwi(CCR2, msg, BytecodeInterpreter::return_from_method);
++
++ __ beq(CCR1, call_method);
++ __ beq(CCR2, return_from_interpreted_method);
++
++ __ cmpwi(CCR3, msg, BytecodeInterpreter::more_monitors);
++ __ cmpwi(CCR4, msg, BytecodeInterpreter::throwing_exception);
++
++ __ beq(CCR3, more_monitors);
++ __ beq(CCR4, throwing_exception);
++
++ __ cmpwi(CCR5, msg, BytecodeInterpreter::popping_frame);
++ __ cmpwi(CCR6, msg, BytecodeInterpreter::do_osr);
++
++ __ beq(CCR5, popping_frame);
++ __ beq(CCR6, retry_method_osr);
++
++ __ stop("bad message from interpreter");
++
++
++ //=============================================================================
++ // Add a monitor just below the existing one(s). State->_stack_base
++ // points to the lowest existing one, so we insert the new one just
++ // below it and shuffle the expression stack down. Ref. the above
++ // stack layout picture, we must update _stack_base, _stack, _stack_limit
++ // and _last_Java_sp in the interpreter state.
++
++ __ BIND(more_monitors);
++
++ generate_more_monitors();
++ __ b(call_interpreter);
++
++ generate_deopt_handling(result_index);
++
++ // Restoring the R14_state is already done by the deopt_blob.
++
++ // Current tos includes no parameter slots.
++ __ ld(R17_tos, state_(_stack));
++ __ li(msg, BytecodeInterpreter::deopt_resume);
++ __ b(return_from_native_common);
++
++ // We are sent here when we are unwinding from a native method or
++ // adapter with an exception pending. We need to notify the interpreter
++ // that there is an exception to process.
++ // We arrive here also if the frame manager called an (interpreted) target
++ // which returns with a StackOverflow exception.
++ // The control flow is in this case is:
++ // frame_manager->throw_excp_stub->forward_excp->rethrow_excp_entry
++
++ AbstractInterpreter::_rethrow_exception_entry = __ pc();
++
++ // Restore R14_state.
++ __ ld(R14_state, 0, R1_SP);
++ __ addi(R14_state, R14_state,
++ -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
++
++ // Store exception oop into thread object.
++ __ std(R3_RET, thread_(pending_exception));
++ __ li(msg, BytecodeInterpreter::method_resume /*rethrow_exception*/);
++ //
++ // NOTE: the interpreter frame as setup be deopt does NOT include
++ // any parameter slots (good thing since we have no callee here
++ // and couldn't remove them) so we don't have to do any calculations
++ // here to figure it out.
++ //
++ __ ld(R17_tos, state_(_stack));
++ __ b(return_from_native_common);
++
++
++ //=============================================================================
++ // Returning from a native method. Result is in the native abi
++ // location so we must move it to the java expression stack.
++
++ __ BIND(return_from_native);
++ guarantee(return_from_native_pc == (address) NULL, "precondition");
++ return_from_native_pc = __ pc();
++
++ // Restore R14_state.
++ __ ld(R14_state, 0, R1_SP);
++ __ addi(R14_state, R14_state, -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
++
++ //
++ // Registers alive
++ // R16_thread
++ // R14_state - address of caller's BytecodeInterpreter.
++ // R3_RET - integer result, if any.
++ // F1_RET - float result, if any.
++ //
++ // Registers updated
++ // R19_method - callee's Method
++ // R17_tos - caller's tos, with outgoing args popped
++ // result_index - index of result handler.
++ // msg - message for resuming interpreter.
++ //
++
++ // Very-local scratch registers.
++
++ const ConditionRegister have_pending_exception = CCR0;
++
++ // Load callee Method, gc may have moved it.
++ __ ld(R19_method, state_(_result._to_call._callee));
++
++ // Load address of caller's tos. includes parameter slots.
++ __ ld(R17_tos, state_(_stack));
++
++ // Pop callee's parameters.
++
++ __ ld(parameter_count, in_bytes(Method::const_offset()), R19_method);
++ __ lhz(parameter_count, in_bytes(ConstMethod::size_of_parameters_offset()), parameter_count);
++ __ sldi(parameter_count, parameter_count, Interpreter::logStackElementSize);
++ __ add(R17_tos, R17_tos, parameter_count);
++
++ // Result stub address array index
++ // TODO: PPC port: assert(4 == sizeof(AccessFlags), "unexpected field size");
++ __ lwa(result_index, method_(result_index));
++
++ __ li(msg, BytecodeInterpreter::method_resume);
++
++ //
++ // Registers alive
++ // R16_thread
++ // R14_state - address of caller's BytecodeInterpreter.
++ // R17_tos - address of caller's tos with outgoing args already popped
++ // R3_RET - integer return value, if any.
++ // F1_RET - float return value, if any.
++ // result_index - index of result handler.
++ // msg - message for resuming interpreter.
++ //
++ // Registers updated
++ // R3_RET - new address of caller's tos, including result, if any
++ //
++
++ __ BIND(return_from_native_common);
++
++ // Check for pending exception
++ __ ld(pending_exception, thread_(pending_exception));
++ __ cmpdi(CCR0, pending_exception, 0);
++ __ beq(CCR0, return_from_native_no_exception);
++
++ // If there's a pending exception, we really have no result, so
++ // R3_RET is dead. Resume_interpreter assumes the new tos is in
++ // R3_RET.
++ __ mr(R3_RET, R17_tos);
++ // `resume_interpreter' expects R15_prev_state to be alive.
++ __ ld(R15_prev_state, state_(_prev_link));
++ __ b(resume_interpreter);
++
++ __ BIND(return_from_native_no_exception);
++
++ // No pending exception, copy method result from native ABI register
++ // to tos.
++
++ // Address of stub descriptor address array.
++ __ load_const(stub_addr, CppInterpreter::tosca_result_to_stack());
++
++ // Pass address of tos to stub.
++ __ mr(R4_ARG2, R17_tos);
++
++ // Address of stub descriptor address.
++ __ sldi(result_index, result_index, LogBytesPerWord);
++ __ add(stub_addr, stub_addr, result_index);
++
++ // Stub descriptor address.
++ __ ld(stub_addr, 0, stub_addr);
++
++ // TODO: don't do this via a call, do it in place!
++ //
++ // call stub via descriptor
++ // in R3_ARG1/F1_ARG1: result value (R3_RET or F1_RET)
++ __ call_stub(stub_addr);
++
++ // new tos = result of call in R3_RET
++
++ // `resume_interpreter' expects R15_prev_state to be alive.
++ __ ld(R15_prev_state, state_(_prev_link));
++ __ b(resume_interpreter);
++
++ //=============================================================================
++ // We encountered an exception while computing the interpreter
++ // state, so R14_state isn't valid. Act as if we just returned from
++ // the callee method with a pending exception.
++ __ BIND(stack_overflow_return);
++
++ //
++ // Registers alive
++ // R16_thread - JavaThread*
++ // R1_SP - old stack pointer
++ // R19_method - callee's Method
++ // R17_tos - address of caller's tos (prepushed)
++ // R15_prev_state - address of caller's BytecodeInterpreter or 0
++ // R18_locals - address of callee's locals array
++ //
++ // Registers updated
++ // R3_RET - address of resuming tos, if recursive unwind
++
++ Label Lskip_unextend_SP;
++
++ {
++ const ConditionRegister is_initial_call = CCR0;
++ const Register tos_save = R21_tmp1;
++ const Register tmp = R22_tmp2;
++
++ assert(tos_save->is_nonvolatile(), "need a nonvolatile");
++
++ // Is the exception thrown in the initial Java frame of this frame
++ // manager frame?
++ __ cmpdi(is_initial_call, R15_prev_state, 0);
++ __ bne(is_initial_call, Lskip_unextend_SP);
++
++ // Pop any c2i extension from the stack. This is necessary in the
++ // non-recursive case (that is we were called by the c2i adapter,
++ // meaning we have to prev state). In this case we entered the frame
++ // manager through a special entry which pushes the orignal
++ // unextended SP to the stack. Here we load it back.
++ __ ld(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
++ __ mtlr(R0);
++ // Resize frame to get rid of a potential extension.
++ __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2);
++
++ // Fall through
++
++ __ bind(Lskip_unextend_SP);
++
++ // Throw the exception via RuntimeStub "throw_StackOverflowError_entry".
++ //
++ // Previously, we called C-Code directly. As a consequence, a
++ // possible GC tried to process the argument oops of the top frame
++ // (see RegisterMap::clear, which sets the corresponding flag to
++ // true). This lead to crashes because:
++ // 1. The top register map did not contain locations for the argument registers
++ // 2. The arguments are dead anyway, could be already overwritten in the worst case
++ // Solution: Call via special runtime stub that pushes it's own frame. This runtime stub has the flag
++ // "CodeBlob::caller_must_gc_arguments()" set to "false", what prevents the dead arguments getting GC'd.
++ //
++ // 2 cases exist:
++ // 1. We were called by the c2i adapter / call stub
++ // 2. We were called by the frame manager
++ //
++ // Both cases are handled by this code:
++ // 1. - initial_caller_sp was saved on stack => Load it back and we're ok
++ // - control flow will be:
++ // throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->excp_blob of calling method
++ // 2. - control flow will be:
++ // throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->
++ // ->rethrow_excp_entry of frame manager->resume_method
++ // Since we restored the caller SP above, the rethrow_excp_entry can restore the original interpreter state
++ // registers using the stack and resume the calling method with a pending excp.
++
++ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "generated in wrong order");
++ __ load_const(R3_ARG1, (StubRoutines::throw_StackOverflowError_entry()));
++ __ mtctr(R3_ARG1);
++ __ bctr();
++ }
++ //=============================================================================
++ // We have popped a frame from an interpreted call. We are assured
++ // of returning to an interpreted call by the popframe abi. We have
++ // no return value all we have to do is pop the current frame and
++ // then make sure that the top of stack (of the caller) gets set to
++ // where it was when we entered the callee (i.e. the args are still
++ // in place). Or we are returning to the interpreter. In the first
++ // case we must extract result (if any) from the java expression
++ // stack and store it in the location the native abi would expect
++ // for a call returning this type. In the second case we must simply
++ // do a stack to stack move as we unwind.
++
++ __ BIND(popping_frame);
++
++ // Registers alive
++ // R14_state
++ // R15_prev_state
++ // R17_tos
++ //
++ // Registers updated
++ // R19_method
++ // R3_RET
++ // msg
++ {
++ Label L;
++
++ // Reload callee method, gc may have moved it.
++ __ ld(R19_method, state_(_method));
++
++ // We may be returning to a deoptimized frame in which case the
++ // usual assumption of a recursive return is not true.
++
++ // not equal = is recursive call
++ __ cmpdi(CCR0, R15_prev_state, 0);
++
++ __ bne(CCR0, L);
++
++ // Pop_frame capability.
++ // The pop_frame api says that the underlying frame is a Java frame, in this case
++ // (prev_state==null) it must be a compiled frame:
++ //
++ // Stack at this point: I, C2I + C, ...
++ //
++ // The outgoing arguments of the call have just been copied (popframe_preserve_args).
++ // By the pop_frame api, we must end up in an interpreted frame. So the compiled frame
++ // will be deoptimized. Deoptimization will restore the outgoing arguments from
++ // popframe_preserve_args, adjust the tos such that it includes the popframe_preserve_args,
++ // and adjust the bci such that the call will be executed again.
++ // We have no results, just pop the interpreter frame, resize the compiled frame to get rid
++ // of the c2i extension and return to the deopt_handler.
++ __ b(unwind_initial_activation);
++
++ // is recursive call
++ __ bind(L);
++
++ // Resume_interpreter expects the original tos in R3_RET.
++ __ ld(R3_RET, prev_state_(_stack));
++
++ // We're done.
++ __ li(msg, BytecodeInterpreter::popping_frame);
++
++ __ b(unwind_recursive_activation);
++ }
++
++
++ //=============================================================================
++
++ // We have finished an interpreted call. We are either returning to
++ // native (call_stub/c2) or we are returning to the interpreter.
++ // When returning to native, we must extract the result (if any)
++ // from the java expression stack and store it in the location the
++ // native abi expects. When returning to the interpreter we must
++ // simply do a stack to stack move as we unwind.
++
++ __ BIND(return_from_interpreted_method);
++
++ //
++ // Registers alive
++ // R16_thread - JavaThread*
++ // R15_prev_state - address of caller's BytecodeInterpreter or 0
++ // R14_state - address of callee's interpreter state
++ // R1_SP - callee's stack pointer
++ //
++ // Registers updated
++ // R19_method - callee's method
++ // R3_RET - address of result (new caller's tos),
++ //
++ // if returning to interpreted
++ // msg - message for interpreter,
++ // if returning to interpreted
++ //
++
++ // Check if this is the initial invocation of the frame manager.
++ // If so, R15_prev_state will be null.
++ __ cmpdi(CCR0, R15_prev_state, 0);
++
++ // Reload callee method, gc may have moved it.
++ __ ld(R19_method, state_(_method));
++
++ // Load the method's result type.
++ __ lwz(result_index, method_(result_index));
++
++ // Go to return_to_initial_caller if R15_prev_state is null.
++ __ beq(CCR0, return_to_initial_caller);
++
++ // Copy callee's result to caller's expression stack via inline stack-to-stack
++ // converters.
++ {
++ Register new_tos = R3_RET;
++ Register from_temp = R4_ARG2;
++ Register from = R5_ARG3;
++ Register tos = R6_ARG4;
++ Register tmp1 = R7_ARG5;
++ Register tmp2 = R8_ARG6;
++
++ ConditionRegister result_type_is_void = CCR1;
++ ConditionRegister result_type_is_long = CCR2;
++ ConditionRegister result_type_is_double = CCR3;
++
++ Label stack_to_stack_void;
++ Label stack_to_stack_double_slot; // T_LONG, T_DOUBLE
++ Label stack_to_stack_single_slot; // T_BOOLEAN, T_BYTE, T_CHAR, T_SHORT, T_INT, T_FLOAT, T_OBJECT
++ Label stack_to_stack_done;
++
++ // Pass callee's address of tos + BytesPerWord
++ __ ld(from_temp, state_(_stack));
++
++ // result type: void
++ __ cmpwi(result_type_is_void, result_index, AbstractInterpreter::BasicType_as_index(T_VOID));
++
++ // Pass caller's tos == callee's locals address
++ __ ld(tos, state_(_locals));
++
++ // result type: long
++ __ cmpwi(result_type_is_long, result_index, AbstractInterpreter::BasicType_as_index(T_LONG));
++
++ __ addi(from, from_temp, Interpreter::stackElementSize);
++
++ // !! don't branch above this line !!
++
++ // handle void
++ __ beq(result_type_is_void, stack_to_stack_void);
++
++ // result type: double
++ __ cmpwi(result_type_is_double, result_index, AbstractInterpreter::BasicType_as_index(T_DOUBLE));
++
++ // handle long or double
++ __ beq(result_type_is_long, stack_to_stack_double_slot);
++ __ beq(result_type_is_double, stack_to_stack_double_slot);
++
++ // fall through to single slot types (incl. object)
++
++ {
++ __ BIND(stack_to_stack_single_slot);
++ // T_BOOLEAN, T_BYTE, T_CHAR, T_SHORT, T_INT, T_FLOAT, T_OBJECT
++
++ __ ld(tmp1, 0, from);
++ __ std(tmp1, 0, tos);
++ // New expression stack top
++ __ addi(new_tos, tos, - BytesPerWord);
++
++ __ b(stack_to_stack_done);
++ }
++
++ {
++ __ BIND(stack_to_stack_double_slot);
++ // T_LONG, T_DOUBLE
++
++ // Move both entries for debug purposes even though only one is live
++ __ ld(tmp1, BytesPerWord, from);
++ __ ld(tmp2, 0, from);
++ __ std(tmp1, 0, tos);
++ __ std(tmp2, -BytesPerWord, tos);
++
++ // new expression stack top
++ __ addi(new_tos, tos, - 2 * BytesPerWord); // two slots
++ __ b(stack_to_stack_done);
++ }
++
++ {
++ __ BIND(stack_to_stack_void);
++ // T_VOID
++
++ // new expression stack top
++ __ mr(new_tos, tos);
++ // fall through to stack_to_stack_done
++ }
++
++ __ BIND(stack_to_stack_done);
++ }
++
++ // new tos = R3_RET
++
++ // Get the message for the interpreter
++ __ li(msg, BytecodeInterpreter::method_resume);
++
++ // And fall thru
++
++
++ //=============================================================================
++ // Restore caller's interpreter state and pass pointer to caller's
++ // new tos to caller.
++
++ __ BIND(unwind_recursive_activation);
++
++ //
++ // Registers alive
++ // R15_prev_state - address of caller's BytecodeInterpreter
++ // R3_RET - address of caller's tos
++ // msg - message for caller's BytecodeInterpreter
++ // R1_SP - callee's stack pointer
++ //
++ // Registers updated
++ // R14_state - address of caller's BytecodeInterpreter
++ // R15_prev_state - address of its parent or 0
++ //
++
++ // Pop callee's interpreter and set R14_state to caller's interpreter.
++ __ pop_interpreter_state(/*prev_state_may_be_0=*/false);
++
++ // And fall thru
++
++
++ //=============================================================================
++ // Resume the (calling) interpreter after a call.
++
++ __ BIND(resume_interpreter);
++
++ //
++ // Registers alive
++ // R14_state - address of resuming BytecodeInterpreter
++ // R15_prev_state - address of its parent or 0
++ // R3_RET - address of resuming tos
++ // msg - message for resuming interpreter
++ // R1_SP - callee's stack pointer
++ //
++ // Registers updated
++ // R1_SP - caller's stack pointer
++ //
++
++ // Restore C stack pointer of caller (resuming interpreter),
++ // R14_state already points to the resuming BytecodeInterpreter.
++ __ pop_interpreter_frame_to_state(R14_state, R21_tmp1, R11_scratch1, R12_scratch2);
++
++ // Store new address of tos (holding return value) in interpreter state.
++ __ std(R3_RET, state_(_stack));
++
++ // Store message for interpreter.
++ __ stw(msg, state_(_msg));
++
++ __ b(call_interpreter);
++
++ //=============================================================================
++ // Interpreter returning to native code (call_stub/c1/c2) from
++ // initial activation. Convert stack result and unwind activation.
++
++ __ BIND(return_to_initial_caller);
++
++ //
++ // Registers alive
++ // R19_method - callee's Method
++ // R14_state - address of callee's interpreter state
++ // R16_thread - JavaThread
++ // R1_SP - callee's stack pointer
++ //
++ // Registers updated
++ // R3_RET/F1_RET - result in expected output register
++ //
++
++ // If we have an exception pending we have no result and we
++ // must figure out where to really return to.
++ //
++ __ ld(pending_exception, thread_(pending_exception));
++ __ cmpdi(CCR0, pending_exception, 0);
++ __ bne(CCR0, unwind_initial_activation_pending_exception);
++
++ __ lwa(result_index, method_(result_index));
++
++ // Address of stub descriptor address array.
++ __ load_const(stub_addr, CppInterpreter::stack_result_to_native());
++
++ // Pass address of callee's tos + BytesPerWord.
++ // Will then point directly to result.
++ __ ld(R3_ARG1, state_(_stack));
++ __ addi(R3_ARG1, R3_ARG1, Interpreter::stackElementSize);
++
++ // Address of stub descriptor address
++ __ sldi(result_index, result_index, LogBytesPerWord);
++ __ add(stub_addr, stub_addr, result_index);
++
++ // Stub descriptor address
++ __ ld(stub_addr, 0, stub_addr);
++
++ // TODO: don't do this via a call, do it in place!
++ //
++ // call stub via descriptor
++ __ call_stub(stub_addr);
++
++ __ BIND(unwind_initial_activation);
++
++ // Unwind from initial activation. No exception is pending.
++
++ //
++ // Stack layout at this point:
++ //
++ // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP
++ // ...
++ // CALLER [PARENT_IJAVA_FRAME_ABI]
++ // ...
++ // CALLER [unextended ABI]
++ // ...
++ //
++ // The CALLER frame has a C2I adapter or is an entry-frame.
++ //
++
++ // An interpreter frame exists, we may pop the TOP_IJAVA_FRAME and
++ // turn the caller's PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME.
++ // But, we simply restore the return pc from the caller's frame and
++ // use the caller's initial_caller_sp as the new SP which pops the
++ // interpreter frame and "resizes" the caller's frame to its "unextended"
++ // size.
++
++ // get rid of top frame
++ __ pop_frame();
++
++ // Load return PC from parent frame.
++ __ ld(R21_tmp1, _parent_ijava_frame_abi(lr), R1_SP);
++
++ // Resize frame to get rid of a potential extension.
++ __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2);
++
++ // update LR
++ __ mtlr(R21_tmp1);
++
++ // return
++ __ blr();
++
++ //=============================================================================
++ // Unwind from initial activation. An exception is pending
++
++ __ BIND(unwind_initial_activation_pending_exception);
++
++ //
++ // Stack layout at this point:
++ //
++ // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP
++ // ...
++ // CALLER [PARENT_IJAVA_FRAME_ABI]
++ // ...
++ // CALLER [unextended ABI]
++ // ...
++ //
++ // The CALLER frame has a C2I adapter or is an entry-frame.
++ //
++
++ // An interpreter frame exists, we may pop the TOP_IJAVA_FRAME and
++ // turn the caller's PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME.
++ // But, we just pop the current TOP_IJAVA_FRAME and fall through
++
++ __ pop_frame();
++ __ ld(R3_ARG1, _top_ijava_frame_abi(lr), R1_SP);
++
++ //
++ // Stack layout at this point:
++ //
++ // CALLER [PARENT_IJAVA_FRAME_ABI] <-- R1_SP
++ // ...
++ // CALLER [unextended ABI]
++ // ...
++ //
++ // The CALLER frame has a C2I adapter or is an entry-frame.
++ //
++ // Registers alive
++ // R16_thread
++ // R3_ARG1 - return address to caller
++ //
++ // Registers updated
++ // R3_ARG1 - address of pending exception
++ // R4_ARG2 - issuing pc = return address to caller
++ // LR - address of exception handler stub
++ //
++
++ // Resize frame to get rid of a potential extension.
++ __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2);
++
++ __ mr(R14, R3_ARG1); // R14 := ARG1
++ __ mr(R4_ARG2, R3_ARG1); // ARG2 := ARG1
++
++ // Find the address of the "catch_exception" stub.
++ __ push_frame_reg_args(0, R11_scratch1);
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address),
++ R16_thread,
++ R4_ARG2);
++ __ pop_frame();
++
++ // Load continuation address into LR.
++ __ mtlr(R3_RET);
++
++ // Load address of pending exception and clear it in thread object.
++ __ ld(R3_ARG1/*R3_RET*/, thread_(pending_exception));
++ __ li(R4_ARG2, 0);
++ __ std(R4_ARG2, thread_(pending_exception));
++
++ // re-load issuing pc
++ __ mr(R4_ARG2, R14);
++
++ // Branch to found exception handler.
++ __ blr();
++
++ //=============================================================================
++ // Call a new method. Compute new args and trim the expression stack
++ // to only what we are currently using and then recurse.
++
++ __ BIND(call_method);
++
++ //
++ // Registers alive
++ // R16_thread
++ // R14_state - address of caller's BytecodeInterpreter
++ // R1_SP - caller's stack pointer
++ //
++ // Registers updated
++ // R15_prev_state - address of caller's BytecodeInterpreter
++ // R17_tos - address of caller's tos
++ // R19_method - callee's Method
++ // R1_SP - trimmed back
++ //
++
++ // Very-local scratch registers.
++
++ const Register offset = R21_tmp1;
++ const Register tmp = R22_tmp2;
++ const Register self_entry = R23_tmp3;
++ const Register stub_entry = R24_tmp4;
++
++ const ConditionRegister cr = CCR0;
++
++ // Load the address of the frame manager.
++ __ load_const(self_entry, &interpreter_frame_manager);
++ __ ld(self_entry, 0, self_entry);
++
++ // Load BytecodeInterpreter._result._to_call._callee (callee's Method).
++ __ ld(R19_method, state_(_result._to_call._callee));
++ // Load BytecodeInterpreter._stack (outgoing tos).
++ __ ld(R17_tos, state_(_stack));
++
++ // Save address of caller's BytecodeInterpreter.
++ __ mr(R15_prev_state, R14_state);
++
++ // Load the callee's entry point.
++ // Load BytecodeInterpreter._result._to_call._callee_entry_point.
++ __ ld(stub_entry, state_(_result._to_call._callee_entry_point));
++
++ // Check whether stub_entry is equal to self_entry.
++ __ cmpd(cr, self_entry, stub_entry);
++ // if (self_entry == stub_entry)
++ // do a re-dispatch
++ __ beq(cr, re_dispatch);
++ // else
++ // call the specialized entry (adapter for jni or compiled code)
++ __ BIND(call_special);
++
++ //
++ // Call the entry generated by `InterpreterGenerator::generate_native_entry'.
++ //
++ // Registers alive
++ // R16_thread
++ // R15_prev_state - address of caller's BytecodeInterpreter
++ // R19_method - callee's Method
++ // R17_tos - address of caller's tos
++ // R1_SP - caller's stack pointer
++ //
++
++ // Mark return from specialized entry for generate_native_entry.
++ guarantee(return_from_native_pc != (address) NULL, "precondition");
++ frame_manager_specialized_return = return_from_native_pc;
++
++ // Set sender_SP in case we call interpreter native wrapper which
++ // will expect it. Compiled code should not care.
++ __ mr(R21_sender_SP, R1_SP);
++
++ // Do a tail call here, and let the link register point to
++ // frame_manager_specialized_return which is return_from_native_pc.
++ __ load_const(tmp, frame_manager_specialized_return);
++ __ call_stub_and_return_to(stub_entry, tmp /* return_pc=tmp */);
++
++
++ //=============================================================================
++ //
++ // InterpretMethod triggered OSR compilation of some Java method M
++ // and now asks to run the compiled code. We call this code the
++ // `callee'.
++ //
++ // This is our current idea on how OSR should look like on PPC64:
++ //
++ // While interpreting a Java method M the stack is:
++ //
++ // (InterpretMethod (M), IJAVA_FRAME (M), ANY_FRAME, ...).
++ //
++ // After having OSR compiled M, `InterpretMethod' returns to the
++ // frame manager, sending the message `retry_method_osr'. The stack
++ // is:
++ //
++ // (IJAVA_FRAME (M), ANY_FRAME, ...).
++ //
++ // The compiler will have generated an `nmethod' suitable for
++ // continuing execution of M at the bytecode index at which OSR took
++ // place. So now the frame manager calls the OSR entry. The OSR
++ // entry sets up a JIT_FRAME for M and continues execution of M with
++ // initial state determined by the IJAVA_FRAME.
++ //
++ // (JIT_FRAME (M), IJAVA_FRAME (M), ANY_FRAME, ...).
++ //
++
++ __ BIND(retry_method_osr);
++ {
++ //
++ // Registers alive
++ // R16_thread
++ // R15_prev_state - address of caller's BytecodeInterpreter
++ // R14_state - address of callee's BytecodeInterpreter
++ // R1_SP - callee's SP before call to InterpretMethod
++ //
++ // Registers updated
++ // R17 - pointer to callee's locals array
++ // (declared via `interpreter_arg_ptr_reg' in the AD file)
++ // R19_method - callee's Method
++ // R1_SP - callee's SP (will become SP of OSR adapter frame)
++ //
++
++ // Provide a debugger breakpoint in the frame manager if breakpoints
++ // in osr'd methods are requested.
++#ifdef COMPILER2
++ NOT_PRODUCT( if (OptoBreakpointOSR) { __ illtrap(); } )
++#endif
++
++ // Load callee's pointer to locals array from callee's state.
++ // __ ld(R17, state_(_locals));
++
++ // Load osr entry.
++ __ ld(R12_scratch2, state_(_result._osr._osr_entry));
++
++ // Load address of temporary osr buffer to arg1.
++ __ ld(R3_ARG1, state_(_result._osr._osr_buf));
++ __ mtctr(R12_scratch2);
++
++ // Load method, gc may move it during execution of osr'd method.
++ __ ld(R22_tmp2, state_(_method));
++ // Load message 'call_method'.
++ __ li(R23_tmp3, BytecodeInterpreter::call_method);
++
++ {
++ // Pop the IJAVA frame of the method which we are going to call osr'd.
++ Label no_state, skip_no_state;
++ __ pop_interpreter_state(/*prev_state_may_be_0=*/true);
++ __ cmpdi(CCR0, R14_state,0);
++ __ beq(CCR0, no_state);
++ // return to interpreter
++ __ pop_interpreter_frame_to_state(R14_state, R11_scratch1, R12_scratch2, R21_tmp1);
++
++ // Init _result._to_call._callee and tell gc that it contains a valid oop
++ // by setting _msg to 'call_method'.
++ __ std(R22_tmp2, state_(_result._to_call._callee));
++ // TODO: PPC port: assert(4 == BytecodeInterpreter::sz_msg(), "unexpected field size");
++ __ stw(R23_tmp3, state_(_msg));
++
++ __ load_const(R21_tmp1, frame_manager_specialized_return);
++ __ b(skip_no_state);
++ __ bind(no_state);
++
++ // Return to initial caller.
++
++ // Get rid of top frame.
++ __ pop_frame();
++
++ // Load return PC from parent frame.
++ __ ld(R21_tmp1, _parent_ijava_frame_abi(lr), R1_SP);
++
++ // Resize frame to get rid of a potential extension.
++ __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2);
++
++ __ bind(skip_no_state);
++
++ // Update LR with return pc.
++ __ mtlr(R21_tmp1);
++ }
++ // Jump to the osr entry point.
++ __ bctr();
++
++ }
++
++ //=============================================================================
++ // Interpreted method "returned" with an exception, pass it on.
++ // Pass no result, unwind activation and continue/return to
++ // interpreter/call_stub/c2.
++
++ __ BIND(throwing_exception);
++
++ // Check if this is the initial invocation of the frame manager. If
++ // so, previous interpreter state in R15_prev_state will be null.
++
++ // New tos of caller is callee's first parameter address, that is
++ // callee's incoming arguments are popped.
++ __ ld(R3_RET, state_(_locals));
++
++ // Check whether this is an initial call.
++ __ cmpdi(CCR0, R15_prev_state, 0);
++ // Yes, called from the call stub or from generated code via a c2i frame.
++ __ beq(CCR0, unwind_initial_activation_pending_exception);
++
++ // Send resume message, interpreter will see the exception first.
++
++ __ li(msg, BytecodeInterpreter::method_resume);
++ __ b(unwind_recursive_activation);
++
++
++ //=============================================================================
++ // Push the last instruction out to the code buffer.
++
++ {
++ __ unimplemented("end of InterpreterGenerator::generate_normal_entry", 128);
++ }
++
++ interpreter_frame_manager = entry;
++ return interpreter_frame_manager;
++}
++
++// Generate code for various sorts of method entries
++//
++address AbstractInterpreterGenerator::generate_method_entry(AbstractInterpreter::MethodKind kind) {
++ address entry_point = NULL;
++
++ switch (kind) {
++ case Interpreter::zerolocals : break;
++ case Interpreter::zerolocals_synchronized : break;
++ case Interpreter::native : // Fall thru
++ case Interpreter::native_synchronized : entry_point = ((CppInterpreterGenerator*)this)->generate_native_entry(); break;
++ case Interpreter::empty : break;
++ case Interpreter::accessor : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry(); break;
++ case Interpreter::abstract : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry(); break;
++ // These are special interpreter intrinsics which we don't support so far.
++ case Interpreter::java_lang_math_sin : break;
++ case Interpreter::java_lang_math_cos : break;
++ case Interpreter::java_lang_math_tan : break;
++ case Interpreter::java_lang_math_abs : break;
++ case Interpreter::java_lang_math_log : break;
++ case Interpreter::java_lang_math_log10 : break;
++ case Interpreter::java_lang_math_sqrt : break;
++ case Interpreter::java_lang_math_pow : break;
++ case Interpreter::java_lang_math_exp : break;
++ case Interpreter::java_lang_ref_reference_get: entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
++ default : ShouldNotReachHere(); break;
++ }
++
++ if (entry_point) {
++ return entry_point;
++ }
++ return ((InterpreterGenerator*)this)->generate_normal_entry();
++}
++
++InterpreterGenerator::InterpreterGenerator(StubQueue* code)
++ : CppInterpreterGenerator(code) {
++ generate_all(); // down here so it can be "virtual"
++}
++
++// How much stack a topmost interpreter method activation needs in words.
++int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
++ // Computation is in bytes not words to match layout_activation_impl
++ // below, but the return is in words.
++
++ //
++ // 0 [TOP_IJAVA_FRAME_ABI] \
++ // alignment (optional) \ |
++ // [operand stack / Java parameters] > stack | |
++ // [monitors] (optional) > monitors | |
++ // [PARENT_IJAVA_FRAME_ABI] \ | |
++ // [BytecodeInterpreter object] > interpreter \ | | |
++ // alignment (optional) | round | parent | round | top
++ // [Java result] (2 slots) > result | | | |
++ // [Java non-arg locals] \ locals | | | |
++ // [arg locals] / / / / /
++ //
++
++ int locals = method->max_locals() * BytesPerWord;
++ int interpreter = frame::interpreter_frame_cinterpreterstate_size_in_bytes();
++ int result = 2 * BytesPerWord;
++
++ int parent = round_to(interpreter + result + locals, 16) + frame::parent_ijava_frame_abi_size;
++
++ int stack = method->max_stack() * BytesPerWord;
++ int monitors = method->is_synchronized() ? frame::interpreter_frame_monitor_size_in_bytes() : 0;
++ int top = round_to(parent + monitors + stack, 16) + frame::top_ijava_frame_abi_size;
++
++ return (top / BytesPerWord);
++}
++
++void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill,
++ frame* caller,
++ frame* current,
++ Method* method,
++ intptr_t* locals,
++ intptr_t* stack,
++ intptr_t* stack_base,
++ intptr_t* monitor_base,
++ intptr_t* frame_sp,
++ bool is_top_frame) {
++ // What about any vtable?
++ //
++ to_fill->_thread = JavaThread::current();
++ // This gets filled in later but make it something recognizable for now.
++ to_fill->_bcp = method->code_base();
++ to_fill->_locals = locals;
++ to_fill->_constants = method->constants()->cache();
++ to_fill->_method = method;
++ to_fill->_mdx = NULL;
++ to_fill->_stack = stack;
++
++ if (is_top_frame && JavaThread::current()->popframe_forcing_deopt_reexecution()) {
++ to_fill->_msg = deopt_resume2;
++ } else {
++ to_fill->_msg = method_resume;
++ }
++ to_fill->_result._to_call._bcp_advance = 0;
++ to_fill->_result._to_call._callee_entry_point = NULL; // doesn't matter to anyone
++ to_fill->_result._to_call._callee = NULL; // doesn't matter to anyone
++ to_fill->_prev_link = NULL;
++
++ if (caller->is_interpreted_frame()) {
++ interpreterState prev = caller->get_interpreterState();
++
++ // Support MH calls. Make sure the interpreter will return the right address:
++ // 1. Caller did ordinary interpreted->compiled call call: Set a prev_state
++ // which makes the CPP interpreter return to frame manager "return_from_interpreted_method"
++ // entry after finishing execution.
++ // 2. Caller did a MH call: If the caller has a MethodHandleInvoke in it's
++ // state (invariant: must be the caller of the bottom vframe) we used the
++ // "call_special" entry to do the call, meaning the arguments have not been
++ // popped from the stack. Therefore, don't enter a prev state in this case
++ // in order to return to "return_from_native" frame manager entry which takes
++ // care of popping arguments. Also, don't overwrite the MH.invoke Method in
++ // the prev_state in order to be able to figure out the number of arguments to
++ // pop.
++ // The parameter method can represent MethodHandle.invokeExact(...).
++ // The MethodHandleCompiler generates these synthetic Methods,
++ // including bytecodes, if an invokedynamic call gets inlined. In
++ // this case we want to return like from any other interpreted
++ // Java call, so we set _prev_link.
++ to_fill->_prev_link = prev;
++
++ if (*prev->_bcp == Bytecodes::_invokeinterface || *prev->_bcp == Bytecodes::_invokedynamic) {
++ prev->_result._to_call._bcp_advance = 5;
++ } else {
++ prev->_result._to_call._bcp_advance = 3;
++ }
++ }
++ to_fill->_oop_temp = NULL;
++ to_fill->_stack_base = stack_base;
++ // Need +1 here because stack_base points to the word just above the
++ // first expr stack entry and stack_limit is supposed to point to
++ // the word just below the last expr stack entry. See
++ // generate_compute_interpreter_state.
++ to_fill->_stack_limit = stack_base - (method->max_stack() + 1);
++ to_fill->_monitor_base = (BasicObjectLock*) monitor_base;
++
++ to_fill->_frame_bottom = frame_sp;
++
++ // PPC64 specific
++ to_fill->_last_Java_pc = NULL;
++ to_fill->_last_Java_fp = NULL;
++ to_fill->_last_Java_sp = frame_sp;
++#ifdef ASSERT
++ to_fill->_self_link = to_fill;
++ to_fill->_native_fresult = 123456.789;
++ to_fill->_native_lresult = CONST64(0xdeafcafedeadc0de);
++#endif
++}
++
++void BytecodeInterpreter::pd_layout_interpreterState(interpreterState istate,
++ address last_Java_pc,
++ intptr_t* last_Java_fp) {
++ istate->_last_Java_pc = last_Java_pc;
++ istate->_last_Java_fp = last_Java_fp;
++}
++
++// Computes monitor_size and top_frame_size in bytes.
++static void frame_size_helper(int max_stack,
++ int monitors,
++ int& monitor_size,
++ int& top_frame_size) {
++ monitor_size = frame::interpreter_frame_monitor_size_in_bytes() * monitors;
++ top_frame_size = round_to(frame::interpreter_frame_cinterpreterstate_size_in_bytes()
++ + monitor_size
++ + max_stack * Interpreter::stackElementSize
++ + 2 * Interpreter::stackElementSize,
++ frame::alignment_in_bytes)
++ + frame::top_ijava_frame_abi_size;
++}
++
++// Returns number of stackElementWords needed for the interpreter frame with the
++// given sections.
++int AbstractInterpreter::size_activation(int max_stack,
++ int temps,
++ int extra_args,
++ int monitors,
++ int callee_params,
++ int callee_locals,
++ bool is_top_frame) {
++ int monitor_size = 0;
++ int top_frame_size = 0;
++ frame_size_helper(max_stack, monitors, monitor_size, top_frame_size);
++
++ int frame_size;
++ if (is_top_frame) {
++ frame_size = top_frame_size;
++ } else {
++ frame_size = round_to(frame::interpreter_frame_cinterpreterstate_size_in_bytes()
++ + monitor_size
++ + (temps - callee_params + callee_locals) * Interpreter::stackElementSize
++ + 2 * Interpreter::stackElementSize,
++ frame::alignment_in_bytes)
++ + frame::parent_ijava_frame_abi_size;
++ assert(extra_args == 0, "non-zero for top_frame only");
++ }
++
++ return frame_size / Interpreter::stackElementSize;
++}
++
++void AbstractInterpreter::layout_activation(Method* method,
++ int temps, // Number of slots on java expression stack in use.
++ int popframe_args,
++ int monitors, // Number of active monitors.
++ int caller_actual_parameters,
++ int callee_params,// Number of slots for callee parameters.
++ int callee_locals,// Number of slots for locals.
++ frame* caller,
++ frame* interpreter_frame,
++ bool is_top_frame,
++ bool is_bottom_frame) {
++
++ // NOTE this code must exactly mimic what
++ // InterpreterGenerator::generate_compute_interpreter_state() does
++ // as far as allocating an interpreter frame. However there is an
++ // exception. With the C++ based interpreter only the top most frame
++ // has a full sized expression stack. The 16 byte slop factor is
++ // both the abi scratch area and a place to hold a result from a
++ // callee on its way to the callers stack.
++
++ int monitor_size = 0;
++ int top_frame_size = 0;
++ frame_size_helper(method->max_stack(), monitors, monitor_size, top_frame_size);
++
++ intptr_t sp = (intptr_t)interpreter_frame->sp();
++ intptr_t fp = *(intptr_t *)sp;
++ assert(fp == (intptr_t)caller->sp(), "fp must match");
++ interpreterState cur_state =
++ (interpreterState)(fp - frame::interpreter_frame_cinterpreterstate_size_in_bytes());
++
++ // Now fill in the interpreterState object.
++
++ intptr_t* locals;
++ if (caller->is_interpreted_frame()) {
++ // Locals must agree with the caller because it will be used to set the
++ // caller's tos when we return.
++ interpreterState prev = caller->get_interpreterState();
++ // Calculate start of "locals" for MH calls. For MH calls, the
++ // current method() (= MH target) and prev->callee() (=
++ // MH.invoke*()) are different and especially have different
++ // signatures. To pop the argumentsof the caller, we must use
++ // the prev->callee()->size_of_arguments() because that's what
++ // the caller actually pushed. Currently, for synthetic MH
++ // calls (deoptimized from inlined MH calls), detected by
++ // is_method_handle_invoke(), we use the callee's arguments
++ // because here, the caller's and callee's signature match.
++ if (true /*!caller->is_at_mh_callsite()*/) {
++ locals = prev->stack() + method->size_of_parameters();
++ } else {
++ // Normal MH call.
++ locals = prev->stack() + prev->callee()->size_of_parameters();
++ }
++ } else {
++ bool is_deopted;
++ locals = (intptr_t*) (fp + ((method->max_locals() - 1) * BytesPerWord) +
++ frame::parent_ijava_frame_abi_size);
++ }
++
++ intptr_t* monitor_base = (intptr_t*) cur_state;
++ intptr_t* stack_base = (intptr_t*) ((intptr_t) monitor_base - monitor_size);
++
++ // Provide pop_frame capability on PPC64, add popframe_args.
++ // +1 because stack is always prepushed.
++ intptr_t* stack = (intptr_t*) ((intptr_t) stack_base - (temps + popframe_args + 1) * BytesPerWord);
++
++ BytecodeInterpreter::layout_interpreterState(cur_state,
++ caller,
++ interpreter_frame,
++ method,
++ locals,
++ stack,
++ stack_base,
++ monitor_base,
++ (intptr_t*)(((intptr_t)fp) - top_frame_size),
++ is_top_frame);
++
++ BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address,
++ interpreter_frame->fp());
++}
++
++#endif // CC_INTERP
+--- ./hotspot/src/cpu/ppc/vm/cppInterpreter_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/cppInterpreter_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,39 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_CPPINTERPRETER_PPC_HPP
++#define CPU_PPC_VM_CPPINTERPRETER_PPC_HPP
++
++ protected:
++
++ // Size of interpreter code. Increase if too small. Interpreter will
++ // fail with a guarantee ("not enough space for interpreter generation");
++ // if too small.
++ // Run with +PrintInterpreter to get the VM to print out the size.
++ // Max size with JVMTI
++
++ const static int InterpreterCodeSize = 12*K;
++
++#endif // CPU_PPC_VM_CPPINTERPRETER_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/debug_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/debug_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "code/codeCache.hpp"
++#include "code/nmethod.hpp"
++#include "runtime/frame.hpp"
++#include "runtime/init.hpp"
++#include "runtime/os.hpp"
++#include "utilities/debug.hpp"
++#include "utilities/top.hpp"
++
++void pd_ps(frame f) {}
+--- ./hotspot/src/cpu/ppc/vm/depChecker_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/depChecker_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_DEPCHECKER_PPC_HPP
++#define CPU_PPC_VM_DEPCHECKER_PPC_HPP
++
++// Nothing to do on ppc64
++
++#endif // CPU_PPC_VM_DEPCHECKER_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/disassembler_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/disassembler_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_DISASSEMBLER_PPC_HPP
++#define CPU_PPC_VM_DISASSEMBLER_PPC_HPP
++
++ static int pd_instruction_alignment() {
++ return sizeof(int);
++ }
++
++ static const char* pd_cpu_opts() {
++ return "ppc64";
++ }
++
++#endif // CPU_PPC_VM_DISASSEMBLER_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/frame_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/frame_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,320 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/markOop.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/monitorChunk.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_ppc.inline.hpp"
++#ifdef COMPILER1
++#include "c1/c1_Runtime1.hpp"
++#include "runtime/vframeArray.hpp"
++#endif
++
++#ifdef ASSERT
++void RegisterMap::check_location_valid() {
++}
++#endif // ASSERT
++
++bool frame::safe_for_sender(JavaThread *thread) {
++ bool safe = false;
++ address cursp = (address)sp();
++ address curfp = (address)fp();
++ if ((cursp != NULL && curfp != NULL &&
++ (cursp <= thread->stack_base() && cursp >= thread->stack_base() - thread->stack_size())) &&
++ (curfp <= thread->stack_base() && curfp >= thread->stack_base() - thread->stack_size())) {
++ safe = true;
++ }
++ return safe;
++}
++
++bool frame::is_interpreted_frame() const {
++ return Interpreter::contains(pc());
++}
++
++frame frame::sender_for_entry_frame(RegisterMap *map) const {
++ assert(map != NULL, "map must be set");
++ // Java frame called from C; skip all C frames and return top C
++ // frame of that chunk as the sender.
++ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
++ assert(!entry_frame_is_first(), "next Java fp must be non zero");
++ assert(jfa->last_Java_sp() > _sp, "must be above this frame on stack");
++ map->clear();
++ assert(map->include_argument_oops(), "should be set by clear");
++
++ if (jfa->last_Java_pc() != NULL) {
++ frame fr(jfa->last_Java_sp(), jfa->last_Java_pc());
++ return fr;
++ }
++ // Last_java_pc is not set, if we come here from compiled code. The
++ // constructor retrieves the PC from the stack.
++ frame fr(jfa->last_Java_sp());
++ return fr;
++}
++
++frame frame::sender_for_interpreter_frame(RegisterMap *map) const {
++ // Pass callers initial_caller_sp as unextended_sp.
++ return frame(sender_sp(), sender_pc(),
++ CC_INTERP_ONLY((intptr_t*)((parent_ijava_frame_abi *)callers_abi())->initial_caller_sp)
++ NOT_CC_INTERP((intptr_t*)get_ijava_state()->sender_sp)
++ );
++}
++
++frame frame::sender_for_compiled_frame(RegisterMap *map) const {
++ assert(map != NULL, "map must be set");
++
++ // Frame owned by compiler.
++ address pc = *compiled_sender_pc_addr(_cb);
++ frame caller(compiled_sender_sp(_cb), pc);
++
++ // Now adjust the map.
++
++ // Get the rest.
++ if (map->update_map()) {
++ // Tell GC to use argument oopmaps for some runtime stubs that need it.
++ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
++ if (_cb->oop_maps() != NULL) {
++ OopMapSet::update_register_map(this, map);
++ }
++ }
++
++ return caller;
++}
++
++intptr_t* frame::compiled_sender_sp(CodeBlob* cb) const {
++ return sender_sp();
++}
++
++address* frame::compiled_sender_pc_addr(CodeBlob* cb) const {
++ return sender_pc_addr();
++}
++
++frame frame::sender(RegisterMap* map) const {
++ // Default is we do have to follow them. The sender_for_xxx will
++ // update it accordingly.
++ map->set_include_argument_oops(false);
++
++ if (is_entry_frame()) return sender_for_entry_frame(map);
++ if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
++ assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
++
++ if (_cb != NULL) {
++ return sender_for_compiled_frame(map);
++ }
++ // Must be native-compiled frame, i.e. the marshaling code for native
++ // methods that exists in the core system.
++ return frame(sender_sp(), sender_pc());
++}
++
++void frame::patch_pc(Thread* thread, address pc) {
++ if (TracePcPatching) {
++ tty->print_cr("patch_pc at address " PTR_FORMAT " [" PTR_FORMAT " -> " PTR_FORMAT "]",
++ &((address*) _sp)[-1], ((address*) _sp)[-1], pc);
++ }
++ own_abi()->lr = (uint64_t)pc;
++ _cb = CodeCache::find_blob(pc);
++ if (_cb != NULL && _cb->is_nmethod() && ((nmethod*)_cb)->is_deopt_pc(_pc)) {
++ address orig = (((nmethod*)_cb)->get_original_pc(this));
++ assert(orig == _pc, "expected original to be stored before patching");
++ _deopt_state = is_deoptimized;
++ // Leave _pc as is.
++ } else {
++ _deopt_state = not_deoptimized;
++ _pc = pc;
++ }
++}
++
++void frame::pd_gc_epilog() {
++ if (is_interpreted_frame()) {
++ // Set constant pool cache entry for interpreter.
++ Method* m = interpreter_frame_method();
++
++ *interpreter_frame_cpoolcache_addr() = m->constants()->cache();
++ }
++}
++
++bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
++ // Is there anything to do?
++ assert(is_interpreted_frame(), "Not an interpreted frame");
++ return true;
++}
++
++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
++ assert(is_interpreted_frame(), "interpreted frame expected");
++ Method* method = interpreter_frame_method();
++ BasicType type = method->result_type();
++
++ if (method->is_native()) {
++ // Prior to calling into the runtime to notify the method exit the possible
++ // result value is saved into the interpreter frame.
++#ifdef CC_INTERP
++ interpreterState istate = get_interpreterState();
++ address lresult = (address)istate + in_bytes(BytecodeInterpreter::native_lresult_offset());
++ address fresult = (address)istate + in_bytes(BytecodeInterpreter::native_fresult_offset());
++#else
++ address lresult = (address)&(get_ijava_state()->lresult);
++ address fresult = (address)&(get_ijava_state()->fresult);
++#endif
++
++ switch (method->result_type()) {
++ case T_OBJECT:
++ case T_ARRAY: {
++ oop* obj_p = *(oop**)lresult;
++ oop obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
++ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
++ *oop_result = obj;
++ break;
++ }
++ // We use std/stfd to store the values.
++ case T_BOOLEAN : value_result->z = (jboolean) *(unsigned long*)lresult; break;
++ case T_INT : value_result->i = (jint) *(long*)lresult; break;
++ case T_CHAR : value_result->c = (jchar) *(unsigned long*)lresult; break;
++ case T_SHORT : value_result->s = (jshort) *(long*)lresult; break;
++ case T_BYTE : value_result->z = (jbyte) *(long*)lresult; break;
++ case T_LONG : value_result->j = (jlong) *(long*)lresult; break;
++ case T_FLOAT : value_result->f = (jfloat) *(double*)fresult; break;
++ case T_DOUBLE : value_result->d = (jdouble) *(double*)fresult; break;
++ case T_VOID : /* Nothing to do */ break;
++ default : ShouldNotReachHere();
++ }
++ } else {
++ intptr_t* tos_addr = interpreter_frame_tos_address();
++ switch (method->result_type()) {
++ case T_OBJECT:
++ case T_ARRAY: {
++ oop obj = *(oop*)tos_addr;
++ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
++ *oop_result = obj;
++ }
++ case T_BOOLEAN : value_result->z = (jboolean) *(jint*)tos_addr; break;
++ case T_BYTE : value_result->b = (jbyte) *(jint*)tos_addr; break;
++ case T_CHAR : value_result->c = (jchar) *(jint*)tos_addr; break;
++ case T_SHORT : value_result->s = (jshort) *(jint*)tos_addr; break;
++ case T_INT : value_result->i = *(jint*)tos_addr; break;
++ case T_LONG : value_result->j = *(jlong*)tos_addr; break;
++ case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break;
++ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break;
++ case T_VOID : /* Nothing to do */ break;
++ default : ShouldNotReachHere();
++ }
++ }
++ return type;
++}
++
++#ifndef PRODUCT
++
++void frame::describe_pd(FrameValues& values, int frame_no) {
++ if (is_interpreted_frame()) {
++#ifdef CC_INTERP
++ interpreterState istate = get_interpreterState();
++ values.describe(frame_no, (intptr_t*)istate, "istate");
++ values.describe(frame_no, (intptr_t*)&(istate->_thread), " thread");
++ values.describe(frame_no, (intptr_t*)&(istate->_bcp), " bcp");
++ values.describe(frame_no, (intptr_t*)&(istate->_locals), " locals");
++ values.describe(frame_no, (intptr_t*)&(istate->_constants), " constants");
++ values.describe(frame_no, (intptr_t*)&(istate->_method), err_msg(" method = %s", istate->_method->name_and_sig_as_C_string()));
++ values.describe(frame_no, (intptr_t*)&(istate->_mdx), " mdx");
++ values.describe(frame_no, (intptr_t*)&(istate->_stack), " stack");
++ values.describe(frame_no, (intptr_t*)&(istate->_msg), err_msg(" msg = %s", BytecodeInterpreter::C_msg(istate->_msg)));
++ values.describe(frame_no, (intptr_t*)&(istate->_result), " result");
++ values.describe(frame_no, (intptr_t*)&(istate->_prev_link), " prev_link");
++ values.describe(frame_no, (intptr_t*)&(istate->_oop_temp), " oop_temp");
++ values.describe(frame_no, (intptr_t*)&(istate->_stack_base), " stack_base");
++ values.describe(frame_no, (intptr_t*)&(istate->_stack_limit), " stack_limit");
++ values.describe(frame_no, (intptr_t*)&(istate->_monitor_base), " monitor_base");
++ values.describe(frame_no, (intptr_t*)&(istate->_frame_bottom), " frame_bottom");
++ values.describe(frame_no, (intptr_t*)&(istate->_last_Java_pc), " last_Java_pc");
++ values.describe(frame_no, (intptr_t*)&(istate->_last_Java_fp), " last_Java_fp");
++ values.describe(frame_no, (intptr_t*)&(istate->_last_Java_sp), " last_Java_sp");
++ values.describe(frame_no, (intptr_t*)&(istate->_self_link), " self_link");
++ values.describe(frame_no, (intptr_t*)&(istate->_native_fresult), " native_fresult");
++ values.describe(frame_no, (intptr_t*)&(istate->_native_lresult), " native_lresult");
++#else
++#define DESCRIBE_ADDRESS(name) \
++ values.describe(frame_no, (intptr_t*)&(get_ijava_state()->name), #name);
++
++ DESCRIBE_ADDRESS(method);
++ DESCRIBE_ADDRESS(locals);
++ DESCRIBE_ADDRESS(monitors);
++ DESCRIBE_ADDRESS(cpoolCache);
++ DESCRIBE_ADDRESS(bcp);
++ DESCRIBE_ADDRESS(esp);
++ DESCRIBE_ADDRESS(mdx);
++ DESCRIBE_ADDRESS(top_frame_sp);
++ DESCRIBE_ADDRESS(sender_sp);
++ DESCRIBE_ADDRESS(oop_tmp);
++ DESCRIBE_ADDRESS(lresult);
++ DESCRIBE_ADDRESS(fresult);
++#endif
++ }
++}
++#endif
++
++void frame::adjust_unextended_sp() {
++ // If we are returning to a compiled MethodHandle call site, the
++ // saved_fp will in fact be a saved value of the unextended SP. The
++ // simplest way to tell whether we are returning to such a call site
++ // is as follows:
++
++ if (is_compiled_frame() && false /*is_at_mh_callsite()*/) { // TODO PPC port
++ // If the sender PC is a deoptimization point, get the original
++ // PC. For MethodHandle call site the unextended_sp is stored in
++ // saved_fp.
++ _unextended_sp = _fp - _cb->frame_size();
++
++#ifdef ASSERT
++ nmethod *sender_nm = _cb->as_nmethod_or_null();
++ assert(sender_nm && *_sp == *_unextended_sp, "backlink changed");
++
++ intptr_t* sp = _unextended_sp; // check if stack can be walked from here
++ for (int x = 0; x < 5; ++x) { // check up to a couple of backlinks
++ intptr_t* prev_sp = *(intptr_t**)sp;
++ if (prev_sp == 0) break; // end of stack
++ assert(prev_sp>sp, "broken stack");
++ sp = prev_sp;
++ }
++
++ if (sender_nm->is_deopt_mh_entry(_pc)) { // checks for deoptimization
++ address original_pc = sender_nm->get_original_pc(this);
++ assert(sender_nm->insts_contains(original_pc), "original PC must be in nmethod");
++ assert(sender_nm->is_method_handle_return(original_pc), "must be");
++ }
++#endif
++ }
++}
++
++intptr_t *frame::initial_deoptimization_info() {
++ // unused... but returns fp() to minimize changes introduced by 7087445
++ return fp();
++}
+--- ./hotspot/src/cpu/ppc/vm/frame_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/frame_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,534 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_FRAME_PPC_HPP
++#define CPU_PPC_VM_FRAME_PPC_HPP
++
++#include "runtime/synchronizer.hpp"
++#include "utilities/top.hpp"
++
++ // C frame layout on PPC-64.
++ //
++ // In this figure the stack grows upwards, while memory grows
++ // downwards. See "64-bit PowerPC ELF ABI Supplement Version 1.7",
++ // IBM Corp. (2003-10-29)
++ // (http://math-atlas.sourceforge.net/devel/assembly/PPC-elf64abi-1.7.pdf).
++ //
++ // Square brackets denote stack regions possibly larger
++ // than a single 64 bit slot.
++ //
++ // STACK:
++ // 0 [C_FRAME] <-- SP after prolog (mod 16 = 0)
++ // [C_FRAME] <-- SP before prolog
++ // ...
++ // [C_FRAME]
++ //
++ // C_FRAME:
++ // 0 [ABI_REG_ARGS]
++ // 112 CARG_9: outgoing arg 9 (arg_1 ... arg_8 via gpr_3 ... gpr_{10})
++ // ...
++ // 40+M*8 CARG_M: outgoing arg M (M is the maximum of outgoing args taken over all call sites in the procedure)
++ // local 1
++ // ...
++ // local N
++ // spill slot for vector reg (16 bytes aligned)
++ // ...
++ // spill slot for vector reg
++ // alignment (4 or 12 bytes)
++ // V SR_VRSAVE
++ // V+4 spill slot for GR
++ // ... ...
++ // spill slot for GR
++ // spill slot for FR
++ // ...
++ // spill slot for FR
++ //
++ // ABI_48:
++ // 0 caller's SP
++ // 8 space for condition register (CR) for next call
++ // 16 space for link register (LR) for next call
++ // 24 reserved
++ // 32 reserved
++ // 40 space for TOC (=R2) register for next call
++ //
++ // ABI_REG_ARGS:
++ // 0 [ABI_48]
++ // 48 CARG_1: spill slot for outgoing arg 1. used by next callee.
++ // ... ...
++ // 104 CARG_8: spill slot for outgoing arg 8. used by next callee.
++ //
++
++ public:
++
++ // C frame layout
++
++ enum {
++ // stack alignment
++ alignment_in_bytes = 16,
++ // log_2(16*8 bits) = 7.
++ log_2_of_alignment_in_bits = 7
++ };
++
++ // ABI_MINFRAME:
++ struct abi_minframe {
++ uint64_t callers_sp;
++ uint64_t cr; //_16
++ uint64_t lr;
++#if !defined(ABI_ELFv2)
++ uint64_t reserved1; //_16
++ uint64_t reserved2;
++#endif
++ uint64_t toc; //_16
++ // nothing to add here!
++ // aligned to frame::alignment_in_bytes (16)
++ };
++
++ enum {
++ abi_minframe_size = sizeof(abi_minframe)
++ };
++
++ struct abi_reg_args : abi_minframe {
++ uint64_t carg_1;
++ uint64_t carg_2; //_16
++ uint64_t carg_3;
++ uint64_t carg_4; //_16
++ uint64_t carg_5;
++ uint64_t carg_6; //_16
++ uint64_t carg_7;
++ uint64_t carg_8; //_16
++ // aligned to frame::alignment_in_bytes (16)
++ };
++
++ enum {
++ abi_reg_args_size = sizeof(abi_reg_args)
++ };
++
++ #define _abi(_component) \
++ (offset_of(frame::abi_reg_args, _component))
++
++ struct abi_reg_args_spill : abi_reg_args {
++ // additional spill slots
++ uint64_t spill_ret;
++ uint64_t spill_fret; //_16
++ // aligned to frame::alignment_in_bytes (16)
++ };
++
++ enum {
++ abi_reg_args_spill_size = sizeof(abi_reg_args_spill)
++ };
++
++ #define _abi_reg_args_spill(_component) \
++ (offset_of(frame::abi_reg_args_spill, _component))
++
++ // non-volatile GPRs:
++
++ struct spill_nonvolatiles {
++ uint64_t r14;
++ uint64_t r15; //_16
++ uint64_t r16;
++ uint64_t r17; //_16
++ uint64_t r18;
++ uint64_t r19; //_16
++ uint64_t r20;
++ uint64_t r21; //_16
++ uint64_t r22;
++ uint64_t r23; //_16
++ uint64_t r24;
++ uint64_t r25; //_16
++ uint64_t r26;
++ uint64_t r27; //_16
++ uint64_t r28;
++ uint64_t r29; //_16
++ uint64_t r30;
++ uint64_t r31; //_16
++
++ double f14;
++ double f15;
++ double f16;
++ double f17;
++ double f18;
++ double f19;
++ double f20;
++ double f21;
++ double f22;
++ double f23;
++ double f24;
++ double f25;
++ double f26;
++ double f27;
++ double f28;
++ double f29;
++ double f30;
++ double f31;
++
++ // aligned to frame::alignment_in_bytes (16)
++ };
++
++ enum {
++ spill_nonvolatiles_size = sizeof(spill_nonvolatiles)
++ };
++
++ #define _spill_nonvolatiles_neg(_component) \
++ (int)(-frame::spill_nonvolatiles_size + offset_of(frame::spill_nonvolatiles, _component))
++
++
++
++#ifndef CC_INTERP
++ // Frame layout for the Java template interpreter on PPC64.
++ //
++ // Diffs to the CC_INTERP are marked with 'X'.
++ //
++ // TOP_IJAVA_FRAME:
++ //
++ // 0 [TOP_IJAVA_FRAME_ABI]
++ // alignment (optional)
++ // [operand stack]
++ // [monitors] (optional)
++ // X[IJAVA_STATE]
++ // note: own locals are located in the caller frame.
++ //
++ // PARENT_IJAVA_FRAME:
++ //
++ // 0 [PARENT_IJAVA_FRAME_ABI]
++ // alignment (optional)
++ // [callee's Java result]
++ // [callee's locals w/o arguments]
++ // [outgoing arguments]
++ // [used part of operand stack w/o arguments]
++ // [monitors] (optional)
++ // X[IJAVA_STATE]
++ //
++
++ struct parent_ijava_frame_abi : abi_minframe {
++ };
++
++ enum {
++ parent_ijava_frame_abi_size = sizeof(parent_ijava_frame_abi)
++ };
++
++#define _parent_ijava_frame_abi(_component) \
++ (offset_of(frame::parent_ijava_frame_abi, _component))
++
++ struct top_ijava_frame_abi : abi_reg_args {
++ };
++
++ enum {
++ top_ijava_frame_abi_size = sizeof(top_ijava_frame_abi)
++ };
++
++#define _top_ijava_frame_abi(_component) \
++ (offset_of(frame::top_ijava_frame_abi, _component))
++
++ struct ijava_state {
++#ifdef ASSERT
++ uint64_t ijava_reserved; // Used for assertion.
++ uint64_t ijava_reserved2; // Inserted for alignment.
++#endif
++ uint64_t method;
++ uint64_t locals;
++ uint64_t monitors;
++ uint64_t cpoolCache;
++ uint64_t bcp;
++ uint64_t esp;
++ uint64_t mdx;
++ uint64_t top_frame_sp; // Maybe define parent_frame_abi and move there.
++ uint64_t sender_sp;
++ // Slots only needed for native calls. Maybe better to move elsewhere.
++ uint64_t oop_tmp;
++ uint64_t lresult;
++ uint64_t fresult;
++ // Aligned to frame::alignment_in_bytes (16).
++ };
++
++ enum {
++ ijava_state_size = sizeof(ijava_state)
++ };
++
++#define _ijava_state_neg(_component) \
++ (int) (-frame::ijava_state_size + offset_of(frame::ijava_state, _component))
++
++#else // CC_INTERP:
++
++ // Frame layout for the Java C++ interpreter on PPC64.
++ //
++ // This frame layout provides a C-like frame for every Java frame.
++ //
++ // In these figures the stack grows upwards, while memory grows
++ // downwards. Square brackets denote regions possibly larger than
++ // single 64 bit slots.
++ //
++ // STACK (no JNI, no compiled code, no library calls,
++ // interpreter-loop is active):
++ // 0 [InterpretMethod]
++ // [TOP_IJAVA_FRAME]
++ // [PARENT_IJAVA_FRAME]
++ // ...
++ // [PARENT_IJAVA_FRAME]
++ // [ENTRY_FRAME]
++ // [C_FRAME]
++ // ...
++ // [C_FRAME]
++ //
++ // TOP_IJAVA_FRAME:
++ // 0 [TOP_IJAVA_FRAME_ABI]
++ // alignment (optional)
++ // [operand stack]
++ // [monitors] (optional)
++ // [cInterpreter object]
++ // result, locals, and arguments are in parent frame!
++ //
++ // PARENT_IJAVA_FRAME:
++ // 0 [PARENT_IJAVA_FRAME_ABI]
++ // alignment (optional)
++ // [callee's Java result]
++ // [callee's locals w/o arguments]
++ // [outgoing arguments]
++ // [used part of operand stack w/o arguments]
++ // [monitors] (optional)
++ // [cInterpreter object]
++ //
++ // ENTRY_FRAME:
++ // 0 [PARENT_IJAVA_FRAME_ABI]
++ // alignment (optional)
++ // [callee's Java result]
++ // [callee's locals w/o arguments]
++ // [outgoing arguments]
++ // [ENTRY_FRAME_LOCALS]
++ //
++ // PARENT_IJAVA_FRAME_ABI:
++ // 0 [ABI_MINFRAME]
++ // top_frame_sp
++ // initial_caller_sp
++ //
++ // TOP_IJAVA_FRAME_ABI:
++ // 0 [PARENT_IJAVA_FRAME_ABI]
++ // carg_3_unused
++ // carg_4_unused
++ // carg_5_unused
++ // carg_6_unused
++ // carg_7_unused
++ // frame_manager_lr
++ //
++
++ // PARENT_IJAVA_FRAME_ABI
++
++ struct parent_ijava_frame_abi : abi_minframe {
++ // SOE registers.
++ // C2i adapters spill their top-frame stack-pointer here.
++ uint64_t top_frame_sp; // carg_1
++ // Sp of calling compiled frame before it was resized by the c2i
++ // adapter or sp of call stub. Does not contain a valid value for
++ // non-initial frames.
++ uint64_t initial_caller_sp; // carg_2
++ // aligned to frame::alignment_in_bytes (16)
++ };
++
++ enum {
++ parent_ijava_frame_abi_size = sizeof(parent_ijava_frame_abi)
++ };
++
++ #define _parent_ijava_frame_abi(_component) \
++ (offset_of(frame::parent_ijava_frame_abi, _component))
++
++ // TOP_IJAVA_FRAME_ABI
++
++ struct top_ijava_frame_abi : parent_ijava_frame_abi {
++ uint64_t carg_3_unused; // carg_3
++ uint64_t card_4_unused; //_16 carg_4
++ uint64_t carg_5_unused; // carg_5
++ uint64_t carg_6_unused; //_16 carg_6
++ uint64_t carg_7_unused; // carg_7
++ // Use arg8 for storing frame_manager_lr. The size of
++ // top_ijava_frame_abi must match abi_reg_args.
++ uint64_t frame_manager_lr; //_16 carg_8
++ // nothing to add here!
++ // aligned to frame::alignment_in_bytes (16)
++ };
++
++ enum {
++ top_ijava_frame_abi_size = sizeof(top_ijava_frame_abi)
++ };
++
++ #define _top_ijava_frame_abi(_component) \
++ (offset_of(frame::top_ijava_frame_abi, _component))
++
++#endif // CC_INTERP
++
++ // ENTRY_FRAME
++
++ struct entry_frame_locals {
++ uint64_t call_wrapper_address;
++ uint64_t result_address; //_16
++ uint64_t result_type;
++ uint64_t arguments_tos_address; //_16
++ // aligned to frame::alignment_in_bytes (16)
++ uint64_t r[spill_nonvolatiles_size/sizeof(uint64_t)];
++ };
++
++ enum {
++ entry_frame_locals_size = sizeof(entry_frame_locals)
++ };
++
++ #define _entry_frame_locals_neg(_component) \
++ (int)(-frame::entry_frame_locals_size + offset_of(frame::entry_frame_locals, _component))
++
++
++ // Frame layout for JIT generated methods
++ //
++ // In these figures the stack grows upwards, while memory grows
++ // downwards. Square brackets denote regions possibly larger than single
++ // 64 bit slots.
++ //
++ // STACK (interpreted Java calls JIT generated Java):
++ // [JIT_FRAME] <-- SP (mod 16 = 0)
++ // [TOP_IJAVA_FRAME]
++ // ...
++ //
++ // JIT_FRAME (is a C frame according to PPC-64 ABI):
++ // [out_preserve]
++ // [out_args]
++ // [spills]
++ // [pad_1]
++ // [monitor] (optional)
++ // ...
++ // [monitor] (optional)
++ // [pad_2]
++ // [in_preserve] added / removed by prolog / epilog
++ //
++
++ // JIT_ABI (TOP and PARENT)
++
++ struct jit_abi {
++ uint64_t callers_sp;
++ uint64_t cr;
++ uint64_t lr;
++ uint64_t toc;
++ // Nothing to add here!
++ // NOT ALIGNED to frame::alignment_in_bytes (16).
++ };
++
++ struct jit_out_preserve : jit_abi {
++ // Nothing to add here!
++ };
++
++ struct jit_in_preserve {
++ // Nothing to add here!
++ };
++
++ enum {
++ jit_out_preserve_size = sizeof(jit_out_preserve),
++ jit_in_preserve_size = sizeof(jit_in_preserve)
++ };
++
++ struct jit_monitor {
++ uint64_t monitor[1];
++ };
++
++ enum {
++ jit_monitor_size = sizeof(jit_monitor),
++ };
++
++ private:
++
++ // STACK:
++ // ...
++ // [THIS_FRAME] <-- this._sp (stack pointer for this frame)
++ // [CALLER_FRAME] <-- this.fp() (_sp of caller's frame)
++ // ...
++ //
++
++ // frame pointer for this frame
++ intptr_t* _fp;
++
++ // The frame's stack pointer before it has been extended by a c2i adapter;
++ // needed by deoptimization
++ intptr_t* _unextended_sp;
++ void adjust_unextended_sp();
++
++ public:
++
++ // Accessors for fields
++ intptr_t* fp() const { return _fp; }
++
++ // Accessors for ABIs
++ inline abi_minframe* own_abi() const { return (abi_minframe*) _sp; }
++ inline abi_minframe* callers_abi() const { return (abi_minframe*) _fp; }
++
++ private:
++
++ // Find codeblob and set deopt_state.
++ inline void find_codeblob_and_set_pc_and_deopt_state(address pc);
++
++ public:
++
++ // Constructors
++ inline frame(intptr_t* sp);
++ frame(intptr_t* sp, address pc);
++ inline frame(intptr_t* sp, address pc, intptr_t* unextended_sp);
++
++ private:
++
++ intptr_t* compiled_sender_sp(CodeBlob* cb) const;
++ address* compiled_sender_pc_addr(CodeBlob* cb) const;
++ address* sender_pc_addr(void) const;
++
++ public:
++
++#ifdef CC_INTERP
++ // Additional interface for interpreter frames:
++ inline interpreterState get_interpreterState() const;
++#else
++ inline ijava_state* get_ijava_state() const;
++ // Some convenient register frame setters/getters for deoptimization.
++ inline intptr_t* interpreter_frame_esp() const;
++ inline void interpreter_frame_set_cpcache(ConstantPoolCache* cp);
++ inline void interpreter_frame_set_esp(intptr_t* esp);
++ inline void interpreter_frame_set_top_frame_sp(intptr_t* top_frame_sp);
++ inline void interpreter_frame_set_sender_sp(intptr_t* sender_sp);
++#endif // CC_INTERP
++
++ // Size of a monitor in bytes.
++ static int interpreter_frame_monitor_size_in_bytes();
++
++ // The size of a cInterpreter object.
++ static inline int interpreter_frame_cinterpreterstate_size_in_bytes();
++
++ private:
++
++ ConstantPoolCache** interpreter_frame_cpoolcache_addr() const;
++
++ public:
++
++ // Additional interface for entry frames:
++ inline entry_frame_locals* get_entry_frame_locals() const {
++ return (entry_frame_locals*) (((address) fp()) - entry_frame_locals_size);
++ }
++
++ enum {
++ // normal return address is 1 bundle past PC
++ pc_return_offset = 0
++ };
++
++#endif // CPU_PPC_VM_FRAME_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/frame_ppc.inline.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/frame_ppc.inline.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,303 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_FRAME_PPC_INLINE_HPP
++#define CPU_PPC_VM_FRAME_PPC_INLINE_HPP
++
++#include "code/codeCache.hpp"
++
++// Inline functions for ppc64 frames:
++
++// Find codeblob and set deopt_state.
++inline void frame::find_codeblob_and_set_pc_and_deopt_state(address pc) {
++ assert(pc != NULL, "precondition: must have PC");
++
++ _cb = CodeCache::find_blob(pc);
++ _pc = pc; // Must be set for get_deopt_original_pc()
++
++ _fp = (intptr_t*)own_abi()->callers_sp;
++ // Use _fp - frame_size, needs to be done between _cb and _pc initialization
++ // and get_deopt_original_pc.
++ adjust_unextended_sp();
++
++ address original_pc = nmethod::get_deopt_original_pc(this);
++ if (original_pc != NULL) {
++ _pc = original_pc;
++ _deopt_state = is_deoptimized;
++ } else {
++ _deopt_state = not_deoptimized;
++ }
++
++ assert(((uint64_t)_sp & 0xf) == 0, "SP must be 16-byte aligned");
++}
++
++// Constructors
++
++// Initialize all fields, _unextended_sp will be adjusted in find_codeblob_and_set_pc_and_deopt_state.
++inline frame::frame() : _sp(NULL), _unextended_sp(NULL), _fp(NULL), _cb(NULL), _pc(NULL), _deopt_state(unknown) {}
++
++inline frame::frame(intptr_t* sp) : _sp(sp), _unextended_sp(sp) {
++ find_codeblob_and_set_pc_and_deopt_state((address)own_abi()->lr); // also sets _fp and adjusts _unextended_sp
++}
++
++inline frame::frame(intptr_t* sp, address pc) : _sp(sp), _unextended_sp(sp) {
++ find_codeblob_and_set_pc_and_deopt_state(pc); // also sets _fp and adjusts _unextended_sp
++}
++
++inline frame::frame(intptr_t* sp, address pc, intptr_t* unextended_sp) : _sp(sp), _unextended_sp(unextended_sp) {
++ find_codeblob_and_set_pc_and_deopt_state(pc); // also sets _fp and adjusts _unextended_sp
++}
++
++// Accessors
++
++// Return unique id for this frame. The id must have a value where we
++// can distinguish identity and younger/older relationship. NULL
++// represents an invalid (incomparable) frame.
++inline intptr_t* frame::id(void) const {
++ // Use _fp. _sp or _unextended_sp wouldn't be correct due to resizing.
++ return _fp;
++}
++
++// Return true if this frame is older (less recent activation) than
++// the frame represented by id.
++inline bool frame::is_older(intptr_t* id) const {
++ assert(this->id() != NULL && id != NULL, "NULL frame id");
++ // Stack grows towards smaller addresses on ppc64.
++ return this->id() > id;
++}
++
++inline int frame::frame_size(RegisterMap* map) const {
++ // Stack grows towards smaller addresses on PPC64: sender is at a higher address.
++ return sender_sp() - sp();
++}
++
++// Return the frame's stack pointer before it has been extended by a
++// c2i adapter. This is needed by deoptimization for ignoring c2i adapter
++// frames.
++inline intptr_t* frame::unextended_sp() const {
++ return _unextended_sp;
++}
++
++// All frames have this field.
++inline address frame::sender_pc() const {
++ return (address)callers_abi()->lr;
++}
++inline address* frame::sender_pc_addr() const {
++ return (address*)&(callers_abi()->lr);
++}
++
++// All frames have this field.
++inline intptr_t* frame::sender_sp() const {
++ return (intptr_t*)callers_abi();
++}
++
++// All frames have this field.
++inline intptr_t* frame::link() const {
++ return (intptr_t*)callers_abi()->callers_sp;
++}
++
++inline intptr_t* frame::real_fp() const {
++ return fp();
++}
++
++#ifdef CC_INTERP
++
++inline interpreterState frame::get_interpreterState() const {
++ return (interpreterState)(((address)callers_abi())
++ - frame::interpreter_frame_cinterpreterstate_size_in_bytes());
++}
++
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++ interpreterState istate = get_interpreterState();
++ return (intptr_t**)&istate->_locals;
++}
++
++inline intptr_t* frame::interpreter_frame_bcx_addr() const {
++ interpreterState istate = get_interpreterState();
++ return (intptr_t*)&istate->_bcp;
++}
++
++inline intptr_t* frame::interpreter_frame_mdx_addr() const {
++ interpreterState istate = get_interpreterState();
++ return (intptr_t*)&istate->_mdx;
++}
++
++inline intptr_t* frame::interpreter_frame_expression_stack() const {
++ return (intptr_t*)interpreter_frame_monitor_end() - 1;
++}
++
++inline jint frame::interpreter_frame_expression_stack_direction() {
++ return -1;
++}
++
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++ interpreterState istate = get_interpreterState();
++ return istate->_stack + 1;
++}
++
++inline intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
++ return &interpreter_frame_tos_address()[offset];
++}
++
++// monitor elements
++
++// in keeping with Intel side: end is lower in memory than begin;
++// and beginning element is oldest element
++// Also begin is one past last monitor.
++
++inline BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++ return get_interpreterState()->monitor_base();
++}
++
++inline BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++ return (BasicObjectLock*)get_interpreterState()->stack_base();
++}
++
++inline int frame::interpreter_frame_cinterpreterstate_size_in_bytes() {
++ // Size of an interpreter object. Not aligned with frame size.
++ return round_to(sizeof(BytecodeInterpreter), 8);
++}
++
++inline Method** frame::interpreter_frame_method_addr() const {
++ interpreterState istate = get_interpreterState();
++ return &istate->_method;
++}
++
++// Constant pool cache
++
++inline ConstantPoolCache** frame::interpreter_frame_cpoolcache_addr() const {
++ interpreterState istate = get_interpreterState();
++ return &istate->_constants; // should really use accessor
++}
++
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++ interpreterState istate = get_interpreterState();
++ return &istate->_constants;
++}
++
++#else // !CC_INTERP
++
++// Template Interpreter frame value accessors.
++
++inline frame::ijava_state* frame::get_ijava_state() const {
++ return (ijava_state*) ((uintptr_t)fp() - ijava_state_size);
++}
++
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++ return (intptr_t**) &(get_ijava_state()->locals);
++}
++inline intptr_t* frame::interpreter_frame_bcx_addr() const {
++ return (intptr_t*) &(get_ijava_state()->bcp);
++}
++inline intptr_t* frame::interpreter_frame_mdx_addr() const {
++ return (intptr_t*) &(get_ijava_state()->mdx);
++}
++// Pointer beyond the "oldest/deepest" BasicObjectLock on stack.
++inline BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++ return (BasicObjectLock *) get_ijava_state()->monitors;
++}
++
++inline BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++ return (BasicObjectLock *) get_ijava_state();
++}
++
++// SAPJVM ASc 2012-11-21. Return register stack slot addr at which currently interpreted method is found
++inline Method** frame::interpreter_frame_method_addr() const {
++ return (Method**) &(get_ijava_state()->method);
++}
++inline ConstantPoolCache** frame::interpreter_frame_cpoolcache_addr() const {
++ return (ConstantPoolCache**) &(get_ijava_state()->cpoolCache);
++}
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++ return (ConstantPoolCache**) &(get_ijava_state()->cpoolCache);
++}
++
++inline oop* frame::interpreter_frame_temp_oop_addr() const {
++ return (oop *) &(get_ijava_state()->oop_tmp);
++}
++inline intptr_t* frame::interpreter_frame_esp() const {
++ return (intptr_t*) get_ijava_state()->esp;
++}
++
++// Convenient setters
++inline void frame::interpreter_frame_set_monitor_end(BasicObjectLock* end) { get_ijava_state()->monitors = (intptr_t) end;}
++inline void frame::interpreter_frame_set_cpcache(ConstantPoolCache* cp) { *frame::interpreter_frame_cpoolcache_addr() = cp; }
++inline void frame::interpreter_frame_set_esp(intptr_t* esp) { get_ijava_state()->esp = (intptr_t) esp; }
++inline void frame::interpreter_frame_set_top_frame_sp(intptr_t* top_frame_sp) { get_ijava_state()->top_frame_sp = (intptr_t) top_frame_sp; }
++inline void frame::interpreter_frame_set_sender_sp(intptr_t* sender_sp) { get_ijava_state()->sender_sp = (intptr_t) sender_sp; }
++
++inline intptr_t* frame::interpreter_frame_expression_stack() const {
++ return (intptr_t*)interpreter_frame_monitor_end() - 1;
++}
++
++inline jint frame::interpreter_frame_expression_stack_direction() {
++ return -1;
++}
++
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++ return ((intptr_t*) get_ijava_state()->esp) + Interpreter::stackElementWords;
++}
++
++inline intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
++ return &interpreter_frame_tos_address()[offset];
++}
++
++#endif // CC_INTERP
++
++inline int frame::interpreter_frame_monitor_size() {
++ // Number of stack slots for a monitor.
++ return round_to(BasicObjectLock::size(), // number of stack slots
++ WordsPerLong); // number of stack slots for a Java long
++}
++
++inline int frame::interpreter_frame_monitor_size_in_bytes() {
++ return frame::interpreter_frame_monitor_size() * wordSize;
++}
++
++// entry frames
++
++inline intptr_t* frame::entry_frame_argument_at(int offset) const {
++ // Since an entry frame always calls the interpreter first, the
++ // parameters are on the stack and relative to known register in the
++ // entry frame.
++ intptr_t* tos = (intptr_t*)get_entry_frame_locals()->arguments_tos_address;
++ return &tos[offset + 1]; // prepushed tos
++}
++
++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
++ return (JavaCallWrapper**)&get_entry_frame_locals()->call_wrapper_address;
++}
++
++inline oop frame::saved_oop_result(RegisterMap* map) const {
++ return *((oop*)map->location(R3->as_VMReg()));
++}
++
++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
++ *((oop*)map->location(R3->as_VMReg())) = obj;
++}
++
++#endif // CPU_PPC_VM_FRAME_PPC_INLINE_HPP
+--- ./hotspot/src/cpu/ppc/vm/globalDefinitions_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/globalDefinitions_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP
++#define CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP
++
++// Size of PPC Instructions
++const int BytesPerInstWord = 4;
++
++const int StackAlignmentInBytes = 16;
++
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are properly extended to 64 bits.
++// If set, SharedRuntime::c_calling_convention() must adapt
++// signatures accordingly.
++const bool CCallingConventionRequiresIntsAsLongs = true;
++
++// The PPC CPUs are NOT multiple-copy-atomic.
++#define CPU_NOT_MULTIPLE_COPY_ATOMIC
++
++#endif // CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/globals_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/globals_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,126 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_GLOBALS_PPC_HPP
++#define CPU_PPC_VM_GLOBALS_PPC_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, ConvertSleepToYield, true);
++define_pd_global(bool, ShareVtableStubs, false); // Improves performance markedly for mtrt and compress.
++define_pd_global(bool, NeedsDeoptSuspend, false); // Only register window machines need this.
++
++
++define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks.
++define_pd_global(bool, TrapBasedNullChecks, true);
++define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast.
++
++// Use large code-entry alignment.
++define_pd_global(intx, CodeEntryAlignment, 128);
++define_pd_global(intx, OptoLoopAlignment, 16);
++define_pd_global(intx, InlineFrequencyCount, 100);
++define_pd_global(intx, InlineSmallCode, 1500);
++
++define_pd_global(intx, PreInflateSpin, 10);
++
++// Flags for template interpreter.
++define_pd_global(bool, RewriteBytecodes, true);
++define_pd_global(bool, RewriteFrequentPairs, true);
++
++define_pd_global(bool, UseMembar, false);
++
++// GC Ergo Flags
++define_pd_global(uintx, CMSYoungGenPerWorker, 16*M); // Default max size of CMS young gen, per GC worker thread.
++
++define_pd_global(uintx, TypeProfileLevel, 0);
++
++// Platform dependent flag handling: flags only defined on this platform.
++#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
++ \
++ /* Load poll address from thread. This is used to implement per-thread */ \
++ /* safepoints on platforms != IA64. */ \
++ product(bool, LoadPollAddressFromThread, false, \
++ "Load polling page address from thread object (required for " \
++ "per-thread safepoints on platforms != IA64)") \
++ \
++ product(uintx, PowerArchitecturePPC64, 0, \
++ "CPU Version: x for PowerX. Currently recognizes Power5 to " \
++ "Power7. Default is 0. CPUs newer than Power7 will be " \
++ "recognized as Power7.") \
++ \
++ /* Reoptimize code-sequences of calls at runtime, e.g. replace an */ \
++ /* indirect call by a direct call. */ \
++ product(bool, ReoptimizeCallSequences, true, \
++ "Reoptimize code-sequences of calls at runtime.") \
++ \
++ product(bool, UseLoadInstructionsForStackBangingPPC64, false, \
++ "Use load instructions for stack banging.") \
++ \
++ /* special instructions */ \
++ \
++ product(bool, UseCountLeadingZerosInstructionsPPC64, true, \
++ "Use count leading zeros instructions.") \
++ \
++ product(bool, UseExtendedLoadAndReserveInstructionsPPC64, false, \
++ "Use extended versions of load-and-reserve instructions.") \
++ \
++ product(bool, UseRotateAndMaskInstructionsPPC64, true, \
++ "Use rotate and mask instructions.") \
++ \
++ product(bool, UseStaticBranchPredictionInCompareAndSwapPPC64, true, \
++ "Use static branch prediction hints in CAS operations.") \
++ product(bool, UseStaticBranchPredictionForUncommonPathsPPC64, false, \
++ "Use static branch prediction hints for uncommon paths.") \
++ \
++ product(bool, UsePower6SchedulerPPC64, false, \
++ "Use Power6 Scheduler.") \
++ \
++ product(bool, InsertEndGroupPPC64, false, \
++ "Insert EndGroup instructions to optimize for Power6.") \
++ \
++ /* Trap based checks. */ \
++ /* Trap based checks use the ppc trap instructions to check certain */ \
++ /* conditions. This instruction raises a SIGTRAP caught by the */ \
++ /* exception handler of the VM. */ \
++ product(bool, UseSIGTRAP, true, \
++ "Allow trap instructions that make use of SIGTRAP. Use this to " \
++ "switch off all optimizations requiring SIGTRAP.") \
++ product(bool, TrapBasedICMissChecks, true, \
++ "Raise and handle SIGTRAP if inline cache miss detected.") \
++ product(bool, TrapBasedNotEntrantChecks, true, \
++ "Raise and handle SIGTRAP if calling not entrant or zombie" \
++ " method.") \
++ product(bool, TraceTraps, false, "Trace all traps the signal handler" \
++ "handles.") \
++ \
++ product(bool, ZapMemory, false, "Write 0x0101... to empty memory." \
++ " Use this to ease debugging.") \
++
++
++#endif // CPU_PPC_VM_GLOBALS_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/icBuffer_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/icBuffer_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "assembler_ppc.inline.hpp"
++#include "code/icBuffer.hpp"
++#include "gc_interface/collectedHeap.inline.hpp"
++#include "interpreter/bytecodes.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_ppc.hpp"
++#include "oops/oop.inline.hpp"
++#include "oops/oop.inline2.hpp"
++
++#define __ masm.
++
++int InlineCacheBuffer::ic_stub_code_size() {
++ return MacroAssembler::load_const_size + MacroAssembler::b64_patchable_size;
++}
++
++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
++ ResourceMark rm;
++ CodeBuffer code(code_begin, ic_stub_code_size());
++ MacroAssembler masm(&code);
++ // Note: even though the code contains an embedded metadata, we do not need reloc info
++ // because
++ // (1) the metadata is old (i.e., doesn't matter for scavenges)
++ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear.
++
++ // Load the oop ...
++ __ load_const(R19_method, (address) cached_value, R0);
++ // ... and jump to entry point.
++ __ b64_patchable((address) entry_point, relocInfo::none);
++
++ __ flush();
++}
++
++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object
++ NativeJump* jump = nativeJump_at(move->next_instruction_address());
++ return jump->jump_destination();
++}
++
++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object
++ void* o = (void*)move->data();
++ return o;
++}
++
+--- ./hotspot/src/cpu/ppc/vm/icache_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/icache_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "assembler_ppc.inline.hpp"
++#include "runtime/icache.hpp"
++
++// Use inline assembler to implement icache flush.
++int ICache::ppc64_flush_icache(address start, int lines, int magic) {
++ address end = start + (unsigned int)lines*ICache::line_size;
++ assert(start <= end, "flush_icache parms");
++
++ // store modified cache lines from data cache
++ for (address a = start; a < end; a += ICache::line_size) {
++ __asm__ __volatile__(
++ "dcbst 0, %0 \n"
++ :
++ : "r" (a)
++ : "memory");
++ }
++
++ // sync instruction
++ __asm__ __volatile__(
++ "sync \n"
++ :
++ :
++ : "memory");
++
++ // invalidate respective cache lines in instruction cache
++ for (address a = start; a < end; a += ICache::line_size) {
++ __asm__ __volatile__(
++ "icbi 0, %0 \n"
++ :
++ : "r" (a)
++ : "memory");
++ }
++
++ // discard fetched instructions
++ __asm__ __volatile__(
++ "isync \n"
++ :
++ :
++ : "memory");
++
++ return magic;
++}
++
++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
++ StubCodeMark mark(this, "ICache", "flush_icache_stub");
++
++ *flush_icache_stub = (ICache::flush_icache_stub_t)ICache::ppc64_flush_icache;
++
++ // First call to flush itself
++ ICache::invalidate_range((address)(*flush_icache_stub), 0);
++}
+--- ./hotspot/src/cpu/ppc/vm/icache_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/icache_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_ICACHE_PPC_HPP
++#define CPU_PPC_VM_ICACHE_PPC_HPP
++
++// Interface for updating the instruction cache. Whenever the VM modifies
++// code, part of the processor instruction cache potentially has to be flushed.
++
++class ICache : public AbstractICache {
++ friend class ICacheStubGenerator;
++ static int ppc64_flush_icache(address start, int lines, int magic);
++
++ public:
++ enum {
++ // Actually, cache line size is 64, but keeping it as it is to be
++ // on the safe side on ALL PPC64 implementations.
++ log2_line_size = 5,
++ line_size = 1 << log2_line_size
++ };
++
++ static void ppc64_flush_icache_bytes(address start, int bytes) {
++ // Align start address to an icache line boundary and transform
++ // nbytes to an icache line count.
++ const uint line_offset = mask_address_bits(start, line_size - 1);
++ ppc64_flush_icache(start - line_offset, (bytes + line_offset + line_size - 1) >> log2_line_size, 0);
++ }
++};
++
++#endif // CPU_PPC_VM_ICACHE_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,2209 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interp_masm_ppc_64.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "prims/jvmtiThreadState.hpp"
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) // nothing
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#endif
++
++void InterpreterMacroAssembler::null_check_throw(Register a, int offset, Register temp_reg) {
++#ifdef CC_INTERP
++ address exception_entry = StubRoutines::throw_NullPointerException_at_call_entry();
++#else
++ address exception_entry = Interpreter::throw_NullPointerException_entry();
++#endif
++ MacroAssembler::null_check_throw(a, offset, temp_reg, exception_entry);
++}
++
++void InterpreterMacroAssembler::branch_to_entry(address entry, Register Rscratch) {
++ assert(entry, "Entry must have been generated by now");
++ if (is_within_range_of_b(entry, pc())) {
++ b(entry);
++ } else {
++ load_const_optimized(Rscratch, entry, R0);
++ mtctr(Rscratch);
++ bctr();
++ }
++}
++
++#ifndef CC_INTERP
++
++void InterpreterMacroAssembler::dispatch_next(TosState state, int bcp_incr) {
++ Register bytecode = R12_scratch2;
++ if (bcp_incr != 0) {
++ lbzu(bytecode, bcp_incr, R14_bcp);
++ } else {
++ lbz(bytecode, 0, R14_bcp);
++ }
++
++ dispatch_Lbyte_code(state, bytecode, Interpreter::dispatch_table(state));
++}
++
++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
++ // Load current bytecode.
++ Register bytecode = R12_scratch2;
++ lbz(bytecode, 0, R14_bcp);
++ dispatch_Lbyte_code(state, bytecode, table);
++}
++
++// Dispatch code executed in the prolog of a bytecode which does not do it's
++// own dispatch. The dispatch address is computed and placed in R24_dispatch_addr.
++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int bcp_incr) {
++ Register bytecode = R12_scratch2;
++ lbz(bytecode, bcp_incr, R14_bcp);
++
++ load_dispatch_table(R24_dispatch_addr, Interpreter::dispatch_table(state));
++
++ sldi(bytecode, bytecode, LogBytesPerWord);
++ ldx(R24_dispatch_addr, R24_dispatch_addr, bytecode);
++}
++
++// Dispatch code executed in the epilog of a bytecode which does not do it's
++// own dispatch. The dispatch address in R24_dispatch_addr is used for the
++// dispatch.
++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int bcp_incr) {
++ mtctr(R24_dispatch_addr);
++ addi(R14_bcp, R14_bcp, bcp_incr);
++ bctr();
++}
++
++void InterpreterMacroAssembler::check_and_handle_popframe(Register scratch_reg) {
++ assert(scratch_reg != R0, "can't use R0 as scratch_reg here");
++ if (JvmtiExport::can_pop_frame()) {
++ Label L;
++
++ // Check the "pending popframe condition" flag in the current thread.
++ lwz(scratch_reg, in_bytes(JavaThread::popframe_condition_offset()), R16_thread);
++
++ // Initiate popframe handling only if it is not already being
++ // processed. If the flag has the popframe_processing bit set, it
++ // means that this code is called *during* popframe handling - we
++ // don't want to reenter.
++ andi_(R0, scratch_reg, JavaThread::popframe_pending_bit);
++ beq(CCR0, L);
++
++ andi_(R0, scratch_reg, JavaThread::popframe_processing_bit);
++ bne(CCR0, L);
++
++ // Call the Interpreter::remove_activation_preserving_args_entry()
++ // func to get the address of the same-named entrypoint in the
++ // generated interpreter code.
++ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*,
++ Interpreter::remove_activation_preserving_args_entry),
++ relocInfo::none);
++
++ // Jump to Interpreter::_remove_activation_preserving_args_entry.
++ mtctr(R3_RET);
++ bctr();
++
++ align(32, 12);
++ bind(L);
++ }
++}
++
++void InterpreterMacroAssembler::check_and_handle_earlyret(Register scratch_reg) {
++ const Register Rthr_state_addr = scratch_reg;
++ if (JvmtiExport::can_force_early_return()) {
++ Label Lno_early_ret;
++ ld(Rthr_state_addr, in_bytes(JavaThread::jvmti_thread_state_offset()), R16_thread);
++ cmpdi(CCR0, Rthr_state_addr, 0);
++ beq(CCR0, Lno_early_ret);
++
++ lwz(R0, in_bytes(JvmtiThreadState::earlyret_state_offset()), Rthr_state_addr);
++ cmpwi(CCR0, R0, JvmtiThreadState::earlyret_pending);
++ bne(CCR0, Lno_early_ret);
++
++ // Jump to Interpreter::_earlyret_entry.
++ lwz(R3_ARG1, in_bytes(JvmtiThreadState::earlyret_tos_offset()), Rthr_state_addr);
++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry));
++ mtlr(R3_RET);
++ blr();
++
++ align(32, 12);
++ bind(Lno_early_ret);
++ }
++}
++
++void InterpreterMacroAssembler::load_earlyret_value(TosState state, Register Rscratch1) {
++ const Register RjvmtiState = Rscratch1;
++ const Register Rscratch2 = R0;
++
++ ld(RjvmtiState, in_bytes(JavaThread::jvmti_thread_state_offset()), R16_thread);
++ li(Rscratch2, 0);
++
++ switch (state) {
++ case atos: ld(R17_tos, in_bytes(JvmtiThreadState::earlyret_oop_offset()), RjvmtiState);
++ std(Rscratch2, in_bytes(JvmtiThreadState::earlyret_oop_offset()), RjvmtiState);
++ break;
++ case ltos: ld(R17_tos, in_bytes(JvmtiThreadState::earlyret_value_offset()), RjvmtiState);
++ break;
++ case btos: // fall through
++ case ctos: // fall through
++ case stos: // fall through
++ case itos: lwz(R17_tos, in_bytes(JvmtiThreadState::earlyret_value_offset()), RjvmtiState);
++ break;
++ case ftos: lfs(F15_ftos, in_bytes(JvmtiThreadState::earlyret_value_offset()), RjvmtiState);
++ break;
++ case dtos: lfd(F15_ftos, in_bytes(JvmtiThreadState::earlyret_value_offset()), RjvmtiState);
++ break;
++ case vtos: break;
++ default : ShouldNotReachHere();
++ }
++
++ // Clean up tos value in the jvmti thread state.
++ std(Rscratch2, in_bytes(JvmtiThreadState::earlyret_value_offset()), RjvmtiState);
++ // Set tos state field to illegal value.
++ li(Rscratch2, ilgl);
++ stw(Rscratch2, in_bytes(JvmtiThreadState::earlyret_tos_offset()), RjvmtiState);
++}
++
++// Common code to dispatch and dispatch_only.
++// Dispatch value in Lbyte_code and increment Lbcp.
++
++void InterpreterMacroAssembler::load_dispatch_table(Register dst, address* table) {
++ address table_base = (address)Interpreter::dispatch_table((TosState)0);
++ intptr_t table_offs = (intptr_t)table - (intptr_t)table_base;
++ if (is_simm16(table_offs)) {
++ addi(dst, R25_templateTableBase, (int)table_offs);
++ } else {
++ load_const_optimized(dst, table, R0);
++ }
++}
++
++void InterpreterMacroAssembler::dispatch_Lbyte_code(TosState state, Register bytecode, address* table, bool verify) {
++ if (verify) {
++ unimplemented("dispatch_Lbyte_code: verify"); // See Sparc Implementation to implement this
++ }
++
++#ifdef FAST_DISPATCH
++ unimplemented("dispatch_Lbyte_code FAST_DISPATCH");
++#else
++ assert_different_registers(bytecode, R11_scratch1);
++
++ // Calc dispatch table address.
++ load_dispatch_table(R11_scratch1, table);
++
++ sldi(R12_scratch2, bytecode, LogBytesPerWord);
++ ldx(R11_scratch1, R11_scratch1, R12_scratch2);
++
++ // Jump off!
++ mtctr(R11_scratch1);
++ bctr();
++#endif
++}
++
++void InterpreterMacroAssembler::load_receiver(Register Rparam_count, Register Rrecv_dst) {
++ sldi(Rrecv_dst, Rparam_count, Interpreter::logStackElementSize);
++ ldx(Rrecv_dst, Rrecv_dst, R15_esp);
++}
++
++// helpers for expression stack
++
++void InterpreterMacroAssembler::pop_i(Register r) {
++ lwzu(r, Interpreter::stackElementSize, R15_esp);
++}
++
++void InterpreterMacroAssembler::pop_ptr(Register r) {
++ ldu(r, Interpreter::stackElementSize, R15_esp);
++}
++
++void InterpreterMacroAssembler::pop_l(Register r) {
++ ld(r, Interpreter::stackElementSize, R15_esp);
++ addi(R15_esp, R15_esp, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_f(FloatRegister f) {
++ lfsu(f, Interpreter::stackElementSize, R15_esp);
++}
++
++void InterpreterMacroAssembler::pop_d(FloatRegister f) {
++ lfd(f, Interpreter::stackElementSize, R15_esp);
++ addi(R15_esp, R15_esp, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_i(Register r) {
++ stw(r, 0, R15_esp);
++ addi(R15_esp, R15_esp, - Interpreter::stackElementSize );
++}
++
++void InterpreterMacroAssembler::push_ptr(Register r) {
++ std(r, 0, R15_esp);
++ addi(R15_esp, R15_esp, - Interpreter::stackElementSize );
++}
++
++void InterpreterMacroAssembler::push_l(Register r) {
++ std(r, - Interpreter::stackElementSize, R15_esp);
++ addi(R15_esp, R15_esp, - 2 * Interpreter::stackElementSize );
++}
++
++void InterpreterMacroAssembler::push_f(FloatRegister f) {
++ stfs(f, 0, R15_esp);
++ addi(R15_esp, R15_esp, - Interpreter::stackElementSize );
++}
++
++void InterpreterMacroAssembler::push_d(FloatRegister f) {
++ stfd(f, - Interpreter::stackElementSize, R15_esp);
++ addi(R15_esp, R15_esp, - 2 * Interpreter::stackElementSize );
++}
++
++void InterpreterMacroAssembler::push_2ptrs(Register first, Register second) {
++ std(first, 0, R15_esp);
++ std(second, -Interpreter::stackElementSize, R15_esp);
++ addi(R15_esp, R15_esp, - 2 * Interpreter::stackElementSize );
++}
++
++void InterpreterMacroAssembler::push_l_pop_d(Register l, FloatRegister d) {
++ std(l, 0, R15_esp);
++ lfd(d, 0, R15_esp);
++}
++
++void InterpreterMacroAssembler::push_d_pop_l(FloatRegister d, Register l) {
++ stfd(d, 0, R15_esp);
++ ld(l, 0, R15_esp);
++}
++
++void InterpreterMacroAssembler::push(TosState state) {
++ switch (state) {
++ case atos: push_ptr(); break;
++ case btos:
++ case ctos:
++ case stos:
++ case itos: push_i(); break;
++ case ltos: push_l(); break;
++ case ftos: push_f(); break;
++ case dtos: push_d(); break;
++ case vtos: /* nothing to do */ break;
++ default : ShouldNotReachHere();
++ }
++}
++
++void InterpreterMacroAssembler::pop(TosState state) {
++ switch (state) {
++ case atos: pop_ptr(); break;
++ case btos:
++ case ctos:
++ case stos:
++ case itos: pop_i(); break;
++ case ltos: pop_l(); break;
++ case ftos: pop_f(); break;
++ case dtos: pop_d(); break;
++ case vtos: /* nothing to do */ break;
++ default : ShouldNotReachHere();
++ }
++ verify_oop(R17_tos, state);
++}
++
++void InterpreterMacroAssembler::empty_expression_stack() {
++ addi(R15_esp, R26_monitor, - Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(int bcp_offset,
++ Register Rdst,
++ signedOrNot is_signed) {
++ // Read Java big endian format.
++ if (is_signed == Signed) {
++ lha(Rdst, bcp_offset, R14_bcp);
++ } else {
++ lhz(Rdst, bcp_offset, R14_bcp);
++ }
++#if 0
++ assert(Rtmp != Rdst, "need separate temp register");
++ Register Rfirst = Rtmp;
++ lbz(Rfirst, bcp_offset, R14_bcp); // first byte
++ lbz(Rdst, bcp_offset+1, R14_bcp); // second byte
++
++ // Rdst = ((Rfirst<<8) & 0xFF00) | (Rdst &~ 0xFF00)
++ rldimi(/*RA=*/Rdst, /*RS=*/Rfirst, /*sh=*/8, /*mb=*/48);
++ if (is_signed == Signed) {
++ extsh(Rdst, Rdst);
++ }
++#endif
++}
++
++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(int bcp_offset,
++ Register Rdst,
++ signedOrNot is_signed) {
++ // Read Java big endian format.
++ if (bcp_offset & 3) { // Offset unaligned?
++ load_const_optimized(Rdst, bcp_offset);
++ if (is_signed == Signed) {
++ lwax(Rdst, R14_bcp, Rdst);
++ } else {
++ lwzx(Rdst, R14_bcp, Rdst);
++ }
++ } else {
++ if (is_signed == Signed) {
++ lwa(Rdst, bcp_offset, R14_bcp);
++ } else {
++ lwz(Rdst, bcp_offset, R14_bcp);
++ }
++ }
++}
++
++// Load the constant pool cache index from the bytecode stream.
++//
++// Kills / writes:
++// - Rdst, Rscratch
++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register Rdst, int bcp_offset, size_t index_size) {
++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++ if (index_size == sizeof(u2)) {
++ get_2_byte_integer_at_bcp(bcp_offset, Rdst, Unsigned);
++ } else if (index_size == sizeof(u4)) {
++ assert(EnableInvokeDynamic, "giant index used only for JSR 292");
++ get_4_byte_integer_at_bcp(bcp_offset, Rdst, Signed);
++ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
++ nand(Rdst, Rdst, Rdst); // convert to plain index
++ } else if (index_size == sizeof(u1)) {
++ lbz(Rdst, bcp_offset, R14_bcp);
++ } else {
++ ShouldNotReachHere();
++ }
++ // Rdst now contains cp cache index.
++}
++
++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, int bcp_offset, size_t index_size) {
++ get_cache_index_at_bcp(cache, bcp_offset, index_size);
++ sldi(cache, cache, exact_log2(in_words(ConstantPoolCacheEntry::size()) * BytesPerWord));
++ add(cache, R27_constPoolCache, cache);
++}
++
++// Load object from cpool->resolved_references(index).
++void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index) {
++ assert_different_registers(result, index);
++ get_constant_pool(result);
++
++ // Convert from field index to resolved_references() index and from
++ // word index to byte offset. Since this is a java object, it can be compressed.
++ Register tmp = index; // reuse
++ sldi(tmp, index, LogBytesPerHeapOop);
++ // Load pointer for resolved_references[] objArray.
++ ld(result, ConstantPool::resolved_references_offset_in_bytes(), result);
++ // JNIHandles::resolve(result)
++ ld(result, 0, result);
++#ifdef ASSERT
++ Label index_ok;
++ lwa(R0, arrayOopDesc::length_offset_in_bytes(), result);
++ sldi(R0, R0, LogBytesPerHeapOop);
++ cmpd(CCR0, tmp, R0);
++ blt(CCR0, index_ok);
++ stop("resolved reference index out of bounds", 0x09256);
++ bind(index_ok);
++#endif
++ // Add in the index.
++ add(result, tmp, result);
++ load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result);
++}
++
++// Generate a subtype check: branch to ok_is_subtype if sub_klass is
++// a subtype of super_klass. Blows registers Rsub_klass, tmp1, tmp2.
++void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, Register Rsuper_klass, Register Rtmp1,
++ Register Rtmp2, Register Rtmp3, Label &ok_is_subtype) {
++ // Profile the not-null value's klass.
++ profile_typecheck(Rsub_klass, Rtmp1, Rtmp2);
++ check_klass_subtype(Rsub_klass, Rsuper_klass, Rtmp1, Rtmp2, ok_is_subtype);
++ profile_typecheck_failed(Rtmp1, Rtmp2);
++}
++
++void InterpreterMacroAssembler::generate_stack_overflow_check_with_compare_and_throw(Register Rmem_frame_size, Register Rscratch1) {
++ Label done;
++ sub(Rmem_frame_size, R1_SP, Rmem_frame_size);
++ ld(Rscratch1, thread_(stack_overflow_limit));
++ cmpld(CCR0/*is_stack_overflow*/, Rmem_frame_size, Rscratch1);
++ bgt(CCR0/*is_stack_overflow*/, done);
++
++ // Load target address of the runtime stub.
++ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "generated in wrong order");
++ load_const_optimized(Rscratch1, (StubRoutines::throw_StackOverflowError_entry()), R0);
++ mtctr(Rscratch1);
++ // Restore caller_sp.
++#ifdef ASSERT
++ ld(Rscratch1, 0, R1_SP);
++ ld(R0, 0, R21_sender_SP);
++ cmpd(CCR0, R0, Rscratch1);
++ asm_assert_eq("backlink", 0x547);
++#endif // ASSERT
++ mr(R1_SP, R21_sender_SP);
++ bctr();
++
++ align(32, 12);
++ bind(done);
++}
++
++// Separate these two to allow for delay slot in middle.
++// These are used to do a test and full jump to exception-throwing code.
++
++// Check that index is in range for array, then shift index by index_shift,
++// and put arrayOop + shifted_index into res.
++// Note: res is still shy of address by array offset into object.
++
++void InterpreterMacroAssembler::index_check_without_pop(Register Rarray, Register Rindex, int index_shift, Register Rtmp, Register Rres) {
++ // Check that index is in range for array, then shift index by index_shift,
++ // and put arrayOop + shifted_index into res.
++ // Note: res is still shy of address by array offset into object.
++ // Kills:
++ // - Rindex
++ // Writes:
++ // - Rres: Address that corresponds to the array index if check was successful.
++ verify_oop(Rarray);
++ const Register Rlength = R0;
++ const Register RsxtIndex = Rtmp;
++ Label LisNull, LnotOOR;
++
++ // Array nullcheck
++ if (!ImplicitNullChecks) {
++ cmpdi(CCR0, Rarray, 0);
++ beq(CCR0, LisNull);
++ } else {
++ null_check_throw(Rarray, arrayOopDesc::length_offset_in_bytes(), /*temp*/RsxtIndex);
++ }
++
++ // Rindex might contain garbage in upper bits (remember that we don't sign extend
++ // during integer arithmetic operations). So kill them and put value into same register
++ // where ArrayIndexOutOfBounds would expect the index in.
++ rldicl(RsxtIndex, Rindex, 0, 32); // zero extend 32 bit -> 64 bit
++
++ // Index check
++ lwz(Rlength, arrayOopDesc::length_offset_in_bytes(), Rarray);
++ cmplw(CCR0, Rindex, Rlength);
++ sldi(RsxtIndex, RsxtIndex, index_shift);
++ blt(CCR0, LnotOOR);
++ load_dispatch_table(Rtmp, (address*)Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
++ mtctr(Rtmp);
++ bctr();
++
++ if (!ImplicitNullChecks) {
++ bind(LisNull);
++ load_dispatch_table(Rtmp, (address*)Interpreter::_throw_NullPointerException_entry);
++ mtctr(Rtmp);
++ bctr();
++ }
++
++ align(32, 16);
++ bind(LnotOOR);
++
++ // Calc address
++ add(Rres, RsxtIndex, Rarray);
++}
++
++void InterpreterMacroAssembler::index_check(Register array, Register index, int index_shift, Register tmp, Register res) {
++ // pop array
++ pop_ptr(array);
++
++ // check array
++ index_check_without_pop(array, index, index_shift, tmp, res);
++}
++
++void InterpreterMacroAssembler::get_const(Register Rdst) {
++ ld(Rdst, in_bytes(Method::const_offset()), R19_method);
++}
++
++void InterpreterMacroAssembler::get_constant_pool(Register Rdst) {
++ get_const(Rdst);
++ ld(Rdst, in_bytes(ConstMethod::constants_offset()), Rdst);
++}
++
++void InterpreterMacroAssembler::get_constant_pool_cache(Register Rdst) {
++ get_constant_pool(Rdst);
++ ld(Rdst, ConstantPool::cache_offset_in_bytes(), Rdst);
++}
++
++void InterpreterMacroAssembler::get_cpool_and_tags(Register Rcpool, Register Rtags) {
++ get_constant_pool(Rcpool);
++ ld(Rtags, ConstantPool::tags_offset_in_bytes(), Rcpool);
++}
++
++// Unlock if synchronized method.
++//
++// Unlock the receiver if this is a synchronized method.
++// Unlock any Java monitors from synchronized blocks.
++//
++// If there are locked Java monitors
++// If throw_monitor_exception
++// throws IllegalMonitorStateException
++// Else if install_monitor_exception
++// installs IllegalMonitorStateException
++// Else
++// no error processing
++void InterpreterMacroAssembler::unlock_if_synchronized_method(TosState state,
++ bool throw_monitor_exception,
++ bool install_monitor_exception) {
++ Label Lunlocked, Lno_unlock;
++ {
++ Register Rdo_not_unlock_flag = R11_scratch1;
++ Register Raccess_flags = R12_scratch2;
++
++ // Check if synchronized method or unlocking prevented by
++ // JavaThread::do_not_unlock_if_synchronized flag.
++ lbz(Rdo_not_unlock_flag, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread);
++ lwz(Raccess_flags, in_bytes(Method::access_flags_offset()), R19_method);
++ li(R0, 0);
++ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread); // reset flag
++
++ push(state);
++
++ // Skip if we don't have to unlock.
++ rldicl_(R0, Raccess_flags, 64-JVM_ACC_SYNCHRONIZED_BIT, 63); // Extract bit and compare to 0.
++ beq(CCR0, Lunlocked);
++
++ cmpwi(CCR0, Rdo_not_unlock_flag, 0);
++ bne(CCR0, Lno_unlock);
++ }
++
++ // Unlock
++ {
++ Register Rmonitor_base = R11_scratch1;
++
++ Label Lunlock;
++ // If it's still locked, everything is ok, unlock it.
++ ld(Rmonitor_base, 0, R1_SP);
++ addi(Rmonitor_base, Rmonitor_base, - (frame::ijava_state_size + frame::interpreter_frame_monitor_size_in_bytes())); // Monitor base
++
++ ld(R0, BasicObjectLock::obj_offset_in_bytes(), Rmonitor_base);
++ cmpdi(CCR0, R0, 0);
++ bne(CCR0, Lunlock);
++
++ // If it's already unlocked, throw exception.
++ if (throw_monitor_exception) {
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception));
++ should_not_reach_here();
++ } else {
++ if (install_monitor_exception) {
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception));
++ b(Lunlocked);
++ }
++ }
++
++ bind(Lunlock);
++ unlock_object(Rmonitor_base);
++ }
++
++ // Check that all other monitors are unlocked. Throw IllegelMonitorState exception if not.
++ bind(Lunlocked);
++ {
++ Label Lexception, Lrestart;
++ Register Rcurrent_obj_addr = R11_scratch1;
++ const int delta = frame::interpreter_frame_monitor_size_in_bytes();
++ assert((delta & LongAlignmentMask) == 0, "sizeof BasicObjectLock must be even number of doublewords");
++
++ bind(Lrestart);
++ // Set up search loop: Calc num of iterations.
++ {
++ Register Riterations = R12_scratch2;
++ Register Rmonitor_base = Rcurrent_obj_addr;
++ ld(Rmonitor_base, 0, R1_SP);
++ addi(Rmonitor_base, Rmonitor_base, - frame::ijava_state_size); // Monitor base
++
++ subf_(Riterations, R26_monitor, Rmonitor_base);
++ ble(CCR0, Lno_unlock);
++
++ addi(Rcurrent_obj_addr, Rmonitor_base, BasicObjectLock::obj_offset_in_bytes() - frame::interpreter_frame_monitor_size_in_bytes());
++ // Check if any monitor is on stack, bail out if not
++ srdi(Riterations, Riterations, exact_log2(delta));
++ mtctr(Riterations);
++ }
++
++ // The search loop: Look for locked monitors.
++ {
++ const Register Rcurrent_obj = R0;
++ Label Lloop;
++
++ ld(Rcurrent_obj, 0, Rcurrent_obj_addr);
++ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, -delta);
++ bind(Lloop);
++
++ // Check if current entry is used.
++ cmpdi(CCR0, Rcurrent_obj, 0);
++ bne(CCR0, Lexception);
++ // Preload next iteration's compare value.
++ ld(Rcurrent_obj, 0, Rcurrent_obj_addr);
++ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, -delta);
++ bdnz(Lloop);
++ }
++ // Fell through: Everything's unlocked => finish.
++ b(Lno_unlock);
++
++ // An object is still locked => need to throw exception.
++ bind(Lexception);
++ if (throw_monitor_exception) {
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception));
++ should_not_reach_here();
++ } else {
++ // Stack unrolling. Unlock object and if requested, install illegal_monitor_exception.
++ // Unlock does not block, so don't have to worry about the frame.
++ Register Rmonitor_addr = R11_scratch1;
++ addi(Rmonitor_addr, Rcurrent_obj_addr, -BasicObjectLock::obj_offset_in_bytes() + delta);
++ unlock_object(Rmonitor_addr);
++ if (install_monitor_exception) {
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception));
++ }
++ b(Lrestart);
++ }
++ }
++
++ align(32, 12);
++ bind(Lno_unlock);
++ pop(state);
++}
++
++// Support function for remove_activation & Co.
++void InterpreterMacroAssembler::merge_frames(Register Rsender_sp, Register return_pc, Register Rscratch1, Register Rscratch2) {
++ // Pop interpreter frame.
++ ld(Rscratch1, 0, R1_SP); // *SP
++ ld(Rsender_sp, _ijava_state_neg(sender_sp), Rscratch1); // top_frame_sp
++ ld(Rscratch2, 0, Rscratch1); // **SP
++#ifdef ASSERT
++ {
++ Label Lok;
++ ld(R0, _ijava_state_neg(ijava_reserved), Rscratch1);
++ cmpdi(CCR0, R0, 0x5afe);
++ beq(CCR0, Lok);
++ stop("frame corrupted (remove activation)", 0x5afe);
++ bind(Lok);
++ }
++#endif
++ if (return_pc!=noreg) {
++ ld(return_pc, _abi(lr), Rscratch1); // LR
++ }
++
++ // Merge top frames.
++ subf(Rscratch1, R1_SP, Rsender_sp); // top_frame_sp - SP
++ stdux(Rscratch2, R1_SP, Rscratch1); // atomically set *(SP = top_frame_sp) = **SP
++}
++
++// Remove activation.
++//
++// Unlock the receiver if this is a synchronized method.
++// Unlock any Java monitors from synchronized blocks.
++// Remove the activation from the stack.
++//
++// If there are locked Java monitors
++// If throw_monitor_exception
++// throws IllegalMonitorStateException
++// Else if install_monitor_exception
++// installs IllegalMonitorStateException
++// Else
++// no error processing
++void InterpreterMacroAssembler::remove_activation(TosState state,
++ bool throw_monitor_exception,
++ bool install_monitor_exception) {
++ unlock_if_synchronized_method(state, throw_monitor_exception, install_monitor_exception);
++
++ // Save result (push state before jvmti call and pop it afterwards) and notify jvmti.
++ notify_method_exit(false, state, NotifyJVMTI, true);
++
++ verify_oop(R17_tos, state);
++ verify_thread();
++
++ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ R0, R11_scratch1, R12_scratch2);
++ mtlr(R0);
++}
++
++#endif // !CC_INTERP
++
++// Lock object
++//
++// Registers alive
++// monitor - Address of the BasicObjectLock to be used for locking,
++// which must be initialized with the object to lock.
++// object - Address of the object to be locked.
++//
++void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
++ if (UseHeavyMonitors) {
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
++ monitor, /*check_for_exceptions=*/true CC_INTERP_ONLY(&& false));
++ } else {
++ // template code:
++ //
++ // markOop displaced_header = obj->mark().set_unlocked();
++ // monitor->lock()->set_displaced_header(displaced_header);
++ // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
++ // // We stored the monitor address into the object's mark word.
++ // } else if (THREAD->is_lock_owned((address)displaced_header))
++ // // Simple recursive case.
++ // monitor->lock()->set_displaced_header(NULL);
++ // } else {
++ // // Slow path.
++ // InterpreterRuntime::monitorenter(THREAD, monitor);
++ // }
++
++ const Register displaced_header = R7_ARG5;
++ const Register object_mark_addr = R8_ARG6;
++ const Register current_header = R9_ARG7;
++ const Register tmp = R10_ARG8;
++
++ Label done;
++ Label cas_failed, slow_case;
++
++ assert_different_registers(displaced_header, object_mark_addr, current_header, tmp);
++
++ // markOop displaced_header = obj->mark().set_unlocked();
++
++ // Load markOop from object into displaced_header.
++ ld(displaced_header, oopDesc::mark_offset_in_bytes(), object);
++
++ if (UseBiasedLocking) {
++ biased_locking_enter(CCR0, object, displaced_header, tmp, current_header, done, &slow_case);
++ }
++
++ // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
++ ori(displaced_header, displaced_header, markOopDesc::unlocked_value);
++
++ // monitor->lock()->set_displaced_header(displaced_header);
++
++ // Initialize the box (Must happen before we update the object mark!).
++ std(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
++ BasicLock::displaced_header_offset_in_bytes(), monitor);
++
++ // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
++
++ // Store stack address of the BasicObjectLock (this is monitor) into object.
++ addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes());
++
++ // Must fence, otherwise, preceding store(s) may float below cmpxchg.
++ // CmpxchgX sets CCR0 to cmpX(current, displaced).
++ fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
++ cmpxchgd(/*flag=*/CCR0,
++ /*current_value=*/current_header,
++ /*compare_value=*/displaced_header, /*exchange_value=*/monitor,
++ /*where=*/object_mark_addr,
++ MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
++ MacroAssembler::cmpxchgx_hint_acquire_lock(),
++ noreg,
++ &cas_failed);
++
++ // If the compare-and-exchange succeeded, then we found an unlocked
++ // object and we have now locked it.
++ b(done);
++ bind(cas_failed);
++
++ // } else if (THREAD->is_lock_owned((address)displaced_header))
++ // // Simple recursive case.
++ // monitor->lock()->set_displaced_header(NULL);
++
++ // We did not see an unlocked object so try the fast recursive case.
++
++ // Check if owner is self by comparing the value in the markOop of object
++ // (current_header) with the stack pointer.
++ sub(current_header, current_header, R1_SP);
++
++ assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
++ load_const_optimized(tmp,
++ (address) (~(os::vm_page_size()-1) |
++ markOopDesc::lock_mask_in_place));
++
++ and_(R0/*==0?*/, current_header, tmp);
++ // If condition is true we are done and hence we can store 0 in the displaced
++ // header indicating it is a recursive lock.
++ bne(CCR0, slow_case);
++ release();
++ std(R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() +
++ BasicLock::displaced_header_offset_in_bytes(), monitor);
++ b(done);
++
++ // } else {
++ // // Slow path.
++ // InterpreterRuntime::monitorenter(THREAD, monitor);
++
++ // None of the above fast optimizations worked so we have to get into the
++ // slow case of monitor enter.
++ bind(slow_case);
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
++ monitor, /*check_for_exceptions=*/true CC_INTERP_ONLY(&& false));
++ // }
++ align(32, 12);
++ bind(done);
++ }
++}
++
++// Unlocks an object. Used in monitorexit bytecode and remove_activation.
++//
++// Registers alive
++// monitor - Address of the BasicObjectLock to be used for locking,
++// which must be initialized with the object to lock.
++//
++// Throw IllegalMonitorException if object is not locked by current thread.
++void InterpreterMacroAssembler::unlock_object(Register monitor, bool check_for_exceptions) {
++ if (UseHeavyMonitors) {
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++ monitor, check_for_exceptions CC_INTERP_ONLY(&& false));
++ } else {
++
++ // template code:
++ //
++ // if ((displaced_header = monitor->displaced_header()) == NULL) {
++ // // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL.
++ // monitor->set_obj(NULL);
++ // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
++ // // We swapped the unlocked mark in displaced_header into the object's mark word.
++ // monitor->set_obj(NULL);
++ // } else {
++ // // Slow path.
++ // InterpreterRuntime::monitorexit(THREAD, monitor);
++ // }
++
++ const Register object = R7_ARG5;
++ const Register displaced_header = R8_ARG6;
++ const Register object_mark_addr = R9_ARG7;
++ const Register current_header = R10_ARG8;
++
++ Label free_slot;
++ Label slow_case;
++
++ assert_different_registers(object, displaced_header, object_mark_addr, current_header);
++
++ if (UseBiasedLocking) {
++ // The object address from the monitor is in object.
++ ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor);
++ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
++ biased_locking_exit(CCR0, object, displaced_header, free_slot);
++ }
++
++ // Test first if we are in the fast recursive case.
++ ld(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
++ BasicLock::displaced_header_offset_in_bytes(), monitor);
++
++ // If the displaced header is zero, we have a recursive unlock.
++ cmpdi(CCR0, displaced_header, 0);
++ beq(CCR0, free_slot); // recursive unlock
++
++ // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
++ // // We swapped the unlocked mark in displaced_header into the object's mark word.
++ // monitor->set_obj(NULL);
++
++ // If we still have a lightweight lock, unlock the object and be done.
++
++ // The object address from the monitor is in object.
++ if (!UseBiasedLocking) { ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor); }
++ addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes());
++
++ // We have the displaced header in displaced_header. If the lock is still
++ // lightweight, it will contain the monitor address and we'll store the
++ // displaced header back into the object's mark word.
++ // CmpxchgX sets CCR0 to cmpX(current, monitor).
++ cmpxchgd(/*flag=*/CCR0,
++ /*current_value=*/current_header,
++ /*compare_value=*/monitor, /*exchange_value=*/displaced_header,
++ /*where=*/object_mark_addr,
++ MacroAssembler::MemBarRel,
++ MacroAssembler::cmpxchgx_hint_release_lock(),
++ noreg,
++ &slow_case);
++ b(free_slot);
++
++ // } else {
++ // // Slow path.
++ // InterpreterRuntime::monitorexit(THREAD, monitor);
++
++ // The lock has been converted into a heavy lock and hence
++ // we need to get into the slow case.
++ bind(slow_case);
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++ monitor, check_for_exceptions CC_INTERP_ONLY(&& false));
++ // }
++
++ Label done;
++ b(done); // Monitor register may be overwritten! Runtime has already freed the slot.
++
++ // Exchange worked, do monitor->set_obj(NULL);
++ align(32, 12);
++ bind(free_slot);
++ li(R0, 0);
++ std(R0, BasicObjectLock::obj_offset_in_bytes(), monitor);
++ bind(done);
++ }
++}
++
++#ifndef CC_INTERP
++
++// Load compiled (i2c) or interpreter entry when calling from interpreted and
++// do the call. Centralized so that all interpreter calls will do the same actions.
++// If jvmti single stepping is on for a thread we must not call compiled code.
++//
++// Input:
++// - Rtarget_method: method to call
++// - Rret_addr: return address
++// - 2 scratch regs
++//
++void InterpreterMacroAssembler::call_from_interpreter(Register Rtarget_method, Register Rret_addr, Register Rscratch1, Register Rscratch2) {
++ assert_different_registers(Rscratch1, Rscratch2, Rtarget_method, Rret_addr);
++ // Assume we want to go compiled if available.
++ const Register Rtarget_addr = Rscratch1;
++ const Register Rinterp_only = Rscratch2;
++
++ ld(Rtarget_addr, in_bytes(Method::from_interpreted_offset()), Rtarget_method);
++
++ if (JvmtiExport::can_post_interpreter_events()) {
++ lwz(Rinterp_only, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
++
++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++ // compiled code in threads for which the event is enabled. Check here for
++ // interp_only_mode if these events CAN be enabled.
++ Label done;
++ verify_thread();
++ cmpwi(CCR0, Rinterp_only, 0);
++ beq(CCR0, done);
++ ld(Rtarget_addr, in_bytes(Method::interpreter_entry_offset()), Rtarget_method);
++ align(32, 12);
++ bind(done);
++ }
++
++#ifdef ASSERT
++ {
++ Label Lok;
++ cmpdi(CCR0, Rtarget_addr, 0);
++ bne(CCR0, Lok);
++ stop("null entry point");
++ bind(Lok);
++ }
++#endif // ASSERT
++
++ mr(R21_sender_SP, R1_SP);
++
++ // Calc a precise SP for the call. The SP value we calculated in
++ // generate_fixed_frame() is based on the max_stack() value, so we would waste stack space
++ // if esp is not max. Also, the i2c adapter extends the stack space without restoring
++ // our pre-calced value, so repeating calls via i2c would result in stack overflow.
++ // Since esp already points to an empty slot, we just have to sub 1 additional slot
++ // to meet the abi scratch requirements.
++ // The max_stack pointer will get restored by means of the GR_Lmax_stack local in
++ // the return entry of the interpreter.
++ addi(Rscratch2, R15_esp, Interpreter::stackElementSize - frame::abi_reg_args_size);
++ clrrdi(Rscratch2, Rscratch2, exact_log2(frame::alignment_in_bytes)); // round towards smaller address
++ resize_frame_absolute(Rscratch2, Rscratch2, R0);
++
++ mr_if_needed(R19_method, Rtarget_method);
++ mtctr(Rtarget_addr);
++ mtlr(Rret_addr);
++
++ save_interpreter_state(Rscratch2);
++#ifdef ASSERT
++ ld(Rscratch1, _ijava_state_neg(top_frame_sp), Rscratch2); // Rscratch2 contains fp
++ cmpd(CCR0, R21_sender_SP, Rscratch1);
++ asm_assert_eq("top_frame_sp incorrect", 0x951);
++#endif
++
++ bctr();
++}
++
++// Set the method data pointer for the current bcp.
++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
++ assert(ProfileInterpreter, "must be profiling interpreter");
++ Label get_continue;
++ ld(R28_mdx, in_bytes(Method::method_data_offset()), R19_method);
++ test_method_data_pointer(get_continue);
++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), R19_method, R14_bcp);
++
++ addi(R28_mdx, R28_mdx, in_bytes(MethodData::data_offset()));
++ add(R28_mdx, R28_mdx, R3_RET);
++ bind(get_continue);
++}
++
++// Test ImethodDataPtr. If it is null, continue at the specified label.
++void InterpreterMacroAssembler::test_method_data_pointer(Label& zero_continue) {
++ assert(ProfileInterpreter, "must be profiling interpreter");
++ cmpdi(CCR0, R28_mdx, 0);
++ beq(CCR0, zero_continue);
++}
++
++void InterpreterMacroAssembler::verify_method_data_pointer() {
++ assert(ProfileInterpreter, "must be profiling interpreter");
++#ifdef ASSERT
++ Label verify_continue;
++ test_method_data_pointer(verify_continue);
++
++ // If the mdp is valid, it will point to a DataLayout header which is
++ // consistent with the bcp. The converse is highly probable also.
++ lhz(R11_scratch1, in_bytes(DataLayout::bci_offset()), R28_mdx);
++ ld(R12_scratch2, in_bytes(Method::const_offset()), R19_method);
++ addi(R11_scratch1, R11_scratch1, in_bytes(ConstMethod::codes_offset()));
++ add(R11_scratch1, R12_scratch2, R12_scratch2);
++ cmpd(CCR0, R11_scratch1, R14_bcp);
++ beq(CCR0, verify_continue);
++
++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp ), R19_method, R14_bcp, R28_mdx);
++
++ bind(verify_continue);
++#endif
++}
++
++void InterpreterMacroAssembler::test_invocation_counter_for_mdp(Register invocation_count,
++ Register Rscratch,
++ Label &profile_continue) {
++ assert(ProfileInterpreter, "must be profiling interpreter");
++ // Control will flow to "profile_continue" if the counter is less than the
++ // limit or if we call profile_method().
++ Label done;
++
++ // If no method data exists, and the counter is high enough, make one.
++ int ipl_offs = load_const_optimized(Rscratch, &InvocationCounter::InterpreterProfileLimit, R0, true);
++ lwz(Rscratch, ipl_offs, Rscratch);
++
++ cmpdi(CCR0, R28_mdx, 0);
++ // Test to see if we should create a method data oop.
++ cmpd(CCR1, Rscratch /* InterpreterProfileLimit */, invocation_count);
++ bne(CCR0, done);
++ bge(CCR1, profile_continue);
++
++ // Build it now.
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++ set_method_data_pointer_for_bcp();
++ b(profile_continue);
++
++ align(32, 12);
++ bind(done);
++}
++
++void InterpreterMacroAssembler::test_backedge_count_for_osr(Register backedge_count, Register branch_bcp, Register Rtmp) {
++ assert_different_registers(backedge_count, Rtmp, branch_bcp);
++ assert(UseOnStackReplacement,"Must UseOnStackReplacement to test_backedge_count_for_osr");
++
++ Label did_not_overflow;
++ Label overflow_with_error;
++
++ int ibbl_offs = load_const_optimized(Rtmp, &InvocationCounter::InterpreterBackwardBranchLimit, R0, true);
++ lwz(Rtmp, ibbl_offs, Rtmp);
++ cmpw(CCR0, backedge_count, Rtmp);
++
++ blt(CCR0, did_not_overflow);
++
++ // When ProfileInterpreter is on, the backedge_count comes from the
++ // methodDataOop, which value does not get reset on the call to
++ // frequency_counter_overflow(). To avoid excessive calls to the overflow
++ // routine while the method is being compiled, add a second test to make sure
++ // the overflow function is called only once every overflow_frequency.
++ if (ProfileInterpreter) {
++ const int overflow_frequency = 1024;
++ li(Rtmp, overflow_frequency-1);
++ andr(Rtmp, Rtmp, backedge_count);
++ cmpwi(CCR0, Rtmp, 0);
++ bne(CCR0, did_not_overflow);
++ }
++
++ // Overflow in loop, pass branch bytecode.
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), branch_bcp, true);
++
++ // Was an OSR adapter generated?
++ // O0 = osr nmethod
++ cmpdi(CCR0, R3_RET, 0);
++ beq(CCR0, overflow_with_error);
++
++ // Has the nmethod been invalidated already?
++ lwz(Rtmp, nmethod::entry_bci_offset(), R3_RET);
++ cmpwi(CCR0, Rtmp, InvalidOSREntryBci);
++ beq(CCR0, overflow_with_error);
++
++ // Migrate the interpreter frame off of the stack.
++ // We can use all registers because we will not return to interpreter from this point.
++
++ // Save nmethod.
++ const Register osr_nmethod = R31;
++ mr(osr_nmethod, R3_RET);
++ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R11_scratch1);
++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin), R16_thread);
++ reset_last_Java_frame();
++ // OSR buffer is in ARG1
++
++ // Remove the interpreter frame.
++ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ R0, R11_scratch1, R12_scratch2);
++
++ // Jump to the osr code.
++ ld(R11_scratch1, nmethod::osr_entry_point_offset(), osr_nmethod);
++ mtlr(R0);
++ mtctr(R11_scratch1);
++ bctr();
++
++ align(32, 12);
++ bind(overflow_with_error);
++ bind(did_not_overflow);
++}
++
++// Store a value at some constant offset from the method data pointer.
++void InterpreterMacroAssembler::set_mdp_data_at(int constant, Register value) {
++ assert(ProfileInterpreter, "must be profiling interpreter");
++
++ std(value, constant, R28_mdx);
++}
++
++// Increment the value at some constant offset from the method data pointer.
++void InterpreterMacroAssembler::increment_mdp_data_at(int constant,
++ Register counter_addr,
++ Register Rbumped_count,
++ bool decrement) {
++ // Locate the counter at a fixed offset from the mdp:
++ addi(counter_addr, R28_mdx, constant);
++ increment_mdp_data_at(counter_addr, Rbumped_count, decrement);
++}
++
++// Increment the value at some non-fixed (reg + constant) offset from
++// the method data pointer.
++void InterpreterMacroAssembler::increment_mdp_data_at(Register reg,
++ int constant,
++ Register scratch,
++ Register Rbumped_count,
++ bool decrement) {
++ // Add the constant to reg to get the offset.
++ add(scratch, R28_mdx, reg);
++ // Then calculate the counter address.
++ addi(scratch, scratch, constant);
++ increment_mdp_data_at(scratch, Rbumped_count, decrement);
++}
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register counter_addr,
++ Register Rbumped_count,
++ bool decrement) {
++ assert(ProfileInterpreter, "must be profiling interpreter");
++
++ // Load the counter.
++ ld(Rbumped_count, 0, counter_addr);
++
++ if (decrement) {
++ // Decrement the register. Set condition codes.
++ addi(Rbumped_count, Rbumped_count, - DataLayout::counter_increment);
++ // Store the decremented counter, if it is still negative.
++ std(Rbumped_count, 0, counter_addr);
++ // Note: add/sub overflow check are not ported, since 64 bit
++ // calculation should never overflow.
++ } else {
++ // Increment the register. Set carry flag.
++ addi(Rbumped_count, Rbumped_count, DataLayout::counter_increment);
++ // Store the incremented counter.
++ std(Rbumped_count, 0, counter_addr);
++ }
++}
++
++// Set a flag value at the current method data pointer position.
++void InterpreterMacroAssembler::set_mdp_flag_at(int flag_constant,
++ Register scratch) {
++ assert(ProfileInterpreter, "must be profiling interpreter");
++ // Load the data header.
++ lbz(scratch, in_bytes(DataLayout::flags_offset()), R28_mdx);
++ // Set the flag.
++ ori(scratch, scratch, flag_constant);
++ // Store the modified header.
++ stb(scratch, in_bytes(DataLayout::flags_offset()), R28_mdx);
++}
++
++// Test the location at some offset from the method data pointer.
++// If it is not equal to value, branch to the not_equal_continue Label.
++void InterpreterMacroAssembler::test_mdp_data_at(int offset,
++ Register value,
++ Label& not_equal_continue,
++ Register test_out) {
++ assert(ProfileInterpreter, "must be profiling interpreter");
++
++ ld(test_out, offset, R28_mdx);
++ cmpd(CCR0, value, test_out);
++ bne(CCR0, not_equal_continue);
++}
++
++// Update the method data pointer by the displacement located at some fixed
++// offset from the method data pointer.
++void InterpreterMacroAssembler::update_mdp_by_offset(int offset_of_disp,
++ Register scratch) {
++ assert(ProfileInterpreter, "must be profiling interpreter");
++
++ ld(scratch, offset_of_disp, R28_mdx);
++ add(R28_mdx, scratch, R28_mdx);
++}
++
++// Update the method data pointer by the displacement located at the
++// offset (reg + offset_of_disp).
++void InterpreterMacroAssembler::update_mdp_by_offset(Register reg,
++ int offset_of_disp,
++ Register scratch) {
++ assert(ProfileInterpreter, "must be profiling interpreter");
++
++ add(scratch, reg, R28_mdx);
++ ld(scratch, offset_of_disp, scratch);
++ add(R28_mdx, scratch, R28_mdx);
++}
++
++// Update the method data pointer by a simple constant displacement.
++void InterpreterMacroAssembler::update_mdp_by_constant(int constant) {
++ assert(ProfileInterpreter, "must be profiling interpreter");
++ addi(R28_mdx, R28_mdx, constant);
++}
++
++// Update the method data pointer for a _ret bytecode whose target
++// was not among our cached targets.
++void InterpreterMacroAssembler::update_mdp_for_ret(TosState state,
++ Register return_bci) {
++ assert(ProfileInterpreter, "must be profiling interpreter");
++
++ push(state);
++ assert(return_bci->is_nonvolatile(), "need to protect return_bci");
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), return_bci);
++ pop(state);
++}
++
++// Increments the backedge counter.
++// Returns backedge counter + invocation counter in Rdst.
++void InterpreterMacroAssembler::increment_backedge_counter(const Register Rcounters, const Register Rdst,
++ const Register Rtmp1, Register Rscratch) {
++ assert(UseCompiler, "incrementing must be useful");
++ assert_different_registers(Rdst, Rtmp1);
++ const Register invocation_counter = Rtmp1;
++ const Register counter = Rdst;
++ // TODO ppc port assert(4 == InvocationCounter::sz_counter(), "unexpected field size.");
++
++ // Load backedge counter.
++ lwz(counter, in_bytes(MethodCounters::backedge_counter_offset()) +
++ in_bytes(InvocationCounter::counter_offset()), Rcounters);
++ // Load invocation counter.
++ lwz(invocation_counter, in_bytes(MethodCounters::invocation_counter_offset()) +
++ in_bytes(InvocationCounter::counter_offset()), Rcounters);
++
++ // Add the delta to the backedge counter.
++ addi(counter, counter, InvocationCounter::count_increment);
++
++ // Mask the invocation counter.
++ li(Rscratch, InvocationCounter::count_mask_value);
++ andr(invocation_counter, invocation_counter, Rscratch);
++
++ // Store new counter value.
++ stw(counter, in_bytes(MethodCounters::backedge_counter_offset()) +
++ in_bytes(InvocationCounter::counter_offset()), Rcounters);
++ // Return invocation counter + backedge counter.
++ add(counter, counter, invocation_counter);
++}
++
++// Count a taken branch in the bytecodes.
++void InterpreterMacroAssembler::profile_taken_branch(Register scratch, Register bumped_count) {
++ if (ProfileInterpreter) {
++ Label profile_continue;
++
++ // If no method data exists, go to profile_continue.
++ test_method_data_pointer(profile_continue);
++
++ // We are taking a branch. Increment the taken count.
++ increment_mdp_data_at(in_bytes(JumpData::taken_offset()), scratch, bumped_count);
++
++ // The method data pointer needs to be updated to reflect the new target.
++ update_mdp_by_offset(in_bytes(JumpData::displacement_offset()), scratch);
++ bind (profile_continue);
++ }
++}
++
++// Count a not-taken branch in the bytecodes.
++void InterpreterMacroAssembler::profile_not_taken_branch(Register scratch1, Register scratch2) {
++ if (ProfileInterpreter) {
++ Label profile_continue;
++
++ // If no method data exists, go to profile_continue.
++ test_method_data_pointer(profile_continue);
++
++ // We are taking a branch. Increment the not taken count.
++ increment_mdp_data_at(in_bytes(BranchData::not_taken_offset()), scratch1, scratch2);
++
++ // The method data pointer needs to be updated to correspond to the
++ // next bytecode.
++ update_mdp_by_constant(in_bytes(BranchData::branch_data_size()));
++ bind (profile_continue);
++ }
++}
++
++// Count a non-virtual call in the bytecodes.
++void InterpreterMacroAssembler::profile_call(Register scratch1, Register scratch2) {
++ if (ProfileInterpreter) {
++ Label profile_continue;
++
++ // If no method data exists, go to profile_continue.
++ test_method_data_pointer(profile_continue);
++
++ // We are making a call. Increment the count.
++ increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch1, scratch2);
++
++ // The method data pointer needs to be updated to reflect the new target.
++ update_mdp_by_constant(in_bytes(CounterData::counter_data_size()));
++ bind (profile_continue);
++ }
++}
++
++// Count a final call in the bytecodes.
++void InterpreterMacroAssembler::profile_final_call(Register scratch1, Register scratch2) {
++ if (ProfileInterpreter) {
++ Label profile_continue;
++
++ // If no method data exists, go to profile_continue.
++ test_method_data_pointer(profile_continue);
++
++ // We are making a call. Increment the count.
++ increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch1, scratch2);
++
++ // The method data pointer needs to be updated to reflect the new target.
++ update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size()));
++ bind (profile_continue);
++ }
++}
++
++// Count a virtual call in the bytecodes.
++void InterpreterMacroAssembler::profile_virtual_call(Register Rreceiver,
++ Register Rscratch1,
++ Register Rscratch2,
++ bool receiver_can_be_null) {
++ if (!ProfileInterpreter) { return; }
++ Label profile_continue;
++
++ // If no method data exists, go to profile_continue.
++ test_method_data_pointer(profile_continue);
++
++ Label skip_receiver_profile;
++ if (receiver_can_be_null) {
++ Label not_null;
++ cmpdi(CCR0, Rreceiver, 0);
++ bne(CCR0, not_null);
++ // We are making a call. Increment the count for null receiver.
++ increment_mdp_data_at(in_bytes(CounterData::count_offset()), Rscratch1, Rscratch2);
++ b(skip_receiver_profile);
++ bind(not_null);
++ }
++
++ // Record the receiver type.
++ record_klass_in_profile(Rreceiver, Rscratch1, Rscratch2, true);
++ bind(skip_receiver_profile);
++
++ // The method data pointer needs to be updated to reflect the new target.
++ update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size()));
++ bind (profile_continue);
++}
++
++void InterpreterMacroAssembler::profile_typecheck(Register Rklass, Register Rscratch1, Register Rscratch2) {
++ if (ProfileInterpreter) {
++ Label profile_continue;
++
++ // If no method data exists, go to profile_continue.
++ test_method_data_pointer(profile_continue);
++
++ int mdp_delta = in_bytes(BitData::bit_data_size());
++ if (TypeProfileCasts) {
++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++
++ // Record the object type.
++ record_klass_in_profile(Rklass, Rscratch1, Rscratch2, false);
++ }
++
++ // The method data pointer needs to be updated.
++ update_mdp_by_constant(mdp_delta);
++
++ bind (profile_continue);
++ }
++}
++
++void InterpreterMacroAssembler::profile_typecheck_failed(Register Rscratch1, Register Rscratch2) {
++ if (ProfileInterpreter && TypeProfileCasts) {
++ Label profile_continue;
++
++ // If no method data exists, go to profile_continue.
++ test_method_data_pointer(profile_continue);
++
++ int count_offset = in_bytes(CounterData::count_offset());
++ // Back up the address, since we have already bumped the mdp.
++ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
++
++ // *Decrement* the counter. We expect to see zero or small negatives.
++ increment_mdp_data_at(count_offset, Rscratch1, Rscratch2, true);
++
++ bind (profile_continue);
++ }
++}
++
++// Count a ret in the bytecodes.
++void InterpreterMacroAssembler::profile_ret(TosState state, Register return_bci, Register scratch1, Register scratch2) {
++ if (ProfileInterpreter) {
++ Label profile_continue;
++ uint row;
++
++ // If no method data exists, go to profile_continue.
++ test_method_data_pointer(profile_continue);
++
++ // Update the total ret count.
++ increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch1, scratch2 );
++
++ for (row = 0; row < RetData::row_limit(); row++) {
++ Label next_test;
++
++ // See if return_bci is equal to bci[n]:
++ test_mdp_data_at(in_bytes(RetData::bci_offset(row)), return_bci, next_test, scratch1);
++
++ // return_bci is equal to bci[n]. Increment the count.
++ increment_mdp_data_at(in_bytes(RetData::bci_count_offset(row)), scratch1, scratch2);
++
++ // The method data pointer needs to be updated to reflect the new target.
++ update_mdp_by_offset(in_bytes(RetData::bci_displacement_offset(row)), scratch1);
++ b(profile_continue);
++ bind(next_test);
++ }
++
++ update_mdp_for_ret(state, return_bci);
++
++ bind (profile_continue);
++ }
++}
++
++// Count the default case of a switch construct.
++void InterpreterMacroAssembler::profile_switch_default(Register scratch1, Register scratch2) {
++ if (ProfileInterpreter) {
++ Label profile_continue;
++
++ // If no method data exists, go to profile_continue.
++ test_method_data_pointer(profile_continue);
++
++ // Update the default case count
++ increment_mdp_data_at(in_bytes(MultiBranchData::default_count_offset()),
++ scratch1, scratch2);
++
++ // The method data pointer needs to be updated.
++ update_mdp_by_offset(in_bytes(MultiBranchData::default_displacement_offset()),
++ scratch1);
++
++ bind (profile_continue);
++ }
++}
++
++// Count the index'th case of a switch construct.
++void InterpreterMacroAssembler::profile_switch_case(Register index,
++ Register scratch1,
++ Register scratch2,
++ Register scratch3) {
++ if (ProfileInterpreter) {
++ assert_different_registers(index, scratch1, scratch2, scratch3);
++ Label profile_continue;
++
++ // If no method data exists, go to profile_continue.
++ test_method_data_pointer(profile_continue);
++
++ // Build the base (index * per_case_size_in_bytes()) + case_array_offset_in_bytes().
++ li(scratch3, in_bytes(MultiBranchData::case_array_offset()));
++
++ assert (in_bytes(MultiBranchData::per_case_size()) == 16, "so that shladd works");
++ sldi(scratch1, index, exact_log2(in_bytes(MultiBranchData::per_case_size())));
++ add(scratch1, scratch1, scratch3);
++
++ // Update the case count.
++ increment_mdp_data_at(scratch1, in_bytes(MultiBranchData::relative_count_offset()), scratch2, scratch3);
++
++ // The method data pointer needs to be updated.
++ update_mdp_by_offset(scratch1, in_bytes(MultiBranchData::relative_displacement_offset()), scratch2);
++
++ bind (profile_continue);
++ }
++}
++
++void InterpreterMacroAssembler::profile_null_seen(Register Rscratch1, Register Rscratch2) {
++ if (ProfileInterpreter) {
++ assert_different_registers(Rscratch1, Rscratch2);
++ Label profile_continue;
++
++ // If no method data exists, go to profile_continue.
++ test_method_data_pointer(profile_continue);
++
++ set_mdp_flag_at(BitData::null_seen_byte_constant(), Rscratch1);
++
++ // The method data pointer needs to be updated.
++ int mdp_delta = in_bytes(BitData::bit_data_size());
++ if (TypeProfileCasts) {
++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++ }
++ update_mdp_by_constant(mdp_delta);
++
++ bind (profile_continue);
++ }
++}
++
++void InterpreterMacroAssembler::record_klass_in_profile(Register Rreceiver,
++ Register Rscratch1, Register Rscratch2,
++ bool is_virtual_call) {
++ assert(ProfileInterpreter, "must be profiling");
++ assert_different_registers(Rreceiver, Rscratch1, Rscratch2);
++
++ Label done;
++ record_klass_in_profile_helper(Rreceiver, Rscratch1, Rscratch2, 0, done, is_virtual_call);
++ bind (done);
++}
++
++void InterpreterMacroAssembler::record_klass_in_profile_helper(
++ Register receiver, Register scratch1, Register scratch2,
++ int start_row, Label& done, bool is_virtual_call) {
++ if (TypeProfileWidth == 0) {
++ if (is_virtual_call) {
++ increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch1, scratch2);
++ }
++ return;
++ }
++
++ int last_row = VirtualCallData::row_limit() - 1;
++ assert(start_row <= last_row, "must be work left to do");
++ // Test this row for both the receiver and for null.
++ // Take any of three different outcomes:
++ // 1. found receiver => increment count and goto done
++ // 2. found null => keep looking for case 1, maybe allocate this cell
++ // 3. found something else => keep looking for cases 1 and 2
++ // Case 3 is handled by a recursive call.
++ for (int row = start_row; row <= last_row; row++) {
++ Label next_test;
++ bool test_for_null_also = (row == start_row);
++
++ // See if the receiver is receiver[n].
++ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
++ test_mdp_data_at(recvr_offset, receiver, next_test, scratch1);
++ // delayed()->tst(scratch);
++
++ // The receiver is receiver[n]. Increment count[n].
++ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
++ increment_mdp_data_at(count_offset, scratch1, scratch2);
++ b(done);
++ bind(next_test);
++
++ if (test_for_null_also) {
++ Label found_null;
++ // Failed the equality check on receiver[n]... Test for null.
++ if (start_row == last_row) {
++ // The only thing left to do is handle the null case.
++ if (is_virtual_call) {
++ // Scratch1 contains test_out from test_mdp_data_at.
++ cmpdi(CCR0, scratch1, 0);
++ beq(CCR0, found_null);
++ // Receiver did not match any saved receiver and there is no empty row for it.
++ // Increment total counter to indicate polymorphic case.
++ increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch1, scratch2);
++ b(done);
++ bind(found_null);
++ } else {
++ cmpdi(CCR0, scratch1, 0);
++ bne(CCR0, done);
++ }
++ break;
++ }
++ // Since null is rare, make it be the branch-taken case.
++ cmpdi(CCR0, scratch1, 0);
++ beq(CCR0, found_null);
++
++ // Put all the "Case 3" tests here.
++ record_klass_in_profile_helper(receiver, scratch1, scratch2, start_row + 1, done, is_virtual_call);
++
++ // Found a null. Keep searching for a matching receiver,
++ // but remember that this is an empty (unused) slot.
++ bind(found_null);
++ }
++ }
++
++ // In the fall-through case, we found no matching receiver, but we
++ // observed the receiver[start_row] is NULL.
++
++ // Fill in the receiver field and increment the count.
++ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
++ set_mdp_data_at(recvr_offset, receiver);
++ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
++ li(scratch1, DataLayout::counter_increment);
++ set_mdp_data_at(count_offset, scratch1);
++ if (start_row > 0) {
++ b(done);
++ }
++}
++
++// Add a InterpMonitorElem to stack (see frame_sparc.hpp).
++void InterpreterMacroAssembler::add_monitor_to_stack(bool stack_is_empty, Register Rtemp1, Register Rtemp2) {
++
++ // Very-local scratch registers.
++ const Register esp = Rtemp1;
++ const Register slot = Rtemp2;
++
++ // Extracted monitor_size.
++ int monitor_size = frame::interpreter_frame_monitor_size_in_bytes();
++ assert(Assembler::is_aligned((unsigned int)monitor_size,
++ (unsigned int)frame::alignment_in_bytes),
++ "size of a monitor must respect alignment of SP");
++
++ resize_frame(-monitor_size, /*temp*/esp); // Allocate space for new monitor
++ std(R1_SP, _ijava_state_neg(top_frame_sp), esp); // esp contains fp
++
++ // Shuffle expression stack down. Recall that stack_base points
++ // just above the new expression stack bottom. Old_tos and new_tos
++ // are used to scan thru the old and new expression stacks.
++ if (!stack_is_empty) {
++ Label copy_slot, copy_slot_finished;
++ const Register n_slots = slot;
++
++ addi(esp, R15_esp, Interpreter::stackElementSize); // Point to first element (pre-pushed stack).
++ subf(n_slots, esp, R26_monitor);
++ srdi_(n_slots, n_slots, LogBytesPerWord); // Compute number of slots to copy.
++ assert(LogBytesPerWord == 3, "conflicts assembler instructions");
++ beq(CCR0, copy_slot_finished); // Nothing to copy.
++
++ mtctr(n_slots);
++
++ // loop
++ bind(copy_slot);
++ ld(slot, 0, esp); // Move expression stack down.
++ std(slot, -monitor_size, esp); // distance = monitor_size
++ addi(esp, esp, BytesPerWord);
++ bdnz(copy_slot);
++
++ bind(copy_slot_finished);
++ }
++
++ addi(R15_esp, R15_esp, -monitor_size);
++ addi(R26_monitor, R26_monitor, -monitor_size);
++
++ // Restart interpreter
++}
++
++// ============================================================================
++// Java locals access
++
++// Load a local variable at index in Rindex into register Rdst_value.
++// Also puts address of local into Rdst_address as a service.
++// Kills:
++// - Rdst_value
++// - Rdst_address
++void InterpreterMacroAssembler::load_local_int(Register Rdst_value, Register Rdst_address, Register Rindex) {
++ sldi(Rdst_address, Rindex, Interpreter::logStackElementSize);
++ subf(Rdst_address, Rdst_address, R18_locals);
++ lwz(Rdst_value, 0, Rdst_address);
++}
++
++// Load a local variable at index in Rindex into register Rdst_value.
++// Also puts address of local into Rdst_address as a service.
++// Kills:
++// - Rdst_value
++// - Rdst_address
++void InterpreterMacroAssembler::load_local_long(Register Rdst_value, Register Rdst_address, Register Rindex) {
++ sldi(Rdst_address, Rindex, Interpreter::logStackElementSize);
++ subf(Rdst_address, Rdst_address, R18_locals);
++ ld(Rdst_value, -8, Rdst_address);
++}
++
++// Load a local variable at index in Rindex into register Rdst_value.
++// Also puts address of local into Rdst_address as a service.
++// Input:
++// - Rindex: slot nr of local variable
++// Kills:
++// - Rdst_value
++// - Rdst_address
++void InterpreterMacroAssembler::load_local_ptr(Register Rdst_value, Register Rdst_address, Register Rindex) {
++ sldi(Rdst_address, Rindex, Interpreter::logStackElementSize);
++ subf(Rdst_address, Rdst_address, R18_locals);
++ ld(Rdst_value, 0, Rdst_address);
++}
++
++// Load a local variable at index in Rindex into register Rdst_value.
++// Also puts address of local into Rdst_address as a service.
++// Kills:
++// - Rdst_value
++// - Rdst_address
++void InterpreterMacroAssembler::load_local_float(FloatRegister Rdst_value, Register Rdst_address, Register Rindex) {
++ sldi(Rdst_address, Rindex, Interpreter::logStackElementSize);
++ subf(Rdst_address, Rdst_address, R18_locals);
++ lfs(Rdst_value, 0, Rdst_address);
++}
++
++// Load a local variable at index in Rindex into register Rdst_value.
++// Also puts address of local into Rdst_address as a service.
++// Kills:
++// - Rdst_value
++// - Rdst_address
++void InterpreterMacroAssembler::load_local_double(FloatRegister Rdst_value, Register Rdst_address, Register Rindex) {
++ sldi(Rdst_address, Rindex, Interpreter::logStackElementSize);
++ subf(Rdst_address, Rdst_address, R18_locals);
++ lfd(Rdst_value, -8, Rdst_address);
++}
++
++// Store an int value at local variable slot Rindex.
++// Kills:
++// - Rindex
++void InterpreterMacroAssembler::store_local_int(Register Rvalue, Register Rindex) {
++ sldi(Rindex, Rindex, Interpreter::logStackElementSize);
++ subf(Rindex, Rindex, R18_locals);
++ stw(Rvalue, 0, Rindex);
++}
++
++// Store a long value at local variable slot Rindex.
++// Kills:
++// - Rindex
++void InterpreterMacroAssembler::store_local_long(Register Rvalue, Register Rindex) {
++ sldi(Rindex, Rindex, Interpreter::logStackElementSize);
++ subf(Rindex, Rindex, R18_locals);
++ std(Rvalue, -8, Rindex);
++}
++
++// Store an oop value at local variable slot Rindex.
++// Kills:
++// - Rindex
++void InterpreterMacroAssembler::store_local_ptr(Register Rvalue, Register Rindex) {
++ sldi(Rindex, Rindex, Interpreter::logStackElementSize);
++ subf(Rindex, Rindex, R18_locals);
++ std(Rvalue, 0, Rindex);
++}
++
++// Store an int value at local variable slot Rindex.
++// Kills:
++// - Rindex
++void InterpreterMacroAssembler::store_local_float(FloatRegister Rvalue, Register Rindex) {
++ sldi(Rindex, Rindex, Interpreter::logStackElementSize);
++ subf(Rindex, Rindex, R18_locals);
++ stfs(Rvalue, 0, Rindex);
++}
++
++// Store an int value at local variable slot Rindex.
++// Kills:
++// - Rindex
++void InterpreterMacroAssembler::store_local_double(FloatRegister Rvalue, Register Rindex) {
++ sldi(Rindex, Rindex, Interpreter::logStackElementSize);
++ subf(Rindex, Rindex, R18_locals);
++ stfd(Rvalue, -8, Rindex);
++}
++
++// Read pending exception from thread and jump to interpreter.
++// Throw exception entry if one if pending. Fall through otherwise.
++void InterpreterMacroAssembler::check_and_forward_exception(Register Rscratch1, Register Rscratch2) {
++ assert_different_registers(Rscratch1, Rscratch2, R3);
++ Register Rexception = Rscratch1;
++ Register Rtmp = Rscratch2;
++ Label Ldone;
++ // Get pending exception oop.
++ ld(Rexception, thread_(pending_exception));
++ cmpdi(CCR0, Rexception, 0);
++ beq(CCR0, Ldone);
++ li(Rtmp, 0);
++ mr_if_needed(R3, Rexception);
++ std(Rtmp, thread_(pending_exception)); // Clear exception in thread
++ if (Interpreter::rethrow_exception_entry() != NULL) {
++ // Already got entry address.
++ load_dispatch_table(Rtmp, (address*)Interpreter::rethrow_exception_entry());
++ } else {
++ // Dynamically load entry address.
++ int simm16_rest = load_const_optimized(Rtmp, &Interpreter::_rethrow_exception_entry, R0, true);
++ ld(Rtmp, simm16_rest, Rtmp);
++ }
++ mtctr(Rtmp);
++ save_interpreter_state(Rtmp);
++ bctr();
++
++ align(32, 12);
++ bind(Ldone);
++}
++
++void InterpreterMacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
++ save_interpreter_state(R11_scratch1);
++
++ MacroAssembler::call_VM(oop_result, entry_point, false);
++
++ restore_interpreter_state(R11_scratch1, /*bcp_and_mdx_only*/ true);
++
++ check_and_handle_popframe(R11_scratch1);
++ check_and_handle_earlyret(R11_scratch1);
++ // Now check exceptions manually.
++ if (check_exceptions) {
++ check_and_forward_exception(R11_scratch1, R12_scratch2);
++ }
++}
++
++void InterpreterMacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
++ // ARG1 is reserved for the thread.
++ mr_if_needed(R4_ARG2, arg_1);
++ call_VM(oop_result, entry_point, check_exceptions);
++}
++
++void InterpreterMacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
++ // ARG1 is reserved for the thread.
++ mr_if_needed(R4_ARG2, arg_1);
++ assert(arg_2 != R4_ARG2, "smashed argument");
++ mr_if_needed(R5_ARG3, arg_2);
++ call_VM(oop_result, entry_point, check_exceptions);
++}
++
++void InterpreterMacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
++ // ARG1 is reserved for the thread.
++ mr_if_needed(R4_ARG2, arg_1);
++ assert(arg_2 != R4_ARG2, "smashed argument");
++ mr_if_needed(R5_ARG3, arg_2);
++ assert(arg_3 != R4_ARG2 && arg_3 != R5_ARG3, "smashed argument");
++ mr_if_needed(R6_ARG4, arg_3);
++ call_VM(oop_result, entry_point, check_exceptions);
++}
++
++void InterpreterMacroAssembler::save_interpreter_state(Register scratch) {
++ ld(scratch, 0, R1_SP);
++ std(R15_esp, _ijava_state_neg(esp), scratch);
++ std(R14_bcp, _ijava_state_neg(bcp), scratch);
++ std(R26_monitor, _ijava_state_neg(monitors), scratch);
++ if (ProfileInterpreter) { std(R28_mdx, _ijava_state_neg(mdx), scratch); }
++ // Other entries should be unchanged.
++}
++
++void InterpreterMacroAssembler::restore_interpreter_state(Register scratch, bool bcp_and_mdx_only) {
++ ld(scratch, 0, R1_SP);
++ ld(R14_bcp, _ijava_state_neg(bcp), scratch); // Changed by VM code (exception).
++ if (ProfileInterpreter) { ld(R28_mdx, _ijava_state_neg(mdx), scratch); } // Changed by VM code.
++ if (!bcp_and_mdx_only) {
++ // Following ones are Metadata.
++ ld(R19_method, _ijava_state_neg(method), scratch);
++ ld(R27_constPoolCache, _ijava_state_neg(cpoolCache), scratch);
++ // Following ones are stack addresses and don't require reload.
++ ld(R15_esp, _ijava_state_neg(esp), scratch);
++ ld(R18_locals, _ijava_state_neg(locals), scratch);
++ ld(R26_monitor, _ijava_state_neg(monitors), scratch);
++ }
++#ifdef ASSERT
++ {
++ Label Lok;
++ subf(R0, R1_SP, scratch);
++ cmpdi(CCR0, R0, frame::abi_reg_args_size + frame::ijava_state_size);
++ bge(CCR0, Lok);
++ stop("frame too small (restore istate)", 0x5432);
++ bind(Lok);
++ }
++ {
++ Label Lok;
++ ld(R0, _ijava_state_neg(ijava_reserved), scratch);
++ cmpdi(CCR0, R0, 0x5afe);
++ beq(CCR0, Lok);
++ stop("frame corrupted (restore istate)", 0x5afe);
++ bind(Lok);
++ }
++#endif
++}
++
++#endif // !CC_INTERP
++
++void InterpreterMacroAssembler::get_method_counters(Register method,
++ Register Rcounters,
++ Label& skip) {
++ BLOCK_COMMENT("Load and ev. allocate counter object {");
++ Label has_counters;
++ ld(Rcounters, in_bytes(Method::method_counters_offset()), method);
++ cmpdi(CCR0, Rcounters, 0);
++ bne(CCR0, has_counters);
++ call_VM(noreg, CAST_FROM_FN_PTR(address,
++ InterpreterRuntime::build_method_counters), method, false);
++ ld(Rcounters, in_bytes(Method::method_counters_offset()), method);
++ cmpdi(CCR0, Rcounters, 0);
++ beq(CCR0, skip); // No MethodCounters, OutOfMemory.
++ BLOCK_COMMENT("} Load and ev. allocate counter object");
++
++ bind(has_counters);
++}
++
++void InterpreterMacroAssembler::increment_invocation_counter(Register Rcounters, Register iv_be_count, Register Rtmp_r0) {
++ assert(UseCompiler, "incrementing must be useful");
++ Register invocation_count = iv_be_count;
++ Register backedge_count = Rtmp_r0;
++ int delta = InvocationCounter::count_increment;
++
++ // Load each counter in a register.
++ // ld(inv_counter, Rtmp);
++ // ld(be_counter, Rtmp2);
++ int inv_counter_offset = in_bytes(MethodCounters::invocation_counter_offset() +
++ InvocationCounter::counter_offset());
++ int be_counter_offset = in_bytes(MethodCounters::backedge_counter_offset() +
++ InvocationCounter::counter_offset());
++
++ BLOCK_COMMENT("Increment profiling counters {");
++
++ // Load the backedge counter.
++ lwz(backedge_count, be_counter_offset, Rcounters); // is unsigned int
++ // Mask the backedge counter.
++ Register tmp = invocation_count;
++ li(tmp, InvocationCounter::count_mask_value);
++ andr(backedge_count, tmp, backedge_count); // Cannot use andi, need sign extension of count_mask_value.
++
++ // Load the invocation counter.
++ lwz(invocation_count, inv_counter_offset, Rcounters); // is unsigned int
++ // Add the delta to the invocation counter and store the result.
++ addi(invocation_count, invocation_count, delta);
++ // Store value.
++ stw(invocation_count, inv_counter_offset, Rcounters);
++
++ // Add invocation counter + backedge counter.
++ add(iv_be_count, backedge_count, invocation_count);
++
++ // Note that this macro must leave the backedge_count + invocation_count in
++ // register iv_be_count!
++ BLOCK_COMMENT("} Increment profiling counters");
++}
++
++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
++ if (state == atos) { MacroAssembler::verify_oop(reg); }
++}
++
++#ifndef CC_INTERP
++// Local helper function for the verify_oop_or_return_address macro.
++static bool verify_return_address(Method* m, int bci) {
++#ifndef PRODUCT
++ address pc = (address)(m->constMethod()) + in_bytes(ConstMethod::codes_offset()) + bci;
++ // Assume it is a valid return address if it is inside m and is preceded by a jsr.
++ if (!m->contains(pc)) return false;
++ address jsr_pc;
++ jsr_pc = pc - Bytecodes::length_for(Bytecodes::_jsr);
++ if (*jsr_pc == Bytecodes::_jsr && jsr_pc >= m->code_base()) return true;
++ jsr_pc = pc - Bytecodes::length_for(Bytecodes::_jsr_w);
++ if (*jsr_pc == Bytecodes::_jsr_w && jsr_pc >= m->code_base()) return true;
++#endif // PRODUCT
++ return false;
++}
++
++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
++ if (VerifyFPU) {
++ unimplemented("verfiyFPU");
++ }
++}
++
++void InterpreterMacroAssembler::verify_oop_or_return_address(Register reg, Register Rtmp) {
++ if (!VerifyOops) return;
++
++ // The VM documentation for the astore[_wide] bytecode allows
++ // the TOS to be not only an oop but also a return address.
++ Label test;
++ Label skip;
++ // See if it is an address (in the current method):
++
++ const int log2_bytecode_size_limit = 16;
++ srdi_(Rtmp, reg, log2_bytecode_size_limit);
++ bne(CCR0, test);
++
++ address fd = CAST_FROM_FN_PTR(address, verify_return_address);
++ unsigned int nbytes_save = 10*8; // 10 volatile gprs
++
++ save_LR_CR(Rtmp);
++ push_frame_reg_args(nbytes_save, Rtmp);
++ save_volatile_gprs(R1_SP, 112); // except R0
++
++ load_const_optimized(Rtmp, fd, R0);
++ mr_if_needed(R4_ARG2, reg);
++ mr(R3_ARG1, R19_method);
++ call_c(Rtmp); // call C
++
++ restore_volatile_gprs(R1_SP, 112); // except R0
++ pop_frame();
++ restore_LR_CR(Rtmp);
++ b(skip);
++
++ // Perform a more elaborate out-of-line call.
++ // Not an address; verify it:
++ bind(test);
++ verify_oop(reg);
++ bind(skip);
++}
++#endif // !CC_INTERP
++
++// Inline assembly for:
++//
++// if (thread is in interp_only_mode) {
++// InterpreterRuntime::post_method_entry();
++// }
++// if (*jvmpi::event_flags_array_at_addr(JVMPI_EVENT_METHOD_ENTRY ) ||
++// *jvmpi::event_flags_array_at_addr(JVMPI_EVENT_METHOD_ENTRY2) ) {
++// SharedRuntime::jvmpi_method_entry(method, receiver);
++// }
++void InterpreterMacroAssembler::notify_method_entry() {
++ // JVMTI
++ // Whenever JVMTI puts a thread in interp_only_mode, method
++ // entry/exit events are sent for that thread to track stack
++ // depth. If it is possible to enter interp_only_mode we add
++ // the code to check if the event should be sent.
++ if (JvmtiExport::can_post_interpreter_events()) {
++ Label jvmti_post_done;
++
++ lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
++ cmpwi(CCR0, R0, 0);
++ beq(CCR0, jvmti_post_done);
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry),
++ /*check_exceptions=*/true CC_INTERP_ONLY(&& false));
++
++ bind(jvmti_post_done);
++ }
++}
++
++// Inline assembly for:
++//
++// if (thread is in interp_only_mode) {
++// // save result
++// InterpreterRuntime::post_method_exit();
++// // restore result
++// }
++// if (*jvmpi::event_flags_array_at_addr(JVMPI_EVENT_METHOD_EXIT)) {
++// // save result
++// SharedRuntime::jvmpi_method_exit();
++// // restore result
++// }
++//
++// Native methods have their result stored in d_tmp and l_tmp.
++// Java methods have their result stored in the expression stack.
++void InterpreterMacroAssembler::notify_method_exit(bool is_native_method, TosState state,
++ NotifyMethodExitMode mode, bool check_exceptions) {
++ // JVMTI
++ // Whenever JVMTI puts a thread in interp_only_mode, method
++ // entry/exit events are sent for that thread to track stack
++ // depth. If it is possible to enter interp_only_mode we add
++ // the code to check if the event should be sent.
++ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
++ Label jvmti_post_done;
++
++ lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
++ cmpwi(CCR0, R0, 0);
++ beq(CCR0, jvmti_post_done);
++ CC_INTERP_ONLY(assert(is_native_method && !check_exceptions, "must not push state"));
++ if (!is_native_method) push(state); // Expose tos to GC.
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit),
++ /*check_exceptions=*/check_exceptions);
++ if (!is_native_method) pop(state);
++
++ align(32, 12);
++ bind(jvmti_post_done);
++ }
++
++ // Dtrace support not implemented.
++}
++
++#ifdef CC_INTERP
++// Convert the current TOP_IJAVA_FRAME into a PARENT_IJAVA_FRAME
++// (using parent_frame_resize) and push a new interpreter
++// TOP_IJAVA_FRAME (using frame_size).
++void InterpreterMacroAssembler::push_interpreter_frame(Register top_frame_size, Register parent_frame_resize,
++ Register tmp1, Register tmp2, Register tmp3,
++ Register tmp4, Register pc) {
++ assert_different_registers(top_frame_size, parent_frame_resize, tmp1, tmp2, tmp3, tmp4);
++ ld(tmp1, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
++ mr(tmp2/*top_frame_sp*/, R1_SP);
++ // Move initial_caller_sp.
++ ld(tmp4, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
++ neg(parent_frame_resize, parent_frame_resize);
++ resize_frame(parent_frame_resize/*-parent_frame_resize*/, tmp3);
++
++ // Set LR in new parent frame.
++ std(tmp1, _abi(lr), R1_SP);
++ // Set top_frame_sp info for new parent frame.
++ std(tmp2, _parent_ijava_frame_abi(top_frame_sp), R1_SP);
++ std(tmp4, _parent_ijava_frame_abi(initial_caller_sp), R1_SP);
++
++ // Push new TOP_IJAVA_FRAME.
++ push_frame(top_frame_size, tmp2);
++
++ get_PC_trash_LR(tmp3);
++ std(tmp3, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
++ // Used for non-initial callers by unextended_sp().
++ std(R1_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
++}
++
++// Pop the topmost TOP_IJAVA_FRAME and convert the previous
++// PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME.
++void InterpreterMacroAssembler::pop_interpreter_frame(Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
++ assert_different_registers(tmp1, tmp2, tmp3, tmp4);
++
++ ld(tmp1/*caller's sp*/, _abi(callers_sp), R1_SP);
++ ld(tmp3, _abi(lr), tmp1);
++
++ ld(tmp4, _parent_ijava_frame_abi(initial_caller_sp), tmp1);
++
++ ld(tmp2/*caller's caller's sp*/, _abi(callers_sp), tmp1);
++ // Merge top frame.
++ std(tmp2, _abi(callers_sp), R1_SP);
++
++ ld(tmp2, _parent_ijava_frame_abi(top_frame_sp), tmp1);
++
++ // Update C stack pointer to caller's top_abi.
++ resize_frame_absolute(tmp2/*addr*/, tmp1/*tmp*/, tmp2/*tmp*/);
++
++ // Update LR in top_frame.
++ std(tmp3, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
++
++ std(tmp4, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
++
++ // Store the top-frame stack-pointer for c2i adapters.
++ std(R1_SP, _top_ijava_frame_abi(top_frame_sp), R1_SP);
++}
++
++// Turn state's interpreter frame into the current TOP_IJAVA_FRAME.
++void InterpreterMacroAssembler::pop_interpreter_frame_to_state(Register state, Register tmp1, Register tmp2, Register tmp3) {
++ assert_different_registers(R14_state, R15_prev_state, tmp1, tmp2, tmp3);
++
++ if (state == R14_state) {
++ ld(tmp1/*state's fp*/, state_(_last_Java_fp));
++ ld(tmp2/*state's sp*/, state_(_last_Java_sp));
++ } else if (state == R15_prev_state) {
++ ld(tmp1/*state's fp*/, prev_state_(_last_Java_fp));
++ ld(tmp2/*state's sp*/, prev_state_(_last_Java_sp));
++ } else {
++ ShouldNotReachHere();
++ }
++
++ // Merge top frames.
++ std(tmp1, _abi(callers_sp), R1_SP);
++
++ // Tmp2 is new SP.
++ // Tmp1 is parent's SP.
++ resize_frame_absolute(tmp2/*addr*/, tmp1/*tmp*/, tmp2/*tmp*/);
++
++ // Update LR in top_frame.
++ // Must be interpreter frame.
++ get_PC_trash_LR(tmp3);
++ std(tmp3, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
++ // Used for non-initial callers by unextended_sp().
++ std(R1_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
++}
++
++// Set SP to initial caller's sp, but before fix the back chain.
++void InterpreterMacroAssembler::resize_frame_to_initial_caller(Register tmp1, Register tmp2) {
++ ld(tmp1, _parent_ijava_frame_abi(initial_caller_sp), R1_SP);
++ ld(tmp2, _parent_ijava_frame_abi(callers_sp), R1_SP);
++ std(tmp2, _parent_ijava_frame_abi(callers_sp), tmp1); // Fix back chain ...
++ mr(R1_SP, tmp1); // ... and resize to initial caller.
++}
++
++// Pop the current interpreter state (without popping the correspoding
++// frame) and restore R14_state and R15_prev_state accordingly.
++// Use prev_state_may_be_0 to indicate whether prev_state may be 0
++// in order to generate an extra check before retrieving prev_state_(_prev_link).
++void InterpreterMacroAssembler::pop_interpreter_state(bool prev_state_may_be_0)
++{
++ // Move prev_state to state and restore prev_state from state_(_prev_link).
++ Label prev_state_is_0;
++ mr(R14_state, R15_prev_state);
++
++ // Don't retrieve /*state==*/prev_state_(_prev_link)
++ // if /*state==*/prev_state is 0.
++ if (prev_state_may_be_0) {
++ cmpdi(CCR0, R15_prev_state, 0);
++ beq(CCR0, prev_state_is_0);
++ }
++
++ ld(R15_prev_state, /*state==*/prev_state_(_prev_link));
++ bind(prev_state_is_0);
++}
++
++void InterpreterMacroAssembler::restore_prev_state() {
++ // _prev_link is private, but cInterpreter is a friend.
++ ld(R15_prev_state, state_(_prev_link));
++}
++#endif // CC_INTERP
+--- ./hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,301 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_INTERP_MASM_PPC_64_HPP
++#define CPU_PPC_VM_INTERP_MASM_PPC_64_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "interpreter/invocationCounter.hpp"
++
++// This file specializes the assembler with interpreter-specific macros.
++
++
++class InterpreterMacroAssembler: public MacroAssembler {
++
++ public:
++ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
++
++ void null_check_throw(Register a, int offset, Register temp_reg);
++
++ void branch_to_entry(address entry, Register Rscratch);
++
++ // Handy address generation macros.
++#define thread_(field_name) in_bytes(JavaThread::field_name ## _offset()), R16_thread
++#define method_(field_name) in_bytes(Method::field_name ## _offset()), R19_method
++
++#ifdef CC_INTERP
++#define state_(field_name) in_bytes(byte_offset_of(BytecodeInterpreter, field_name)), R14_state
++#define prev_state_(field_name) in_bytes(byte_offset_of(BytecodeInterpreter, field_name)), R15_prev_state
++ void pop (TosState state) {}; // Not needed.
++ void push(TosState state) {}; // Not needed.
++#endif
++
++#ifndef CC_INTERP
++ virtual void check_and_handle_popframe(Register java_thread);
++ virtual void check_and_handle_earlyret(Register java_thread);
++
++ // Base routine for all dispatches.
++ void dispatch_base(TosState state, address* table);
++
++ void load_earlyret_value(TosState state, Register Rscratch1);
++
++ static const Address l_tmp;
++ static const Address d_tmp;
++
++ // dispatch routines
++ void dispatch_next(TosState state, int step = 0);
++ void dispatch_via (TosState state, address* table);
++ void load_dispatch_table(Register dst, address* table);
++ void dispatch_Lbyte_code(TosState state, Register bytecode, address* table, bool verify = false);
++
++ // Called by shared interpreter generator.
++ void dispatch_prolog(TosState state, int step = 0);
++ void dispatch_epilog(TosState state, int step = 0);
++
++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls.
++ void super_call_VM_leaf(Register thread_cache, address entry_point, Register arg_1);
++ void super_call_VM(Register thread_cache, Register oop_result, Register last_java_sp,
++ address entry_point, Register arg_1, Register arg_2, bool check_exception = true);
++
++ // Generate a subtype check: branch to ok_is_subtype if sub_klass is
++ // a subtype of super_klass. Blows registers tmp1, tmp2 and tmp3.
++ void gen_subtype_check(Register sub_klass, Register super_klass,
++ Register tmp1, Register tmp2, Register tmp3, Label &ok_is_subtype);
++
++ // Load object from cpool->resolved_references(index).
++ void load_resolved_reference_at_index(Register result, Register index);
++
++ void generate_stack_overflow_check_with_compare_and_throw(Register Rmem_frame_size, Register Rscratch1);
++ void load_receiver(Register Rparam_count, Register Rrecv_dst);
++
++ // helpers for expression stack
++ void pop_i( Register r = R17_tos);
++ void pop_ptr( Register r = R17_tos);
++ void pop_l( Register r = R17_tos);
++ void pop_f(FloatRegister f = F15_ftos);
++ void pop_d(FloatRegister f = F15_ftos );
++
++ void push_i( Register r = R17_tos);
++ void push_ptr( Register r = R17_tos);
++ void push_l( Register r = R17_tos);
++ void push_f(FloatRegister f = F15_ftos );
++ void push_d(FloatRegister f = F15_ftos);
++
++ void push_2ptrs(Register first, Register second);
++
++ void push_l_pop_d(Register l = R17_tos, FloatRegister d = F15_ftos);
++ void push_d_pop_l(FloatRegister d = F15_ftos, Register l = R17_tos);
++
++ void pop (TosState state); // transition vtos -> state
++ void push(TosState state); // transition state -> vtos
++ void empty_expression_stack(); // Resets both Lesp and SP.
++
++ public:
++ // Load values from bytecode stream:
++
++ enum signedOrNot { Signed, Unsigned };
++ enum setCCOrNot { set_CC, dont_set_CC };
++
++ void get_2_byte_integer_at_bcp(int bcp_offset,
++ Register Rdst,
++ signedOrNot is_signed);
++
++ void get_4_byte_integer_at_bcp(int bcp_offset,
++ Register Rdst,
++ signedOrNot is_signed = Unsigned);
++
++ void get_cache_index_at_bcp(Register Rdst, int bcp_offset, size_t index_size);
++
++ void get_cache_and_index_at_bcp(Register cache, int bcp_offset, size_t index_size = sizeof(u2));
++
++
++ // common code
++
++ void field_offset_at(int n, Register tmp, Register dest, Register base);
++ int field_offset_at(Register object, address bcp, int offset);
++ void fast_iaaccess(int n, address bcp);
++ void fast_iagetfield(address bcp);
++ void fast_iaputfield(address bcp, bool do_store_check);
++
++ void index_check(Register array, Register index, int index_shift, Register tmp, Register res);
++ void index_check_without_pop(Register array, Register index, int index_shift, Register tmp, Register res);
++
++ void get_const(Register Rdst);
++ void get_constant_pool(Register Rdst);
++ void get_constant_pool_cache(Register Rdst);
++ void get_cpool_and_tags(Register Rcpool, Register Rtags);
++ void is_a(Label& L);
++
++ // Java Call Helpers
++ void call_from_interpreter(Register Rtarget_method, Register Rret_addr, Register Rscratch1, Register Rscratch2);
++
++ // --------------------------------------------------
++
++ void unlock_if_synchronized_method(TosState state, bool throw_monitor_exception = true,
++ bool install_monitor_exception = true);
++
++ // Removes the current activation (incl. unlocking of monitors).
++ // Additionally this code is used for earlyReturn in which case we
++ // want to skip throwing an exception and installing an exception.
++ void remove_activation(TosState state,
++ bool throw_monitor_exception = true,
++ bool install_monitor_exception = true);
++ void merge_frames(Register Rtop_frame_sp, Register return_pc, Register Rscratch1, Register Rscratch2); // merge top frames
++
++ void add_monitor_to_stack(bool stack_is_empty, Register Rtemp1, Register Rtemp2);
++
++ // Local variable access helpers
++ void load_local_int(Register Rdst_value, Register Rdst_address, Register Rindex);
++ void load_local_long(Register Rdst_value, Register Rdst_address, Register Rindex);
++ void load_local_ptr(Register Rdst_value, Register Rdst_address, Register Rindex);
++ void load_local_float(FloatRegister Rdst_value, Register Rdst_address, Register Rindex);
++ void load_local_double(FloatRegister Rdst_value, Register Rdst_address, Register Rindex);
++ void store_local_int(Register Rvalue, Register Rindex);
++ void store_local_long(Register Rvalue, Register Rindex);
++ void store_local_ptr(Register Rvalue, Register Rindex);
++ void store_local_float(FloatRegister Rvalue, Register Rindex);
++ void store_local_double(FloatRegister Rvalue, Register Rindex);
++
++ // Call VM for std frames
++ // Special call VM versions that check for exceptions and forward exception
++ // via short cut (not via expensive forward exception stub).
++ void check_and_forward_exception(Register Rscratch1, Register Rscratch2);
++ void call_VM(Register oop_result, address entry_point, bool check_exceptions = true);
++ void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true);
++ void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
++ void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
++ // Should not be used:
++ void call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions = true) {ShouldNotReachHere();}
++ void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true) {ShouldNotReachHere();}
++ void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true) {ShouldNotReachHere();}
++ void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true) {ShouldNotReachHere();}
++
++ Address first_local_in_stack();
++
++ enum LoadOrStore { load, store };
++ void static_iload_or_store(int which_local, LoadOrStore direction, Register Rtmp);
++ void static_aload_or_store(int which_local, LoadOrStore direction, Register Rtmp);
++ void static_dload_or_store(int which_local, LoadOrStore direction);
++
++ void save_interpreter_state(Register scratch);
++ void restore_interpreter_state(Register scratch, bool bcp_and_mdx_only = false);
++
++ void increment_backedge_counter(const Register Rcounters, Register Rtmp, Register Rtmp2, Register Rscratch);
++ void test_backedge_count_for_osr(Register backedge_count, Register branch_bcp, Register Rtmp);
++
++ void record_static_call_in_profile(Register Rentry, Register Rtmp);
++ void record_receiver_call_in_profile(Register Rklass, Register Rentry, Register Rtmp);
++#endif // !CC_INTERP
++
++ void get_method_counters(Register method, Register Rcounters, Label& skip);
++ void increment_invocation_counter(Register iv_be_count, Register Rtmp1, Register Rtmp2_r0);
++
++ // Object locking
++ void lock_object (Register lock_reg, Register obj_reg);
++ void unlock_object(Register lock_reg, bool check_for_exceptions = true);
++
++#ifndef CC_INTERP
++
++ // Interpreter profiling operations
++ void set_method_data_pointer_for_bcp();
++ void test_method_data_pointer(Label& zero_continue);
++ void verify_method_data_pointer();
++ void test_invocation_counter_for_mdp(Register invocation_count, Register Rscratch, Label &profile_continue);
++
++ void set_mdp_data_at(int constant, Register value);
++
++ void increment_mdp_data_at(int constant, Register counter_addr, Register Rbumped_count, bool decrement = false);
++
++ void increment_mdp_data_at(Register counter_addr, Register Rbumped_count, bool decrement = false);
++ void increment_mdp_data_at(Register reg, int constant, Register scratch, Register Rbumped_count, bool decrement = false);
++
++ void set_mdp_flag_at(int flag_constant, Register scratch);
++ void test_mdp_data_at(int offset, Register value, Label& not_equal_continue, Register test_out);
++
++ void update_mdp_by_offset(int offset_of_disp, Register scratch);
++ void update_mdp_by_offset(Register reg, int offset_of_disp,
++ Register scratch);
++ void update_mdp_by_constant(int constant);
++ void update_mdp_for_ret(TosState state, Register return_bci);
++
++ void profile_taken_branch(Register scratch, Register bumped_count);
++ void profile_not_taken_branch(Register scratch1, Register scratch2);
++ void profile_call(Register scratch1, Register scratch2);
++ void profile_final_call(Register scratch1, Register scratch2);
++ void profile_virtual_call(Register Rreceiver, Register Rscratch1, Register Rscratch2, bool receiver_can_be_null);
++ void profile_typecheck(Register Rklass, Register Rscratch1, Register Rscratch2);
++ void profile_typecheck_failed(Register Rscratch1, Register Rscratch2);
++ void profile_ret(TosState state, Register return_bci, Register scratch1, Register scratch2);
++ void profile_switch_default(Register scratch1, Register scratch2);
++ void profile_switch_case(Register index, Register scratch1,Register scratch2, Register scratch3);
++ void profile_null_seen(Register Rscratch1, Register Rscratch2);
++ void record_klass_in_profile(Register receiver, Register scratch1, Register scratch2, bool is_virtual_call);
++ void record_klass_in_profile_helper(Register receiver, Register scratch1, Register scratch2, int start_row, Label& done, bool is_virtual_call);
++
++#endif // !CC_INTERP
++
++ // Debugging
++ void verify_oop(Register reg, TosState state = atos); // only if +VerifyOops && state == atos
++#ifndef CC_INTERP
++ void verify_oop_or_return_address(Register reg, Register rtmp); // for astore
++ void verify_FPU(int stack_depth, TosState state = ftos);
++#endif // !CC_INTERP
++
++ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
++
++ // Support for jvmdi/jvmpi.
++ void notify_method_entry();
++ void notify_method_exit(bool is_native_method, TosState state,
++ NotifyMethodExitMode mode, bool check_exceptions);
++
++#ifdef CC_INTERP
++ // Convert the current TOP_IJAVA_FRAME into a PARENT_IJAVA_FRAME
++ // (using parent_frame_resize) and push a new interpreter
++ // TOP_IJAVA_FRAME (using frame_size).
++ void push_interpreter_frame(Register top_frame_size, Register parent_frame_resize,
++ Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register pc=noreg);
++
++ // Pop the topmost TOP_IJAVA_FRAME and convert the previous
++ // PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME.
++ void pop_interpreter_frame(Register tmp1, Register tmp2, Register tmp3, Register tmp4);
++
++ // Turn state's interpreter frame into the current TOP_IJAVA_FRAME.
++ void pop_interpreter_frame_to_state(Register state, Register tmp1, Register tmp2, Register tmp3);
++
++ // Set SP to initial caller's sp, but before fix the back chain.
++ void resize_frame_to_initial_caller(Register tmp1, Register tmp2);
++
++ // Pop the current interpreter state (without popping the
++ // correspoding frame) and restore R14_state and R15_prev_state
++ // accordingly. Use prev_state_may_be_0 to indicate whether
++ // prev_state may be 0 in order to generate an extra check before
++ // retrieving prev_state_(_prev_link).
++ void pop_interpreter_state(bool prev_state_may_be_0);
++
++ void restore_prev_state();
++#endif
++};
++
++#endif // CPU_PPC_VM_INTERP_MASM_PPC_64_HPP
+--- ./hotspot/src/cpu/ppc/vm/interpreterGenerator_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/interpreterGenerator_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP
++#define CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP
++
++ friend class AbstractInterpreterGenerator;
++
++ private:
++
++ address generate_abstract_entry(void);
++ address generate_accessor_entry(void);
++ address generate_Reference_get_entry(void);
++
++#endif // CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/interpreterRT_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/interpreterRT_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,155 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/universe.inline.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/signature.hpp"
++
++#define __ _masm->
++
++// Access macros for Java and C arguments.
++// The first Java argument is at index -1.
++#define locals_j_arg_at(index) (Interpreter::local_offset_in_bytes(index)), R18_locals
++// The first C argument is at index 0.
++#define sp_c_arg_at(index) ((index)*wordSize + _abi(carg_1)), R1_SP
++
++// Implementation of SignatureHandlerGenerator
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
++ Argument jni_arg(jni_offset());
++ Register r = jni_arg.is_register() ? jni_arg.as_register() : R0;
++
++ __ lwa(r, locals_j_arg_at(offset())); // sign extension of integer
++ if (DEBUG_ONLY(true ||) !jni_arg.is_register()) {
++ __ std(r, sp_c_arg_at(jni_arg.number()));
++ }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
++ Argument jni_arg(jni_offset());
++ Register r = jni_arg.is_register() ? jni_arg.as_register() : R0;
++
++ __ ld(r, locals_j_arg_at(offset()+1)); // long resides in upper slot
++ if (DEBUG_ONLY(true ||) !jni_arg.is_register()) {
++ __ std(r, sp_c_arg_at(jni_arg.number()));
++ }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
++ FloatRegister fp_reg = (_num_used_fp_arg_regs < 13/*max_fp_register_arguments*/)
++ ? as_FloatRegister((_num_used_fp_arg_regs++) + F1_ARG1->encoding())
++ : F0;
++
++ __ lfs(fp_reg, locals_j_arg_at(offset()));
++ if (DEBUG_ONLY(true ||) jni_offset() > 8) {
++ __ stfs(fp_reg, sp_c_arg_at(jni_offset()));
++ }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
++ FloatRegister fp_reg = (_num_used_fp_arg_regs < 13/*max_fp_register_arguments*/)
++ ? as_FloatRegister((_num_used_fp_arg_regs++) + F1_ARG1->encoding())
++ : F0;
++
++ __ lfd(fp_reg, locals_j_arg_at(offset()+1));
++ if (DEBUG_ONLY(true ||) jni_offset() > 8) {
++ __ stfd(fp_reg, sp_c_arg_at(jni_offset()));
++ }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
++ Argument jni_arg(jni_offset());
++ Register r = jni_arg.is_register() ? jni_arg.as_register() : R11_scratch1;
++
++ // The handle for a receiver will never be null.
++ bool do_NULL_check = offset() != 0 || is_static();
++
++ Label do_null;
++ if (do_NULL_check) {
++ __ ld(R0, locals_j_arg_at(offset()));
++ __ cmpdi(CCR0, R0, 0);
++ __ li(r, 0);
++ __ beq(CCR0, do_null);
++ }
++ __ addir(r, locals_j_arg_at(offset()));
++ __ bind(do_null);
++ if (DEBUG_ONLY(true ||) !jni_arg.is_register()) {
++ __ std(r, sp_c_arg_at(jni_arg.number()));
++ }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
++#if !defined(ABI_ELFv2)
++ // Emit fd for current codebuffer. Needs patching!
++ __ emit_fd();
++#endif
++
++ // Generate code to handle arguments.
++ iterate(fingerprint);
++
++ // Return the result handler.
++ __ load_const(R3_RET, AbstractInterpreter::result_handler(method()->result_type()));
++ __ blr();
++
++ __ flush();
++}
++
++#undef __
++
++// Implementation of SignatureHandlerLibrary
++
++void SignatureHandlerLibrary::pd_set_handler(address handler) {
++#if !defined(ABI_ELFv2)
++ // patch fd here.
++ FunctionDescriptor* fd = (FunctionDescriptor*) handler;
++
++ fd->set_entry(handler + (int)sizeof(FunctionDescriptor));
++ assert(fd->toc() == (address)0xcafe, "need to adjust TOC here");
++#endif
++}
++
++
++// Access function to get the signature.
++IRT_ENTRY(address, InterpreterRuntime::get_signature(JavaThread* thread, Method* method))
++ methodHandle m(thread, method);
++ assert(m->is_native(), "sanity check");
++ Symbol *s = m->signature();
++ return (address) s->base();
++IRT_END
++
++IRT_ENTRY(address, InterpreterRuntime::get_result_handler(JavaThread* thread, Method* method))
++ methodHandle m(thread, method);
++ assert(m->is_native(), "sanity check");
++ return AbstractInterpreter::result_handler(m->result_type());
++IRT_END
+--- ./hotspot/src/cpu/ppc/vm/interpreterRT_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/interpreterRT_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_INTERPRETERRT_PPC_HPP
++#define CPU_PPC_VM_INTERPRETERRT_PPC_HPP
++
++#include "memory/allocation.hpp"
++
++// native method calls
++
++class SignatureHandlerGenerator: public NativeSignatureIterator {
++ private:
++ MacroAssembler* _masm;
++ // number of already used floating-point argument registers
++ int _num_used_fp_arg_regs;
++
++ void pass_int();
++ void pass_long();
++ void pass_double();
++ void pass_float();
++ void pass_object();
++
++ public:
++ // Creation
++ SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
++ _masm = new MacroAssembler(buffer);
++ _num_used_fp_arg_regs = 0;
++ }
++
++ // Code generation
++ void generate(uint64_t fingerprint);
++};
++
++// Support for generate_slow_signature_handler.
++static address get_result_handler(JavaThread* thread, Method* method);
++
++// A function to get the signature.
++static address get_signature(JavaThread* thread, Method* method);
++
++#endif // CPU_PPC_VM_INTERPRETERRT_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,787 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterGenerator.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateTable.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++#ifdef COMPILER1
++#include "c1/c1_Runtime1.hpp"
++#endif
++
++#define __ _masm->
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) // nothing
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++int AbstractInterpreter::BasicType_as_index(BasicType type) {
++ int i = 0;
++ switch (type) {
++ case T_BOOLEAN: i = 0; break;
++ case T_CHAR : i = 1; break;
++ case T_BYTE : i = 2; break;
++ case T_SHORT : i = 3; break;
++ case T_INT : i = 4; break;
++ case T_LONG : i = 5; break;
++ case T_VOID : i = 6; break;
++ case T_FLOAT : i = 7; break;
++ case T_DOUBLE : i = 8; break;
++ case T_OBJECT : i = 9; break;
++ case T_ARRAY : i = 9; break;
++ default : ShouldNotReachHere();
++ }
++ assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds");
++ return i;
++}
++
++address AbstractInterpreterGenerator::generate_slow_signature_handler() {
++ // Slow_signature handler that respects the PPC C calling conventions.
++ //
++ // We get called by the native entry code with our output register
++ // area == 8. First we call InterpreterRuntime::get_result_handler
++ // to copy the pointer to the signature string temporarily to the
++ // first C-argument and to return the result_handler in
++ // R3_RET. Since native_entry will copy the jni-pointer to the
++ // first C-argument slot later on, it is OK to occupy this slot
++ // temporarilly. Then we copy the argument list on the java
++ // expression stack into native varargs format on the native stack
++ // and load arguments into argument registers. Integer arguments in
++ // the varargs vector will be sign-extended to 8 bytes.
++ //
++ // On entry:
++ // R3_ARG1 - intptr_t* Address of java argument list in memory.
++ // R15_prev_state - BytecodeInterpreter* Address of interpreter state for
++ // this method
++ // R19_method
++ //
++ // On exit (just before return instruction):
++ // R3_RET - contains the address of the result_handler.
++ // R4_ARG2 - is not updated for static methods and contains "this" otherwise.
++ // R5_ARG3-R10_ARG8: - When the (i-2)th Java argument is not of type float or double,
++ // ARGi contains this argument. Otherwise, ARGi is not updated.
++ // F1_ARG1-F13_ARG13 - contain the first 13 arguments of type float or double.
++
++ const int LogSizeOfTwoInstructions = 3;
++
++ // FIXME: use Argument:: GL: Argument names different numbers!
++ const int max_fp_register_arguments = 13;
++ const int max_int_register_arguments = 6; // first 2 are reserved
++
++ const Register arg_java = R21_tmp1;
++ const Register arg_c = R22_tmp2;
++ const Register signature = R23_tmp3; // is string
++ const Register sig_byte = R24_tmp4;
++ const Register fpcnt = R25_tmp5;
++ const Register argcnt = R26_tmp6;
++ const Register intSlot = R27_tmp7;
++ const Register target_sp = R28_tmp8;
++ const FloatRegister floatSlot = F0;
++
++ address entry = __ function_entry();
++
++ __ save_LR_CR(R0);
++ __ save_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14));
++ // We use target_sp for storing arguments in the C frame.
++ __ mr(target_sp, R1_SP);
++ __ push_frame_reg_args_nonvolatiles(0, R11_scratch1);
++
++ __ mr(arg_java, R3_ARG1);
++
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::get_signature), R16_thread, R19_method);
++
++ // Signature is in R3_RET. Signature is callee saved.
++ __ mr(signature, R3_RET);
++
++ // Get the result handler.
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::get_result_handler), R16_thread, R19_method);
++
++ {
++ Label L;
++ // test if static
++ // _access_flags._flags must be at offset 0.
++ // TODO PPC port: requires change in shared code.
++ //assert(in_bytes(AccessFlags::flags_offset()) == 0,
++ // "MethodDesc._access_flags == MethodDesc._access_flags._flags");
++ // _access_flags must be a 32 bit value.
++ assert(sizeof(AccessFlags) == 4, "wrong size");
++ __ lwa(R11_scratch1/*access_flags*/, method_(access_flags));
++ // testbit with condition register.
++ __ testbitdi(CCR0, R0, R11_scratch1/*access_flags*/, JVM_ACC_STATIC_BIT);
++ __ btrue(CCR0, L);
++ // For non-static functions, pass "this" in R4_ARG2 and copy it
++ // to 2nd C-arg slot.
++ // We need to box the Java object here, so we use arg_java
++ // (address of current Java stack slot) as argument and don't
++ // dereference it as in case of ints, floats, etc.
++ __ mr(R4_ARG2, arg_java);
++ __ addi(arg_java, arg_java, -BytesPerWord);
++ __ std(R4_ARG2, _abi(carg_2), target_sp);
++ __ bind(L);
++ }
++
++ // Will be incremented directly after loop_start. argcnt=0
++ // corresponds to 3rd C argument.
++ __ li(argcnt, -1);
++ // arg_c points to 3rd C argument
++ __ addi(arg_c, target_sp, _abi(carg_3));
++ // no floating-point args parsed so far
++ __ li(fpcnt, 0);
++
++ Label move_intSlot_to_ARG, move_floatSlot_to_FARG;
++ Label loop_start, loop_end;
++ Label do_int, do_long, do_float, do_double, do_dontreachhere, do_object, do_array, do_boxed;
++
++ // signature points to '(' at entry
++#ifdef ASSERT
++ __ lbz(sig_byte, 0, signature);
++ __ cmplwi(CCR0, sig_byte, '(');
++ __ bne(CCR0, do_dontreachhere);
++#endif
++
++ __ bind(loop_start);
++
++ __ addi(argcnt, argcnt, 1);
++ __ lbzu(sig_byte, 1, signature);
++
++ __ cmplwi(CCR0, sig_byte, ')'); // end of signature
++ __ beq(CCR0, loop_end);
++
++ __ cmplwi(CCR0, sig_byte, 'B'); // byte
++ __ beq(CCR0, do_int);
++
++ __ cmplwi(CCR0, sig_byte, 'C'); // char
++ __ beq(CCR0, do_int);
++
++ __ cmplwi(CCR0, sig_byte, 'D'); // double
++ __ beq(CCR0, do_double);
++
++ __ cmplwi(CCR0, sig_byte, 'F'); // float
++ __ beq(CCR0, do_float);
++
++ __ cmplwi(CCR0, sig_byte, 'I'); // int
++ __ beq(CCR0, do_int);
++
++ __ cmplwi(CCR0, sig_byte, 'J'); // long
++ __ beq(CCR0, do_long);
++
++ __ cmplwi(CCR0, sig_byte, 'S'); // short
++ __ beq(CCR0, do_int);
++
++ __ cmplwi(CCR0, sig_byte, 'Z'); // boolean
++ __ beq(CCR0, do_int);
++
++ __ cmplwi(CCR0, sig_byte, 'L'); // object
++ __ beq(CCR0, do_object);
++
++ __ cmplwi(CCR0, sig_byte, '['); // array
++ __ beq(CCR0, do_array);
++
++ // __ cmplwi(CCR0, sig_byte, 'V'); // void cannot appear since we do not parse the return type
++ // __ beq(CCR0, do_void);
++
++ __ bind(do_dontreachhere);
++
++ __ unimplemented("ShouldNotReachHere in slow_signature_handler", 120);
++
++ __ bind(do_array);
++
++ {
++ Label start_skip, end_skip;
++
++ __ bind(start_skip);
++ __ lbzu(sig_byte, 1, signature);
++ __ cmplwi(CCR0, sig_byte, '[');
++ __ beq(CCR0, start_skip); // skip further brackets
++ __ cmplwi(CCR0, sig_byte, '9');
++ __ bgt(CCR0, end_skip); // no optional size
++ __ cmplwi(CCR0, sig_byte, '0');
++ __ bge(CCR0, start_skip); // skip optional size
++ __ bind(end_skip);
++
++ __ cmplwi(CCR0, sig_byte, 'L');
++ __ beq(CCR0, do_object); // for arrays of objects, the name of the object must be skipped
++ __ b(do_boxed); // otherwise, go directly to do_boxed
++ }
++
++ __ bind(do_object);
++ {
++ Label L;
++ __ bind(L);
++ __ lbzu(sig_byte, 1, signature);
++ __ cmplwi(CCR0, sig_byte, ';');
++ __ bne(CCR0, L);
++ }
++ // Need to box the Java object here, so we use arg_java (address of
++ // current Java stack slot) as argument and don't dereference it as
++ // in case of ints, floats, etc.
++ Label do_null;
++ __ bind(do_boxed);
++ __ ld(R0,0, arg_java);
++ __ cmpdi(CCR0, R0, 0);
++ __ li(intSlot,0);
++ __ beq(CCR0, do_null);
++ __ mr(intSlot, arg_java);
++ __ bind(do_null);
++ __ std(intSlot, 0, arg_c);
++ __ addi(arg_java, arg_java, -BytesPerWord);
++ __ addi(arg_c, arg_c, BytesPerWord);
++ __ cmplwi(CCR0, argcnt, max_int_register_arguments);
++ __ blt(CCR0, move_intSlot_to_ARG);
++ __ b(loop_start);
++
++ __ bind(do_int);
++ __ lwa(intSlot, 0, arg_java);
++ __ std(intSlot, 0, arg_c);
++ __ addi(arg_java, arg_java, -BytesPerWord);
++ __ addi(arg_c, arg_c, BytesPerWord);
++ __ cmplwi(CCR0, argcnt, max_int_register_arguments);
++ __ blt(CCR0, move_intSlot_to_ARG);
++ __ b(loop_start);
++
++ __ bind(do_long);
++ __ ld(intSlot, -BytesPerWord, arg_java);
++ __ std(intSlot, 0, arg_c);
++ __ addi(arg_java, arg_java, - 2 * BytesPerWord);
++ __ addi(arg_c, arg_c, BytesPerWord);
++ __ cmplwi(CCR0, argcnt, max_int_register_arguments);
++ __ blt(CCR0, move_intSlot_to_ARG);
++ __ b(loop_start);
++
++ __ bind(do_float);
++ __ lfs(floatSlot, 0, arg_java);
++#if defined(LINUX)
++ __ stfs(floatSlot, 4, arg_c);
++#elif defined(AIX)
++ __ stfs(floatSlot, 0, arg_c);
++#else
++#error "unknown OS"
++#endif
++ __ addi(arg_java, arg_java, -BytesPerWord);
++ __ addi(arg_c, arg_c, BytesPerWord);
++ __ cmplwi(CCR0, fpcnt, max_fp_register_arguments);
++ __ blt(CCR0, move_floatSlot_to_FARG);
++ __ b(loop_start);
++
++ __ bind(do_double);
++ __ lfd(floatSlot, - BytesPerWord, arg_java);
++ __ stfd(floatSlot, 0, arg_c);
++ __ addi(arg_java, arg_java, - 2 * BytesPerWord);
++ __ addi(arg_c, arg_c, BytesPerWord);
++ __ cmplwi(CCR0, fpcnt, max_fp_register_arguments);
++ __ blt(CCR0, move_floatSlot_to_FARG);
++ __ b(loop_start);
++
++ __ bind(loop_end);
++
++ __ pop_frame();
++ __ restore_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14));
++ __ restore_LR_CR(R0);
++
++ __ blr();
++
++ Label move_int_arg, move_float_arg;
++ __ bind(move_int_arg); // each case must consist of 2 instructions (otherwise adapt LogSizeOfTwoInstructions)
++ __ mr(R5_ARG3, intSlot); __ b(loop_start);
++ __ mr(R6_ARG4, intSlot); __ b(loop_start);
++ __ mr(R7_ARG5, intSlot); __ b(loop_start);
++ __ mr(R8_ARG6, intSlot); __ b(loop_start);
++ __ mr(R9_ARG7, intSlot); __ b(loop_start);
++ __ mr(R10_ARG8, intSlot); __ b(loop_start);
++
++ __ bind(move_float_arg); // each case must consist of 2 instructions (otherwise adapt LogSizeOfTwoInstructions)
++ __ fmr(F1_ARG1, floatSlot); __ b(loop_start);
++ __ fmr(F2_ARG2, floatSlot); __ b(loop_start);
++ __ fmr(F3_ARG3, floatSlot); __ b(loop_start);
++ __ fmr(F4_ARG4, floatSlot); __ b(loop_start);
++ __ fmr(F5_ARG5, floatSlot); __ b(loop_start);
++ __ fmr(F6_ARG6, floatSlot); __ b(loop_start);
++ __ fmr(F7_ARG7, floatSlot); __ b(loop_start);
++ __ fmr(F8_ARG8, floatSlot); __ b(loop_start);
++ __ fmr(F9_ARG9, floatSlot); __ b(loop_start);
++ __ fmr(F10_ARG10, floatSlot); __ b(loop_start);
++ __ fmr(F11_ARG11, floatSlot); __ b(loop_start);
++ __ fmr(F12_ARG12, floatSlot); __ b(loop_start);
++ __ fmr(F13_ARG13, floatSlot); __ b(loop_start);
++
++ __ bind(move_intSlot_to_ARG);
++ __ sldi(R0, argcnt, LogSizeOfTwoInstructions);
++ __ load_const(R11_scratch1, move_int_arg); // Label must be bound here.
++ __ add(R11_scratch1, R0, R11_scratch1);
++ __ mtctr(R11_scratch1/*branch_target*/);
++ __ bctr();
++ __ bind(move_floatSlot_to_FARG);
++ __ sldi(R0, fpcnt, LogSizeOfTwoInstructions);
++ __ addi(fpcnt, fpcnt, 1);
++ __ load_const(R11_scratch1, move_float_arg); // Label must be bound here.
++ __ add(R11_scratch1, R0, R11_scratch1);
++ __ mtctr(R11_scratch1/*branch_target*/);
++ __ bctr();
++
++ return entry;
++}
++
++address AbstractInterpreterGenerator::generate_result_handler_for(BasicType type) {
++ //
++ // Registers alive
++ // R3_RET
++ // LR
++ //
++ // Registers updated
++ // R3_RET
++ //
++
++ Label done;
++ address entry = __ pc();
++
++ switch (type) {
++ case T_BOOLEAN:
++ // convert !=0 to 1
++ __ neg(R0, R3_RET);
++ __ orr(R0, R3_RET, R0);
++ __ srwi(R3_RET, R0, 31);
++ break;
++ case T_BYTE:
++ // sign extend 8 bits
++ __ extsb(R3_RET, R3_RET);
++ break;
++ case T_CHAR:
++ // zero extend 16 bits
++ __ clrldi(R3_RET, R3_RET, 48);
++ break;
++ case T_SHORT:
++ // sign extend 16 bits
++ __ extsh(R3_RET, R3_RET);
++ break;
++ case T_INT:
++ // sign extend 32 bits
++ __ extsw(R3_RET, R3_RET);
++ break;
++ case T_LONG:
++ break;
++ case T_OBJECT:
++ // unbox result if not null
++ __ cmpdi(CCR0, R3_RET, 0);
++ __ beq(CCR0, done);
++ __ ld(R3_RET, 0, R3_RET);
++ __ verify_oop(R3_RET);
++ break;
++ case T_FLOAT:
++ break;
++ case T_DOUBLE:
++ break;
++ case T_VOID:
++ break;
++ default: ShouldNotReachHere();
++ }
++
++ __ BIND(done);
++ __ blr();
++
++ return entry;
++}
++
++// Abstract method entry.
++//
++address InterpreterGenerator::generate_abstract_entry(void) {
++ address entry = __ pc();
++
++ //
++ // Registers alive
++ // R16_thread - JavaThread*
++ // R19_method - callee's method (method to be invoked)
++ // R1_SP - SP prepared such that caller's outgoing args are near top
++ // LR - return address to caller
++ //
++ // Stack layout at this point:
++ //
++ // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP
++ // alignment (optional)
++ // [outgoing Java arguments]
++ // ...
++ // PARENT [PARENT_IJAVA_FRAME_ABI]
++ // ...
++ //
++
++ // Can't use call_VM here because we have not set up a new
++ // interpreter state. Make the call to the vm and make it look like
++ // our caller set up the JavaFrameAnchor.
++ __ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R12_scratch2/*tmp*/);
++
++ // Push a new C frame and save LR.
++ __ save_LR_CR(R0);
++ __ push_frame_reg_args(0, R11_scratch1);
++
++ // This is not a leaf but we have a JavaFrameAnchor now and we will
++ // check (create) exceptions afterward so this is ok.
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
++
++ // Pop the C frame and restore LR.
++ __ pop_frame();
++ __ restore_LR_CR(R0);
++
++ // Reset JavaFrameAnchor from call_VM_leaf above.
++ __ reset_last_Java_frame();
++
++#ifdef CC_INTERP
++ // Return to frame manager, it will handle the pending exception.
++ __ blr();
++#else
++ // We don't know our caller, so jump to the general forward exception stub,
++ // which will also pop our full frame off. Satisfy the interface of
++ // SharedRuntime::generate_forward_exception()
++ __ load_const_optimized(R11_scratch1, StubRoutines::forward_exception_entry(), R0);
++ __ mtctr(R11_scratch1);
++ __ bctr();
++#endif
++
++ return entry;
++}
++
++// Call an accessor method (assuming it is resolved, otherwise drop into
++// vanilla (slow path) entry.
++address InterpreterGenerator::generate_accessor_entry(void) {
++ if (!UseFastAccessorMethods && (!FLAG_IS_ERGO(UseFastAccessorMethods))) {
++ return NULL;
++ }
++
++ Label Lslow_path, Lacquire;
++
++ const Register
++ Rclass_or_obj = R3_ARG1,
++ Rconst_method = R4_ARG2,
++ Rcodes = Rconst_method,
++ Rcpool_cache = R5_ARG3,
++ Rscratch = R11_scratch1,
++ Rjvmti_mode = Rscratch,
++ Roffset = R12_scratch2,
++ Rflags = R6_ARG4,
++ Rbtable = R7_ARG5;
++
++ static address branch_table[number_of_states];
++
++ address entry = __ pc();
++
++ // Check for safepoint:
++ // Ditch this, real man don't need safepoint checks.
++
++ // Also check for JVMTI mode
++ // Check for null obj, take slow path if so.
++ __ ld(Rclass_or_obj, Interpreter::stackElementSize, CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp));
++ __ lwz(Rjvmti_mode, thread_(interp_only_mode));
++ __ cmpdi(CCR1, Rclass_or_obj, 0);
++ __ cmpwi(CCR0, Rjvmti_mode, 0);
++ __ crorc(/*CCR0 eq*/2, /*CCR1 eq*/4+2, /*CCR0 eq*/2);
++ __ beq(CCR0, Lslow_path); // this==null or jvmti_mode!=0
++
++ // Do 2 things in parallel:
++ // 1. Load the index out of the first instruction word, which looks like this:
++ // <0x2a><0xb4>.
++ // 2. Load constant pool cache base.
++ __ ld(Rconst_method, in_bytes(Method::const_offset()), R19_method);
++ __ ld(Rcpool_cache, in_bytes(ConstMethod::constants_offset()), Rconst_method);
++
++ __ lhz(Rcodes, in_bytes(ConstMethod::codes_offset()) + 2, Rconst_method); // Lower half of 32 bit field.
++ __ ld(Rcpool_cache, ConstantPool::cache_offset_in_bytes(), Rcpool_cache);
++
++ // Get the const pool entry by means of .
++ const int codes_shift = exact_log2(in_words(ConstantPoolCacheEntry::size()) * BytesPerWord);
++ __ slwi(Rscratch, Rcodes, codes_shift); // (codes&0xFFFF)<print_cr("accessor_entry: branch_table[%d] = 0x%llx (opcode 0x%llx)", i, branch_table[i], *((unsigned int*)branch_table[i]));
++ }
++#endif
++
++ __ bind(Lslow_path);
++ __ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), Rscratch);
++ __ flush();
++
++ return entry;
++}
++
++// Interpreter intrinsic for WeakReference.get().
++// 1. Don't push a full blown frame and go on dispatching, but fetch the value
++// into R8 and return quickly
++// 2. If G1 is active we *must* execute this intrinsic for corrrectness:
++// It contains a GC barrier which puts the reference into the satb buffer
++// to indicate that someone holds a strong reference to the object the
++// weak ref points to!
++address InterpreterGenerator::generate_Reference_get_entry(void) {
++ // Code: _aload_0, _getfield, _areturn
++ // parameter size = 1
++ //
++ // The code that gets generated by this routine is split into 2 parts:
++ // 1. the "intrinsified" code for G1 (or any SATB based GC),
++ // 2. the slow path - which is an expansion of the regular method entry.
++ //
++ // Notes:
++ // * In the G1 code we do not check whether we need to block for
++ // a safepoint. If G1 is enabled then we must execute the specialized
++ // code for Reference.get (except when the Reference object is null)
++ // so that we can log the value in the referent field with an SATB
++ // update buffer.
++ // If the code for the getfield template is modified so that the
++ // G1 pre-barrier code is executed when the current method is
++ // Reference.get() then going through the normal method entry
++ // will be fine.
++ // * The G1 code can, however, check the receiver object (the instance
++ // of java.lang.Reference) and jump to the slow path if null. If the
++ // Reference object is null then we obviously cannot fetch the referent
++ // and so we don't need to call the G1 pre-barrier. Thus we can use the
++ // regular method entry code to generate the NPE.
++ //
++ // This code is based on generate_accessor_enty.
++
++ address entry = __ pc();
++
++ const int referent_offset = java_lang_ref_Reference::referent_offset;
++ guarantee(referent_offset > 0, "referent offset not initialized");
++
++ if (UseG1GC) {
++ Label slow_path;
++
++ // Debugging not possible, so can't use __ skip_if_jvmti_mode(slow_path, GR31_SCRATCH);
++
++ // In the G1 code we don't check if we need to reach a safepoint. We
++ // continue and the thread will safepoint at the next bytecode dispatch.
++
++ // If the receiver is null then it is OK to jump to the slow path.
++ __ ld(R3_RET, Interpreter::stackElementSize, CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp)); // get receiver
++
++ // Check if receiver == NULL and go the slow path.
++ __ cmpdi(CCR0, R3_RET, 0);
++ __ beq(CCR0, slow_path);
++
++ // Load the value of the referent field.
++ __ load_heap_oop(R3_RET, referent_offset, R3_RET);
++
++ // Generate the G1 pre-barrier code to log the value of
++ // the referent field in an SATB buffer. Note with
++ // these parameters the pre-barrier does not generate
++ // the load of the previous value.
++
++ // Restore caller sp for c2i case.
++#ifdef ASSERT
++ __ ld(R9_ARG7, 0, R1_SP);
++ __ ld(R10_ARG8, 0, R21_sender_SP);
++ __ cmpd(CCR0, R9_ARG7, R10_ARG8);
++ __ asm_assert_eq("backlink", 0x544);
++#endif // ASSERT
++ __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
++
++ __ g1_write_barrier_pre(noreg, // obj
++ noreg, // offset
++ R3_RET, // pre_val
++ R11_scratch1, // tmp
++ R12_scratch2, // tmp
++ true); // needs_frame
++
++ __ blr();
++
++ // Generate regular method entry.
++ __ bind(slow_path);
++ __ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R11_scratch1);
++ __ flush();
++
++ return entry;
++ } else {
++ return generate_accessor_entry();
++ }
++}
++
++void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
++ // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
++ // the days we had adapter frames. When we deoptimize a situation where a
++ // compiled caller calls a compiled caller will have registers it expects
++ // to survive the call to the callee. If we deoptimize the callee the only
++ // way we can restore these registers is to have the oldest interpreter
++ // frame that we create restore these values. That is what this routine
++ // will accomplish.
++
++ // At the moment we have modified c2 to not have any callee save registers
++ // so this problem does not exist and this routine is just a place holder.
++
++ assert(f->is_interpreted_frame(), "must be interpreted");
++}
+--- ./hotspot/src/cpu/ppc/vm/interpreter_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/interpreter_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,50 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_INTERPRETER_PPC_HPP
++#define CPU_PPC_VM_INTERPRETER_PPC_HPP
++
++ public:
++
++ // Stack index relative to tos (which points at value).
++ static int expr_index_at(int i) {
++ return stackElementWords * i;
++ }
++
++ // Already negated by c++ interpreter.
++ static int local_index_at(int i) {
++ assert(i <= 0, "local direction already negated");
++ return stackElementWords * i;
++ }
++
++#ifndef CC_INTERP
++ // The offset in bytes to access a expression stack slot
++ // relative to the esp pointer.
++ static int expr_offset_in_bytes(int slot) {
++ return stackElementSize * slot + wordSize;
++ }
++#endif
++
++#endif // CPU_PPC_VM_INTERPRETER_PPC_PP
+--- ./hotspot/src/cpu/ppc/vm/javaFrameAnchor_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/javaFrameAnchor_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,78 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_JAVAFRAMEANCHOR_PPC_HPP
++#define CPU_PPC_VM_JAVAFRAMEANCHOR_PPC_HPP
++
++public:
++ // Each arch must define reset, save, restore
++ // These are used by objects that only care about:
++ // 1 - initializing a new state (thread creation, javaCalls)
++ // 2 - saving a current state (javaCalls)
++ // 3 - restoring an old state (javaCalls)
++
++ inline void clear(void) {
++ // clearing _last_Java_sp must be first
++ _last_Java_sp = NULL;
++ // fence?
++ OrderAccess::release();
++ _last_Java_pc = NULL;
++ }
++
++ inline void set(intptr_t* sp, address pc) {
++ _last_Java_pc = pc;
++ OrderAccess::release();
++ _last_Java_sp = sp;
++ }
++
++ void copy(JavaFrameAnchor* src) {
++ // In order to make sure the transition state is valid for "this".
++ // We must clear _last_Java_sp before copying the rest of the new data.
++ //
++ // Hack Alert: Temporary bugfix for 4717480/4721647
++ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
++ // unless the value is changing.
++ if (_last_Java_sp != src->_last_Java_sp) {
++ _last_Java_sp = NULL;
++ OrderAccess::release();
++ }
++ _last_Java_pc = src->_last_Java_pc;
++ // Must be last so profiler will always see valid frame if has_last_frame() is true.
++ OrderAccess::release();
++ _last_Java_sp = src->_last_Java_sp;
++ }
++
++ // Always walkable.
++ bool walkable(void) { return true; }
++ // Never any thing to do since we are always walkable and can find address of return addresses.
++ void make_walkable(JavaThread* thread) { }
++
++ intptr_t* last_Java_sp(void) const { return _last_Java_sp; }
++
++ address last_Java_pc(void) { return _last_Java_pc; }
++
++ void set_last_Java_sp(intptr_t* sp) { OrderAccess::release(); _last_Java_sp = sp; }
++
++#endif // CPU_PPC_VM_JAVAFRAMEANCHOR_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/jniFastGetField_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/jniFastGetField_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,75 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "assembler_ppc.inline.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/safepoint.hpp"
++
++
++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
++ // We don't have fast jni accessors.
++ return (address) -1;
++}
++
++address JNI_FastGetField::generate_fast_get_boolean_field() {
++ return generate_fast_get_int_field0(T_BOOLEAN);
++}
++
++address JNI_FastGetField::generate_fast_get_byte_field() {
++ return generate_fast_get_int_field0(T_BYTE);
++}
++
++address JNI_FastGetField::generate_fast_get_char_field() {
++ return generate_fast_get_int_field0(T_CHAR);
++}
++
++address JNI_FastGetField::generate_fast_get_short_field() {
++ return generate_fast_get_int_field0(T_SHORT);
++}
++
++address JNI_FastGetField::generate_fast_get_int_field() {
++ return generate_fast_get_int_field0(T_INT);
++}
++
++address JNI_FastGetField::generate_fast_get_long_field() {
++ // We don't have fast jni accessors.
++ return (address) -1;
++}
++
++address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) {
++ // We don't have fast jni accessors.
++ return (address) -1;
++}
++
++address JNI_FastGetField::generate_fast_get_float_field() {
++ return generate_fast_get_float_field0(T_FLOAT);
++}
++
++address JNI_FastGetField::generate_fast_get_double_field() {
++ return generate_fast_get_float_field0(T_DOUBLE);
++}
+--- ./hotspot/src/cpu/ppc/vm/jniTypes_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/jniTypes_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,110 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_JNITYPES_PPC_HPP
++#define CPU_PPC_VM_JNITYPES_PPC_HPP
++
++#include "memory/allocation.hpp"
++#include "oops/oop.hpp"
++#include "prims/jni.h"
++
++// This file holds platform-dependent routines used to write primitive
++// jni types to the array of arguments passed into JavaCalls::call.
++
++class JNITypes : AllStatic {
++ // These functions write a java primitive type (in native format) to
++ // a java stack slot array to be passed as an argument to
++ // JavaCalls:calls. I.e., they are functionally 'push' operations
++ // if they have a 'pos' formal parameter. Note that jlong's and
++ // jdouble's are written _in reverse_ of the order in which they
++ // appear in the interpreter stack. This is because call stubs (see
++ // stubGenerator_sparc.cpp) reverse the argument list constructed by
++ // JavaCallArguments (see javaCalls.hpp).
++
++ private:
++
++#ifndef PPC64
++#error "ppc32 support currently not implemented!!!"
++#endif // PPC64
++
++ public:
++ // Ints are stored in native format in one JavaCallArgument slot at *to.
++ static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; }
++ static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; }
++ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; }
++
++ // Longs are stored in native format in one JavaCallArgument slot at
++ // *(to+1).
++ static inline void put_long(jlong from, intptr_t *to) {
++ *(jlong*) (to + 1) = from;
++ }
++
++ static inline void put_long(jlong from, intptr_t *to, int& pos) {
++ *(jlong*) (to + 1 + pos) = from;
++ pos += 2;
++ }
++
++ static inline void put_long(jlong *from, intptr_t *to, int& pos) {
++ *(jlong*) (to + 1 + pos) = *from;
++ pos += 2;
++ }
++
++ // Oops are stored in native format in one JavaCallArgument slot at *to.
++ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; }
++ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; }
++ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
++
++ // Floats are stored in native format in one JavaCallArgument slot at *to.
++ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; }
++ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; }
++ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
++
++ // Doubles are stored in native word format in one JavaCallArgument
++ // slot at *(to+1).
++ static inline void put_double(jdouble from, intptr_t *to) {
++ *(jdouble*) (to + 1) = from;
++ }
++
++ static inline void put_double(jdouble from, intptr_t *to, int& pos) {
++ *(jdouble*) (to + 1 + pos) = from;
++ pos += 2;
++ }
++
++ static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
++ *(jdouble*) (to + 1 + pos) = *from;
++ pos += 2;
++ }
++
++ // The get_xxx routines, on the other hand, actually _do_ fetch
++ // java primitive types from the interpreter stack.
++ // No need to worry about alignment on Intel.
++ static inline jint get_int (intptr_t *from) { return *(jint *) from; }
++ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + 1); }
++ static inline oop get_obj (intptr_t *from) { return *(oop *) from; }
++ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; }
++ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + 1); }
++};
++
++#endif // CPU_PPC_VM_JNITYPES_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/jni_ppc.h Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/jni_ppc.h Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation. Oracle designates this
++ * particular file as subject to the "Classpath" exception as provided
++ * by Oracle in the LICENSE file that accompanied this code.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#ifndef CPU_PPC_VM_JNI_PPC_H
++#define CPU_PPC_VM_JNI_PPC_H
++
++// Note: please do not change these without also changing jni_md.h in the JDK
++// repository
++#ifndef __has_attribute
++ #define __has_attribute(x) 0
++#endif
++#if (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ > 2))) || __has_attribute(visibility)
++ #define JNIEXPORT __attribute__((visibility("default")))
++ #define JNIIMPORT __attribute__((visibility("default")))
++#else
++ #define JNIEXPORT
++ #define JNIIMPORT
++#endif
++
++#define JNICALL
++
++typedef int jint;
++
++#if defined(_LP64)
++ typedef long jlong;
++#else
++ typedef long long jlong;
++#endif
++
++typedef signed char jbyte;
++
++#endif // CPU_PPC_VM_JNI_PPC_H
+--- ./hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,3169 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "compiler/disassembler.hpp"
++#include "gc_interface/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/cardTableModRefBS.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++#if INCLUDE_ALL_GCS
++#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
++#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
++#include "gc_implementation/g1/heapRegion.hpp"
++#endif // INCLUDE_ALL_GCS
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) // nothing
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#endif
++
++#ifdef ASSERT
++// On RISC, there's no benefit to verifying instruction boundaries.
++bool AbstractAssembler::pd_check_instruction_mark() { return false; }
++#endif
++
++void MacroAssembler::ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop) {
++ assert(Assembler::is_simm(si31, 31) && si31 >= 0, "si31 out of range");
++ if (Assembler::is_simm(si31, 16)) {
++ ld(d, si31, a);
++ if (emit_filler_nop) nop();
++ } else {
++ const int hi = MacroAssembler::largeoffset_si16_si16_hi(si31);
++ const int lo = MacroAssembler::largeoffset_si16_si16_lo(si31);
++ addis(d, a, hi);
++ ld(d, lo, d);
++ }
++}
++
++void MacroAssembler::ld_largeoffset(Register d, int si31, Register a, int emit_filler_nop) {
++ assert_different_registers(d, a);
++ ld_largeoffset_unchecked(d, si31, a, emit_filler_nop);
++}
++
++void MacroAssembler::load_sized_value(Register dst, RegisterOrConstant offs, Register base,
++ size_t size_in_bytes, bool is_signed) {
++ switch (size_in_bytes) {
++ case 8: ld(dst, offs, base); break;
++ case 4: is_signed ? lwa(dst, offs, base) : lwz(dst, offs, base); break;
++ case 2: is_signed ? lha(dst, offs, base) : lhz(dst, offs, base); break;
++ case 1: lbz(dst, offs, base); if (is_signed) extsb(dst, dst); break; // lba doesn't exist :(
++ default: ShouldNotReachHere();
++ }
++}
++
++void MacroAssembler::store_sized_value(Register dst, RegisterOrConstant offs, Register base,
++ size_t size_in_bytes) {
++ switch (size_in_bytes) {
++ case 8: std(dst, offs, base); break;
++ case 4: stw(dst, offs, base); break;
++ case 2: sth(dst, offs, base); break;
++ case 1: stb(dst, offs, base); break;
++ default: ShouldNotReachHere();
++ }
++}
++
++void MacroAssembler::align(int modulus, int max, int rem) {
++ int padding = (rem + modulus - (offset() % modulus)) % modulus;
++ if (padding > max) return;
++ for (int c = (padding >> 2); c > 0; --c) { nop(); }
++}
++
++// Issue instructions that calculate given TOC from global TOC.
++void MacroAssembler::calculate_address_from_global_toc(Register dst, address addr, bool hi16, bool lo16,
++ bool add_relocation, bool emit_dummy_addr) {
++ int offset = -1;
++ if (emit_dummy_addr) {
++ offset = -128; // dummy address
++ } else if (addr != (address)(intptr_t)-1) {
++ offset = MacroAssembler::offset_to_global_toc(addr);
++ }
++
++ if (hi16) {
++ addis(dst, R29, MacroAssembler::largeoffset_si16_si16_hi(offset));
++ }
++ if (lo16) {
++ if (add_relocation) {
++ // Relocate at the addi to avoid confusion with a load from the method's TOC.
++ relocate(internal_word_Relocation::spec(addr));
++ }
++ addi(dst, dst, MacroAssembler::largeoffset_si16_si16_lo(offset));
++ }
++}
++
++int MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) {
++ const int offset = MacroAssembler::offset_to_global_toc(addr);
++
++ const address inst2_addr = a;
++ const int inst2 = *(int *)inst2_addr;
++
++ // The relocation points to the second instruction, the addi,
++ // and the addi reads and writes the same register dst.
++ const int dst = inv_rt_field(inst2);
++ assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
++
++ // Now, find the preceding addis which writes to dst.
++ int inst1 = 0;
++ address inst1_addr = inst2_addr - BytesPerInstWord;
++ while (inst1_addr >= bound) {
++ inst1 = *(int *) inst1_addr;
++ if (is_addis(inst1) && inv_rt_field(inst1) == dst) {
++ // Stop, found the addis which writes dst.
++ break;
++ }
++ inst1_addr -= BytesPerInstWord;
++ }
++
++ assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC");
++ set_imm((int *)inst1_addr, MacroAssembler::largeoffset_si16_si16_hi(offset));
++ set_imm((int *)inst2_addr, MacroAssembler::largeoffset_si16_si16_lo(offset));
++ return (int)((intptr_t)addr - (intptr_t)inst1_addr);
++}
++
++address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(address a, address bound) {
++ const address inst2_addr = a;
++ const int inst2 = *(int *)inst2_addr;
++
++ // The relocation points to the second instruction, the addi,
++ // and the addi reads and writes the same register dst.
++ const int dst = inv_rt_field(inst2);
++ assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
++
++ // Now, find the preceding addis which writes to dst.
++ int inst1 = 0;
++ address inst1_addr = inst2_addr - BytesPerInstWord;
++ while (inst1_addr >= bound) {
++ inst1 = *(int *) inst1_addr;
++ if (is_addis(inst1) && inv_rt_field(inst1) == dst) {
++ // stop, found the addis which writes dst
++ break;
++ }
++ inst1_addr -= BytesPerInstWord;
++ }
++
++ assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC");
++
++ int offset = (get_imm(inst1_addr, 0) << 16) + get_imm(inst2_addr, 0);
++ // -1 is a special case
++ if (offset == -1) {
++ return (address)(intptr_t)-1;
++ } else {
++ return global_toc() + offset;
++ }
++}
++
++#ifdef _LP64
++// Patch compressed oops or klass constants.
++// Assembler sequence is
++// 1) compressed oops:
++// lis rx = const.hi
++// ori rx = rx | const.lo
++// 2) compressed klass:
++// lis rx = const.hi
++// clrldi rx = rx & 0xFFFFffff // clearMS32b, optional
++// ori rx = rx | const.lo
++// Clrldi will be passed by.
++int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
++ assert(UseCompressedOops, "Should only patch compressed oops");
++
++ const address inst2_addr = a;
++ const int inst2 = *(int *)inst2_addr;
++
++ // The relocation points to the second instruction, the ori,
++ // and the ori reads and writes the same register dst.
++ const int dst = inv_rta_field(inst2);
++ assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be ori reading and writing dst");
++ // Now, find the preceding addis which writes to dst.
++ int inst1 = 0;
++ address inst1_addr = inst2_addr - BytesPerInstWord;
++ bool inst1_found = false;
++ while (inst1_addr >= bound) {
++ inst1 = *(int *)inst1_addr;
++ if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break; }
++ inst1_addr -= BytesPerInstWord;
++ }
++ assert(inst1_found, "inst is not lis");
++
++ int xc = (data >> 16) & 0xffff;
++ int xd = (data >> 0) & 0xffff;
++
++ set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo
++ set_imm((int *)inst2_addr, (xd)); // unsigned int
++ return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr);
++}
++
++// Get compressed oop or klass constant.
++narrowOop MacroAssembler::get_narrow_oop(address a, address bound) {
++ assert(UseCompressedOops, "Should only patch compressed oops");
++
++ const address inst2_addr = a;
++ const int inst2 = *(int *)inst2_addr;
++
++ // The relocation points to the second instruction, the ori,
++ // and the ori reads and writes the same register dst.
++ const int dst = inv_rta_field(inst2);
++ assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be ori reading and writing dst");
++ // Now, find the preceding lis which writes to dst.
++ int inst1 = 0;
++ address inst1_addr = inst2_addr - BytesPerInstWord;
++ bool inst1_found = false;
++
++ while (inst1_addr >= bound) {
++ inst1 = *(int *) inst1_addr;
++ if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break;}
++ inst1_addr -= BytesPerInstWord;
++ }
++ assert(inst1_found, "inst is not lis");
++
++ uint xl = ((unsigned int) (get_imm(inst2_addr, 0) & 0xffff));
++ uint xh = (((get_imm(inst1_addr, 0)) & 0xffff) << 16);
++
++ return (int) (xl | xh);
++}
++#endif // _LP64
++
++void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc) {
++ int toc_offset = 0;
++ // Use RelocationHolder::none for the constant pool entry, otherwise
++ // we will end up with a failing NativeCall::verify(x) where x is
++ // the address of the constant pool entry.
++ // FIXME: We should insert relocation information for oops at the constant
++ // pool entries instead of inserting it at the loads; patching of a constant
++ // pool entry should be less expensive.
++ address oop_address = address_constant((address)a.value(), RelocationHolder::none);
++ // Relocate at the pc of the load.
++ relocate(a.rspec());
++ toc_offset = (int)(oop_address - code()->consts()->start());
++ ld_largeoffset_unchecked(dst, toc_offset, toc, true);
++}
++
++bool MacroAssembler::is_load_const_from_method_toc_at(address a) {
++ const address inst1_addr = a;
++ const int inst1 = *(int *)inst1_addr;
++
++ // The relocation points to the ld or the addis.
++ return (is_ld(inst1)) ||
++ (is_addis(inst1) && inv_ra_field(inst1) != 0);
++}
++
++int MacroAssembler::get_offset_of_load_const_from_method_toc_at(address a) {
++ assert(is_load_const_from_method_toc_at(a), "must be load_const_from_method_toc");
++
++ const address inst1_addr = a;
++ const int inst1 = *(int *)inst1_addr;
++
++ if (is_ld(inst1)) {
++ return inv_d1_field(inst1);
++ } else if (is_addis(inst1)) {
++ const int dst = inv_rt_field(inst1);
++
++ // Now, find the succeeding ld which reads and writes to dst.
++ address inst2_addr = inst1_addr + BytesPerInstWord;
++ int inst2 = 0;
++ while (true) {
++ inst2 = *(int *) inst2_addr;
++ if (is_ld(inst2) && inv_ra_field(inst2) == dst && inv_rt_field(inst2) == dst) {
++ // Stop, found the ld which reads and writes dst.
++ break;
++ }
++ inst2_addr += BytesPerInstWord;
++ }
++ return (inv_d1_field(inst1) << 16) + inv_d1_field(inst2);
++ }
++ ShouldNotReachHere();
++ return 0;
++}
++
++// Get the constant from a `load_const' sequence.
++long MacroAssembler::get_const(address a) {
++ assert(is_load_const_at(a), "not a load of a constant");
++ const int *p = (const int*) a;
++ unsigned long x = (((unsigned long) (get_imm(a,0) & 0xffff)) << 48);
++ if (is_ori(*(p+1))) {
++ x |= (((unsigned long) (get_imm(a,1) & 0xffff)) << 32);
++ x |= (((unsigned long) (get_imm(a,3) & 0xffff)) << 16);
++ x |= (((unsigned long) (get_imm(a,4) & 0xffff)));
++ } else if (is_lis(*(p+1))) {
++ x |= (((unsigned long) (get_imm(a,2) & 0xffff)) << 32);
++ x |= (((unsigned long) (get_imm(a,1) & 0xffff)) << 16);
++ x |= (((unsigned long) (get_imm(a,3) & 0xffff)));
++ } else {
++ ShouldNotReachHere();
++ return (long) 0;
++ }
++ return (long) x;
++}
++
++// Patch the 64 bit constant of a `load_const' sequence. This is a low
++// level procedure. It neither flushes the instruction cache nor is it
++// mt safe.
++void MacroAssembler::patch_const(address a, long x) {
++ assert(is_load_const_at(a), "not a load of a constant");
++ int *p = (int*) a;
++ if (is_ori(*(p+1))) {
++ set_imm(0 + p, (x >> 48) & 0xffff);
++ set_imm(1 + p, (x >> 32) & 0xffff);
++ set_imm(3 + p, (x >> 16) & 0xffff);
++ set_imm(4 + p, x & 0xffff);
++ } else if (is_lis(*(p+1))) {
++ set_imm(0 + p, (x >> 48) & 0xffff);
++ set_imm(2 + p, (x >> 32) & 0xffff);
++ set_imm(1 + p, (x >> 16) & 0xffff);
++ set_imm(3 + p, x & 0xffff);
++ } else {
++ ShouldNotReachHere();
++ }
++}
++
++AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) {
++ assert(oop_recorder() != NULL, "this assembler needs a Recorder");
++ int index = oop_recorder()->allocate_metadata_index(obj);
++ RelocationHolder rspec = metadata_Relocation::spec(index);
++ return AddressLiteral((address)obj, rspec);
++}
++
++AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) {
++ assert(oop_recorder() != NULL, "this assembler needs a Recorder");
++ int index = oop_recorder()->find_index(obj);
++ RelocationHolder rspec = metadata_Relocation::spec(index);
++ return AddressLiteral((address)obj, rspec);
++}
++
++AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) {
++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++ int oop_index = oop_recorder()->allocate_oop_index(obj);
++ return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
++}
++
++AddressLiteral MacroAssembler::constant_oop_address(jobject obj) {
++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++ int oop_index = oop_recorder()->find_index(obj);
++ return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
++}
++
++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
++ Register tmp, int offset) {
++ intptr_t value = *delayed_value_addr;
++ if (value != 0) {
++ return RegisterOrConstant(value + offset);
++ }
++
++ // Load indirectly to solve generation ordering problem.
++ // static address, no relocation
++ int simm16_offset = load_const_optimized(tmp, delayed_value_addr, noreg, true);
++ ld(tmp, simm16_offset, tmp); // must be aligned ((xa & 3) == 0)
++
++ if (offset != 0) {
++ addi(tmp, tmp, offset);
++ }
++
++ return RegisterOrConstant(tmp);
++}
++
++#ifndef PRODUCT
++void MacroAssembler::pd_print_patched_instruction(address branch) {
++ Unimplemented(); // TODO: PPC port
++}
++#endif // ndef PRODUCT
++
++// Conditional far branch for destinations encodable in 24+2 bits.
++void MacroAssembler::bc_far(int boint, int biint, Label& dest, int optimize) {
++
++ // If requested by flag optimize, relocate the bc_far as a
++ // runtime_call and prepare for optimizing it when the code gets
++ // relocated.
++ if (optimize == bc_far_optimize_on_relocate) {
++ relocate(relocInfo::runtime_call_type);
++ }
++
++ // variant 2:
++ //
++ // b!cxx SKIP
++ // bxx DEST
++ // SKIP:
++ //
++
++ const int opposite_boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(boint)),
++ opposite_bcond(inv_boint_bcond(boint)));
++
++ // We emit two branches.
++ // First, a conditional branch which jumps around the far branch.
++ const address not_taken_pc = pc() + 2 * BytesPerInstWord;
++ const address bc_pc = pc();
++ bc(opposite_boint, biint, not_taken_pc);
++
++ const int bc_instr = *(int*)bc_pc;
++ assert(not_taken_pc == (address)inv_bd_field(bc_instr, (intptr_t)bc_pc), "postcondition");
++ assert(opposite_boint == inv_bo_field(bc_instr), "postcondition");
++ assert(boint == add_bhint_to_boint(opposite_bhint(inv_boint_bhint(inv_bo_field(bc_instr))),
++ opposite_bcond(inv_boint_bcond(inv_bo_field(bc_instr)))),
++ "postcondition");
++ assert(biint == inv_bi_field(bc_instr), "postcondition");
++
++ // Second, an unconditional far branch which jumps to dest.
++ // Note: target(dest) remembers the current pc (see CodeSection::target)
++ // and returns the current pc if the label is not bound yet; when
++ // the label gets bound, the unconditional far branch will be patched.
++ const address target_pc = target(dest);
++ const address b_pc = pc();
++ b(target_pc);
++
++ assert(not_taken_pc == pc(), "postcondition");
++ assert(dest.is_bound() || target_pc == b_pc, "postcondition");
++}
++
++bool MacroAssembler::is_bc_far_at(address instruction_addr) {
++ return is_bc_far_variant1_at(instruction_addr) ||
++ is_bc_far_variant2_at(instruction_addr) ||
++ is_bc_far_variant3_at(instruction_addr);
++}
++
++address MacroAssembler::get_dest_of_bc_far_at(address instruction_addr) {
++ if (is_bc_far_variant1_at(instruction_addr)) {
++ const address instruction_1_addr = instruction_addr;
++ const int instruction_1 = *(int*)instruction_1_addr;
++ return (address)inv_bd_field(instruction_1, (intptr_t)instruction_1_addr);
++ } else if (is_bc_far_variant2_at(instruction_addr)) {
++ const address instruction_2_addr = instruction_addr + 4;
++ return bxx_destination(instruction_2_addr);
++ } else if (is_bc_far_variant3_at(instruction_addr)) {
++ return instruction_addr + 8;
++ }
++ // variant 4 ???
++ ShouldNotReachHere();
++ return NULL;
++}
++void MacroAssembler::set_dest_of_bc_far_at(address instruction_addr, address dest) {
++
++ if (is_bc_far_variant3_at(instruction_addr)) {
++ // variant 3, far cond branch to the next instruction, already patched to nops:
++ //
++ // nop
++ // endgroup
++ // SKIP/DEST:
++ //
++ return;
++ }
++
++ // first, extract boint and biint from the current branch
++ int boint = 0;
++ int biint = 0;
++
++ ResourceMark rm;
++ const int code_size = 2 * BytesPerInstWord;
++ CodeBuffer buf(instruction_addr, code_size);
++ MacroAssembler masm(&buf);
++ if (is_bc_far_variant2_at(instruction_addr) && dest == instruction_addr + 8) {
++ // Far branch to next instruction: Optimize it by patching nops (produce variant 3).
++ masm.nop();
++ masm.endgroup();
++ } else {
++ if (is_bc_far_variant1_at(instruction_addr)) {
++ // variant 1, the 1st instruction contains the destination address:
++ //
++ // bcxx DEST
++ // endgroup
++ //
++ const int instruction_1 = *(int*)(instruction_addr);
++ boint = inv_bo_field(instruction_1);
++ biint = inv_bi_field(instruction_1);
++ } else if (is_bc_far_variant2_at(instruction_addr)) {
++ // variant 2, the 2nd instruction contains the destination address:
++ //
++ // b!cxx SKIP
++ // bxx DEST
++ // SKIP:
++ //
++ const int instruction_1 = *(int*)(instruction_addr);
++ boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(inv_bo_field(instruction_1))),
++ opposite_bcond(inv_boint_bcond(inv_bo_field(instruction_1))));
++ biint = inv_bi_field(instruction_1);
++ } else {
++ // variant 4???
++ ShouldNotReachHere();
++ }
++
++ // second, set the new branch destination and optimize the code
++ if (dest != instruction_addr + 4 && // the bc_far is still unbound!
++ masm.is_within_range_of_bcxx(dest, instruction_addr)) {
++ // variant 1:
++ //
++ // bcxx DEST
++ // endgroup
++ //
++ masm.bc(boint, biint, dest);
++ masm.endgroup();
++ } else {
++ // variant 2:
++ //
++ // b!cxx SKIP
++ // bxx DEST
++ // SKIP:
++ //
++ const int opposite_boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(boint)),
++ opposite_bcond(inv_boint_bcond(boint)));
++ const address not_taken_pc = masm.pc() + 2 * BytesPerInstWord;
++ masm.bc(opposite_boint, biint, not_taken_pc);
++ masm.b(dest);
++ }
++ }
++ ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
++}
++
++// Emit a NOT mt-safe patchable 64 bit absolute call/jump.
++void MacroAssembler::bxx64_patchable(address dest, relocInfo::relocType rt, bool link) {
++ // get current pc
++ uint64_t start_pc = (uint64_t) pc();
++
++ const address pc_of_bl = (address) (start_pc + (6*BytesPerInstWord)); // bl is last
++ const address pc_of_b = (address) (start_pc + (0*BytesPerInstWord)); // b is first
++
++ // relocate here
++ if (rt != relocInfo::none) {
++ relocate(rt);
++ }
++
++ if ( ReoptimizeCallSequences &&
++ (( link && is_within_range_of_b(dest, pc_of_bl)) ||
++ (!link && is_within_range_of_b(dest, pc_of_b)))) {
++ // variant 2:
++ // Emit an optimized, pc-relative call/jump.
++
++ if (link) {
++ // some padding
++ nop();
++ nop();
++ nop();
++ nop();
++ nop();
++ nop();
++
++ // do the call
++ assert(pc() == pc_of_bl, "just checking");
++ bl(dest, relocInfo::none);
++ } else {
++ // do the jump
++ assert(pc() == pc_of_b, "just checking");
++ b(dest, relocInfo::none);
++
++ // some padding
++ nop();
++ nop();
++ nop();
++ nop();
++ nop();
++ nop();
++ }
++
++ // Assert that we can identify the emitted call/jump.
++ assert(is_bxx64_patchable_variant2_at((address)start_pc, link),
++ "can't identify emitted call");
++ } else {
++ // variant 1:
++#if defined(ABI_ELFv2)
++ nop();
++ calculate_address_from_global_toc(R12, dest, true, true, false);
++ mtctr(R12);
++ nop();
++ nop();
++#else
++ mr(R0, R11); // spill R11 -> R0.
++
++ // Load the destination address into CTR,
++ // calculate destination relative to global toc.
++ calculate_address_from_global_toc(R11, dest, true, true, false);
++
++ mtctr(R11);
++ mr(R11, R0); // spill R11 <- R0.
++ nop();
++#endif
++
++ // do the call/jump
++ if (link) {
++ bctrl();
++ } else{
++ bctr();
++ }
++ // Assert that we can identify the emitted call/jump.
++ assert(is_bxx64_patchable_variant1b_at((address)start_pc, link),
++ "can't identify emitted call");
++ }
++
++ // Assert that we can identify the emitted call/jump.
++ assert(is_bxx64_patchable_at((address)start_pc, link),
++ "can't identify emitted call");
++ assert(get_dest_of_bxx64_patchable_at((address)start_pc, link) == dest,
++ "wrong encoding of dest address");
++}
++
++// Identify a bxx64_patchable instruction.
++bool MacroAssembler::is_bxx64_patchable_at(address instruction_addr, bool link) {
++ return is_bxx64_patchable_variant1b_at(instruction_addr, link)
++ //|| is_bxx64_patchable_variant1_at(instruction_addr, link)
++ || is_bxx64_patchable_variant2_at(instruction_addr, link);
++}
++
++// Does the call64_patchable instruction use a pc-relative encoding of
++// the call destination?
++bool MacroAssembler::is_bxx64_patchable_pcrelative_at(address instruction_addr, bool link) {
++ // variant 2 is pc-relative
++ return is_bxx64_patchable_variant2_at(instruction_addr, link);
++}
++
++// Identify variant 1.
++bool MacroAssembler::is_bxx64_patchable_variant1_at(address instruction_addr, bool link) {
++ unsigned int* instr = (unsigned int*) instruction_addr;
++ return (link ? is_bctrl(instr[6]) : is_bctr(instr[6])) // bctr[l]
++ && is_mtctr(instr[5]) // mtctr
++ && is_load_const_at(instruction_addr);
++}
++
++// Identify variant 1b: load destination relative to global toc.
++bool MacroAssembler::is_bxx64_patchable_variant1b_at(address instruction_addr, bool link) {
++ unsigned int* instr = (unsigned int*) instruction_addr;
++ return (link ? is_bctrl(instr[6]) : is_bctr(instr[6])) // bctr[l]
++ && is_mtctr(instr[3]) // mtctr
++ && is_calculate_address_from_global_toc_at(instruction_addr + 2*BytesPerInstWord, instruction_addr);
++}
++
++// Identify variant 2.
++bool MacroAssembler::is_bxx64_patchable_variant2_at(address instruction_addr, bool link) {
++ unsigned int* instr = (unsigned int*) instruction_addr;
++ if (link) {
++ return is_bl (instr[6]) // bl dest is last
++ && is_nop(instr[0]) // nop
++ && is_nop(instr[1]) // nop
++ && is_nop(instr[2]) // nop
++ && is_nop(instr[3]) // nop
++ && is_nop(instr[4]) // nop
++ && is_nop(instr[5]); // nop
++ } else {
++ return is_b (instr[0]) // b dest is first
++ && is_nop(instr[1]) // nop
++ && is_nop(instr[2]) // nop
++ && is_nop(instr[3]) // nop
++ && is_nop(instr[4]) // nop
++ && is_nop(instr[5]) // nop
++ && is_nop(instr[6]); // nop
++ }
++}
++
++// Set dest address of a bxx64_patchable instruction.
++void MacroAssembler::set_dest_of_bxx64_patchable_at(address instruction_addr, address dest, bool link) {
++ ResourceMark rm;
++ int code_size = MacroAssembler::bxx64_patchable_size;
++ CodeBuffer buf(instruction_addr, code_size);
++ MacroAssembler masm(&buf);
++ masm.bxx64_patchable(dest, relocInfo::none, link);
++ ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
++}
++
++// Get dest address of a bxx64_patchable instruction.
++address MacroAssembler::get_dest_of_bxx64_patchable_at(address instruction_addr, bool link) {
++ if (is_bxx64_patchable_variant1_at(instruction_addr, link)) {
++ return (address) (unsigned long) get_const(instruction_addr);
++ } else if (is_bxx64_patchable_variant2_at(instruction_addr, link)) {
++ unsigned int* instr = (unsigned int*) instruction_addr;
++ if (link) {
++ const int instr_idx = 6; // bl is last
++ int branchoffset = branch_destination(instr[instr_idx], 0);
++ return instruction_addr + branchoffset + instr_idx*BytesPerInstWord;
++ } else {
++ const int instr_idx = 0; // b is first
++ int branchoffset = branch_destination(instr[instr_idx], 0);
++ return instruction_addr + branchoffset + instr_idx*BytesPerInstWord;
++ }
++ // Load dest relative to global toc.
++ } else if (is_bxx64_patchable_variant1b_at(instruction_addr, link)) {
++ return get_address_of_calculate_address_from_global_toc_at(instruction_addr + 2*BytesPerInstWord,
++ instruction_addr);
++ } else {
++ ShouldNotReachHere();
++ return NULL;
++ }
++}
++
++// Uses ordering which corresponds to ABI:
++// _savegpr0_14: std r14,-144(r1)
++// _savegpr0_15: std r15,-136(r1)
++// _savegpr0_16: std r16,-128(r1)
++void MacroAssembler::save_nonvolatile_gprs(Register dst, int offset) {
++ std(R14, offset, dst); offset += 8;
++ std(R15, offset, dst); offset += 8;
++ std(R16, offset, dst); offset += 8;
++ std(R17, offset, dst); offset += 8;
++ std(R18, offset, dst); offset += 8;
++ std(R19, offset, dst); offset += 8;
++ std(R20, offset, dst); offset += 8;
++ std(R21, offset, dst); offset += 8;
++ std(R22, offset, dst); offset += 8;
++ std(R23, offset, dst); offset += 8;
++ std(R24, offset, dst); offset += 8;
++ std(R25, offset, dst); offset += 8;
++ std(R26, offset, dst); offset += 8;
++ std(R27, offset, dst); offset += 8;
++ std(R28, offset, dst); offset += 8;
++ std(R29, offset, dst); offset += 8;
++ std(R30, offset, dst); offset += 8;
++ std(R31, offset, dst); offset += 8;
++
++ stfd(F14, offset, dst); offset += 8;
++ stfd(F15, offset, dst); offset += 8;
++ stfd(F16, offset, dst); offset += 8;
++ stfd(F17, offset, dst); offset += 8;
++ stfd(F18, offset, dst); offset += 8;
++ stfd(F19, offset, dst); offset += 8;
++ stfd(F20, offset, dst); offset += 8;
++ stfd(F21, offset, dst); offset += 8;
++ stfd(F22, offset, dst); offset += 8;
++ stfd(F23, offset, dst); offset += 8;
++ stfd(F24, offset, dst); offset += 8;
++ stfd(F25, offset, dst); offset += 8;
++ stfd(F26, offset, dst); offset += 8;
++ stfd(F27, offset, dst); offset += 8;
++ stfd(F28, offset, dst); offset += 8;
++ stfd(F29, offset, dst); offset += 8;
++ stfd(F30, offset, dst); offset += 8;
++ stfd(F31, offset, dst);
++}
++
++// Uses ordering which corresponds to ABI:
++// _restgpr0_14: ld r14,-144(r1)
++// _restgpr0_15: ld r15,-136(r1)
++// _restgpr0_16: ld r16,-128(r1)
++void MacroAssembler::restore_nonvolatile_gprs(Register src, int offset) {
++ ld(R14, offset, src); offset += 8;
++ ld(R15, offset, src); offset += 8;
++ ld(R16, offset, src); offset += 8;
++ ld(R17, offset, src); offset += 8;
++ ld(R18, offset, src); offset += 8;
++ ld(R19, offset, src); offset += 8;
++ ld(R20, offset, src); offset += 8;
++ ld(R21, offset, src); offset += 8;
++ ld(R22, offset, src); offset += 8;
++ ld(R23, offset, src); offset += 8;
++ ld(R24, offset, src); offset += 8;
++ ld(R25, offset, src); offset += 8;
++ ld(R26, offset, src); offset += 8;
++ ld(R27, offset, src); offset += 8;
++ ld(R28, offset, src); offset += 8;
++ ld(R29, offset, src); offset += 8;
++ ld(R30, offset, src); offset += 8;
++ ld(R31, offset, src); offset += 8;
++
++ // FP registers
++ lfd(F14, offset, src); offset += 8;
++ lfd(F15, offset, src); offset += 8;
++ lfd(F16, offset, src); offset += 8;
++ lfd(F17, offset, src); offset += 8;
++ lfd(F18, offset, src); offset += 8;
++ lfd(F19, offset, src); offset += 8;
++ lfd(F20, offset, src); offset += 8;
++ lfd(F21, offset, src); offset += 8;
++ lfd(F22, offset, src); offset += 8;
++ lfd(F23, offset, src); offset += 8;
++ lfd(F24, offset, src); offset += 8;
++ lfd(F25, offset, src); offset += 8;
++ lfd(F26, offset, src); offset += 8;
++ lfd(F27, offset, src); offset += 8;
++ lfd(F28, offset, src); offset += 8;
++ lfd(F29, offset, src); offset += 8;
++ lfd(F30, offset, src); offset += 8;
++ lfd(F31, offset, src);
++}
++
++// For verify_oops.
++void MacroAssembler::save_volatile_gprs(Register dst, int offset) {
++ std(R3, offset, dst); offset += 8;
++ std(R4, offset, dst); offset += 8;
++ std(R5, offset, dst); offset += 8;
++ std(R6, offset, dst); offset += 8;
++ std(R7, offset, dst); offset += 8;
++ std(R8, offset, dst); offset += 8;
++ std(R9, offset, dst); offset += 8;
++ std(R10, offset, dst); offset += 8;
++ std(R11, offset, dst); offset += 8;
++ std(R12, offset, dst);
++}
++
++// For verify_oops.
++void MacroAssembler::restore_volatile_gprs(Register src, int offset) {
++ ld(R3, offset, src); offset += 8;
++ ld(R4, offset, src); offset += 8;
++ ld(R5, offset, src); offset += 8;
++ ld(R6, offset, src); offset += 8;
++ ld(R7, offset, src); offset += 8;
++ ld(R8, offset, src); offset += 8;
++ ld(R9, offset, src); offset += 8;
++ ld(R10, offset, src); offset += 8;
++ ld(R11, offset, src); offset += 8;
++ ld(R12, offset, src);
++}
++
++void MacroAssembler::save_LR_CR(Register tmp) {
++ mfcr(tmp);
++ std(tmp, _abi(cr), R1_SP);
++ mflr(tmp);
++ std(tmp, _abi(lr), R1_SP);
++ // Tmp must contain lr on exit! (see return_addr and prolog in ppc64.ad)
++}
++
++void MacroAssembler::restore_LR_CR(Register tmp) {
++ assert(tmp != R1_SP, "must be distinct");
++ ld(tmp, _abi(lr), R1_SP);
++ mtlr(tmp);
++ ld(tmp, _abi(cr), R1_SP);
++ mtcr(tmp);
++}
++
++address MacroAssembler::get_PC_trash_LR(Register result) {
++ Label L;
++ bl(L);
++ bind(L);
++ address lr_pc = pc();
++ mflr(result);
++ return lr_pc;
++}
++
++void MacroAssembler::resize_frame(Register offset, Register tmp) {
++#ifdef ASSERT
++ assert_different_registers(offset, tmp, R1_SP);
++ andi_(tmp, offset, frame::alignment_in_bytes-1);
++ asm_assert_eq("resize_frame: unaligned", 0x204);
++#endif
++
++ // tmp <- *(SP)
++ ld(tmp, _abi(callers_sp), R1_SP);
++ // addr <- SP + offset;
++ // *(addr) <- tmp;
++ // SP <- addr
++ stdux(tmp, R1_SP, offset);
++}
++
++void MacroAssembler::resize_frame(int offset, Register tmp) {
++ assert(is_simm(offset, 16), "too big an offset");
++ assert_different_registers(tmp, R1_SP);
++ assert((offset & (frame::alignment_in_bytes-1))==0, "resize_frame: unaligned");
++ // tmp <- *(SP)
++ ld(tmp, _abi(callers_sp), R1_SP);
++ // addr <- SP + offset;
++ // *(addr) <- tmp;
++ // SP <- addr
++ stdu(tmp, offset, R1_SP);
++}
++
++void MacroAssembler::resize_frame_absolute(Register addr, Register tmp1, Register tmp2) {
++ // (addr == tmp1) || (addr == tmp2) is allowed here!
++ assert(tmp1 != tmp2, "must be distinct");
++
++ // compute offset w.r.t. current stack pointer
++ // tmp_1 <- addr - SP (!)
++ subf(tmp1, R1_SP, addr);
++
++ // atomically update SP keeping back link.
++ resize_frame(tmp1/* offset */, tmp2/* tmp */);
++}
++
++void MacroAssembler::push_frame(Register bytes, Register tmp) {
++#ifdef ASSERT
++ assert(bytes != R0, "r0 not allowed here");
++ andi_(R0, bytes, frame::alignment_in_bytes-1);
++ asm_assert_eq("push_frame(Reg, Reg): unaligned", 0x203);
++#endif
++ neg(tmp, bytes);
++ stdux(R1_SP, R1_SP, tmp);
++}
++
++// Push a frame of size `bytes'.
++void MacroAssembler::push_frame(unsigned int bytes, Register tmp) {
++ long offset = align_addr(bytes, frame::alignment_in_bytes);
++ if (is_simm(-offset, 16)) {
++ stdu(R1_SP, -offset, R1_SP);
++ } else {
++ load_const(tmp, -offset);
++ stdux(R1_SP, R1_SP, tmp);
++ }
++}
++
++// Push a frame of size `bytes' plus abi_reg_args on top.
++void MacroAssembler::push_frame_reg_args(unsigned int bytes, Register tmp) {
++ push_frame(bytes + frame::abi_reg_args_size, tmp);
++}
++
++// Setup up a new C frame with a spill area for non-volatile GPRs and
++// additional space for local variables.
++void MacroAssembler::push_frame_reg_args_nonvolatiles(unsigned int bytes,
++ Register tmp) {
++ push_frame(bytes + frame::abi_reg_args_size + frame::spill_nonvolatiles_size, tmp);
++}
++
++// Pop current C frame.
++void MacroAssembler::pop_frame() {
++ ld(R1_SP, _abi(callers_sp), R1_SP);
++}
++
++#if defined(ABI_ELFv2)
++address MacroAssembler::branch_to(Register r_function_entry, bool and_link) {
++ // TODO(asmundak): make sure the caller uses R12 as function descriptor
++ // most of the times.
++ if (R12 != r_function_entry) {
++ mr(R12, r_function_entry);
++ }
++ mtctr(R12);
++ // Do a call or a branch.
++ if (and_link) {
++ bctrl();
++ } else {
++ bctr();
++ }
++ _last_calls_return_pc = pc();
++
++ return _last_calls_return_pc;
++}
++
++// Call a C function via a function descriptor and use full C
++// calling conventions. Updates and returns _last_calls_return_pc.
++address MacroAssembler::call_c(Register r_function_entry) {
++ return branch_to(r_function_entry, /*and_link=*/true);
++}
++
++// For tail calls: only branch, don't link, so callee returns to caller of this function.
++address MacroAssembler::call_c_and_return_to_caller(Register r_function_entry) {
++ return branch_to(r_function_entry, /*and_link=*/false);
++}
++
++address MacroAssembler::call_c(address function_entry, relocInfo::relocType rt) {
++ load_const(R12, function_entry, R0);
++ return branch_to(R12, /*and_link=*/true);
++}
++
++#else
++// Generic version of a call to C function via a function descriptor
++// with variable support for C calling conventions (TOC, ENV, etc.).
++// Updates and returns _last_calls_return_pc.
++address MacroAssembler::branch_to(Register function_descriptor, bool and_link, bool save_toc_before_call,
++ bool restore_toc_after_call, bool load_toc_of_callee, bool load_env_of_callee) {
++ // we emit standard ptrgl glue code here
++ assert((function_descriptor != R0), "function_descriptor cannot be R0");
++
++ // retrieve necessary entries from the function descriptor
++ ld(R0, in_bytes(FunctionDescriptor::entry_offset()), function_descriptor);
++ mtctr(R0);
++
++ if (load_toc_of_callee) {
++ ld(R2_TOC, in_bytes(FunctionDescriptor::toc_offset()), function_descriptor);
++ }
++ if (load_env_of_callee) {
++ ld(R11, in_bytes(FunctionDescriptor::env_offset()), function_descriptor);
++ } else if (load_toc_of_callee) {
++ li(R11, 0);
++ }
++
++ // do a call or a branch
++ if (and_link) {
++ bctrl();
++ } else {
++ bctr();
++ }
++ _last_calls_return_pc = pc();
++
++ return _last_calls_return_pc;
++}
++
++// Call a C function via a function descriptor and use full C calling
++// conventions.
++// We don't use the TOC in generated code, so there is no need to save
++// and restore its value.
++address MacroAssembler::call_c(Register fd) {
++ return branch_to(fd, /*and_link=*/true,
++ /*save toc=*/false,
++ /*restore toc=*/false,
++ /*load toc=*/true,
++ /*load env=*/true);
++}
++
++address MacroAssembler::call_c_and_return_to_caller(Register fd) {
++ return branch_to(fd, /*and_link=*/false,
++ /*save toc=*/false,
++ /*restore toc=*/false,
++ /*load toc=*/true,
++ /*load env=*/true);
++}
++
++address MacroAssembler::call_c(const FunctionDescriptor* fd, relocInfo::relocType rt) {
++ if (rt != relocInfo::none) {
++ // this call needs to be relocatable
++ if (!ReoptimizeCallSequences
++ || (rt != relocInfo::runtime_call_type && rt != relocInfo::none)
++ || fd == NULL // support code-size estimation
++ || !fd->is_friend_function()
++ || fd->entry() == NULL) {
++ // it's not a friend function as defined by class FunctionDescriptor,
++ // so do a full call-c here.
++ load_const(R11, (address)fd, R0);
++
++ bool has_env = (fd != NULL && fd->env() != NULL);
++ return branch_to(R11, /*and_link=*/true,
++ /*save toc=*/false,
++ /*restore toc=*/false,
++ /*load toc=*/true,
++ /*load env=*/has_env);
++ } else {
++ // It's a friend function. Load the entry point and don't care about
++ // toc and env. Use an optimizable call instruction, but ensure the
++ // same code-size as in the case of a non-friend function.
++ nop();
++ nop();
++ nop();
++ bl64_patchable(fd->entry(), rt);
++ _last_calls_return_pc = pc();
++ return _last_calls_return_pc;
++ }
++ } else {
++ // This call does not need to be relocatable, do more aggressive
++ // optimizations.
++ if (!ReoptimizeCallSequences
++ || !fd->is_friend_function()) {
++ // It's not a friend function as defined by class FunctionDescriptor,
++ // so do a full call-c here.
++ load_const(R11, (address)fd, R0);
++ return branch_to(R11, /*and_link=*/true,
++ /*save toc=*/false,
++ /*restore toc=*/false,
++ /*load toc=*/true,
++ /*load env=*/true);
++ } else {
++ // it's a friend function, load the entry point and don't care about
++ // toc and env.
++ address dest = fd->entry();
++ if (is_within_range_of_b(dest, pc())) {
++ bl(dest);
++ } else {
++ bl64_patchable(dest, rt);
++ }
++ _last_calls_return_pc = pc();
++ return _last_calls_return_pc;
++ }
++ }
++}
++
++// Call a C function. All constants needed reside in TOC.
++//
++// Read the address to call from the TOC.
++// Read env from TOC, if fd specifies an env.
++// Read new TOC from TOC.
++address MacroAssembler::call_c_using_toc(const FunctionDescriptor* fd,
++ relocInfo::relocType rt, Register toc) {
++ if (!ReoptimizeCallSequences
++ || (rt != relocInfo::runtime_call_type && rt != relocInfo::none)
++ || !fd->is_friend_function()) {
++ // It's not a friend function as defined by class FunctionDescriptor,
++ // so do a full call-c here.
++ assert(fd->entry() != NULL, "function must be linked");
++
++ AddressLiteral fd_entry(fd->entry());
++ load_const_from_method_toc(R11, fd_entry, toc);
++ mtctr(R11);
++ if (fd->env() == NULL) {
++ li(R11, 0);
++ nop();
++ } else {
++ AddressLiteral fd_env(fd->env());
++ load_const_from_method_toc(R11, fd_env, toc);
++ }
++ AddressLiteral fd_toc(fd->toc());
++ load_toc_from_toc(R2_TOC, fd_toc, toc);
++ // R2_TOC is killed.
++ bctrl();
++ _last_calls_return_pc = pc();
++ } else {
++ // It's a friend function, load the entry point and don't care about
++ // toc and env. Use an optimizable call instruction, but ensure the
++ // same code-size as in the case of a non-friend function.
++ nop();
++ bl64_patchable(fd->entry(), rt);
++ _last_calls_return_pc = pc();
++ }
++ return _last_calls_return_pc;
++}
++#endif // ABI_ELFv2
++
++void MacroAssembler::call_VM_base(Register oop_result,
++ Register last_java_sp,
++ address entry_point,
++ bool check_exceptions) {
++ BLOCK_COMMENT("call_VM {");
++ // Determine last_java_sp register.
++ if (!last_java_sp->is_valid()) {
++ last_java_sp = R1_SP;
++ }
++ set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, R11_scratch1);
++
++ // ARG1 must hold thread address.
++ mr(R3_ARG1, R16_thread);
++#if defined(ABI_ELFv2)
++ address return_pc = call_c(entry_point, relocInfo::none);
++#else
++ address return_pc = call_c((FunctionDescriptor*)entry_point, relocInfo::none);
++#endif
++
++ reset_last_Java_frame();
++
++ // Check for pending exceptions.
++ if (check_exceptions) {
++ // We don't check for exceptions here.
++ ShouldNotReachHere();
++ }
++
++ // Get oop result if there is one and reset the value in the thread.
++ if (oop_result->is_valid()) {
++ get_vm_result(oop_result);
++ }
++
++ _last_calls_return_pc = return_pc;
++ BLOCK_COMMENT("} call_VM");
++}
++
++void MacroAssembler::call_VM_leaf_base(address entry_point) {
++ BLOCK_COMMENT("call_VM_leaf {");
++#if defined(ABI_ELFv2)
++ call_c(entry_point, relocInfo::none);
++#else
++ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, entry_point), relocInfo::none);
++#endif
++ BLOCK_COMMENT("} call_VM_leaf");
++}
++
++void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
++ call_VM_base(oop_result, noreg, entry_point, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1,
++ bool check_exceptions) {
++ // R3_ARG1 is reserved for the thread.
++ mr_if_needed(R4_ARG2, arg_1);
++ call_VM(oop_result, entry_point, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2,
++ bool check_exceptions) {
++ // R3_ARG1 is reserved for the thread
++ mr_if_needed(R4_ARG2, arg_1);
++ assert(arg_2 != R4_ARG2, "smashed argument");
++ mr_if_needed(R5_ARG3, arg_2);
++ call_VM(oop_result, entry_point, check_exceptions);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point) {
++ call_VM_leaf_base(entry_point);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
++ mr_if_needed(R3_ARG1, arg_1);
++ call_VM_leaf(entry_point);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
++ mr_if_needed(R3_ARG1, arg_1);
++ assert(arg_2 != R3_ARG1, "smashed argument");
++ mr_if_needed(R4_ARG2, arg_2);
++ call_VM_leaf(entry_point);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
++ mr_if_needed(R3_ARG1, arg_1);
++ assert(arg_2 != R3_ARG1, "smashed argument");
++ mr_if_needed(R4_ARG2, arg_2);
++ assert(arg_3 != R3_ARG1 && arg_3 != R4_ARG2, "smashed argument");
++ mr_if_needed(R5_ARG3, arg_3);
++ call_VM_leaf(entry_point);
++}
++
++// Check whether instruction is a read access to the polling page
++// which was emitted by load_from_polling_page(..).
++bool MacroAssembler::is_load_from_polling_page(int instruction, void* ucontext,
++ address* polling_address_ptr) {
++ if (!is_ld(instruction))
++ return false; // It's not a ld. Fail.
++
++ int rt = inv_rt_field(instruction);
++ int ra = inv_ra_field(instruction);
++ int ds = inv_ds_field(instruction);
++ if (!(ds == 0 && ra != 0 && rt == 0)) {
++ return false; // It's not a ld(r0, X, ra). Fail.
++ }
++
++ if (!ucontext) {
++ // Set polling address.
++ if (polling_address_ptr != NULL) {
++ *polling_address_ptr = NULL;
++ }
++ return true; // No ucontext given. Can't check value of ra. Assume true.
++ }
++
++#ifdef LINUX
++ // Ucontext given. Check that register ra contains the address of
++ // the safepoing polling page.
++ ucontext_t* uc = (ucontext_t*) ucontext;
++ // Set polling address.
++ address addr = (address)uc->uc_mcontext.regs->gpr[ra] + (ssize_t)ds;
++ if (polling_address_ptr != NULL) {
++ *polling_address_ptr = addr;
++ }
++ return os::is_poll_address(addr);
++#else
++ // Not on Linux, ucontext must be NULL.
++ ShouldNotReachHere();
++ return false;
++#endif
++}
++
++bool MacroAssembler::is_memory_serialization(int instruction, JavaThread* thread, void* ucontext) {
++#ifdef LINUX
++ ucontext_t* uc = (ucontext_t*) ucontext;
++
++ if (is_stwx(instruction) || is_stwux(instruction)) {
++ int ra = inv_ra_field(instruction);
++ int rb = inv_rb_field(instruction);
++
++ // look up content of ra and rb in ucontext
++ address ra_val=(address)uc->uc_mcontext.regs->gpr[ra];
++ long rb_val=(long)uc->uc_mcontext.regs->gpr[rb];
++ return os::is_memory_serialize_page(thread, ra_val+rb_val);
++ } else if (is_stw(instruction) || is_stwu(instruction)) {
++ int ra = inv_ra_field(instruction);
++ int d1 = inv_d1_field(instruction);
++
++ // look up content of ra in ucontext
++ address ra_val=(address)uc->uc_mcontext.regs->gpr[ra];
++ return os::is_memory_serialize_page(thread, ra_val+d1);
++ } else {
++ return false;
++ }
++#else
++ // workaround not needed on !LINUX :-)
++ ShouldNotCallThis();
++ return false;
++#endif
++}
++
++void MacroAssembler::bang_stack_with_offset(int offset) {
++ // When increasing the stack, the old stack pointer will be written
++ // to the new top of stack according to the PPC64 abi.
++ // Therefore, stack banging is not necessary when increasing
++ // the stack by <= os::vm_page_size() bytes.
++ // When increasing the stack by a larger amount, this method is
++ // called repeatedly to bang the intermediate pages.
++
++ // Stack grows down, caller passes positive offset.
++ assert(offset > 0, "must bang with positive offset");
++
++ long stdoffset = -offset;
++
++ if (is_simm(stdoffset, 16)) {
++ // Signed 16 bit offset, a simple std is ok.
++ if (UseLoadInstructionsForStackBangingPPC64) {
++ ld(R0, (int)(signed short)stdoffset, R1_SP);
++ } else {
++ std(R0,(int)(signed short)stdoffset, R1_SP);
++ }
++ } else if (is_simm(stdoffset, 31)) {
++ const int hi = MacroAssembler::largeoffset_si16_si16_hi(stdoffset);
++ const int lo = MacroAssembler::largeoffset_si16_si16_lo(stdoffset);
++
++ Register tmp = R11;
++ addis(tmp, R1_SP, hi);
++ if (UseLoadInstructionsForStackBangingPPC64) {
++ ld(R0, lo, tmp);
++ } else {
++ std(R0, lo, tmp);
++ }
++ } else {
++ ShouldNotReachHere();
++ }
++}
++
++// If instruction is a stack bang of the form
++// std R0, x(Ry), (see bang_stack_with_offset())
++// stdu R1_SP, x(R1_SP), (see push_frame(), resize_frame())
++// or stdux R1_SP, Rx, R1_SP (see push_frame(), resize_frame())
++// return the banged address. Otherwise, return 0.
++address MacroAssembler::get_stack_bang_address(int instruction, void *ucontext) {
++#ifdef LINUX
++ ucontext_t* uc = (ucontext_t*) ucontext;
++ int rs = inv_rs_field(instruction);
++ int ra = inv_ra_field(instruction);
++ if ( (is_ld(instruction) && rs == 0 && UseLoadInstructionsForStackBangingPPC64)
++ || (is_std(instruction) && rs == 0 && !UseLoadInstructionsForStackBangingPPC64)
++ || (is_stdu(instruction) && rs == 1)) {
++ int ds = inv_ds_field(instruction);
++ // return banged address
++ return ds+(address)uc->uc_mcontext.regs->gpr[ra];
++ } else if (is_stdux(instruction) && rs == 1) {
++ int rb = inv_rb_field(instruction);
++ address sp = (address)uc->uc_mcontext.regs->gpr[1];
++ long rb_val = (long)uc->uc_mcontext.regs->gpr[rb];
++ return ra != 1 || rb_val >= 0 ? NULL // not a stack bang
++ : sp + rb_val; // banged address
++ }
++ return NULL; // not a stack bang
++#else
++ // workaround not needed on !LINUX :-)
++ ShouldNotCallThis();
++ return NULL;
++#endif
++}
++
++// CmpxchgX sets condition register to cmpX(current, compare).
++void MacroAssembler::cmpxchgw(ConditionRegister flag, Register dest_current_value,
++ Register compare_value, Register exchange_value,
++ Register addr_base, int semantics, bool cmpxchgx_hint,
++ Register int_flag_success, bool contention_hint) {
++ Label retry;
++ Label failed;
++ Label done;
++
++ // Save one branch if result is returned via register and
++ // result register is different from the other ones.
++ bool use_result_reg = (int_flag_success != noreg);
++ bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value &&
++ int_flag_success != exchange_value && int_flag_success != addr_base);
++
++ // release/fence semantics
++ if (semantics & MemBarRel) {
++ release();
++ }
++
++ if (use_result_reg && preset_result_reg) {
++ li(int_flag_success, 0); // preset (assume cas failed)
++ }
++
++ // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
++ if (contention_hint) { // Don't try to reserve if cmp fails.
++ lwz(dest_current_value, 0, addr_base);
++ cmpw(flag, dest_current_value, compare_value);
++ bne(flag, failed);
++ }
++
++ // atomic emulation loop
++ bind(retry);
++
++ lwarx(dest_current_value, addr_base, cmpxchgx_hint);
++ cmpw(flag, dest_current_value, compare_value);
++ if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
++ bne_predict_not_taken(flag, failed);
++ } else {
++ bne( flag, failed);
++ }
++ // branch to done => (flag == ne), (dest_current_value != compare_value)
++ // fall through => (flag == eq), (dest_current_value == compare_value)
++
++ stwcx_(exchange_value, addr_base);
++ if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
++ bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0.
++ } else {
++ bne( CCR0, retry); // StXcx_ sets CCR0.
++ }
++ // fall through => (flag == eq), (dest_current_value == compare_value), (swapped)
++
++ // Result in register (must do this at the end because int_flag_success can be the
++ // same register as one above).
++ if (use_result_reg) {
++ li(int_flag_success, 1);
++ }
++
++ if (semantics & MemBarFenceAfter) {
++ fence();
++ } else if (semantics & MemBarAcq) {
++ isync();
++ }
++
++ if (use_result_reg && !preset_result_reg) {
++ b(done);
++ }
++
++ bind(failed);
++ if (use_result_reg && !preset_result_reg) {
++ li(int_flag_success, 0);
++ }
++
++ bind(done);
++ // (flag == ne) => (dest_current_value != compare_value), (!swapped)
++ // (flag == eq) => (dest_current_value == compare_value), ( swapped)
++}
++
++// Preforms atomic compare exchange:
++// if (compare_value == *addr_base)
++// *addr_base = exchange_value
++// int_flag_success = 1;
++// else
++// int_flag_success = 0;
++//
++// ConditionRegister flag = cmp(compare_value, *addr_base)
++// Register dest_current_value = *addr_base
++// Register compare_value Used to compare with value in memory
++// Register exchange_value Written to memory if compare_value == *addr_base
++// Register addr_base The memory location to compareXChange
++// Register int_flag_success Set to 1 if exchange_value was written to *addr_base
++//
++// To avoid the costly compare exchange the value is tested beforehand.
++// Several special cases exist to avoid that unnecessary information is generated.
++//
++void MacroAssembler::cmpxchgd(ConditionRegister flag,
++ Register dest_current_value, Register compare_value, Register exchange_value,
++ Register addr_base, int semantics, bool cmpxchgx_hint,
++ Register int_flag_success, Label* failed_ext, bool contention_hint) {
++ Label retry;
++ Label failed_int;
++ Label& failed = (failed_ext != NULL) ? *failed_ext : failed_int;
++ Label done;
++
++ // Save one branch if result is returned via register and result register is different from the other ones.
++ bool use_result_reg = (int_flag_success!=noreg);
++ bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value &&
++ int_flag_success!=exchange_value && int_flag_success!=addr_base);
++ assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both");
++
++ // release/fence semantics
++ if (semantics & MemBarRel) {
++ release();
++ }
++
++ if (use_result_reg && preset_result_reg) {
++ li(int_flag_success, 0); // preset (assume cas failed)
++ }
++
++ // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
++ if (contention_hint) { // Don't try to reserve if cmp fails.
++ ld(dest_current_value, 0, addr_base);
++ cmpd(flag, dest_current_value, compare_value);
++ bne(flag, failed);
++ }
++
++ // atomic emulation loop
++ bind(retry);
++
++ ldarx(dest_current_value, addr_base, cmpxchgx_hint);
++ cmpd(flag, dest_current_value, compare_value);
++ if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
++ bne_predict_not_taken(flag, failed);
++ } else {
++ bne( flag, failed);
++ }
++
++ stdcx_(exchange_value, addr_base);
++ if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
++ bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
++ } else {
++ bne( CCR0, retry); // stXcx_ sets CCR0
++ }
++
++ // result in register (must do this at the end because int_flag_success can be the same register as one above)
++ if (use_result_reg) {
++ li(int_flag_success, 1);
++ }
++
++ // POWER6 doesn't need isync in CAS.
++ // Always emit isync to be on the safe side.
++ if (semantics & MemBarFenceAfter) {
++ fence();
++ } else if (semantics & MemBarAcq) {
++ isync();
++ }
++
++ if (use_result_reg && !preset_result_reg) {
++ b(done);
++ }
++
++ bind(failed_int);
++ if (use_result_reg && !preset_result_reg) {
++ li(int_flag_success, 0);
++ }
++
++ bind(done);
++ // (flag == ne) => (dest_current_value != compare_value), (!swapped)
++ // (flag == eq) => (dest_current_value == compare_value), ( swapped)
++}
++
++// Look up the method for a megamorphic invokeinterface call.
++// The target method is determined by .
++// The receiver klass is in recv_klass.
++// On success, the result will be in method_result, and execution falls through.
++// On failure, execution transfers to the given label.
++void MacroAssembler::lookup_interface_method(Register recv_klass,
++ Register intf_klass,
++ RegisterOrConstant itable_index,
++ Register method_result,
++ Register scan_temp,
++ Register sethi_temp,
++ Label& L_no_such_interface) {
++ assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
++ assert(itable_index.is_constant() || itable_index.as_register() == method_result,
++ "caller must use same register for non-constant itable index as for method");
++
++ // Compute start of first itableOffsetEntry (which is at the end of the vtable).
++ int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
++ int itentry_off = itableMethodEntry::method_offset_in_bytes();
++ int logMEsize = exact_log2(itableMethodEntry::size() * wordSize);
++ int scan_step = itableOffsetEntry::size() * wordSize;
++ int log_vte_size= exact_log2(vtableEntry::size() * wordSize);
++
++ lwz(scan_temp, InstanceKlass::vtable_length_offset() * wordSize, recv_klass);
++ // %%% We should store the aligned, prescaled offset in the klassoop.
++ // Then the next several instructions would fold away.
++
++ sldi(scan_temp, scan_temp, log_vte_size);
++ addi(scan_temp, scan_temp, vtable_base);
++ add(scan_temp, recv_klass, scan_temp);
++
++ // Adjust recv_klass by scaled itable_index, so we can free itable_index.
++ if (itable_index.is_register()) {
++ Register itable_offset = itable_index.as_register();
++ sldi(itable_offset, itable_offset, logMEsize);
++ if (itentry_off) addi(itable_offset, itable_offset, itentry_off);
++ add(recv_klass, itable_offset, recv_klass);
++ } else {
++ long itable_offset = (long)itable_index.as_constant();
++ load_const_optimized(sethi_temp, (itable_offset<itable(); scan->interface() != NULL; scan += scan_step) {
++ // if (scan->interface() == intf) {
++ // result = (klass + scan->offset() + itable_index);
++ // }
++ // }
++ Label search, found_method;
++
++ for (int peel = 1; peel >= 0; peel--) {
++ // %%%% Could load both offset and interface in one ldx, if they were
++ // in the opposite order. This would save a load.
++ ld(method_result, itableOffsetEntry::interface_offset_in_bytes(), scan_temp);
++
++ // Check that this entry is non-null. A null entry means that
++ // the receiver class doesn't implement the interface, and wasn't the
++ // same as when the caller was compiled.
++ cmpd(CCR0, method_result, intf_klass);
++
++ if (peel) {
++ beq(CCR0, found_method);
++ } else {
++ bne(CCR0, search);
++ // (invert the test to fall through to found_method...)
++ }
++
++ if (!peel) break;
++
++ bind(search);
++
++ cmpdi(CCR0, method_result, 0);
++ beq(CCR0, L_no_such_interface);
++ addi(scan_temp, scan_temp, scan_step);
++ }
++
++ bind(found_method);
++
++ // Got a hit.
++ int ito_offset = itableOffsetEntry::offset_offset_in_bytes();
++ lwz(scan_temp, ito_offset, scan_temp);
++ ldx(method_result, scan_temp, recv_klass);
++}
++
++// virtual method calling
++void MacroAssembler::lookup_virtual_method(Register recv_klass,
++ RegisterOrConstant vtable_index,
++ Register method_result) {
++
++ assert_different_registers(recv_klass, method_result, vtable_index.register_or_noreg());
++
++ const int base = InstanceKlass::vtable_start_offset() * wordSize;
++ assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++
++ if (vtable_index.is_register()) {
++ sldi(vtable_index.as_register(), vtable_index.as_register(), LogBytesPerWord);
++ add(recv_klass, vtable_index.as_register(), recv_klass);
++ } else {
++ addi(recv_klass, recv_klass, vtable_index.as_constant() << LogBytesPerWord);
++ }
++ ld(R19_method, base + vtableEntry::method_offset_in_bytes(), recv_klass);
++}
++
++/////////////////////////////////////////// subtype checking ////////////////////////////////////////////
++
++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
++ Register super_klass,
++ Register temp1_reg,
++ Register temp2_reg,
++ Label& L_success,
++ Label& L_failure) {
++
++ const Register check_cache_offset = temp1_reg;
++ const Register cached_super = temp2_reg;
++
++ assert_different_registers(sub_klass, super_klass, check_cache_offset, cached_super);
++
++ int sco_offset = in_bytes(Klass::super_check_offset_offset());
++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++
++ // If the pointers are equal, we are done (e.g., String[] elements).
++ // This self-check enables sharing of secondary supertype arrays among
++ // non-primary types such as array-of-interface. Otherwise, each such
++ // type would need its own customized SSA.
++ // We move this check to the front of the fast path because many
++ // type checks are in fact trivially successful in this manner,
++ // so we get a nicely predicted branch right at the start of the check.
++ cmpd(CCR0, sub_klass, super_klass);
++ beq(CCR0, L_success);
++
++ // Check the supertype display:
++ lwz(check_cache_offset, sco_offset, super_klass);
++ // The loaded value is the offset from KlassOopDesc.
++
++ ldx(cached_super, check_cache_offset, sub_klass);
++ cmpd(CCR0, cached_super, super_klass);
++ beq(CCR0, L_success);
++
++ // This check has worked decisively for primary supers.
++ // Secondary supers are sought in the super_cache ('super_cache_addr').
++ // (Secondary supers are interfaces and very deeply nested subtypes.)
++ // This works in the same check above because of a tricky aliasing
++ // between the super_cache and the primary super display elements.
++ // (The 'super_check_addr' can address either, as the case requires.)
++ // Note that the cache is updated below if it does not help us find
++ // what we need immediately.
++ // So if it was a primary super, we can just fail immediately.
++ // Otherwise, it's the slow path for us (no success at this point).
++
++ cmpwi(CCR0, check_cache_offset, sc_offset);
++ bne(CCR0, L_failure);
++ // bind(slow_path); // fallthru
++}
++
++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
++ Register super_klass,
++ Register temp1_reg,
++ Register temp2_reg,
++ Label* L_success,
++ Register result_reg) {
++ const Register array_ptr = temp1_reg; // current value from cache array
++ const Register temp = temp2_reg;
++
++ assert_different_registers(sub_klass, super_klass, array_ptr, temp);
++
++ int source_offset = in_bytes(Klass::secondary_supers_offset());
++ int target_offset = in_bytes(Klass::secondary_super_cache_offset());
++
++ int length_offset = Array::length_offset_in_bytes();
++ int base_offset = Array::base_offset_in_bytes();
++
++ Label hit, loop, failure, fallthru;
++
++ ld(array_ptr, source_offset, sub_klass);
++
++ //assert(4 == arrayOopDesc::length_length_in_bytes(), "precondition violated.");
++ lwz(temp, length_offset, array_ptr);
++ cmpwi(CCR0, temp, 0);
++ beq(CCR0, result_reg!=noreg ? failure : fallthru); // length 0
++
++ mtctr(temp); // load ctr
++
++ bind(loop);
++ // Oops in table are NO MORE compressed.
++ ld(temp, base_offset, array_ptr);
++ cmpd(CCR0, temp, super_klass);
++ beq(CCR0, hit);
++ addi(array_ptr, array_ptr, BytesPerWord);
++ bdnz(loop);
++
++ bind(failure);
++ if (result_reg!=noreg) li(result_reg, 1); // load non-zero result (indicates a miss)
++ b(fallthru);
++
++ bind(hit);
++ std(super_klass, target_offset, sub_klass); // save result to cache
++ if (result_reg != noreg) li(result_reg, 0); // load zero result (indicates a hit)
++ if (L_success != NULL) b(*L_success);
++
++ bind(fallthru);
++}
++
++// Try fast path, then go to slow one if not successful
++void MacroAssembler::check_klass_subtype(Register sub_klass,
++ Register super_klass,
++ Register temp1_reg,
++ Register temp2_reg,
++ Label& L_success) {
++ Label L_failure;
++ check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg, temp2_reg, L_success, L_failure);
++ check_klass_subtype_slow_path(sub_klass, super_klass, temp1_reg, temp2_reg, &L_success);
++ bind(L_failure); // Fallthru if not successful.
++}
++
++void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
++ Register temp_reg,
++ Label& wrong_method_type) {
++ assert_different_registers(mtype_reg, mh_reg, temp_reg);
++ // Compare method type against that of the receiver.
++ load_heap_oop_not_null(temp_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg), mh_reg);
++ cmpd(CCR0, temp_reg, mtype_reg);
++ bne(CCR0, wrong_method_type);
++}
++
++RegisterOrConstant MacroAssembler::argument_offset(RegisterOrConstant arg_slot,
++ Register temp_reg,
++ int extra_slot_offset) {
++ // cf. TemplateTable::prepare_invoke(), if (load_receiver).
++ int stackElementSize = Interpreter::stackElementSize;
++ int offset = extra_slot_offset * stackElementSize;
++ if (arg_slot.is_constant()) {
++ offset += arg_slot.as_constant() * stackElementSize;
++ return offset;
++ } else {
++ assert(temp_reg != noreg, "must specify");
++ sldi(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize));
++ if (offset != 0)
++ addi(temp_reg, temp_reg, offset);
++ return temp_reg;
++ }
++}
++
++void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj_reg,
++ Register mark_reg, Register temp_reg,
++ Register temp2_reg, Label& done, Label* slow_case) {
++ assert(UseBiasedLocking, "why call this otherwise?");
++
++#ifdef ASSERT
++ assert_different_registers(obj_reg, mark_reg, temp_reg, temp2_reg);
++#endif
++
++ Label cas_label;
++
++ // Branch to done if fast path fails and no slow_case provided.
++ Label *slow_case_int = (slow_case != NULL) ? slow_case : &done;
++
++ // Biased locking
++ // See whether the lock is currently biased toward our thread and
++ // whether the epoch is still valid
++ // Note that the runtime guarantees sufficient alignment of JavaThread
++ // pointers to allow age to be placed into low bits
++ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits,
++ "biased locking makes assumptions about bit layout");
++
++ if (PrintBiasedLockingStatistics) {
++ load_const(temp_reg, (address) BiasedLocking::total_entry_count_addr(), temp2_reg);
++ lwz(temp2_reg, 0, temp_reg);
++ addi(temp2_reg, temp2_reg, 1);
++ stw(temp2_reg, 0, temp_reg);
++ }
++
++ andi(temp_reg, mark_reg, markOopDesc::biased_lock_mask_in_place);
++ cmpwi(cr_reg, temp_reg, markOopDesc::biased_lock_pattern);
++ bne(cr_reg, cas_label);
++
++ load_klass(temp_reg, obj_reg);
++
++ load_const_optimized(temp2_reg, ~((int) markOopDesc::age_mask_in_place));
++ ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg);
++ orr(temp_reg, R16_thread, temp_reg);
++ xorr(temp_reg, mark_reg, temp_reg);
++ andr(temp_reg, temp_reg, temp2_reg);
++ cmpdi(cr_reg, temp_reg, 0);
++ if (PrintBiasedLockingStatistics) {
++ Label l;
++ bne(cr_reg, l);
++ load_const(mark_reg, (address) BiasedLocking::biased_lock_entry_count_addr());
++ lwz(temp2_reg, 0, mark_reg);
++ addi(temp2_reg, temp2_reg, 1);
++ stw(temp2_reg, 0, mark_reg);
++ // restore mark_reg
++ ld(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg);
++ bind(l);
++ }
++ beq(cr_reg, done);
++
++ Label try_revoke_bias;
++ Label try_rebias;
++
++ // At this point we know that the header has the bias pattern and
++ // that we are not the bias owner in the current epoch. We need to
++ // figure out more details about the state of the header in order to
++ // know what operations can be legally performed on the object's
++ // header.
++
++ // If the low three bits in the xor result aren't clear, that means
++ // the prototype header is no longer biased and we have to revoke
++ // the bias on this object.
++ andi(temp2_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
++ cmpwi(cr_reg, temp2_reg, 0);
++ bne(cr_reg, try_revoke_bias);
++
++ // Biasing is still enabled for this data type. See whether the
++ // epoch of the current bias is still valid, meaning that the epoch
++ // bits of the mark word are equal to the epoch bits of the
++ // prototype header. (Note that the prototype header's epoch bits
++ // only change at a safepoint.) If not, attempt to rebias the object
++ // toward the current thread. Note that we must be absolutely sure
++ // that the current epoch is invalid in order to do this because
++ // otherwise the manipulations it performs on the mark word are
++ // illegal.
++
++ int shift_amount = 64 - markOopDesc::epoch_shift;
++ // rotate epoch bits to right (little) end and set other bits to 0
++ // [ big part | epoch | little part ] -> [ 0..0 | epoch ]
++ rldicl_(temp2_reg, temp_reg, shift_amount, 64 - markOopDesc::epoch_bits);
++ // branch if epoch bits are != 0, i.e. they differ, because the epoch has been incremented
++ bne(CCR0, try_rebias);
++
++ // The epoch of the current bias is still valid but we know nothing
++ // about the owner; it might be set or it might be clear. Try to
++ // acquire the bias of the object using an atomic operation. If this
++ // fails we will go in to the runtime to revoke the object's bias.
++ // Note that we first construct the presumed unbiased header so we
++ // don't accidentally blow away another thread's valid bias.
++ andi(mark_reg, mark_reg, (markOopDesc::biased_lock_mask_in_place |
++ markOopDesc::age_mask_in_place |
++ markOopDesc::epoch_mask_in_place));
++ orr(temp_reg, R16_thread, mark_reg);
++
++ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
++
++ // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
++ fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
++ cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
++ /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
++ /*where=*/obj_reg,
++ MacroAssembler::MemBarAcq,
++ MacroAssembler::cmpxchgx_hint_acquire_lock(),
++ noreg, slow_case_int); // bail out if failed
++
++ // If the biasing toward our thread failed, this means that
++ // another thread succeeded in biasing it toward itself and we
++ // need to revoke that bias. The revocation will occur in the
++ // interpreter runtime in the slow case.
++ if (PrintBiasedLockingStatistics) {
++ load_const(temp_reg, (address) BiasedLocking::anonymously_biased_lock_entry_count_addr(), temp2_reg);
++ lwz(temp2_reg, 0, temp_reg);
++ addi(temp2_reg, temp2_reg, 1);
++ stw(temp2_reg, 0, temp_reg);
++ }
++ b(done);
++
++ bind(try_rebias);
++ // At this point we know the epoch has expired, meaning that the
++ // current "bias owner", if any, is actually invalid. Under these
++ // circumstances _only_, we are allowed to use the current header's
++ // value as the comparison value when doing the cas to acquire the
++ // bias in the current epoch. In other words, we allow transfer of
++ // the bias from one thread to another directly in this situation.
++ andi(temp_reg, mark_reg, markOopDesc::age_mask_in_place);
++ orr(temp_reg, R16_thread, temp_reg);
++ load_klass(temp2_reg, obj_reg);
++ ld(temp2_reg, in_bytes(Klass::prototype_header_offset()), temp2_reg);
++ orr(temp_reg, temp_reg, temp2_reg);
++
++ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
++
++ // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
++ fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
++ cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
++ /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
++ /*where=*/obj_reg,
++ MacroAssembler::MemBarAcq,
++ MacroAssembler::cmpxchgx_hint_acquire_lock(),
++ noreg, slow_case_int); // bail out if failed
++
++ // If the biasing toward our thread failed, this means that
++ // another thread succeeded in biasing it toward itself and we
++ // need to revoke that bias. The revocation will occur in the
++ // interpreter runtime in the slow case.
++ if (PrintBiasedLockingStatistics) {
++ load_const(temp_reg, (address) BiasedLocking::rebiased_lock_entry_count_addr(), temp2_reg);
++ lwz(temp2_reg, 0, temp_reg);
++ addi(temp2_reg, temp2_reg, 1);
++ stw(temp2_reg, 0, temp_reg);
++ }
++ b(done);
++
++ bind(try_revoke_bias);
++ // The prototype mark in the klass doesn't have the bias bit set any
++ // more, indicating that objects of this data type are not supposed
++ // to be biased any more. We are going to try to reset the mark of
++ // this object to the prototype value and fall through to the
++ // CAS-based locking scheme. Note that if our CAS fails, it means
++ // that another thread raced us for the privilege of revoking the
++ // bias of this particular object, so it's okay to continue in the
++ // normal locking code.
++ load_klass(temp_reg, obj_reg);
++ ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg);
++ andi(temp2_reg, mark_reg, markOopDesc::age_mask_in_place);
++ orr(temp_reg, temp_reg, temp2_reg);
++
++ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
++
++ // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
++ fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
++ cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
++ /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
++ /*where=*/obj_reg,
++ MacroAssembler::MemBarAcq,
++ MacroAssembler::cmpxchgx_hint_acquire_lock());
++
++ // reload markOop in mark_reg before continuing with lightweight locking
++ ld(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg);
++
++ // Fall through to the normal CAS-based lock, because no matter what
++ // the result of the above CAS, some thread must have succeeded in
++ // removing the bias bit from the object's header.
++ if (PrintBiasedLockingStatistics) {
++ Label l;
++ bne(cr_reg, l);
++ load_const(temp_reg, (address) BiasedLocking::revoked_lock_entry_count_addr(), temp2_reg);
++ lwz(temp2_reg, 0, temp_reg);
++ addi(temp2_reg, temp2_reg, 1);
++ stw(temp2_reg, 0, temp_reg);
++ bind(l);
++ }
++
++ bind(cas_label);
++}
++
++void MacroAssembler::biased_locking_exit (ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done) {
++ // Check for biased locking unlock case, which is a no-op
++ // Note: we do not have to check the thread ID for two reasons.
++ // First, the interpreter checks for IllegalMonitorStateException at
++ // a higher level. Second, if the bias was revoked while we held the
++ // lock, the object could not be rebiased toward another thread, so
++ // the bias bit would be clear.
++
++ ld(temp_reg, 0, mark_addr);
++ andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
++
++ cmpwi(cr_reg, temp_reg, markOopDesc::biased_lock_pattern);
++ beq(cr_reg, done);
++}
++
++// "The box" is the space on the stack where we copy the object mark.
++void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
++ Register temp, Register displaced_header, Register current_header) {
++ assert_different_registers(oop, box, temp, displaced_header, current_header);
++ assert(flag != CCR0, "bad condition register");
++ Label cont;
++ Label object_has_monitor;
++ Label cas_failed;
++
++ // Load markOop from object into displaced_header.
++ ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop);
++
++
++ // Always do locking in runtime.
++ if (EmitSync & 0x01) {
++ cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
++ return;
++ }
++
++ if (UseBiasedLocking) {
++ biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont);
++ }
++
++ // Handle existing monitor.
++ if ((EmitSync & 0x02) == 0) {
++ // The object has an existing monitor iff (mark & monitor_value) != 0.
++ andi_(temp, displaced_header, markOopDesc::monitor_value);
++ bne(CCR0, object_has_monitor);
++ }
++
++ // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
++ ori(displaced_header, displaced_header, markOopDesc::unlocked_value);
++
++ // Load Compare Value application register.
++
++ // Initialize the box. (Must happen before we update the object mark!)
++ std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
++
++ // Must fence, otherwise, preceding store(s) may float below cmpxchg.
++ // Compare object markOop with mark and if equal exchange scratch1 with object markOop.
++ // CmpxchgX sets cr_reg to cmpX(current, displaced).
++ membar(Assembler::StoreStore);
++ cmpxchgd(/*flag=*/flag,
++ /*current_value=*/current_header,
++ /*compare_value=*/displaced_header,
++ /*exchange_value=*/box,
++ /*where=*/oop,
++ MacroAssembler::MemBarAcq,
++ MacroAssembler::cmpxchgx_hint_acquire_lock(),
++ noreg,
++ &cas_failed);
++ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
++
++ // If the compare-and-exchange succeeded, then we found an unlocked
++ // object and we have now locked it.
++ b(cont);
++
++ bind(cas_failed);
++ // We did not see an unlocked object so try the fast recursive case.
++
++ // Check if the owner is self by comparing the value in the markOop of object
++ // (current_header) with the stack pointer.
++ sub(current_header, current_header, R1_SP);
++ load_const_optimized(temp, (address) (~(os::vm_page_size()-1) |
++ markOopDesc::lock_mask_in_place));
++
++ and_(R0/*==0?*/, current_header, temp);
++ // If condition is true we are cont and hence we can store 0 as the
++ // displaced header in the box, which indicates that it is a recursive lock.
++ mcrf(flag,CCR0);
++ std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box);
++
++ // Handle existing monitor.
++ if ((EmitSync & 0x02) == 0) {
++ b(cont);
++
++ bind(object_has_monitor);
++ // The object's monitor m is unlocked iff m->owner == NULL,
++ // otherwise m->owner may contain a thread or a stack address.
++ //
++ // Try to CAS m->owner from NULL to current thread.
++ addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
++ li(displaced_header, 0);
++ // CmpxchgX sets flag to cmpX(current, displaced).
++ cmpxchgd(/*flag=*/flag,
++ /*current_value=*/current_header,
++ /*compare_value=*/displaced_header,
++ /*exchange_value=*/R16_thread,
++ /*where=*/temp,
++ MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
++ MacroAssembler::cmpxchgx_hint_acquire_lock());
++
++ // Store a non-null value into the box.
++ std(box, BasicLock::displaced_header_offset_in_bytes(), box);
++
++# ifdef ASSERT
++ bne(flag, cont);
++ // We have acquired the monitor, check some invariants.
++ addi(/*monitor=*/temp, temp, -ObjectMonitor::owner_offset_in_bytes());
++ // Invariant 1: _recursions should be 0.
++ //assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
++ asm_assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), temp,
++ "monitor->_recursions should be 0", -1);
++ // Invariant 2: OwnerIsThread shouldn't be 0.
++ //assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
++ //asm_assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), temp,
++ // "monitor->OwnerIsThread shouldn't be 0", -1);
++# endif
++ }
++
++ bind(cont);
++ // flag == EQ indicates success
++ // flag == NE indicates failure
++}
++
++void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
++ Register temp, Register displaced_header, Register current_header) {
++ assert_different_registers(oop, box, temp, displaced_header, current_header);
++ assert(flag != CCR0, "bad condition register");
++ Label cont;
++ Label object_has_monitor;
++
++ // Always do locking in runtime.
++ if (EmitSync & 0x01) {
++ cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
++ return;
++ }
++
++ if (UseBiasedLocking) {
++ biased_locking_exit(flag, oop, current_header, cont);
++ }
++
++ // Find the lock address and load the displaced header from the stack.
++ ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
++
++ // If the displaced header is 0, we have a recursive unlock.
++ cmpdi(flag, displaced_header, 0);
++ beq(flag, cont);
++
++ // Handle existing monitor.
++ if ((EmitSync & 0x02) == 0) {
++ // The object has an existing monitor iff (mark & monitor_value) != 0.
++ ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
++ andi(temp, current_header, markOopDesc::monitor_value);
++ cmpdi(flag, temp, 0);
++ bne(flag, object_has_monitor);
++ }
++
++
++ // Check if it is still a light weight lock, this is is true if we see
++ // the stack address of the basicLock in the markOop of the object.
++ // Cmpxchg sets flag to cmpd(current_header, box).
++ cmpxchgd(/*flag=*/flag,
++ /*current_value=*/current_header,
++ /*compare_value=*/box,
++ /*exchange_value=*/displaced_header,
++ /*where=*/oop,
++ MacroAssembler::MemBarRel,
++ MacroAssembler::cmpxchgx_hint_release_lock(),
++ noreg,
++ &cont);
++
++ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
++
++ // Handle existing monitor.
++ if ((EmitSync & 0x02) == 0) {
++ b(cont);
++
++ bind(object_has_monitor);
++ addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
++ ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
++ ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
++ xorr(temp, R16_thread, temp); // Will be 0 if we are the owner.
++ orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
++ cmpdi(flag, temp, 0);
++ bne(flag, cont);
++
++ ld(temp, ObjectMonitor::EntryList_offset_in_bytes(), current_header);
++ ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header);
++ orr(temp, temp, displaced_header); // Will be 0 if both are 0.
++ cmpdi(flag, temp, 0);
++ bne(flag, cont);
++ release();
++ std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
++ }
++
++ bind(cont);
++ // flag == EQ indicates success
++ // flag == NE indicates failure
++}
++
++// Write serialization page so VM thread can do a pseudo remote membar.
++// We use the current thread pointer to calculate a thread specific
++// offset to write to within the page. This minimizes bus traffic
++// due to cache line collision.
++void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) {
++ srdi(tmp2, thread, os::get_serialize_page_shift_count());
++
++ int mask = os::vm_page_size() - sizeof(int);
++ if (Assembler::is_simm(mask, 16)) {
++ andi(tmp2, tmp2, mask);
++ } else {
++ lis(tmp1, (int)((signed short) (mask >> 16)));
++ ori(tmp1, tmp1, mask & 0x0000ffff);
++ andr(tmp2, tmp2, tmp1);
++ }
++
++ load_const(tmp1, (long) os::get_memory_serialize_page());
++ release();
++ stwx(R0, tmp1, tmp2);
++}
++
++
++// GC barrier helper macros
++
++// Write the card table byte if needed.
++void MacroAssembler::card_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp) {
++ CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set();
++ assert(bs->kind() == BarrierSet::CardTableModRef ||
++ bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
++#ifdef ASSERT
++ cmpdi(CCR0, Rnew_val, 0);
++ asm_assert_ne("null oop not allowed", 0x321);
++#endif
++ card_table_write(bs->byte_map_base, Rtmp, Rstore_addr);
++}
++
++// Write the card table byte.
++void MacroAssembler::card_table_write(jbyte* byte_map_base, Register Rtmp, Register Robj) {
++ assert_different_registers(Robj, Rtmp, R0);
++ load_const_optimized(Rtmp, (address)byte_map_base, R0);
++ srdi(Robj, Robj, CardTableModRefBS::card_shift);
++ li(R0, 0); // dirty
++ if (UseConcMarkSweepGC) membar(Assembler::StoreStore);
++ stbx(R0, Rtmp, Robj);
++}
++
++#if INCLUDE_ALL_GCS
++// General G1 pre-barrier generator.
++// Goal: record the previous value if it is not null.
++void MacroAssembler::g1_write_barrier_pre(Register Robj, RegisterOrConstant offset, Register Rpre_val,
++ Register Rtmp1, Register Rtmp2, bool needs_frame) {
++ Label runtime, filtered;
++
++ // Is marking active?
++ if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
++ lwz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread);
++ } else {
++ guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
++ lbz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread);
++ }
++ cmpdi(CCR0, Rtmp1, 0);
++ beq(CCR0, filtered);
++
++ // Do we need to load the previous value?
++ if (Robj != noreg) {
++ // Load the previous value...
++ if (UseCompressedOops) {
++ lwz(Rpre_val, offset, Robj);
++ } else {
++ ld(Rpre_val, offset, Robj);
++ }
++ // Previous value has been loaded into Rpre_val.
++ }
++ assert(Rpre_val != noreg, "must have a real register");
++
++ // Is the previous value null?
++ cmpdi(CCR0, Rpre_val, 0);
++ beq(CCR0, filtered);
++
++ if (Robj != noreg && UseCompressedOops) {
++ decode_heap_oop_not_null(Rpre_val);
++ }
++
++ // OK, it's not filtered, so we'll need to call enqueue. In the normal
++ // case, pre_val will be a scratch G-reg, but there are some cases in
++ // which it's an O-reg. In the first case, do a normal call. In the
++ // latter, do a save here and call the frameless version.
++
++ // Can we store original value in the thread's buffer?
++ // Is index == 0?
++ // (The index field is typed as size_t.)
++ const Register Rbuffer = Rtmp1, Rindex = Rtmp2;
++
++ ld(Rindex, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_index()), R16_thread);
++ cmpdi(CCR0, Rindex, 0);
++ beq(CCR0, runtime); // If index == 0, goto runtime.
++ ld(Rbuffer, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_buf()), R16_thread);
++
++ addi(Rindex, Rindex, -wordSize); // Decrement index.
++ std(Rindex, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_index()), R16_thread);
++
++ // Record the previous value.
++ stdx(Rpre_val, Rbuffer, Rindex);
++ b(filtered);
++
++ bind(runtime);
++
++ // VM call need frame to access(write) O register.
++ if (needs_frame) {
++ save_LR_CR(Rtmp1);
++ push_frame_reg_args(0, Rtmp2);
++ }
++
++ if (Rpre_val->is_volatile() && Robj == noreg) mr(R31, Rpre_val); // Save pre_val across C call if it was preloaded.
++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), Rpre_val, R16_thread);
++ if (Rpre_val->is_volatile() && Robj == noreg) mr(Rpre_val, R31); // restore
++
++ if (needs_frame) {
++ pop_frame();
++ restore_LR_CR(Rtmp1);
++ }
++
++ bind(filtered);
++}
++
++// General G1 post-barrier generator
++// Store cross-region card.
++void MacroAssembler::g1_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp1, Register Rtmp2, Register Rtmp3, Label *filtered_ext) {
++ Label runtime, filtered_int;
++ Label& filtered = (filtered_ext != NULL) ? *filtered_ext : filtered_int;
++ assert_different_registers(Rstore_addr, Rnew_val, Rtmp1, Rtmp2);
++
++ G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
++ assert(bs->kind() == BarrierSet::G1SATBCT ||
++ bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
++
++ // Does store cross heap regions?
++ if (G1RSBarrierRegionFilter) {
++ xorr(Rtmp1, Rstore_addr, Rnew_val);
++ srdi_(Rtmp1, Rtmp1, HeapRegion::LogOfHRGrainBytes);
++ beq(CCR0, filtered);
++ }
++
++ // Crosses regions, storing NULL?
++#ifdef ASSERT
++ cmpdi(CCR0, Rnew_val, 0);
++ asm_assert_ne("null oop not allowed (G1)", 0x322); // Checked by caller on PPC64, so following branch is obsolete:
++ //beq(CCR0, filtered);
++#endif
++
++ // Storing region crossing non-NULL, is card already dirty?
++ assert(sizeof(*bs->byte_map_base) == sizeof(jbyte), "adjust this code");
++ const Register Rcard_addr = Rtmp1;
++ Register Rbase = Rtmp2;
++ load_const_optimized(Rbase, (address)bs->byte_map_base, /*temp*/ Rtmp3);
++
++ srdi(Rcard_addr, Rstore_addr, CardTableModRefBS::card_shift);
++
++ // Get the address of the card.
++ lbzx(/*card value*/ Rtmp3, Rbase, Rcard_addr);
++ cmpwi(CCR0, Rtmp3, (int)G1SATBCardTableModRefBS::g1_young_card_val());
++ beq(CCR0, filtered);
++
++ membar(Assembler::StoreLoad);
++ lbzx(/*card value*/ Rtmp3, Rbase, Rcard_addr); // Reload after membar.
++ cmpwi(CCR0, Rtmp3 /* card value */, CardTableModRefBS::dirty_card_val());
++ beq(CCR0, filtered);
++
++ // Storing a region crossing, non-NULL oop, card is clean.
++ // Dirty card and log.
++ li(Rtmp3, CardTableModRefBS::dirty_card_val());
++ //release(); // G1: oops are allowed to get visible after dirty marking.
++ stbx(Rtmp3, Rbase, Rcard_addr);
++
++ add(Rcard_addr, Rbase, Rcard_addr); // This is the address which needs to get enqueued.
++ Rbase = noreg; // end of lifetime
++
++ const Register Rqueue_index = Rtmp2,
++ Rqueue_buf = Rtmp3;
++ ld(Rqueue_index, in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_index()), R16_thread);
++ cmpdi(CCR0, Rqueue_index, 0);
++ beq(CCR0, runtime); // index == 0 then jump to runtime
++ ld(Rqueue_buf, in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_buf()), R16_thread);
++
++ addi(Rqueue_index, Rqueue_index, -wordSize); // decrement index
++ std(Rqueue_index, in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_index()), R16_thread);
++
++ stdx(Rcard_addr, Rqueue_buf, Rqueue_index); // store card
++ b(filtered);
++
++ bind(runtime);
++
++ // Save the live input values.
++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), Rcard_addr, R16_thread);
++
++ bind(filtered_int);
++}
++#endif // INCLUDE_ALL_GCS
++
++// Values for last_Java_pc, and last_Java_sp must comply to the rules
++// in frame_ppc64.hpp.
++void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc) {
++ // Always set last_Java_pc and flags first because once last_Java_sp
++ // is visible has_last_Java_frame is true and users will look at the
++ // rest of the fields. (Note: flags should always be zero before we
++ // get here so doesn't need to be set.)
++
++ // Verify that last_Java_pc was zeroed on return to Java
++ asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()), R16_thread,
++ "last_Java_pc not zeroed before leaving Java", 0x200);
++
++ // When returning from calling out from Java mode the frame anchor's
++ // last_Java_pc will always be set to NULL. It is set here so that
++ // if we are doing a call to native (not VM) that we capture the
++ // known pc and don't have to rely on the native call having a
++ // standard frame linkage where we can find the pc.
++ if (last_Java_pc != noreg)
++ std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread);
++
++ // Set last_Java_sp last.
++ std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread);
++}
++
++void MacroAssembler::reset_last_Java_frame(void) {
++ asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()),
++ R16_thread, "SP was not set, still zero", 0x202);
++
++ BLOCK_COMMENT("reset_last_Java_frame {");
++ li(R0, 0);
++
++ // _last_Java_sp = 0
++ std(R0, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread);
++
++ // _last_Java_pc = 0
++ std(R0, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread);
++ BLOCK_COMMENT("} reset_last_Java_frame");
++}
++
++void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1) {
++ assert_different_registers(sp, tmp1);
++
++ // sp points to a TOP_IJAVA_FRAME, retrieve frame's PC via
++ // TOP_IJAVA_FRAME_ABI.
++ // FIXME: assert that we really have a TOP_IJAVA_FRAME here!
++#ifdef CC_INTERP
++ ld(tmp1/*pc*/, _top_ijava_frame_abi(frame_manager_lr), sp);
++#else
++ address entry = pc();
++ load_const_optimized(tmp1, entry);
++#endif
++
++ set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1);
++}
++
++void MacroAssembler::get_vm_result(Register oop_result) {
++ // Read:
++ // R16_thread
++ // R16_thread->in_bytes(JavaThread::vm_result_offset())
++ //
++ // Updated:
++ // oop_result
++ // R16_thread->in_bytes(JavaThread::vm_result_offset())
++
++ ld(oop_result, in_bytes(JavaThread::vm_result_offset()), R16_thread);
++ li(R0, 0);
++ std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread);
++
++ verify_oop(oop_result);
++}
++
++void MacroAssembler::get_vm_result_2(Register metadata_result) {
++ // Read:
++ // R16_thread
++ // R16_thread->in_bytes(JavaThread::vm_result_2_offset())
++ //
++ // Updated:
++ // metadata_result
++ // R16_thread->in_bytes(JavaThread::vm_result_2_offset())
++
++ ld(metadata_result, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
++ li(R0, 0);
++ std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
++}
++
++
++void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
++ Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided.
++ if (Universe::narrow_klass_base() != 0) {
++ // Use dst as temp if it is free.
++ load_const(R0, Universe::narrow_klass_base(), (dst != current && dst != R0) ? dst : noreg);
++ sub(dst, current, R0);
++ current = dst;
++ }
++ if (Universe::narrow_klass_shift() != 0) {
++ srdi(dst, current, Universe::narrow_klass_shift());
++ current = dst;
++ }
++ mr_if_needed(dst, current); // Move may be required.
++}
++
++void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) {
++ if (UseCompressedClassPointers) {
++ encode_klass_not_null(ck, klass);
++ stw(ck, oopDesc::klass_offset_in_bytes(), dst_oop);
++ } else {
++ std(klass, oopDesc::klass_offset_in_bytes(), dst_oop);
++ }
++}
++
++void MacroAssembler::store_klass_gap(Register dst_oop, Register val) {
++ if (UseCompressedClassPointers) {
++ if (val == noreg) {
++ val = R0;
++ li(val, 0);
++ }
++ stw(val, oopDesc::klass_gap_offset_in_bytes(), dst_oop); // klass gap if compressed
++ }
++}
++
++int MacroAssembler::instr_size_for_decode_klass_not_null() {
++ if (!UseCompressedClassPointers) return 0;
++ int num_instrs = 1; // shift or move
++ if (Universe::narrow_klass_base() != 0) num_instrs = 7; // shift + load const + add
++ return num_instrs * BytesPerInstWord;
++}
++
++void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
++ if (src == noreg) src = dst;
++ Register shifted_src = src;
++ if (Universe::narrow_klass_shift() != 0 ||
++ Universe::narrow_klass_base() == 0 && src != dst) { // Move required.
++ shifted_src = dst;
++ sldi(shifted_src, src, Universe::narrow_klass_shift());
++ }
++ if (Universe::narrow_klass_base() != 0) {
++ load_const(R0, Universe::narrow_klass_base());
++ add(dst, shifted_src, R0);
++ }
++}
++
++void MacroAssembler::load_klass(Register dst, Register src) {
++ if (UseCompressedClassPointers) {
++ lwz(dst, oopDesc::klass_offset_in_bytes(), src);
++ // Attention: no null check here!
++ decode_klass_not_null(dst, dst);
++ } else {
++ ld(dst, oopDesc::klass_offset_in_bytes(), src);
++ }
++}
++
++void MacroAssembler::load_klass_with_trap_null_check(Register dst, Register src) {
++ if (!os::zero_page_read_protected()) {
++ if (TrapBasedNullChecks) {
++ trap_null_check(src);
++ }
++ }
++ load_klass(dst, src);
++}
++
++void MacroAssembler::reinit_heapbase(Register d, Register tmp) {
++ if (Universe::heap() != NULL) {
++ if (Universe::narrow_oop_base() == NULL) {
++ Assembler::xorr(R30, R30, R30);
++ } else {
++ load_const(R30, Universe::narrow_ptrs_base(), tmp);
++ }
++ } else {
++ load_const(R30, Universe::narrow_ptrs_base_addr(), tmp);
++ ld(R30, 0, R30);
++ }
++}
++
++// Clear Array
++// Kills both input registers. tmp == R0 is allowed.
++void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) {
++ // Procedure for large arrays (uses data cache block zero instruction).
++ Label startloop, fast, fastloop, small_rest, restloop, done;
++ const int cl_size = VM_Version::get_cache_line_size(),
++ cl_dwords = cl_size>>3,
++ cl_dw_addr_bits = exact_log2(cl_dwords),
++ dcbz_min = 1; // Min count of dcbz executions, needs to be >0.
++
++//2:
++ cmpdi(CCR1, cnt_dwords, ((dcbz_min+1)<=dcbz_min lines included).
++ blt(CCR1, small_rest); // Too small.
++ rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line.
++ beq(CCR0, fast); // Already 128byte aligned.
++
++ subfic(tmp, tmp, cl_dwords);
++ mtctr(tmp); // Set ctr to hit 128byte boundary (00).
++ andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
++ mtctr(tmp); // Load counter.
++//16:
++ bind(fastloop);
++ dcbz(base_ptr); // Clear 128byte aligned block.
++ addi(base_ptr, base_ptr, cl_size);
++ bdnz(fastloop);
++ if (InsertEndGroupPPC64) { endgroup(); } else { nop(); }
++//20:
++ bind(small_rest);
++ cmpdi(CCR0, cnt_dwords, 0); // size 0?
++ beq(CCR0, done); // rest == 0
++ li(tmp, 0);
++ mtctr(cnt_dwords); // Load counter.
++//24:
++ bind(restloop); // Clear rest.
++ std(tmp, 0, base_ptr); // Clear 8byte aligned block.
++ addi(base_ptr, base_ptr, 8);
++ bdnz(restloop);
++//27:
++ bind(done);
++}
++
++/////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
++
++// Search for a single jchar in an jchar[].
++//
++// Assumes that result differs from all other registers.
++//
++// Haystack, needle are the addresses of jchar-arrays.
++// NeedleChar is needle[0] if it is known at compile time.
++// Haycnt is the length of the haystack. We assume haycnt >=1.
++//
++// Preserves haystack, haycnt, kills all other registers.
++//
++// If needle == R0, we search for the constant needleChar.
++void MacroAssembler::string_indexof_1(Register result, Register haystack, Register haycnt,
++ Register needle, jchar needleChar,
++ Register tmp1, Register tmp2) {
++
++ assert_different_registers(result, haystack, haycnt, needle, tmp1, tmp2);
++
++ Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_Found3, L_NotFound, L_End;
++ Register needle0 = needle, // Contains needle[0].
++ addr = tmp1,
++ ch1 = tmp2,
++ ch2 = R0;
++
++//2 (variable) or 3 (const):
++ if (needle != R0) lhz(needle0, 0, needle); // Preload needle character, needle has len==1.
++ dcbtct(haystack, 0x00); // Indicate R/O access to haystack.
++
++ srwi_(tmp2, haycnt, 1); // Shift right by exact_log2(UNROLL_FACTOR).
++ mr(addr, haystack);
++ beq(CCR0, L_FinalCheck);
++ mtctr(tmp2); // Move to count register.
++//8:
++ bind(L_InnerLoop); // Main work horse (2x unrolled search loop).
++ lhz(ch1, 0, addr); // Load characters from haystack.
++ lhz(ch2, 2, addr);
++ (needle != R0) ? cmpw(CCR0, ch1, needle0) : cmplwi(CCR0, ch1, needleChar);
++ (needle != R0) ? cmpw(CCR1, ch2, needle0) : cmplwi(CCR1, ch2, needleChar);
++ beq(CCR0, L_Found1); // Did we find the needle?
++ beq(CCR1, L_Found2);
++ addi(addr, addr, 4);
++ bdnz(L_InnerLoop);
++//16:
++ bind(L_FinalCheck);
++ andi_(R0, haycnt, 1);
++ beq(CCR0, L_NotFound);
++ lhz(ch1, 0, addr); // One position left at which we have to compare.
++ (needle != R0) ? cmpw(CCR1, ch1, needle0) : cmplwi(CCR1, ch1, needleChar);
++ beq(CCR1, L_Found3);
++//21:
++ bind(L_NotFound);
++ li(result, -1); // Not found.
++ b(L_End);
++
++ bind(L_Found2);
++ addi(addr, addr, 2);
++//24:
++ bind(L_Found1);
++ bind(L_Found3); // Return index ...
++ subf(addr, haystack, addr); // relative to haystack,
++ srdi(result, addr, 1); // in characters.
++ bind(L_End);
++}
++
++
++// Implementation of IndexOf for jchar arrays.
++//
++// The length of haystack and needle are not constant, i.e. passed in a register.
++//
++// Preserves registers haystack, needle.
++// Kills registers haycnt, needlecnt.
++// Assumes that result differs from all other registers.
++// Haystack, needle are the addresses of jchar-arrays.
++// Haycnt, needlecnt are the lengths of them, respectively.
++//
++// Needlecntval must be zero or 15-bit unsigned immediate and > 1.
++void MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
++ Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
++ Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
++
++ // Ensure 0=2, bail out otherwise.
++ // **************************************************************************************************
++
++//1 (variable) or 3 (const):
++ dcbtct(needle, 0x00); // Indicate R/O access to str1.
++ dcbtct(haystack, 0x00); // Indicate R/O access to str2.
++
++ // Compute last haystack addr to use if no match gets found.
++ if (needlecntval == 0) { // variable needlecnt
++//3:
++ subf(ch1, needlecnt, haycnt); // Last character index to compare is haycnt-needlecnt.
++ addi(addr, haystack, -2); // Accesses use pre-increment.
++ cmpwi(CCR6, needlecnt, 2);
++ blt(CCR6, L_TooShort); // Variable needlecnt: handle short needle separately.
++ slwi(ch1, ch1, 1); // Scale to number of bytes.
++ lwz(n_start, 0, needle); // Load first 2 characters of needle.
++ add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
++ addi(needlecnt, needlecnt, -2); // Rest of needle.
++ } else { // constant needlecnt
++ guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");
++ assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");
++//5:
++ addi(ch1, haycnt, -needlecntval); // Last character index to compare is haycnt-needlecnt.
++ lwz(n_start, 0, needle); // Load first 2 characters of needle.
++ addi(addr, haystack, -2); // Accesses use pre-increment.
++ slwi(ch1, ch1, 1); // Scale to number of bytes.
++ add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
++ li(needlecnt, needlecntval-2); // Rest of needle.
++ }
++
++ // Main Loop (now we have at least 3 characters).
++//11:
++ Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2, L_Comp3;
++ bind(L_OuterLoop); // Search for 1st 2 characters.
++ Register addr_diff = tmp4;
++ subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check.
++ addi(addr, addr, 2); // This is the new address we want to use for comparing.
++ srdi_(ch2, addr_diff, 2);
++ beq(CCR0, L_FinalCheck); // 2 characters left?
++ mtctr(ch2); // addr_diff/4
++//16:
++ bind(L_InnerLoop); // Main work horse (2x unrolled search loop)
++ lwz(ch1, 0, addr); // Load 2 characters of haystack (ignore alignment).
++ lwz(ch2, 2, addr);
++ cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).
++ cmpw(CCR1, ch2, n_start);
++ beq(CCR0, L_Comp1); // Did we find the needle start?
++ beq(CCR1, L_Comp2);
++ addi(addr, addr, 4);
++ bdnz(L_InnerLoop);
++//24:
++ bind(L_FinalCheck);
++ rldicl_(addr_diff, addr_diff, 64-1, 63); // Remaining characters not covered by InnerLoop: (addr_diff>>1)&1.
++ beq(CCR0, L_NotFound);
++ lwz(ch1, 0, addr); // One position left at which we have to compare.
++ cmpw(CCR1, ch1, n_start);
++ beq(CCR1, L_Comp3);
++//29:
++ bind(L_NotFound);
++ li(result, -1); // not found
++ b(L_End);
++
++
++ // **************************************************************************************************
++ // Special Case: unfortunately, the variable needle case can be called with needlecnt<2
++ // **************************************************************************************************
++//31:
++ if ((needlecntval>>1) !=1 ) { // Const needlecnt is 2 or 3? Reduce code size.
++ int nopcnt = 5;
++ if (needlecntval !=0 ) ++nopcnt; // Balance alignment (other case: see below).
++ if (needlecntval == 0) { // We have to handle these cases separately.
++ Label L_OneCharLoop;
++ bind(L_TooShort);
++ mtctr(haycnt);
++ lhz(n_start, 0, needle); // First character of needle
++ bind(L_OneCharLoop);
++ lhzu(ch1, 2, addr);
++ cmpw(CCR1, ch1, n_start);
++ beq(CCR1, L_Found); // Did we find the one character needle?
++ bdnz(L_OneCharLoop);
++ li(result, -1); // Not found.
++ b(L_End);
++ } // 8 instructions, so no impact on alignment.
++ for (int x = 0; x < nopcnt; ++x) nop();
++ }
++
++ // **************************************************************************************************
++ // Regular Case Part II: compare rest of needle (first 2 characters have been compared already)
++ // **************************************************************************************************
++
++ // Compare the rest
++//36 if needlecntval==0, else 37:
++ bind(L_Comp2);
++ addi(addr, addr, 2); // First comparison has failed, 2nd one hit.
++ bind(L_Comp1); // Addr points to possible needle start.
++ bind(L_Comp3); // Could have created a copy and use a different return address but saving code size here.
++ if (needlecntval != 2) { // Const needlecnt==2?
++ if (needlecntval != 3) {
++ if (needlecntval == 0) beq(CCR6, L_Found); // Variable needlecnt==2?
++ Register ind_reg = tmp4;
++ li(ind_reg, 2*2); // First 2 characters are already compared, use index 2.
++ mtctr(needlecnt); // Decremented by 2, still > 0.
++//40:
++ Label L_CompLoop;
++ bind(L_CompLoop);
++ lhzx(ch2, needle, ind_reg);
++ lhzx(ch1, addr, ind_reg);
++ cmpw(CCR1, ch1, ch2);
++ bne(CCR1, L_OuterLoop);
++ addi(ind_reg, ind_reg, 2);
++ bdnz(L_CompLoop);
++ } else { // No loop required if there's only one needle character left.
++ lhz(ch2, 2*2, needle);
++ lhz(ch1, 2*2, addr);
++ cmpw(CCR1, ch1, ch2);
++ bne(CCR1, L_OuterLoop);
++ }
++ }
++ // Return index ...
++//46:
++ bind(L_Found);
++ subf(addr, haystack, addr); // relative to haystack, ...
++ srdi(result, addr, 1); // in characters.
++//48:
++ bind(L_End);
++}
++
++// Implementation of Compare for jchar arrays.
++//
++// Kills the registers str1, str2, cnt1, cnt2.
++// Kills cr0, ctr.
++// Assumes that result differes from the input registers.
++void MacroAssembler::string_compare(Register str1_reg, Register str2_reg, Register cnt1_reg, Register cnt2_reg,
++ Register result_reg, Register tmp_reg) {
++ assert_different_registers(result_reg, str1_reg, str2_reg, cnt1_reg, cnt2_reg, tmp_reg);
++
++ Label Ldone, Lslow_case, Lslow_loop, Lfast_loop;
++ Register cnt_diff = R0,
++ limit_reg = cnt1_reg,
++ chr1_reg = result_reg,
++ chr2_reg = cnt2_reg,
++ addr_diff = str2_reg;
++
++ // Offset 0 should be 32 byte aligned.
++//-4:
++ dcbtct(str1_reg, 0x00); // Indicate R/O access to str1.
++ dcbtct(str2_reg, 0x00); // Indicate R/O access to str2.
++//-2:
++ // Compute min(cnt1, cnt2) and check if 0 (bail out if we don't need to compare characters).
++ subf(result_reg, cnt2_reg, cnt1_reg); // difference between cnt1/2
++ subf_(addr_diff, str1_reg, str2_reg); // alias?
++ beq(CCR0, Ldone); // return cnt difference if both ones are identical
++ srawi(limit_reg, result_reg, 31); // generate signmask (cnt1/2 must be non-negative so cnt_diff can't overflow)
++ mr(cnt_diff, result_reg);
++ andr(limit_reg, result_reg, limit_reg); // difference or zero (negative): cnt14 characters for fast loop
++ andi(limit_reg, tmp_reg, 4-1); // remaining characters
++
++ // Adapt str1_reg str2_reg for the first loop iteration
++ mtctr(chr2_reg); // (min(cnt1, cnt2)-1)/4
++ addi(limit_reg, limit_reg, 4+1); // compare last 5-8 characters in slow_case if mismatch found in fast_loop
++//16:
++ // Compare the rest of the characters
++ bind(Lfast_loop);
++ ld(chr1_reg, 0, str1_reg);
++ ldx(chr2_reg, str1_reg, addr_diff);
++ cmpd(CCR0, chr2_reg, chr1_reg);
++ bne(CCR0, Lslow_case); // return chr1_reg
++ addi(str1_reg, str1_reg, 4*2);
++ bdnz(Lfast_loop);
++ addi(limit_reg, limit_reg, -4); // no mismatch found in fast_loop, only 1-4 characters missing
++//23:
++ bind(Lslow_case);
++ mtctr(limit_reg);
++//24:
++ bind(Lslow_loop);
++ lhz(chr1_reg, 0, str1_reg);
++ lhzx(chr2_reg, str1_reg, addr_diff);
++ subf_(result_reg, chr2_reg, chr1_reg);
++ bne(CCR0, Ldone); // return chr1_reg
++ addi(str1_reg, str1_reg, 1*2);
++ bdnz(Lslow_loop);
++//30:
++ // If strings are equal up to min length, return the length difference.
++ mr(result_reg, cnt_diff);
++ nop(); // alignment
++//32:
++ // Otherwise, return the difference between the first mismatched chars.
++ bind(Ldone);
++}
++
++
++// Compare char[] arrays.
++//
++// str1_reg USE only
++// str2_reg USE only
++// cnt_reg USE_DEF, due to tmp reg shortage
++// result_reg DEF only, might compromise USE only registers
++void MacroAssembler::char_arrays_equals(Register str1_reg, Register str2_reg, Register cnt_reg, Register result_reg,
++ Register tmp1_reg, Register tmp2_reg, Register tmp3_reg, Register tmp4_reg,
++ Register tmp5_reg) {
++
++ // Str1 may be the same register as str2 which can occur e.g. after scalar replacement.
++ assert_different_registers(result_reg, str1_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg);
++ assert_different_registers(result_reg, str2_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg);
++
++ // Offset 0 should be 32 byte aligned.
++ Label Linit_cbc, Lcbc, Lloop, Ldone_true, Ldone_false;
++ Register index_reg = tmp5_reg;
++ Register cbc_iter = tmp4_reg;
++
++//-1:
++ dcbtct(str1_reg, 0x00); // Indicate R/O access to str1.
++ dcbtct(str2_reg, 0x00); // Indicate R/O access to str2.
++//1:
++ andi(cbc_iter, cnt_reg, 4-1); // Remaining iterations after 4 java characters per iteration loop.
++ li(index_reg, 0); // init
++ li(result_reg, 0); // assume false
++ srwi_(tmp2_reg, cnt_reg, exact_log2(4)); // Div: 4 java characters per iteration (main loop).
++
++ cmpwi(CCR1, cbc_iter, 0); // CCR1 = (cbc_iter==0)
++ beq(CCR0, Linit_cbc); // too short
++ mtctr(tmp2_reg);
++//8:
++ bind(Lloop);
++ ldx(tmp1_reg, str1_reg, index_reg);
++ ldx(tmp2_reg, str2_reg, index_reg);
++ cmpd(CCR0, tmp1_reg, tmp2_reg);
++ bne(CCR0, Ldone_false); // Unequal char pair found -> done.
++ addi(index_reg, index_reg, 4*sizeof(jchar));
++ bdnz(Lloop);
++//14:
++ bind(Linit_cbc);
++ beq(CCR1, Ldone_true);
++ mtctr(cbc_iter);
++//16:
++ bind(Lcbc);
++ lhzx(tmp1_reg, str1_reg, index_reg);
++ lhzx(tmp2_reg, str2_reg, index_reg);
++ cmpw(CCR0, tmp1_reg, tmp2_reg);
++ bne(CCR0, Ldone_false); // Unequal char pair found -> done.
++ addi(index_reg, index_reg, 1*sizeof(jchar));
++ bdnz(Lcbc);
++ nop();
++ bind(Ldone_true);
++ li(result_reg, 1);
++//24:
++ bind(Ldone_false);
++}
++
++
++void MacroAssembler::char_arrays_equalsImm(Register str1_reg, Register str2_reg, int cntval, Register result_reg,
++ Register tmp1_reg, Register tmp2_reg) {
++ // Str1 may be the same register as str2 which can occur e.g. after scalar replacement.
++ assert_different_registers(result_reg, str1_reg, tmp1_reg, tmp2_reg);
++ assert_different_registers(result_reg, str2_reg, tmp1_reg, tmp2_reg);
++ assert(sizeof(jchar) == 2, "must be");
++ assert(cntval >= 0 && ((cntval & 0x7fff) == cntval), "wrong immediate");
++
++ Label Ldone_false;
++
++ if (cntval < 16) { // short case
++ if (cntval != 0) li(result_reg, 0); // assume false
++
++ const int num_bytes = cntval*sizeof(jchar);
++ int index = 0;
++ for (int next_index; (next_index = index + 8) <= num_bytes; index = next_index) {
++ ld(tmp1_reg, index, str1_reg);
++ ld(tmp2_reg, index, str2_reg);
++ cmpd(CCR0, tmp1_reg, tmp2_reg);
++ bne(CCR0, Ldone_false);
++ }
++ if (cntval & 2) {
++ lwz(tmp1_reg, index, str1_reg);
++ lwz(tmp2_reg, index, str2_reg);
++ cmpw(CCR0, tmp1_reg, tmp2_reg);
++ bne(CCR0, Ldone_false);
++ index += 4;
++ }
++ if (cntval & 1) {
++ lhz(tmp1_reg, index, str1_reg);
++ lhz(tmp2_reg, index, str2_reg);
++ cmpw(CCR0, tmp1_reg, tmp2_reg);
++ bne(CCR0, Ldone_false);
++ }
++ // fallthrough: true
++ } else {
++ Label Lloop;
++ Register index_reg = tmp1_reg;
++ const int loopcnt = cntval/4;
++ assert(loopcnt > 0, "must be");
++ // Offset 0 should be 32 byte aligned.
++ //2:
++ dcbtct(str1_reg, 0x00); // Indicate R/O access to str1.
++ dcbtct(str2_reg, 0x00); // Indicate R/O access to str2.
++ li(tmp2_reg, loopcnt);
++ li(index_reg, 0); // init
++ li(result_reg, 0); // assume false
++ mtctr(tmp2_reg);
++ //8:
++ bind(Lloop);
++ ldx(R0, str1_reg, index_reg);
++ ldx(tmp2_reg, str2_reg, index_reg);
++ cmpd(CCR0, R0, tmp2_reg);
++ bne(CCR0, Ldone_false); // Unequal char pair found -> done.
++ addi(index_reg, index_reg, 4*sizeof(jchar));
++ bdnz(Lloop);
++ //14:
++ if (cntval & 2) {
++ lwzx(R0, str1_reg, index_reg);
++ lwzx(tmp2_reg, str2_reg, index_reg);
++ cmpw(CCR0, R0, tmp2_reg);
++ bne(CCR0, Ldone_false);
++ if (cntval & 1) addi(index_reg, index_reg, 2*sizeof(jchar));
++ }
++ if (cntval & 1) {
++ lhzx(R0, str1_reg, index_reg);
++ lhzx(tmp2_reg, str2_reg, index_reg);
++ cmpw(CCR0, R0, tmp2_reg);
++ bne(CCR0, Ldone_false);
++ }
++ // fallthru: true
++ }
++ li(result_reg, 1);
++ bind(Ldone_false);
++}
++
++
++void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) {
++#ifdef ASSERT
++ Label ok;
++ if (check_equal) {
++ beq(CCR0, ok);
++ } else {
++ bne(CCR0, ok);
++ }
++ stop(msg, id);
++ bind(ok);
++#endif
++}
++
++void MacroAssembler::asm_assert_mems_zero(bool check_equal, int size, int mem_offset,
++ Register mem_base, const char* msg, int id) {
++#ifdef ASSERT
++ switch (size) {
++ case 4:
++ lwz(R0, mem_offset, mem_base);
++ cmpwi(CCR0, R0, 0);
++ break;
++ case 8:
++ ld(R0, mem_offset, mem_base);
++ cmpdi(CCR0, R0, 0);
++ break;
++ default:
++ ShouldNotReachHere();
++ }
++ asm_assert(check_equal, msg, id);
++#endif // ASSERT
++}
++
++void MacroAssembler::verify_thread() {
++ if (VerifyThread) {
++ unimplemented("'VerifyThread' currently not implemented on PPC");
++ }
++}
++
++// READ: oop. KILL: R0. Volatile floats perhaps.
++void MacroAssembler::verify_oop(Register oop, const char* msg) {
++ if (!VerifyOops) {
++ return;
++ }
++ // Will be preserved.
++ Register tmp = R11;
++ assert(oop != tmp, "precondition");
++ unsigned int nbytes_save = 10*8; // 10 volatile gprs
++ address/* FunctionDescriptor** */fd = StubRoutines::verify_oop_subroutine_entry_address();
++ // save tmp
++ mr(R0, tmp);
++ // kill tmp
++ save_LR_CR(tmp);
++ push_frame_reg_args(nbytes_save, tmp);
++ // restore tmp
++ mr(tmp, R0);
++ save_volatile_gprs(R1_SP, 112); // except R0
++ // load FunctionDescriptor** / entry_address *
++ load_const(tmp, fd);
++ // load FunctionDescriptor* / entry_address
++ ld(tmp, 0, tmp);
++ mr(R4_ARG2, oop);
++ load_const(R3_ARG1, (address)msg);
++ // call destination for its side effect
++ call_c(tmp);
++ restore_volatile_gprs(R1_SP, 112); // except R0
++ pop_frame();
++ // save tmp
++ mr(R0, tmp);
++ // kill tmp
++ restore_LR_CR(tmp);
++ // restore tmp
++ mr(tmp, R0);
++}
++
++const char* stop_types[] = {
++ "stop",
++ "untested",
++ "unimplemented",
++ "shouldnotreachhere"
++};
++
++static void stop_on_request(int tp, const char* msg) {
++ tty->print("PPC assembly code requires stop: (%s) %s\n", (void *)stop_types[tp%/*stop_end*/4], msg);
++ guarantee(false, err_msg("PPC assembly code requires stop: %s", msg));
++}
++
++// Call a C-function that prints output.
++void MacroAssembler::stop(int type, const char* msg, int id) {
++#ifndef PRODUCT
++ block_comment(err_msg("stop: %s %s {", stop_types[type%stop_end], msg));
++#else
++ block_comment("stop {");
++#endif
++
++ // setup arguments
++ load_const_optimized(R3_ARG1, type);
++ load_const_optimized(R4_ARG2, (void *)msg, /*tmp=*/R0);
++ call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), R3_ARG1, R4_ARG2);
++ illtrap();
++ emit_int32(id);
++ block_comment("} stop;");
++}
++
++#ifndef PRODUCT
++// Write pattern 0x0101010101010101 in memory region [low-before, high+after].
++// Val, addr are temp registers.
++// If low == addr, addr is killed.
++// High is preserved.
++void MacroAssembler::zap_from_to(Register low, int before, Register high, int after, Register val, Register addr) {
++ if (!ZapMemory) return;
++
++ assert_different_registers(low, val);
++
++ BLOCK_COMMENT("zap memory region {");
++ load_const_optimized(val, 0x0101010101010101);
++ int size = before + after;
++ if (low == high && size < 5 && size > 0) {
++ int offset = -before*BytesPerWord;
++ for (int i = 0; i < size; ++i) {
++ std(val, offset, low);
++ offset += (1*BytesPerWord);
++ }
++ } else {
++ addi(addr, low, -before*BytesPerWord);
++ assert_different_registers(high, val);
++ if (after) addi(high, high, after * BytesPerWord);
++ Label loop;
++ bind(loop);
++ std(val, 0, addr);
++ addi(addr, addr, 8);
++ cmpd(CCR6, addr, high);
++ ble(CCR6, loop);
++ if (after) addi(high, high, -after * BytesPerWord); // Correct back to old value.
++ }
++ BLOCK_COMMENT("} zap memory region");
++}
++
++#endif // !PRODUCT
++
++SkipIfEqualZero::SkipIfEqualZero(MacroAssembler* masm, Register temp, const bool* flag_addr) : _masm(masm), _label() {
++ int simm16_offset = masm->load_const_optimized(temp, (address)flag_addr, R0, true);
++ assert(sizeof(bool) == 1, "PowerPC ABI");
++ masm->lbz(temp, simm16_offset, temp);
++ masm->cmpwi(CCR0, temp, 0);
++ masm->beq(CCR0, _label);
++}
++
++SkipIfEqualZero::~SkipIfEqualZero() {
++ _masm->bind(_label);
++}
+--- ./hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,716 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_MACROASSEMBLER_PPC_HPP
++#define CPU_PPC_VM_MACROASSEMBLER_PPC_HPP
++
++#include "asm/assembler.hpp"
++
++// MacroAssembler extends Assembler by a few frequently used macros.
++
++class ciTypeArray;
++
++class MacroAssembler: public Assembler {
++ public:
++ MacroAssembler(CodeBuffer* code) : Assembler(code) {}
++
++ //
++ // Optimized instruction emitters
++ //
++
++ inline static int largeoffset_si16_si16_hi(int si31) { return (si31 + (1<<15)) >> 16; }
++ inline static int largeoffset_si16_si16_lo(int si31) { return si31 - (((si31 + (1<<15)) >> 16) << 16); }
++
++ // load d = *[a+si31]
++ // Emits several instructions if the offset is not encodable in one instruction.
++ void ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop);
++ void ld_largeoffset (Register d, int si31, Register a, int emit_filler_nop);
++ inline static bool is_ld_largeoffset(address a);
++ inline static int get_ld_largeoffset_offset(address a);
++
++ inline void round_to(Register r, int modulus);
++
++ // Load/store with type given by parameter.
++ void load_sized_value( Register dst, RegisterOrConstant offs, Register base, size_t size_in_bytes, bool is_signed);
++ void store_sized_value(Register dst, RegisterOrConstant offs, Register base, size_t size_in_bytes);
++
++ // Move register if destination register and target register are different
++ inline void mr_if_needed(Register rd, Register rs);
++ inline void fmr_if_needed(FloatRegister rd, FloatRegister rs);
++ // This is dedicated for emitting scheduled mach nodes. For better
++ // readability of the ad file I put it here.
++ // Endgroups are not needed if
++ // - the scheduler is off
++ // - the scheduler found that there is a natural group end, in that
++ // case it reduced the size of the instruction used in the test
++ // yielding 'needed'.
++ inline void endgroup_if_needed(bool needed);
++
++ // Memory barriers.
++ inline void membar(int bits);
++ inline void release();
++ inline void acquire();
++ inline void fence();
++
++ // nop padding
++ void align(int modulus, int max = 252, int rem = 0);
++
++ //
++ // Constants, loading constants, TOC support
++ //
++
++ // Address of the global TOC.
++ inline static address global_toc();
++ // Offset of given address to the global TOC.
++ inline static int offset_to_global_toc(const address addr);
++
++ // Address of TOC of the current method.
++ inline address method_toc();
++ // Offset of given address to TOC of the current method.
++ inline int offset_to_method_toc(const address addr);
++
++ // Global TOC.
++ void calculate_address_from_global_toc(Register dst, address addr,
++ bool hi16 = true, bool lo16 = true,
++ bool add_relocation = true, bool emit_dummy_addr = false);
++ inline void calculate_address_from_global_toc_hi16only(Register dst, address addr) {
++ calculate_address_from_global_toc(dst, addr, true, false);
++ };
++ inline void calculate_address_from_global_toc_lo16only(Register dst, address addr) {
++ calculate_address_from_global_toc(dst, addr, false, true);
++ };
++
++ inline static bool is_calculate_address_from_global_toc_at(address a, address bound);
++ static int patch_calculate_address_from_global_toc_at(address a, address addr, address bound);
++ static address get_address_of_calculate_address_from_global_toc_at(address a, address addr);
++
++#ifdef _LP64
++ // Patch narrow oop constant.
++ inline static bool is_set_narrow_oop(address a, address bound);
++ static int patch_set_narrow_oop(address a, address bound, narrowOop data);
++ static narrowOop get_narrow_oop(address a, address bound);
++#endif
++
++ inline static bool is_load_const_at(address a);
++
++ // Emits an oop const to the constant pool, loads the constant, and
++ // sets a relocation info with address current_pc.
++ void load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc);
++ void load_toc_from_toc(Register dst, AddressLiteral& a, Register toc) {
++ assert(dst == R2_TOC, "base register must be TOC");
++ load_const_from_method_toc(dst, a, toc);
++ }
++
++ static bool is_load_const_from_method_toc_at(address a);
++ static int get_offset_of_load_const_from_method_toc_at(address a);
++
++ // Get the 64 bit constant from a `load_const' sequence.
++ static long get_const(address load_const);
++
++ // Patch the 64 bit constant of a `load_const' sequence. This is a
++ // low level procedure. It neither flushes the instruction cache nor
++ // is it atomic.
++ static void patch_const(address load_const, long x);
++
++ // Metadata in code that we have to keep track of.
++ AddressLiteral allocate_metadata_address(Metadata* obj); // allocate_index
++ AddressLiteral constant_metadata_address(Metadata* obj); // find_index
++ // Oops used directly in compiled code are stored in the constant pool,
++ // and loaded from there.
++ // Allocate new entry for oop in constant pool. Generate relocation.
++ AddressLiteral allocate_oop_address(jobject obj);
++ // Find oop obj in constant pool. Return relocation with it's index.
++ AddressLiteral constant_oop_address(jobject obj);
++
++ // Find oop in constant pool and emit instructions to load it.
++ // Uses constant_oop_address.
++ inline void set_oop_constant(jobject obj, Register d);
++ // Same as load_address.
++ inline void set_oop (AddressLiteral obj_addr, Register d);
++
++ // Read runtime constant: Issue load if constant not yet established,
++ // else use real constant.
++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++ Register tmp,
++ int offset);
++
++ //
++ // branch, jump
++ //
++
++ inline void pd_patch_instruction(address branch, address target);
++ NOT_PRODUCT(static void pd_print_patched_instruction(address branch);)
++
++ // Conditional far branch for destinations encodable in 24+2 bits.
++ // Same interface as bc, e.g. no inverse boint-field.
++ enum {
++ bc_far_optimize_not = 0,
++ bc_far_optimize_on_relocate = 1
++ };
++ // optimize: flag for telling the conditional far branch to optimize
++ // itself when relocated.
++ void bc_far(int boint, int biint, Label& dest, int optimize);
++ // Relocation of conditional far branches.
++ static bool is_bc_far_at(address instruction_addr);
++ static address get_dest_of_bc_far_at(address instruction_addr);
++ static void set_dest_of_bc_far_at(address instruction_addr, address dest);
++ private:
++ static bool inline is_bc_far_variant1_at(address instruction_addr);
++ static bool inline is_bc_far_variant2_at(address instruction_addr);
++ static bool inline is_bc_far_variant3_at(address instruction_addr);
++ public:
++
++ // Convenience bc_far versions.
++ inline void blt_far(ConditionRegister crx, Label& L, int optimize);
++ inline void bgt_far(ConditionRegister crx, Label& L, int optimize);
++ inline void beq_far(ConditionRegister crx, Label& L, int optimize);
++ inline void bso_far(ConditionRegister crx, Label& L, int optimize);
++ inline void bge_far(ConditionRegister crx, Label& L, int optimize);
++ inline void ble_far(ConditionRegister crx, Label& L, int optimize);
++ inline void bne_far(ConditionRegister crx, Label& L, int optimize);
++ inline void bns_far(ConditionRegister crx, Label& L, int optimize);
++
++ // Emit, identify and patch a NOT mt-safe patchable 64 bit absolute call/jump.
++ private:
++ enum {
++ bxx64_patchable_instruction_count = (2/*load_codecache_const*/ + 3/*5load_const*/ + 1/*mtctr*/ + 1/*bctrl*/),
++ bxx64_patchable_size = bxx64_patchable_instruction_count * BytesPerInstWord,
++ bxx64_patchable_ret_addr_offset = bxx64_patchable_size
++ };
++ void bxx64_patchable(address target, relocInfo::relocType rt, bool link);
++ static bool is_bxx64_patchable_at( address instruction_addr, bool link);
++ // Does the instruction use a pc-relative encoding of the destination?
++ static bool is_bxx64_patchable_pcrelative_at( address instruction_addr, bool link);
++ static bool is_bxx64_patchable_variant1_at( address instruction_addr, bool link);
++ // Load destination relative to global toc.
++ static bool is_bxx64_patchable_variant1b_at( address instruction_addr, bool link);
++ static bool is_bxx64_patchable_variant2_at( address instruction_addr, bool link);
++ static void set_dest_of_bxx64_patchable_at( address instruction_addr, address target, bool link);
++ static address get_dest_of_bxx64_patchable_at(address instruction_addr, bool link);
++
++ public:
++ // call
++ enum {
++ bl64_patchable_instruction_count = bxx64_patchable_instruction_count,
++ bl64_patchable_size = bxx64_patchable_size,
++ bl64_patchable_ret_addr_offset = bxx64_patchable_ret_addr_offset
++ };
++ inline void bl64_patchable(address target, relocInfo::relocType rt) {
++ bxx64_patchable(target, rt, /*link=*/true);
++ }
++ inline static bool is_bl64_patchable_at(address instruction_addr) {
++ return is_bxx64_patchable_at(instruction_addr, /*link=*/true);
++ }
++ inline static bool is_bl64_patchable_pcrelative_at(address instruction_addr) {
++ return is_bxx64_patchable_pcrelative_at(instruction_addr, /*link=*/true);
++ }
++ inline static void set_dest_of_bl64_patchable_at(address instruction_addr, address target) {
++ set_dest_of_bxx64_patchable_at(instruction_addr, target, /*link=*/true);
++ }
++ inline static address get_dest_of_bl64_patchable_at(address instruction_addr) {
++ return get_dest_of_bxx64_patchable_at(instruction_addr, /*link=*/true);
++ }
++ // jump
++ enum {
++ b64_patchable_instruction_count = bxx64_patchable_instruction_count,
++ b64_patchable_size = bxx64_patchable_size,
++ };
++ inline void b64_patchable(address target, relocInfo::relocType rt) {
++ bxx64_patchable(target, rt, /*link=*/false);
++ }
++ inline static bool is_b64_patchable_at(address instruction_addr) {
++ return is_bxx64_patchable_at(instruction_addr, /*link=*/false);
++ }
++ inline static bool is_b64_patchable_pcrelative_at(address instruction_addr) {
++ return is_bxx64_patchable_pcrelative_at(instruction_addr, /*link=*/false);
++ }
++ inline static void set_dest_of_b64_patchable_at(address instruction_addr, address target) {
++ set_dest_of_bxx64_patchable_at(instruction_addr, target, /*link=*/false);
++ }
++ inline static address get_dest_of_b64_patchable_at(address instruction_addr) {
++ return get_dest_of_bxx64_patchable_at(instruction_addr, /*link=*/false);
++ }
++
++ //
++ // Support for frame handling
++ //
++
++ // some ABI-related functions
++ void save_nonvolatile_gprs( Register dst_base, int offset);
++ void restore_nonvolatile_gprs(Register src_base, int offset);
++ void save_volatile_gprs( Register dst_base, int offset);
++ void restore_volatile_gprs(Register src_base, int offset);
++ void save_LR_CR( Register tmp); // tmp contains LR on return.
++ void restore_LR_CR(Register tmp);
++
++ // Get current PC using bl-next-instruction trick.
++ address get_PC_trash_LR(Register result);
++
++ // Resize current frame either relatively wrt to current SP or absolute.
++ void resize_frame(Register offset, Register tmp);
++ void resize_frame(int offset, Register tmp);
++ void resize_frame_absolute(Register addr, Register tmp1, Register tmp2);
++
++ // Push a frame of size bytes.
++ void push_frame(Register bytes, Register tmp);
++
++ // Push a frame of size `bytes'. No abi space provided.
++ void push_frame(unsigned int bytes, Register tmp);
++
++ // Push a frame of size `bytes' plus abi_reg_args on top.
++ void push_frame_reg_args(unsigned int bytes, Register tmp);
++
++ // Setup up a new C frame with a spill area for non-volatile GPRs and additional
++ // space for local variables
++ void push_frame_reg_args_nonvolatiles(unsigned int bytes, Register tmp);
++
++ // pop current C frame
++ void pop_frame();
++
++ //
++ // Calls
++ //
++
++ private:
++ address _last_calls_return_pc;
++
++#if defined(ABI_ELFv2)
++ // Generic version of a call to C function.
++ // Updates and returns _last_calls_return_pc.
++ address branch_to(Register function_entry, bool and_link);
++#else
++ // Generic version of a call to C function via a function descriptor
++ // with variable support for C calling conventions (TOC, ENV, etc.).
++ // updates and returns _last_calls_return_pc.
++ address branch_to(Register function_descriptor, bool and_link, bool save_toc_before_call,
++ bool restore_toc_after_call, bool load_toc_of_callee, bool load_env_of_callee);
++#endif
++
++ public:
++
++ // Get the pc where the last call will return to. returns _last_calls_return_pc.
++ inline address last_calls_return_pc();
++
++#if defined(ABI_ELFv2)
++ // Call a C function via a function descriptor and use full C
++ // calling conventions. Updates and returns _last_calls_return_pc.
++ address call_c(Register function_entry);
++ // For tail calls: only branch, don't link, so callee returns to caller of this function.
++ address call_c_and_return_to_caller(Register function_entry);
++ address call_c(address function_entry, relocInfo::relocType rt);
++#else
++ // Call a C function via a function descriptor and use full C
++ // calling conventions. Updates and returns _last_calls_return_pc.
++ address call_c(Register function_descriptor);
++ // For tail calls: only branch, don't link, so callee returns to caller of this function.
++ address call_c_and_return_to_caller(Register function_descriptor);
++ address call_c(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt);
++ address call_c_using_toc(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt,
++ Register toc);
++#endif
++
++ protected:
++
++ // It is imperative that all calls into the VM are handled via the
++ // call_VM macros. They make sure that the stack linkage is setup
++ // correctly. call_VM's correspond to ENTRY/ENTRY_X entry points
++ // while call_VM_leaf's correspond to LEAF entry points.
++ //
++ // This is the base routine called by the different versions of
++ // call_VM. The interpreter may customize this version by overriding
++ // it for its purposes (e.g., to save/restore additional registers
++ // when doing a VM call).
++ //
++ // If no last_java_sp is specified (noreg) then SP will be used instead.
++ virtual void call_VM_base(
++ // where an oop-result ends up if any; use noreg otherwise
++ Register oop_result,
++ // to set up last_Java_frame in stubs; use noreg otherwise
++ Register last_java_sp,
++ // the entry point
++ address entry_point,
++ // flag which indicates if exception should be checked
++ bool check_exception = true
++ );
++
++ // Support for VM calls. This is the base routine called by the
++ // different versions of call_VM_leaf. The interpreter may customize
++ // this version by overriding it for its purposes (e.g., to
++ // save/restore additional registers when doing a VM call).
++ void call_VM_leaf_base(address entry_point);
++
++ public:
++ // Call into the VM.
++ // Passes the thread pointer (in R3_ARG1) as a prepended argument.
++ // Makes sure oop return values are visible to the GC.
++ void call_VM(Register oop_result, address entry_point, bool check_exceptions = true);
++ void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true);
++ void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
++ void call_VM_leaf(address entry_point);
++ void call_VM_leaf(address entry_point, Register arg_1);
++ void call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
++ void call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
++
++ // Call a stub function via a function descriptor, but don't save
++ // TOC before call, don't setup TOC and ENV for call, and don't
++ // restore TOC after call. Updates and returns _last_calls_return_pc.
++ inline address call_stub(Register function_entry);
++ inline void call_stub_and_return_to(Register function_entry, Register return_pc);
++
++ //
++ // Java utilities
++ //
++
++ // Read from the polling page, its address is already in a register.
++ inline void load_from_polling_page(Register polling_page_address, int offset = 0);
++ // Check whether instruction is a read access to the polling page
++ // which was emitted by load_from_polling_page(..).
++ static bool is_load_from_polling_page(int instruction, void* ucontext/*may be NULL*/,
++ address* polling_address_ptr = NULL);
++
++ // Check whether instruction is a write access to the memory
++ // serialization page realized by one of the instructions stw, stwu,
++ // stwx, or stwux.
++ static bool is_memory_serialization(int instruction, JavaThread* thread, void* ucontext);
++
++ // Support for NULL-checks
++ //
++ // Generates code that causes a NULL OS exception if the content of reg is NULL.
++ // If the accessed location is M[reg + offset] and the offset is known, provide the
++ // offset. No explicit code generation is needed if the offset is within a certain
++ // range (0 <= offset <= page_size).
++
++ // Stack overflow checking
++ void bang_stack_with_offset(int offset);
++
++ // If instruction is a stack bang of the form ld, stdu, or
++ // stdux, return the banged address. Otherwise, return 0.
++ static address get_stack_bang_address(int instruction, void* ucontext);
++
++ // Atomics
++ // CmpxchgX sets condition register to cmpX(current, compare).
++ // (flag == ne) => (dest_current_value != compare_value), (!swapped)
++ // (flag == eq) => (dest_current_value == compare_value), ( swapped)
++ static inline bool cmpxchgx_hint_acquire_lock() { return true; }
++ // The stxcx will probably not be succeeded by a releasing store.
++ static inline bool cmpxchgx_hint_release_lock() { return false; }
++ static inline bool cmpxchgx_hint_atomic_update() { return false; }
++
++ // Cmpxchg semantics
++ enum {
++ MemBarNone = 0,
++ MemBarRel = 1,
++ MemBarAcq = 2,
++ MemBarFenceAfter = 4 // use powers of 2
++ };
++ void cmpxchgw(ConditionRegister flag,
++ Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
++ int semantics, bool cmpxchgx_hint = false,
++ Register int_flag_success = noreg, bool contention_hint = false);
++ void cmpxchgd(ConditionRegister flag,
++ Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
++ int semantics, bool cmpxchgx_hint = false,
++ Register int_flag_success = noreg, Label* failed = NULL, bool contention_hint = false);
++
++ // interface method calling
++ void lookup_interface_method(Register recv_klass,
++ Register intf_klass,
++ RegisterOrConstant itable_index,
++ Register method_result,
++ Register temp_reg, Register temp2_reg,
++ Label& no_such_interface);
++
++ // virtual method calling
++ void lookup_virtual_method(Register recv_klass,
++ RegisterOrConstant vtable_index,
++ Register method_result);
++
++ // Test sub_klass against super_klass, with fast and slow paths.
++
++ // The fast path produces a tri-state answer: yes / no / maybe-slow.
++ // One of the three labels can be NULL, meaning take the fall-through.
++ // If super_check_offset is -1, the value is loaded up from super_klass.
++ // No registers are killed, except temp_reg and temp2_reg.
++ // If super_check_offset is not -1, temp2_reg is not used and can be noreg.
++ void check_klass_subtype_fast_path(Register sub_klass,
++ Register super_klass,
++ Register temp1_reg,
++ Register temp2_reg,
++ Label& L_success,
++ Label& L_failure);
++
++ // The rest of the type check; must be wired to a corresponding fast path.
++ // It does not repeat the fast path logic, so don't use it standalone.
++ // The temp_reg can be noreg, if no temps are available.
++ // It can also be sub_klass or super_klass, meaning it's OK to kill that one.
++ // Updates the sub's secondary super cache as necessary.
++ void check_klass_subtype_slow_path(Register sub_klass,
++ Register super_klass,
++ Register temp1_reg,
++ Register temp2_reg,
++ Label* L_success = NULL,
++ Register result_reg = noreg);
++
++ // Simplified, combined version, good for typical uses.
++ // Falls through on failure.
++ void check_klass_subtype(Register sub_klass,
++ Register super_klass,
++ Register temp1_reg,
++ Register temp2_reg,
++ Label& L_success);
++
++ // Method handle support (JSR 292).
++ void check_method_handle_type(Register mtype_reg, Register mh_reg, Register temp_reg, Label& wrong_method_type);
++
++ RegisterOrConstant argument_offset(RegisterOrConstant arg_slot, Register temp_reg, int extra_slot_offset = 0);
++
++ // Biased locking support
++ // Upon entry,obj_reg must contain the target object, and mark_reg
++ // must contain the target object's header.
++ // Destroys mark_reg if an attempt is made to bias an anonymously
++ // biased lock. In this case a failure will go either to the slow
++ // case or fall through with the notEqual condition code set with
++ // the expectation that the slow case in the runtime will be called.
++ // In the fall-through case where the CAS-based lock is done,
++ // mark_reg is not destroyed.
++ void biased_locking_enter(ConditionRegister cr_reg, Register obj_reg, Register mark_reg, Register temp_reg,
++ Register temp2_reg, Label& done, Label* slow_case = NULL);
++ // Upon entry, the base register of mark_addr must contain the oop.
++ // Destroys temp_reg.
++ // If allow_delay_slot_filling is set to true, the next instruction
++ // emitted after this one will go in an annulled delay slot if the
++ // biased locking exit case failed.
++ void biased_locking_exit(ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done);
++
++ void compiler_fast_lock_object( ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3);
++ void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3);
++
++ // Support for serializing memory accesses between threads
++ void serialize_memory(Register thread, Register tmp1, Register tmp2);
++
++ // GC barrier support.
++ void card_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp);
++ void card_table_write(jbyte* byte_map_base, Register Rtmp, Register Robj);
++
++#if INCLUDE_ALL_GCS
++ // General G1 pre-barrier generator.
++ void g1_write_barrier_pre(Register Robj, RegisterOrConstant offset, Register Rpre_val,
++ Register Rtmp1, Register Rtmp2, bool needs_frame = false);
++ // General G1 post-barrier generator
++ void g1_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp1,
++ Register Rtmp2, Register Rtmp3, Label *filtered_ext = NULL);
++#endif
++
++ // Support for managing the JavaThread pointer (i.e.; the reference to
++ // thread-local information).
++
++ // Support for last Java frame (but use call_VM instead where possible):
++ // access R16_thread->last_Java_sp.
++ void set_last_Java_frame(Register last_java_sp, Register last_Java_pc);
++ void reset_last_Java_frame(void);
++ void set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1);
++
++ // Read vm result from thread: oop_result = R16_thread->result;
++ void get_vm_result (Register oop_result);
++ void get_vm_result_2(Register metadata_result);
++
++ static bool needs_explicit_null_check(intptr_t offset);
++
++ // Trap-instruction-based checks.
++ // Range checks can be distinguished from zero checks as they check 32 bit,
++ // zero checks all 64 bits (tw, td).
++ inline void trap_null_check(Register a, trap_to_bits cmp = traptoEqual);
++ static bool is_trap_null_check(int x) {
++ return is_tdi(x, traptoEqual, -1/*any reg*/, 0) ||
++ is_tdi(x, traptoGreaterThanUnsigned, -1/*any reg*/, 0);
++ }
++
++ inline void trap_zombie_not_entrant();
++ static bool is_trap_zombie_not_entrant(int x) { return is_tdi(x, traptoUnconditional, 0/*reg 0*/, 1); }
++
++ inline void trap_should_not_reach_here();
++ static bool is_trap_should_not_reach_here(int x) { return is_tdi(x, traptoUnconditional, 0/*reg 0*/, 2); }
++
++ inline void trap_ic_miss_check(Register a, Register b);
++ static bool is_trap_ic_miss_check(int x) {
++ return is_td(x, traptoGreaterThanUnsigned | traptoLessThanUnsigned, -1/*any reg*/, -1/*any reg*/);
++ }
++
++ // Implicit or explicit null check, jumps to static address exception_entry.
++ inline void null_check_throw(Register a, int offset, Register temp_reg, address exception_entry);
++
++ // Check accessed object for null. Use SIGTRAP-based null checks on AIX.
++ inline void load_with_trap_null_check(Register d, int si16, Register s1);
++
++ // Load heap oop and decompress. Loaded oop may not be null.
++ inline void load_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1 = noreg);
++ inline void store_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1,
++ /*specify if d must stay uncompressed*/ Register tmp = noreg);
++
++ // Null allowed.
++ inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg);
++
++ // Encode/decode heap oop. Oop may not be null, else en/decoding goes wrong.
++ inline Register encode_heap_oop_not_null(Register d, Register src = noreg);
++ inline void decode_heap_oop_not_null(Register d);
++
++ // Null allowed.
++ inline void decode_heap_oop(Register d);
++
++ // Load/Store klass oop from klass field. Compress.
++ void load_klass(Register dst, Register src);
++ void load_klass_with_trap_null_check(Register dst, Register src);
++ void store_klass(Register dst_oop, Register klass, Register tmp = R0);
++ void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified.
++ static int instr_size_for_decode_klass_not_null();
++ void decode_klass_not_null(Register dst, Register src = noreg);
++ void encode_klass_not_null(Register dst, Register src = noreg);
++
++ // Load common heap base into register.
++ void reinit_heapbase(Register d, Register tmp = noreg);
++
++ // SIGTRAP-based range checks for arrays.
++ inline void trap_range_check_l(Register a, Register b);
++ inline void trap_range_check_l(Register a, int si16);
++ static bool is_trap_range_check_l(int x) {
++ return (is_tw (x, traptoLessThanUnsigned, -1/*any reg*/, -1/*any reg*/) ||
++ is_twi(x, traptoLessThanUnsigned, -1/*any reg*/) );
++ }
++ inline void trap_range_check_le(Register a, int si16);
++ static bool is_trap_range_check_le(int x) {
++ return is_twi(x, traptoEqual | traptoLessThanUnsigned, -1/*any reg*/);
++ }
++ inline void trap_range_check_g(Register a, int si16);
++ static bool is_trap_range_check_g(int x) {
++ return is_twi(x, traptoGreaterThanUnsigned, -1/*any reg*/);
++ }
++ inline void trap_range_check_ge(Register a, Register b);
++ inline void trap_range_check_ge(Register a, int si16);
++ static bool is_trap_range_check_ge(int x) {
++ return (is_tw (x, traptoEqual | traptoGreaterThanUnsigned, -1/*any reg*/, -1/*any reg*/) ||
++ is_twi(x, traptoEqual | traptoGreaterThanUnsigned, -1/*any reg*/) );
++ }
++ static bool is_trap_range_check(int x) {
++ return is_trap_range_check_l(x) || is_trap_range_check_le(x) ||
++ is_trap_range_check_g(x) || is_trap_range_check_ge(x);
++ }
++
++ void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0);
++
++ // Needle of length 1.
++ void string_indexof_1(Register result, Register haystack, Register haycnt,
++ Register needle, jchar needleChar,
++ Register tmp1, Register tmp2);
++ // General indexof, eventually with constant needle length.
++ void string_indexof(Register result, Register haystack, Register haycnt,
++ Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
++ Register tmp1, Register tmp2, Register tmp3, Register tmp4);
++ void string_compare(Register str1_reg, Register str2_reg, Register cnt1_reg, Register cnt2_reg,
++ Register result_reg, Register tmp_reg);
++ void char_arrays_equals(Register str1_reg, Register str2_reg, Register cnt_reg, Register result_reg,
++ Register tmp1_reg, Register tmp2_reg, Register tmp3_reg, Register tmp4_reg,
++ Register tmp5_reg);
++ void char_arrays_equalsImm(Register str1_reg, Register str2_reg, int cntval, Register result_reg,
++ Register tmp1_reg, Register tmp2_reg);
++
++ //
++ // Debugging
++ //
++
++ // assert on cr0
++ void asm_assert(bool check_equal, const char* msg, int id);
++ void asm_assert_eq(const char* msg, int id) { asm_assert(true, msg, id); }
++ void asm_assert_ne(const char* msg, int id) { asm_assert(false, msg, id); }
++
++ private:
++ void asm_assert_mems_zero(bool check_equal, int size, int mem_offset, Register mem_base,
++ const char* msg, int id);
++
++ public:
++
++ void asm_assert_mem8_is_zero(int mem_offset, Register mem_base, const char* msg, int id) {
++ asm_assert_mems_zero(true, 8, mem_offset, mem_base, msg, id);
++ }
++ void asm_assert_mem8_isnot_zero(int mem_offset, Register mem_base, const char* msg, int id) {
++ asm_assert_mems_zero(false, 8, mem_offset, mem_base, msg, id);
++ }
++
++ // Verify R16_thread contents.
++ void verify_thread();
++
++ // Emit code to verify that reg contains a valid oop if +VerifyOops is set.
++ void verify_oop(Register reg, const char* s = "broken oop");
++
++ // TODO: verify method and klass metadata (compare against vptr?)
++ void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
++ void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line) {}
++
++ // Convenience method returning function entry. For the ELFv1 case
++ // creates function descriptor at the current address and returs
++ // the pointer to it. For the ELFv2 case returns the current address.
++ inline address function_entry();
++
++#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
++#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
++
++ private:
++
++ enum {
++ stop_stop = 0,
++ stop_untested = 1,
++ stop_unimplemented = 2,
++ stop_shouldnotreachhere = 3,
++ stop_end = 4
++ };
++ void stop(int type, const char* msg, int id);
++
++ public:
++ // Prints msg, dumps registers and stops execution.
++ void stop (const char* msg = "", int id = 0) { stop(stop_stop, msg, id); }
++ void untested (const char* msg = "", int id = 0) { stop(stop_untested, msg, id); }
++ void unimplemented(const char* msg = "", int id = 0) { stop(stop_unimplemented, msg, id); }
++ void should_not_reach_here() { stop(stop_shouldnotreachhere, "", -1); }
++
++ void zap_from_to(Register low, int before, Register high, int after, Register val, Register addr) PRODUCT_RETURN;
++};
++
++// class SkipIfEqualZero:
++//
++// Instantiating this class will result in assembly code being output that will
++// jump around any code emitted between the creation of the instance and it's
++// automatic destruction at the end of a scope block, depending on the value of
++// the flag passed to the constructor, which will be checked at run-time.
++class SkipIfEqualZero : public StackObj {
++ private:
++ MacroAssembler* _masm;
++ Label _label;
++
++ public:
++ // 'Temp' is a temp register that this object can use (and trash).
++ explicit SkipIfEqualZero(MacroAssembler*, Register temp, const bool* flag_addr);
++ ~SkipIfEqualZero();
++};
++
++#endif // CPU_PPC_VM_MACROASSEMBLER_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,407 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_MACROASSEMBLER_PPC_INLINE_HPP
++#define CPU_PPC_VM_MACROASSEMBLER_PPC_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++inline bool MacroAssembler::is_ld_largeoffset(address a) {
++ const int inst1 = *(int *)a;
++ const int inst2 = *(int *)(a+4);
++ return (is_ld(inst1)) ||
++ (is_addis(inst1) && is_ld(inst2) && inv_ra_field(inst2) == inv_rt_field(inst1));
++}
++
++inline int MacroAssembler::get_ld_largeoffset_offset(address a) {
++ assert(MacroAssembler::is_ld_largeoffset(a), "must be ld with large offset");
++
++ const int inst1 = *(int *)a;
++ if (is_ld(inst1)) {
++ return inv_d1_field(inst1);
++ } else {
++ const int inst2 = *(int *)(a+4);
++ return (inv_d1_field(inst1) << 16) + inv_d1_field(inst2);
++ }
++}
++
++inline void MacroAssembler::round_to(Register r, int modulus) {
++ assert(is_power_of_2_long((jlong)modulus), "must be power of 2");
++ addi(r, r, modulus-1);
++ clrrdi(r, r, log2_long((jlong)modulus));
++}
++
++// Move register if destination register and target register are different.
++inline void MacroAssembler::mr_if_needed(Register rd, Register rs) {
++ if (rs != rd) mr(rd, rs);
++}
++inline void MacroAssembler::fmr_if_needed(FloatRegister rd, FloatRegister rs) {
++ if (rs != rd) fmr(rd, rs);
++}
++inline void MacroAssembler::endgroup_if_needed(bool needed) {
++ if (needed) {
++ endgroup();
++ }
++}
++
++inline void MacroAssembler::membar(int bits) {
++ // TODO: use elemental_membar(bits) for Power 8 and disable optimization of acquire-release
++ // (Matcher::post_membar_release where we use PPC64_ONLY(xop == Op_MemBarRelease ||))
++ if (bits & StoreLoad) sync(); else lwsync();
++}
++inline void MacroAssembler::release() { membar(LoadStore | StoreStore); }
++inline void MacroAssembler::acquire() { membar(LoadLoad | LoadStore); }
++inline void MacroAssembler::fence() { membar(LoadLoad | LoadStore | StoreLoad | StoreStore); }
++
++// Address of the global TOC.
++inline address MacroAssembler::global_toc() {
++ return CodeCache::low_bound();
++}
++
++// Offset of given address to the global TOC.
++inline int MacroAssembler::offset_to_global_toc(const address addr) {
++ intptr_t offset = (intptr_t)addr - (intptr_t)MacroAssembler::global_toc();
++ assert(Assembler::is_simm((long)offset, 31) && offset >= 0, "must be in range");
++ return (int)offset;
++}
++
++// Address of current method's TOC.
++inline address MacroAssembler::method_toc() {
++ return code()->consts()->start();
++}
++
++// Offset of given address to current method's TOC.
++inline int MacroAssembler::offset_to_method_toc(address addr) {
++ intptr_t offset = (intptr_t)addr - (intptr_t)method_toc();
++ assert(is_simm((long)offset, 31) && offset >= 0, "must be in range");
++ return (int)offset;
++}
++
++inline bool MacroAssembler::is_calculate_address_from_global_toc_at(address a, address bound) {
++ const address inst2_addr = a;
++ const int inst2 = *(int *) a;
++
++ // The relocation points to the second instruction, the addi.
++ if (!is_addi(inst2)) return false;
++
++ // The addi reads and writes the same register dst.
++ const int dst = inv_rt_field(inst2);
++ if (inv_ra_field(inst2) != dst) return false;
++
++ // Now, find the preceding addis which writes to dst.
++ int inst1 = 0;
++ address inst1_addr = inst2_addr - BytesPerInstWord;
++ while (inst1_addr >= bound) {
++ inst1 = *(int *) inst1_addr;
++ if (is_addis(inst1) && inv_rt_field(inst1) == dst) {
++ // stop, found the addis which writes dst
++ break;
++ }
++ inst1_addr -= BytesPerInstWord;
++ }
++
++ if (!(inst1 == 0 || inv_ra_field(inst1) == 29 /* R29 */)) return false;
++ return is_addis(inst1);
++}
++
++#ifdef _LP64
++// Detect narrow oop constants.
++inline bool MacroAssembler::is_set_narrow_oop(address a, address bound) {
++ const address inst2_addr = a;
++ const int inst2 = *(int *)a;
++ // The relocation points to the second instruction, the ori.
++ if (!is_ori(inst2)) return false;
++
++ // The ori reads and writes the same register dst.
++ const int dst = inv_rta_field(inst2);
++ if (inv_rs_field(inst2) != dst) return false;
++
++ // Now, find the preceding addis which writes to dst.
++ int inst1 = 0;
++ address inst1_addr = inst2_addr - BytesPerInstWord;
++ while (inst1_addr >= bound) {
++ inst1 = *(int *) inst1_addr;
++ if (is_lis(inst1) && inv_rs_field(inst1) == dst) return true;
++ inst1_addr -= BytesPerInstWord;
++ }
++ return false;
++}
++#endif
++
++
++inline bool MacroAssembler::is_load_const_at(address a) {
++ const int* p_inst = (int *) a;
++ bool b = is_lis(*p_inst++);
++ if (is_ori(*p_inst)) {
++ p_inst++;
++ b = b && is_rldicr(*p_inst++); // TODO: could be made more precise: `sldi'!
++ b = b && is_oris(*p_inst++);
++ b = b && is_ori(*p_inst);
++ } else if (is_lis(*p_inst)) {
++ p_inst++;
++ b = b && is_ori(*p_inst++);
++ b = b && is_ori(*p_inst);
++ // TODO: could enhance reliability by adding is_insrdi
++ } else return false;
++ return b;
++}
++
++inline void MacroAssembler::set_oop_constant(jobject obj, Register d) {
++ set_oop(constant_oop_address(obj), d);
++}
++
++inline void MacroAssembler::set_oop(AddressLiteral obj_addr, Register d) {
++ assert(obj_addr.rspec().type() == relocInfo::oop_type, "must be an oop reloc");
++ load_const(d, obj_addr);
++}
++
++inline void MacroAssembler::pd_patch_instruction(address branch, address target) {
++ jint& stub_inst = *(jint*) branch;
++ stub_inst = patched_branch(target - branch, stub_inst, 0);
++}
++
++// Relocation of conditional far branches.
++inline bool MacroAssembler::is_bc_far_variant1_at(address instruction_addr) {
++ // Variant 1, the 1st instruction contains the destination address:
++ //
++ // bcxx DEST
++ // endgroup
++ //
++ const int instruction_1 = *(int*)(instruction_addr);
++ const int instruction_2 = *(int*)(instruction_addr + 4);
++ return is_bcxx(instruction_1) &&
++ (inv_bd_field(instruction_1, (intptr_t)instruction_addr) != (intptr_t)(instruction_addr + 2*4)) &&
++ is_endgroup(instruction_2);
++}
++
++// Relocation of conditional far branches.
++inline bool MacroAssembler::is_bc_far_variant2_at(address instruction_addr) {
++ // Variant 2, the 2nd instruction contains the destination address:
++ //
++ // b!cxx SKIP
++ // bxx DEST
++ // SKIP:
++ //
++ const int instruction_1 = *(int*)(instruction_addr);
++ const int instruction_2 = *(int*)(instruction_addr + 4);
++ return is_bcxx(instruction_1) &&
++ (inv_bd_field(instruction_1, (intptr_t)instruction_addr) == (intptr_t)(instruction_addr + 2*4)) &&
++ is_bxx(instruction_2);
++}
++
++// Relocation for conditional branches
++inline bool MacroAssembler::is_bc_far_variant3_at(address instruction_addr) {
++ // Variant 3, far cond branch to the next instruction, already patched to nops:
++ //
++ // nop
++ // endgroup
++ // SKIP/DEST:
++ //
++ const int instruction_1 = *(int*)(instruction_addr);
++ const int instruction_2 = *(int*)(instruction_addr + 4);
++ return is_nop(instruction_1) &&
++ is_endgroup(instruction_2);
++}
++
++
++// Convenience bc_far versions
++inline void MacroAssembler::blt_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs1, bi0(crx, less), L, optimize); }
++inline void MacroAssembler::bgt_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs1, bi0(crx, greater), L, optimize); }
++inline void MacroAssembler::beq_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs1, bi0(crx, equal), L, optimize); }
++inline void MacroAssembler::bso_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs1, bi0(crx, summary_overflow), L, optimize); }
++inline void MacroAssembler::bge_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs0, bi0(crx, less), L, optimize); }
++inline void MacroAssembler::ble_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs0, bi0(crx, greater), L, optimize); }
++inline void MacroAssembler::bne_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs0, bi0(crx, equal), L, optimize); }
++inline void MacroAssembler::bns_far(ConditionRegister crx, Label& L, int optimize) { MacroAssembler::bc_far(bcondCRbiIs0, bi0(crx, summary_overflow), L, optimize); }
++
++inline address MacroAssembler::call_stub(Register function_entry) {
++ mtctr(function_entry);
++ bctrl();
++ return pc();
++}
++
++inline void MacroAssembler::call_stub_and_return_to(Register function_entry, Register return_pc) {
++ assert_different_registers(function_entry, return_pc);
++ mtlr(return_pc);
++ mtctr(function_entry);
++ bctr();
++}
++
++// Get the pc where the last emitted call will return to.
++inline address MacroAssembler::last_calls_return_pc() {
++ return _last_calls_return_pc;
++}
++
++// Read from the polling page, its address is already in a register.
++inline void MacroAssembler::load_from_polling_page(Register polling_page_address, int offset) {
++ ld(R0, offset, polling_page_address);
++}
++
++// Trap-instruction-based checks.
++
++inline void MacroAssembler::trap_null_check(Register a, trap_to_bits cmp) {
++ assert(TrapBasedNullChecks, "sanity");
++ tdi(cmp, a/*reg a*/, 0);
++}
++inline void MacroAssembler::trap_zombie_not_entrant() {
++ tdi(traptoUnconditional, 0/*reg 0*/, 1);
++}
++inline void MacroAssembler::trap_should_not_reach_here() {
++ tdi_unchecked(traptoUnconditional, 0/*reg 0*/, 2);
++}
++
++inline void MacroAssembler::trap_ic_miss_check(Register a, Register b) {
++ td(traptoGreaterThanUnsigned | traptoLessThanUnsigned, a, b);
++}
++
++// Do an explicit null check if access to a+offset will not raise a SIGSEGV.
++// Either issue a trap instruction that raises SIGTRAP, or do a compare that
++// branches to exception_entry.
++// No support for compressed oops (base page of heap). Does not distinguish
++// loads and stores.
++inline void MacroAssembler::null_check_throw(Register a, int offset, Register temp_reg,
++ address exception_entry) {
++ if (!ImplicitNullChecks || needs_explicit_null_check(offset) || !os::zero_page_read_protected()) {
++ if (TrapBasedNullChecks) {
++ assert(UseSIGTRAP, "sanity");
++ trap_null_check(a);
++ } else {
++ Label ok;
++ cmpdi(CCR0, a, 0);
++ bne(CCR0, ok);
++ load_const_optimized(temp_reg, exception_entry);
++ mtctr(temp_reg);
++ bctr();
++ bind(ok);
++ }
++ }
++}
++
++inline void MacroAssembler::load_with_trap_null_check(Register d, int si16, Register s1) {
++ if (!os::zero_page_read_protected()) {
++ if (TrapBasedNullChecks) {
++ trap_null_check(s1);
++ }
++ }
++ ld(d, si16, s1);
++}
++
++inline void MacroAssembler::load_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1) {
++ if (UseCompressedOops) {
++ lwz(d, offs, s1);
++ // Attention: no null check here!
++ decode_heap_oop_not_null(d);
++ } else {
++ ld(d, offs, s1);
++ }
++}
++
++inline void MacroAssembler::store_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1, Register tmp) {
++ if (UseCompressedOops) {
++ Register compressedOop = encode_heap_oop_not_null((tmp != noreg) ? tmp : d, d);
++ stw(compressedOop, offs, s1);
++ } else {
++ std(d, offs, s1);
++ }
++}
++
++inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1) {
++ if (UseCompressedOops) {
++ lwz(d, offs, s1);
++ decode_heap_oop(d);
++ } else {
++ ld(d, offs, s1);
++ }
++}
++
++inline Register MacroAssembler::encode_heap_oop_not_null(Register d, Register src) {
++ Register current = (src!=noreg) ? src : d; // Compressed oop is in d if no src provided.
++ if (Universe::narrow_oop_base() != NULL) {
++ sub(d, current, R30);
++ current = d;
++ }
++ if (Universe::narrow_oop_shift() != 0) {
++ srdi(d, current, LogMinObjAlignmentInBytes);
++ current = d;
++ }
++ return current; // Encoded oop is in this register.
++}
++
++inline void MacroAssembler::decode_heap_oop_not_null(Register d) {
++ if (Universe::narrow_oop_shift() != 0) {
++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++ sldi(d, d, LogMinObjAlignmentInBytes);
++ }
++ if (Universe::narrow_oop_base() != NULL) {
++ add(d, d, R30);
++ }
++}
++
++inline void MacroAssembler::decode_heap_oop(Register d) {
++ Label isNull;
++ if (Universe::narrow_oop_base() != NULL) {
++ cmpwi(CCR0, d, 0);
++ beq(CCR0, isNull);
++ }
++ if (Universe::narrow_oop_shift() != 0) {
++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++ sldi(d, d, LogMinObjAlignmentInBytes);
++ }
++ if (Universe::narrow_oop_base() != NULL) {
++ add(d, d, R30);
++ }
++ bind(isNull);
++}
++
++// SIGTRAP-based range checks for arrays.
++inline void MacroAssembler::trap_range_check_l(Register a, Register b) {
++ tw (traptoLessThanUnsigned, a/*reg a*/, b/*reg b*/);
++}
++inline void MacroAssembler::trap_range_check_l(Register a, int si16) {
++ twi(traptoLessThanUnsigned, a/*reg a*/, si16);
++}
++inline void MacroAssembler::trap_range_check_le(Register a, int si16) {
++ twi(traptoEqual | traptoLessThanUnsigned, a/*reg a*/, si16);
++}
++inline void MacroAssembler::trap_range_check_g(Register a, int si16) {
++ twi(traptoGreaterThanUnsigned, a/*reg a*/, si16);
++}
++inline void MacroAssembler::trap_range_check_ge(Register a, Register b) {
++ tw (traptoEqual | traptoGreaterThanUnsigned, a/*reg a*/, b/*reg b*/);
++}
++inline void MacroAssembler::trap_range_check_ge(Register a, int si16) {
++ twi(traptoEqual | traptoGreaterThanUnsigned, a/*reg a*/, si16);
++}
++
++#if defined(ABI_ELFv2)
++inline address MacroAssembler::function_entry() { return pc(); }
++#else
++inline address MacroAssembler::function_entry() { return emit_fd(); }
++#endif
++
++#endif // CPU_PPC_VM_MACROASSEMBLER_PPC_INLINE_HPP
+--- ./hotspot/src/cpu/ppc/vm/metaspaceShared_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/metaspaceShared_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,61 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "asm/codeBuffer.hpp"
++#include "memory/metaspaceShared.hpp"
++
++// Generate the self-patching vtable method:
++//
++// This method will be called (as any other Klass virtual method) with
++// the Klass itself as the first argument. Example:
++//
++// oop obj;
++// int size = obj->klass()->klass_part()->oop_size(this);
++//
++// for which the virtual method call is Klass::oop_size();
++//
++// The dummy method is called with the Klass object as the first
++// operand, and an object as the second argument.
++//
++
++//=====================================================================
++
++// All of the dummy methods in the vtable are essentially identical,
++// differing only by an ordinal constant, and they bear no releationship
++// to the original method which the caller intended. Also, there needs
++// to be 'vtbl_list_size' instances of the vtable in order to
++// differentiate between the 'vtable_list_size' original Klass objects.
++
++void MetaspaceShared::generate_vtable_methods(void** vtbl_list,
++ void** vtable,
++ char** md_top,
++ char* md_end,
++ char** mc_top,
++ char* mc_end) {
++ Unimplemented();
++}
++
+--- ./hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,558 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/allocation.inline.hpp"
++#include "prims/methodHandles.hpp"
++
++#define __ _masm->
++
++#ifdef CC_INTERP
++#define EXCEPTION_ENTRY StubRoutines::throw_NullPointerException_at_call_entry()
++#else
++#define EXCEPTION_ENTRY Interpreter::throw_NullPointerException_entry()
++#endif
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) // nothing
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++// Workaround for C++ overloading nastiness on '0' for RegisterOrConstant.
++inline static RegisterOrConstant constant(int value) {
++ return RegisterOrConstant(value);
++}
++
++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp_reg, Register temp2_reg) {
++ if (VerifyMethodHandles)
++ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), temp_reg, temp2_reg,
++ "MH argument is a Class");
++ __ ld(klass_reg, java_lang_Class::klass_offset_in_bytes(), klass_reg);
++}
++
++#ifdef ASSERT
++static int check_nonzero(const char* xname, int x) {
++ assert(x != 0, err_msg("%s should be nonzero", xname));
++ return x;
++}
++#define NONZERO(x) check_nonzero(#x, x)
++#else //ASSERT
++#define NONZERO(x) (x)
++#endif //ASSERT
++
++#ifdef ASSERT
++void MethodHandles::verify_klass(MacroAssembler* _masm,
++ Register obj_reg, SystemDictionary::WKID klass_id,
++ Register temp_reg, Register temp2_reg,
++ const char* error_message) {
++ Klass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id);
++ KlassHandle klass = SystemDictionary::well_known_klass(klass_id);
++ Label L_ok, L_bad;
++ BLOCK_COMMENT("verify_klass {");
++ __ verify_oop(obj_reg);
++ __ cmpdi(CCR0, obj_reg, 0);
++ __ beq(CCR0, L_bad);
++ __ load_klass(temp_reg, obj_reg);
++ __ load_const_optimized(temp2_reg, (address) klass_addr);
++ __ ld(temp2_reg, 0, temp2_reg);
++ __ cmpd(CCR0, temp_reg, temp2_reg);
++ __ beq(CCR0, L_ok);
++ __ ld(temp_reg, klass->super_check_offset(), temp_reg);
++ __ cmpd(CCR0, temp_reg, temp2_reg);
++ __ beq(CCR0, L_ok);
++ __ BIND(L_bad);
++ __ stop(error_message);
++ __ BIND(L_ok);
++ BLOCK_COMMENT("} verify_klass");
++}
++
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
++ Label L;
++ BLOCK_COMMENT("verify_ref_kind {");
++ __ load_sized_value(temp, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes()), member_reg,
++ sizeof(u4), /*is_signed*/ false);
++ // assert(sizeof(u4) == sizeof(java.lang.invoke.MemberName.flags), "");
++ __ srwi( temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
++ __ andi(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
++ __ cmpwi(CCR1, temp, ref_kind);
++ __ beq(CCR1, L);
++ { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
++ jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
++ if (ref_kind == JVM_REF_invokeVirtual ||
++ ref_kind == JVM_REF_invokeSpecial)
++ // could do this for all ref_kinds, but would explode assembly code size
++ trace_method_handle(_masm, buf);
++ __ stop(buf);
++ }
++ BLOCK_COMMENT("} verify_ref_kind");
++ __ BIND(L);
++}
++
++#endif // ASSERT
++
++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register target, Register temp,
++ bool for_compiler_entry) {
++ Label L_no_such_method;
++ assert(method == R19_method, "interpreter calling convention");
++ assert_different_registers(method, target, temp);
++
++ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
++ Label run_compiled_code;
++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++ // compiled code in threads for which the event is enabled. Check here for
++ // interp_only_mode if these events CAN be enabled.
++ __ verify_thread();
++ __ lwz(temp, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
++ __ cmplwi(CCR0, temp, 0);
++ __ beq(CCR0, run_compiled_code);
++ // Null method test is replicated below in compiled case,
++ // it might be able to address across the verify_thread()
++ __ cmplwi(CCR0, R19_method, 0);
++ __ beq(CCR0, L_no_such_method);
++ __ ld(target, in_bytes(Method::interpreter_entry_offset()), R19_method);
++ __ mtctr(target);
++ __ bctr();
++ __ BIND(run_compiled_code);
++ }
++
++ // Compiled case, either static or fall-through from runtime conditional
++ __ cmplwi(CCR0, R19_method, 0);
++ __ beq(CCR0, L_no_such_method);
++
++ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
++ Method::from_interpreted_offset();
++ __ ld(target, in_bytes(entry_offset), R19_method);
++ __ mtctr(target);
++ __ bctr();
++
++ __ bind(L_no_such_method);
++ assert(StubRoutines::throw_AbstractMethodError_entry() != NULL, "not yet generated!");
++ __ load_const_optimized(target, StubRoutines::throw_AbstractMethodError_entry());
++ __ mtctr(target);
++ __ bctr();
++}
++
++
++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
++ Register recv, Register method_temp,
++ Register temp2, Register temp3,
++ bool for_compiler_entry) {
++ BLOCK_COMMENT("jump_to_lambda_form {");
++ // This is the initial entry point of a lazy method handle.
++ // After type checking, it picks up the invoker from the LambdaForm.
++ assert_different_registers(recv, method_temp, temp2); // temp3 is only passed on
++ assert(method_temp == R19_method, "required register for loading method");
++
++ // Load the invoker, as MH -> MH.form -> LF.vmentry
++ __ verify_oop(recv);
++ __ load_heap_oop_not_null(method_temp, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes()), recv);
++ __ verify_oop(method_temp);
++ __ load_heap_oop_not_null(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes()), method_temp);
++ __ verify_oop(method_temp);
++ // the following assumes that a Method* is normally compressed in the vmtarget field:
++ __ ld(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()), method_temp);
++
++ if (VerifyMethodHandles && !for_compiler_entry) {
++ // make sure recv is already on stack
++ __ ld(temp2, in_bytes(Method::const_offset()), method_temp);
++ __ load_sized_value(temp2, in_bytes(ConstMethod::size_of_parameters_offset()), temp2,
++ sizeof(u2), /*is_signed*/ false);
++ // assert(sizeof(u2) == sizeof(ConstMethod::_size_of_parameters), "");
++ Label L;
++ __ ld(temp2, __ argument_offset(temp2, temp2, 0), CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp));
++ __ cmpd(CCR1, temp2, recv);
++ __ beq(CCR1, L);
++ __ stop("receiver not on stack");
++ __ BIND(L);
++ }
++
++ jump_from_method_handle(_masm, method_temp, temp2, temp3, for_compiler_entry);
++ BLOCK_COMMENT("} jump_to_lambda_form");
++}
++
++
++
++// Code generation
++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
++ vmIntrinsics::ID iid) {
++ const bool not_for_compiler_entry = false; // this is the interpreter entry
++ assert(is_signature_polymorphic(iid), "expected invoke iid");
++ if (iid == vmIntrinsics::_invokeGeneric ||
++ iid == vmIntrinsics::_compiledLambdaForm) {
++ // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
++ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
++ // They all allow an appendix argument.
++ __ stop("Should not reach here"); // empty stubs make SG sick
++ return NULL;
++ }
++
++ Register argbase = CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp); // parameter (preserved)
++ Register argslot = R3;
++ Register temp1 = R6;
++ Register param_size = R7;
++
++ // here's where control starts out:
++ __ align(CodeEntryAlignment);
++ address entry_point = __ pc();
++
++ if (VerifyMethodHandles) {
++ Label L;
++ BLOCK_COMMENT("verify_intrinsic_id {");
++ __ load_sized_value(temp1, Method::intrinsic_id_offset_in_bytes(), R19_method,
++ sizeof(u1), /*is_signed*/ false);
++ // assert(sizeof(u1) == sizeof(Method::_intrinsic_id), "");
++ __ cmpwi(CCR1, temp1, (int) iid);
++ __ beq(CCR1, L);
++ if (iid == vmIntrinsics::_linkToVirtual ||
++ iid == vmIntrinsics::_linkToSpecial) {
++ // could do this for all kinds, but would explode assembly code size
++ trace_method_handle(_masm, "bad Method*:intrinsic_id");
++ }
++ __ stop("bad Method*::intrinsic_id");
++ __ BIND(L);
++ BLOCK_COMMENT("} verify_intrinsic_id");
++ }
++
++ // First task: Find out how big the argument list is.
++ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
++ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
++ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
++ __ ld(param_size, in_bytes(Method::const_offset()), R19_method);
++ __ load_sized_value(param_size, in_bytes(ConstMethod::size_of_parameters_offset()), param_size,
++ sizeof(u2), /*is_signed*/ false);
++ // assert(sizeof(u2) == sizeof(ConstMethod::_size_of_parameters), "");
++ } else {
++ DEBUG_ONLY(param_size = noreg);
++ }
++
++ Register tmp_mh = noreg;
++ if (!is_signature_polymorphic_static(iid)) {
++ __ ld(tmp_mh = temp1, __ argument_offset(param_size, param_size, 0), argbase);
++ DEBUG_ONLY(param_size = noreg);
++ }
++
++ if (TraceMethodHandles) {
++ if (tmp_mh != noreg)
++ __ mr(R23_method_handle, tmp_mh); // make stub happy
++ trace_method_handle_interpreter_entry(_masm, iid);
++ }
++
++ if (iid == vmIntrinsics::_invokeBasic) {
++ generate_method_handle_dispatch(_masm, iid, tmp_mh, noreg, not_for_compiler_entry);
++
++ } else {
++ // Adjust argument list by popping the trailing MemberName argument.
++ Register tmp_recv = noreg;
++ if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
++ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
++ __ ld(tmp_recv = temp1, __ argument_offset(param_size, param_size, 0), argbase);
++ DEBUG_ONLY(param_size = noreg);
++ }
++ Register R19_member = R19_method; // MemberName ptr; incoming method ptr is dead now
++ __ ld(R19_member, RegisterOrConstant((intptr_t)8), argbase);
++ __ add(argbase, Interpreter::stackElementSize, argbase);
++ generate_method_handle_dispatch(_masm, iid, tmp_recv, R19_member, not_for_compiler_entry);
++ }
++
++ return entry_point;
++}
++
++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
++ vmIntrinsics::ID iid,
++ Register receiver_reg,
++ Register member_reg,
++ bool for_compiler_entry) {
++ assert(is_signature_polymorphic(iid), "expected invoke iid");
++ Register temp1 = (for_compiler_entry ? R25_tmp5 : R7);
++ Register temp2 = (for_compiler_entry ? R22_tmp2 : R8);
++ Register temp3 = (for_compiler_entry ? R23_tmp3 : R9);
++ Register temp4 = (for_compiler_entry ? R24_tmp4 : R10);
++ if (receiver_reg != noreg) assert_different_registers(temp1, temp2, temp3, temp4, receiver_reg);
++ if (member_reg != noreg) assert_different_registers(temp1, temp2, temp3, temp4, member_reg);
++
++ if (iid == vmIntrinsics::_invokeBasic) {
++ // indirect through MH.form.vmentry.vmtarget
++ jump_to_lambda_form(_masm, receiver_reg, R19_method, temp1, temp2, for_compiler_entry);
++ } else {
++ // The method is a member invoker used by direct method handles.
++ if (VerifyMethodHandles) {
++ // make sure the trailing argument really is a MemberName (caller responsibility)
++ verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(MemberName_klass),
++ temp1, temp2,
++ "MemberName required for invokeVirtual etc.");
++ }
++
++ Register temp1_recv_klass = temp1;
++ if (iid != vmIntrinsics::_linkToStatic) {
++ __ verify_oop(receiver_reg);
++ if (iid == vmIntrinsics::_linkToSpecial) {
++ // Don't actually load the klass; just null-check the receiver.
++ __ null_check_throw(receiver_reg, -1, temp1, EXCEPTION_ENTRY);
++ } else {
++ // load receiver klass itself
++ __ null_check_throw(receiver_reg, oopDesc::klass_offset_in_bytes(), temp1, EXCEPTION_ENTRY);
++ __ load_klass(temp1_recv_klass, receiver_reg);
++ __ verify_klass_ptr(temp1_recv_klass);
++ }
++ BLOCK_COMMENT("check_receiver {");
++ // The receiver for the MemberName must be in receiver_reg.
++ // Check the receiver against the MemberName.clazz
++ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
++ // Did not load it above...
++ __ load_klass(temp1_recv_klass, receiver_reg);
++ __ verify_klass_ptr(temp1_recv_klass);
++ }
++ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
++ Label L_ok;
++ Register temp2_defc = temp2;
++ __ load_heap_oop_not_null(temp2_defc, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()), member_reg);
++ load_klass_from_Class(_masm, temp2_defc, temp3, temp4);
++ __ verify_klass_ptr(temp2_defc);
++ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, temp4, L_ok);
++ // If we get here, the type check failed!
++ __ stop("receiver class disagrees with MemberName.clazz");
++ __ BIND(L_ok);
++ }
++ BLOCK_COMMENT("} check_receiver");
++ }
++ if (iid == vmIntrinsics::_linkToSpecial ||
++ iid == vmIntrinsics::_linkToStatic) {
++ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass
++ }
++
++ // Live registers at this point:
++ // member_reg - MemberName that was the trailing argument
++ // temp1_recv_klass - klass of stacked receiver, if needed
++ // O5_savedSP - interpreter linkage (if interpreted)
++ // O0..O5 - compiler arguments (if compiled)
++
++ Label L_incompatible_class_change_error;
++ switch (iid) {
++ case vmIntrinsics::_linkToSpecial:
++ if (VerifyMethodHandles) {
++ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp2);
++ }
++ __ ld(R19_method, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()), member_reg);
++ break;
++
++ case vmIntrinsics::_linkToStatic:
++ if (VerifyMethodHandles) {
++ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp2);
++ }
++ __ ld(R19_method, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()), member_reg);
++ break;
++
++ case vmIntrinsics::_linkToVirtual:
++ {
++ // same as TemplateTable::invokevirtual,
++ // minus the CP setup and profiling:
++
++ if (VerifyMethodHandles) {
++ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp2);
++ }
++
++ // pick out the vtable index from the MemberName, and then we can discard it:
++ Register temp2_index = temp2;
++ __ ld(temp2_index, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()), member_reg);
++
++ if (VerifyMethodHandles) {
++ Label L_index_ok;
++ __ cmpdi(CCR1, temp2_index, 0);
++ __ bge(CCR1, L_index_ok);
++ __ stop("no virtual index");
++ __ BIND(L_index_ok);
++ }
++
++ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget
++ // at this point. And VerifyMethodHandles has already checked clazz, if needed.
++
++ // get target Method* & entry point
++ __ lookup_virtual_method(temp1_recv_klass, temp2_index, R19_method);
++ break;
++ }
++
++ case vmIntrinsics::_linkToInterface:
++ {
++ // same as TemplateTable::invokeinterface
++ // (minus the CP setup and profiling, with different argument motion)
++ if (VerifyMethodHandles) {
++ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp2);
++ }
++
++ Register temp2_intf = temp2;
++ __ load_heap_oop_not_null(temp2_intf, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()), member_reg);
++ load_klass_from_Class(_masm, temp2_intf, temp3, temp4);
++ __ verify_klass_ptr(temp2_intf);
++
++ Register vtable_index = R19_method;
++ __ ld(vtable_index, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()), member_reg);
++ if (VerifyMethodHandles) {
++ Label L_index_ok;
++ __ cmpdi(CCR1, vtable_index, 0);
++ __ bge(CCR1, L_index_ok);
++ __ stop("invalid vtable index for MH.invokeInterface");
++ __ BIND(L_index_ok);
++ }
++
++ // given intf, index, and recv klass, dispatch to the implementation method
++ __ lookup_interface_method(temp1_recv_klass, temp2_intf,
++ // note: next two args must be the same:
++ vtable_index, R19_method,
++ temp3, temp4,
++ L_incompatible_class_change_error);
++ break;
++ }
++
++ default:
++ fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
++ break;
++ }
++
++ // Live at this point:
++ // R19_method
++ // O5_savedSP (if interpreted)
++
++ // After figuring out which concrete method to call, jump into it.
++ // Note that this works in the interpreter with no data motion.
++ // But the compiled version will require that rcx_recv be shifted out.
++ __ verify_method_ptr(R19_method);
++ jump_from_method_handle(_masm, R19_method, temp1, temp2, for_compiler_entry);
++
++ if (iid == vmIntrinsics::_linkToInterface) {
++ __ BIND(L_incompatible_class_change_error);
++ __ load_const_optimized(temp1, StubRoutines::throw_IncompatibleClassChangeError_entry());
++ __ mtctr(temp1);
++ __ bctr();
++ }
++ }
++}
++
++#ifndef PRODUCT
++void trace_method_handle_stub(const char* adaptername,
++ oopDesc* mh,
++ intptr_t* entry_sp,
++ intptr_t* saved_regs) {
++
++ bool has_mh = (strstr(adaptername, "/static") == NULL &&
++ strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH
++ const char* mh_reg_name = has_mh ? "R23_method_handle" : "G23";
++ tty->print_cr("MH %s %s="INTPTR_FORMAT " sp=" INTPTR_FORMAT,
++ adaptername, mh_reg_name, (intptr_t) mh, entry_sp);
++
++ if (Verbose) {
++ tty->print_cr("Registers:");
++ const int abi_offset = frame::abi_reg_args_size / 8;
++ for (int i = R3->encoding(); i <= R12->encoding(); i++) {
++ Register r = as_Register(i);
++ int count = i - R3->encoding();
++ // The registers are stored in reverse order on the stack (by save_volatile_gprs(R1_SP, abi_reg_args_size)).
++ tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[abi_offset + count]);
++ if ((count + 1) % 4 == 0) {
++ tty->cr();
++ } else {
++ tty->print(", ");
++ }
++ }
++ tty->cr();
++
++ {
++ // dumping last frame with frame::describe
++
++ JavaThread* p = JavaThread::active();
++
++ ResourceMark rm;
++ PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
++ FrameValues values;
++
++ // Note: We want to allow trace_method_handle from any call site.
++ // While trace_method_handle creates a frame, it may be entered
++ // without a PC on the stack top (e.g. not just after a call).
++ // Walking that frame could lead to failures due to that invalid PC.
++ // => carefully detect that frame when doing the stack walking
++
++ // Current C frame
++ frame cur_frame = os::current_frame();
++
++ // Robust search of trace_calling_frame (independant of inlining).
++ // Assumes saved_regs comes from a pusha in the trace_calling_frame.
++ assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
++ frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
++ while (trace_calling_frame.fp() < saved_regs) {
++ trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
++ }
++
++ // Safely create a frame and call frame::describe.
++ intptr_t *dump_sp = trace_calling_frame.sender_sp();
++
++ frame dump_frame = frame(dump_sp);
++ dump_frame.describe(values, 1);
++
++ values.describe(-1, saved_regs, "raw top of stack");
++
++ tty->print_cr("Stack layout:");
++ values.print(p);
++ }
++
++ if (has_mh && mh->is_oop()) {
++ mh->print();
++ if (java_lang_invoke_MethodHandle::is_instance(mh)) {
++ if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
++ java_lang_invoke_MethodHandle::form(mh)->print();
++ }
++ }
++ }
++}
++
++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
++ if (!TraceMethodHandles) return;
++
++ BLOCK_COMMENT("trace_method_handle {");
++
++ int nbytes_save = 10 * 8; // 10 volatile gprs
++ __ save_LR_CR(R0);
++ __ mr(R0, R1_SP); // saved_sp
++ assert(Assembler::is_simm(-nbytes_save, 16), "Overwriting R0");
++ // Push_frame_reg_args only uses R0 if nbytes_save is wider than 16 bit.
++ __ push_frame_reg_args(nbytes_save, R0);
++ __ save_volatile_gprs(R1_SP, frame::abi_reg_args_size); // Except R0.
++
++ __ load_const(R3_ARG1, (address)adaptername);
++ __ mr(R4_ARG2, R23_method_handle);
++ __ mr(R5_ARG3, R0); // saved_sp
++ __ mr(R6_ARG4, R1_SP);
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub));
++
++ __ restore_volatile_gprs(R1_SP, 112); // Except R0.
++ __ pop_frame();
++ __ restore_LR_CR(R0);
++
++ BLOCK_COMMENT("} trace_method_handle");
++}
++#endif // PRODUCT
+--- ./hotspot/src/cpu/ppc/vm/methodHandles_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/methodHandles_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// Platform-specific definitions for method handles.
++// These definitions are inlined into class MethodHandles.
++
++// Adapters
++//static unsigned int adapter_code_size() {
++// return 32*K DEBUG_ONLY(+ 16*K) + (TraceMethodHandles ? 16*K : 0) + (VerifyMethodHandles ? 32*K : 0);
++//}
++enum /* platform_dependent_constants */ {
++ adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 25000)) LP64_ONLY(32000 DEBUG_ONLY(+ 150000))
++};
++
++// Additional helper methods for MethodHandles code generation:
++public:
++ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp_reg, Register temp2_reg);
++
++ static void verify_klass(MacroAssembler* _masm,
++ Register obj_reg, SystemDictionary::WKID klass_id,
++ Register temp_reg, Register temp2_reg,
++ const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
++
++ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg,
++ Register temp_reg, Register temp2_reg) {
++ Unimplemented();
++ }
++
++ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
++
++ // Similar to InterpreterMacroAssembler::jump_from_interpreted.
++ // Takes care of special dispatch from single stepping too.
++ static void jump_from_method_handle(MacroAssembler* _masm, Register method,
++ Register temp, Register temp2,
++ bool for_compiler_entry);
++
++ static void jump_to_lambda_form(MacroAssembler* _masm,
++ Register recv, Register method_temp,
++ Register temp2, Register temp3,
++ bool for_compiler_entry);
+--- ./hotspot/src/cpu/ppc/vm/nativeInst_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/nativeInst_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,391 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_ppc.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/ostream.hpp"
++#ifdef COMPILER1
++#include "c1/c1_Runtime1.hpp"
++#endif
++
++// We use an illtrap for marking a method as not_entrant or zombie iff !UseSIGTRAP
++// Work around a C++ compiler bug which changes 'this'
++bool NativeInstruction::is_sigill_zombie_not_entrant_at(address addr) {
++ assert(!UseSIGTRAP, "precondition");
++ if (*(int*)addr != 0 /*illtrap*/) return false;
++ CodeBlob* cb = CodeCache::find_blob_unsafe(addr);
++ if (cb == NULL || !cb->is_nmethod()) return false;
++ nmethod *nm = (nmethod *)cb;
++ // This method is not_entrant or zombie iff the illtrap instruction is
++ // located at the verified entry point.
++ return nm->verified_entry_point() == addr;
++}
++
++#ifdef ASSERT
++void NativeInstruction::verify() {
++ // Make sure code pattern is actually an instruction address.
++ address addr = addr_at(0);
++ if (addr == 0 || ((intptr_t)addr & 3) != 0) {
++ fatal("not an instruction address");
++ }
++}
++#endif // ASSERT
++
++// Extract call destination from a NativeCall. The call might use a trampoline stub.
++address NativeCall::destination() const {
++ address addr = (address)this;
++ address destination = Assembler::bxx_destination(addr);
++
++ // Do we use a trampoline stub for this call?
++ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie.
++ assert(cb && cb->is_nmethod(), "sanity");
++ nmethod *nm = (nmethod *)cb;
++ if (nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
++ // Yes we do, so get the destination from the trampoline stub.
++ const address trampoline_stub_addr = destination;
++ destination = NativeCallTrampolineStub_at(trampoline_stub_addr)->destination(nm);
++ }
++
++ return destination;
++}
++
++// Similar to replace_mt_safe, but just changes the destination. The
++// important thing is that free-running threads are able to execute this
++// call instruction at all times. Thus, the displacement field must be
++// instruction-word-aligned.
++//
++// Used in the runtime linkage of calls; see class CompiledIC.
++//
++// Add parameter assert_lock to switch off assertion
++// during code generation, where no patching lock is needed.
++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
++ assert(!assert_lock ||
++ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
++ "concurrent code patching");
++
++ ResourceMark rm;
++ int code_size = 1 * BytesPerInstWord;
++ address addr_call = addr_at(0);
++ assert(MacroAssembler::is_bl(*(int*)addr_call), "unexpected code at call-site");
++
++ CodeBuffer cb(addr_call, code_size + 1);
++ MacroAssembler* a = new MacroAssembler(&cb);
++
++ // Patch the call.
++ if (ReoptimizeCallSequences &&
++ a->is_within_range_of_b(dest, addr_call)) {
++ a->bl(dest);
++ } else {
++ address trampoline_stub_addr = get_trampoline();
++
++ // We did not find a trampoline stub because the current codeblob
++ // does not provide this information. The branch will be patched
++ // later during a final fixup, when all necessary information is
++ // available.
++ if (trampoline_stub_addr == 0)
++ return;
++
++ // Patch the constant in the call's trampoline stub.
++ NativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
++
++ a->bl(trampoline_stub_addr);
++ }
++ ICache::ppc64_flush_icache_bytes(addr_call, code_size);
++}
++
++address NativeCall::get_trampoline() {
++ address call_addr = addr_at(0);
++
++ CodeBlob *code = CodeCache::find_blob(call_addr);
++ assert(code != NULL, "Could not find the containing code blob");
++
++ // There are no relocations available when the code gets relocated
++ // because of CodeBuffer expansion.
++ if (code->relocation_size() == 0)
++ return NULL;
++
++ address bl_destination = Assembler::bxx_destination(call_addr);
++ if (code->content_contains(bl_destination) &&
++ is_NativeCallTrampolineStub_at(bl_destination))
++ return bl_destination;
++
++ // If the codeBlob is not a nmethod, this is because we get here from the
++ // CodeBlob constructor, which is called within the nmethod constructor.
++ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
++}
++
++#ifdef ASSERT
++void NativeCall::verify() {
++ address addr = addr_at(0);
++
++ if (!NativeCall::is_call_at(addr)) {
++ tty->print_cr("not a NativeCall at " PTR_FORMAT, addr);
++ // TODO: PPC port: Disassembler::decode(addr - 20, addr + 20, tty);
++ fatal(err_msg("not a NativeCall at " PTR_FORMAT, addr));
++ }
++}
++#endif // ASSERT
++
++#ifdef ASSERT
++void NativeFarCall::verify() {
++ address addr = addr_at(0);
++
++ NativeInstruction::verify();
++ if (!NativeFarCall::is_far_call_at(addr)) {
++ tty->print_cr("not a NativeFarCall at " PTR_FORMAT, addr);
++ // TODO: PPC port: Disassembler::decode(addr, 20, 20, tty);
++ fatal(err_msg("not a NativeFarCall at " PTR_FORMAT, addr));
++ }
++}
++#endif // ASSERT
++
++address NativeMovConstReg::next_instruction_address() const {
++#ifdef ASSERT
++ CodeBlob* nm = CodeCache::find_blob(instruction_address());
++ assert(!MacroAssembler::is_set_narrow_oop(addr_at(0), nm->content_begin()), "Should not patch narrow oop here");
++#endif
++
++ if (MacroAssembler::is_load_const_from_method_toc_at(addr_at(0))) {
++ return addr_at(load_const_from_method_toc_instruction_size);
++ } else {
++ return addr_at(load_const_instruction_size);
++ }
++}
++
++intptr_t NativeMovConstReg::data() const {
++ address addr = addr_at(0);
++
++ if (MacroAssembler::is_load_const_at(addr)) {
++ return MacroAssembler::get_const(addr);
++ }
++
++ CodeBlob* cb = CodeCache::find_blob_unsafe(addr);
++ if (MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) {
++ narrowOop no = (narrowOop)MacroAssembler::get_narrow_oop(addr, cb->content_begin());
++ return cast_from_oop(oopDesc::decode_heap_oop(no));
++ } else {
++ assert(MacroAssembler::is_load_const_from_method_toc_at(addr), "must be load_const_from_pool");
++
++ address ctable = cb->content_begin();
++ int offset = MacroAssembler::get_offset_of_load_const_from_method_toc_at(addr);
++ return *(intptr_t *)(ctable + offset);
++ }
++}
++
++address NativeMovConstReg::set_data_plain(intptr_t data, CodeBlob *cb) {
++ address addr = instruction_address();
++ address next_address = NULL;
++ if (!cb) cb = CodeCache::find_blob(addr);
++
++ if (cb != NULL && MacroAssembler::is_load_const_from_method_toc_at(addr)) {
++ // A load from the method's TOC (ctable).
++ assert(cb->is_nmethod(), "must be nmethod");
++ const address ctable = cb->content_begin();
++ const int toc_offset = MacroAssembler::get_offset_of_load_const_from_method_toc_at(addr);
++ *(intptr_t *)(ctable + toc_offset) = data;
++ next_address = addr + BytesPerInstWord;
++ } else if (cb != NULL &&
++ MacroAssembler::is_calculate_address_from_global_toc_at(addr, cb->content_begin())) {
++ // A calculation relative to the global TOC.
++ if (MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr, cb->content_begin()) !=
++ (address)data) {
++ const int invalidated_range =
++ MacroAssembler::patch_calculate_address_from_global_toc_at(addr, cb->content_begin(),
++ (address)data);
++ const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
++ // FIXME:
++ const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
++ ICache::ppc64_flush_icache_bytes(start, range);
++ }
++ next_address = addr + 1 * BytesPerInstWord;
++ } else if (MacroAssembler::is_load_const_at(addr)) {
++ // A normal 5 instruction load_const code sequence.
++ if (MacroAssembler::get_const(addr) != (long)data) {
++ // This is not mt safe, ok in methods like CodeBuffer::copy_code().
++ MacroAssembler::patch_const(addr, (long)data);
++ ICache::ppc64_flush_icache_bytes(addr, load_const_instruction_size);
++ }
++ next_address = addr + 5 * BytesPerInstWord;
++ } else if (MacroAssembler::is_bl(* (int*) addr)) {
++ // A single branch-and-link instruction.
++ ResourceMark rm;
++ const int code_size = 1 * BytesPerInstWord;
++ CodeBuffer cb(addr, code_size + 1);
++ MacroAssembler* a = new MacroAssembler(&cb);
++ a->bl((address) data);
++ ICache::ppc64_flush_icache_bytes(addr, code_size);
++ next_address = addr + code_size;
++ } else {
++ ShouldNotReachHere();
++ }
++
++ return next_address;
++}
++
++void NativeMovConstReg::set_data(intptr_t data) {
++ // Store the value into the instruction stream.
++ CodeBlob *cb = CodeCache::find_blob(instruction_address());
++ address next_address = set_data_plain(data, cb);
++
++ // Also store the value into an oop_Relocation cell, if any.
++ if (cb && cb->is_nmethod()) {
++ RelocIterator iter((nmethod *) cb, instruction_address(), next_address);
++ oop* oop_addr = NULL;
++ Metadata** metadata_addr = NULL;
++ while (iter.next()) {
++ if (iter.type() == relocInfo::oop_type) {
++ oop_Relocation *r = iter.oop_reloc();
++ if (oop_addr == NULL) {
++ oop_addr = r->oop_addr();
++ *oop_addr = cast_to_oop(data);
++ } else {
++ assert(oop_addr == r->oop_addr(), "must be only one set-oop here") ;
++ }
++ }
++ if (iter.type() == relocInfo::metadata_type) {
++ metadata_Relocation *r = iter.metadata_reloc();
++ if (metadata_addr == NULL) {
++ metadata_addr = r->metadata_addr();
++ *metadata_addr = (Metadata*)data;
++ } else {
++ assert(metadata_addr == r->metadata_addr(), "must be only one set-metadata here");
++ }
++ }
++ }
++ }
++}
++
++void NativeMovConstReg::set_narrow_oop(narrowOop data, CodeBlob *code /* = NULL */) {
++ address addr = addr_at(0);
++ CodeBlob* cb = (code) ? code : CodeCache::find_blob(instruction_address());
++ if (MacroAssembler::get_narrow_oop(addr, cb->content_begin()) == (long)data) return;
++ const int invalidated_range =
++ MacroAssembler::patch_set_narrow_oop(addr, cb->content_begin(), (long)data);
++ const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
++ // FIXME:
++ const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
++ ICache::ppc64_flush_icache_bytes(start, range);
++}
++
++// Do not use an assertion here. Let clients decide whether they only
++// want this when assertions are enabled.
++#ifdef ASSERT
++void NativeMovConstReg::verify() {
++ address addr = addr_at(0);
++ if (! MacroAssembler::is_load_const_at(addr) &&
++ ! MacroAssembler::is_load_const_from_method_toc_at(addr)) {
++ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // find_nmethod() asserts if nmethod is zombie.
++ if (! (cb != NULL && MacroAssembler::is_calculate_address_from_global_toc_at(addr, cb->content_begin())) &&
++ ! (cb != NULL && MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) &&
++ ! MacroAssembler::is_bl(*((int*) addr))) {
++ tty->print_cr("not a NativeMovConstReg at " PTR_FORMAT, addr);
++ // TODO: PPC port: Disassembler::decode(addr, 20, 20, tty);
++ fatal(err_msg("not a NativeMovConstReg at " PTR_FORMAT, addr));
++ }
++ }
++}
++#endif // ASSERT
++
++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
++ ResourceMark rm;
++ int code_size = 1 * BytesPerInstWord;
++ CodeBuffer cb(verified_entry, code_size + 1);
++ MacroAssembler* a = new MacroAssembler(&cb);
++#ifdef COMPILER2
++ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
++#endif
++ // Patch this nmethod atomically. Always use illtrap/trap in debug build.
++ if (DEBUG_ONLY(false &&) a->is_within_range_of_b(dest, a->pc())) {
++ a->b(dest);
++ } else {
++ // The signal handler will continue at dest=OptoRuntime::handle_wrong_method_stub().
++ if (TrapBasedNotEntrantChecks) {
++ // We use a special trap for marking a method as not_entrant or zombie.
++ a->trap_zombie_not_entrant();
++ } else {
++ // We use an illtrap for marking a method as not_entrant or zombie.
++ a->illtrap();
++ }
++ }
++ ICache::ppc64_flush_icache_bytes(verified_entry, code_size);
++}
++
++#ifdef ASSERT
++void NativeJump::verify() {
++ address addr = addr_at(0);
++
++ NativeInstruction::verify();
++ if (!NativeJump::is_jump_at(addr)) {
++ tty->print_cr("not a NativeJump at " PTR_FORMAT, addr);
++ // TODO: PPC port: Disassembler::decode(addr, 20, 20, tty);
++ fatal(err_msg("not a NativeJump at " PTR_FORMAT, addr));
++ }
++}
++#endif // ASSERT
++
++//-------------------------------------------------------------------
++
++// Call trampoline stubs.
++//
++// Layout and instructions of a call trampoline stub:
++// 0: load the TOC (part 1)
++// 4: load the TOC (part 2)
++// 8: load the call target from the constant pool (part 1)
++// [12: load the call target from the constant pool (part 2, optional)]
++// ..: branch via CTR
++//
++
++address NativeCallTrampolineStub::encoded_destination_addr() const {
++ address instruction_addr = addr_at(2 * BytesPerInstWord);
++ assert(MacroAssembler::is_ld_largeoffset(instruction_addr),
++ "must be a ld with large offset (from the constant pool)");
++
++ return instruction_addr;
++}
++
++address NativeCallTrampolineStub::destination(nmethod *nm) const {
++ CodeBlob* cb = nm ? nm : CodeCache::find_blob_unsafe(addr_at(0));
++ address ctable = cb->content_begin();
++
++ return *(address*)(ctable + destination_toc_offset());
++}
++
++int NativeCallTrampolineStub::destination_toc_offset() const {
++ return MacroAssembler::get_ld_largeoffset_offset(encoded_destination_addr());
++}
++
++void NativeCallTrampolineStub::set_destination(address new_destination) {
++ CodeBlob* cb = CodeCache::find_blob(addr_at(0));
++ address ctable = cb->content_begin();
++
++ *(address*)(ctable + destination_toc_offset()) = new_destination;
++}
++
+--- ./hotspot/src/cpu/ppc/vm/nativeInst_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/nativeInst_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,398 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_NATIVEINST_PPC_HPP
++#define CPU_PPC_VM_NATIVEINST_PPC_HPP
++
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.hpp"
++#include "memory/allocation.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/os.hpp"
++#include "utilities/top.hpp"
++
++// We have interfaces for the following instructions:
++//
++// - NativeInstruction
++// - NativeCall
++// - NativeFarCall
++// - NativeMovConstReg
++// - NativeJump
++// - NativeIllegalInstruction
++// - NativeConditionalFarBranch
++// - NativeCallTrampolineStub
++
++// The base class for different kinds of native instruction abstractions.
++// It provides the primitive operations to manipulate code relative to this.
++class NativeInstruction VALUE_OBJ_CLASS_SPEC {
++ friend class Relocation;
++
++ public:
++ bool is_sigtrap_ic_miss_check() {
++ assert(UseSIGTRAP, "precondition");
++ return MacroAssembler::is_trap_ic_miss_check(long_at(0));
++ }
++
++ bool is_sigtrap_null_check() {
++ assert(UseSIGTRAP && TrapBasedNullChecks, "precondition");
++ return MacroAssembler::is_trap_null_check(long_at(0));
++ }
++
++ // We use a special trap for marking a method as not_entrant or zombie
++ // iff UseSIGTRAP.
++ bool is_sigtrap_zombie_not_entrant() {
++ assert(UseSIGTRAP, "precondition");
++ return MacroAssembler::is_trap_zombie_not_entrant(long_at(0));
++ }
++
++ // We use an illtrap for marking a method as not_entrant or zombie
++ // iff !UseSIGTRAP.
++ bool is_sigill_zombie_not_entrant() {
++ assert(!UseSIGTRAP, "precondition");
++ // Work around a C++ compiler bug which changes 'this'.
++ return NativeInstruction::is_sigill_zombie_not_entrant_at(addr_at(0));
++ }
++ static bool is_sigill_zombie_not_entrant_at(address addr);
++
++#ifdef COMPILER2
++ // SIGTRAP-based implicit range checks
++ bool is_sigtrap_range_check() {
++ assert(UseSIGTRAP && TrapBasedRangeChecks, "precondition");
++ return MacroAssembler::is_trap_range_check(long_at(0));
++ }
++#endif
++
++ // 'should not reach here'.
++ bool is_sigtrap_should_not_reach_here() {
++ return MacroAssembler::is_trap_should_not_reach_here(long_at(0));
++ }
++
++ bool is_safepoint_poll() {
++ // Is the current instruction a POTENTIAL read access to the polling page?
++ // The current arguments of the instruction are not checked!
++ return MacroAssembler::is_load_from_polling_page(long_at(0), NULL);
++ }
++
++ bool is_memory_serialization(JavaThread *thread, void *ucontext) {
++ // Is the current instruction a write access of thread to the
++ // memory serialization page?
++ return MacroAssembler::is_memory_serialization(long_at(0), thread, ucontext);
++ }
++
++ address get_stack_bang_address(void *ucontext) {
++ // If long_at(0) is not a stack bang, return 0. Otherwise, return
++ // banged address.
++ return MacroAssembler::get_stack_bang_address(long_at(0), ucontext);
++ }
++
++ protected:
++ address addr_at(int offset) const { return address(this) + offset; }
++ int long_at(int offset) const { return *(int*)addr_at(offset); }
++
++ public:
++ void verify() NOT_DEBUG_RETURN;
++};
++
++inline NativeInstruction* nativeInstruction_at(address address) {
++ NativeInstruction* inst = (NativeInstruction*)address;
++ inst->verify();
++ return inst;
++}
++
++// The NativeCall is an abstraction for accessing/manipulating call
++// instructions. It is used to manipulate inline caches, primitive &
++// dll calls, etc.
++//
++// Sparc distinguishes `NativeCall' and `NativeFarCall'. On PPC64,
++// at present, we provide a single class `NativeCall' representing the
++// sequence `load_const, mtctr, bctrl' or the sequence 'ld_from_toc,
++// mtctr, bctrl'.
++class NativeCall: public NativeInstruction {
++ public:
++
++ enum ppc_specific_constants {
++ load_const_instruction_size = 28,
++ load_const_from_method_toc_instruction_size = 16,
++ instruction_size = 16 // Used in shared code for calls with reloc_info.
++ };
++
++ static bool is_call_at(address a) {
++ return Assembler::is_bl(*(int*)(a));
++ }
++
++ static bool is_call_before(address return_address) {
++ return NativeCall::is_call_at(return_address - 4);
++ }
++
++ address instruction_address() const {
++ return addr_at(0);
++ }
++
++ address next_instruction_address() const {
++ // We have only bl.
++ assert(MacroAssembler::is_bl(*(int*)instruction_address()), "Should be bl instruction!");
++ return addr_at(4);
++ }
++
++ address return_address() const {
++ return next_instruction_address();
++ }
++
++ address destination() const;
++
++ // The parameter assert_lock disables the assertion during code generation.
++ void set_destination_mt_safe(address dest, bool assert_lock = true);
++
++ address get_trampoline();
++
++ void verify_alignment() {} // do nothing on ppc
++ void verify() NOT_DEBUG_RETURN;
++};
++
++inline NativeCall* nativeCall_at(address instr) {
++ NativeCall* call = (NativeCall*)instr;
++ call->verify();
++ return call;
++}
++
++inline NativeCall* nativeCall_before(address return_address) {
++ NativeCall* call = NULL;
++ if (MacroAssembler::is_bl(*(int*)(return_address - 4)))
++ call = (NativeCall*)(return_address - 4);
++ call->verify();
++ return call;
++}
++
++// The NativeFarCall is an abstraction for accessing/manipulating native
++// call-anywhere instructions.
++// Used to call native methods which may be loaded anywhere in the address
++// space, possibly out of reach of a call instruction.
++class NativeFarCall: public NativeInstruction {
++ public:
++ // We use MacroAssembler::bl64_patchable() for implementing a
++ // call-anywhere instruction.
++
++ // Checks whether instr points at a NativeFarCall instruction.
++ static bool is_far_call_at(address instr) {
++ return MacroAssembler::is_bl64_patchable_at(instr);
++ }
++
++ // Does the NativeFarCall implementation use a pc-relative encoding
++ // of the call destination?
++ // Used when relocating code.
++ bool is_pcrelative() {
++ assert(MacroAssembler::is_bl64_patchable_at((address)this),
++ "unexpected call type");
++ return MacroAssembler::is_bl64_patchable_pcrelative_at((address)this);
++ }
++
++ // Returns the NativeFarCall's destination.
++ address destination() const {
++ assert(MacroAssembler::is_bl64_patchable_at((address)this),
++ "unexpected call type");
++ return MacroAssembler::get_dest_of_bl64_patchable_at((address)this);
++ }
++
++ // Sets the NativeCall's destination, not necessarily mt-safe.
++ // Used when relocating code.
++ void set_destination(address dest) {
++ // Set new destination (implementation of call may change here).
++ assert(MacroAssembler::is_bl64_patchable_at((address)this),
++ "unexpected call type");
++ MacroAssembler::set_dest_of_bl64_patchable_at((address)this, dest);
++ }
++
++ void verify() NOT_DEBUG_RETURN;
++};
++
++// Instantiates a NativeFarCall object starting at the given instruction
++// address and returns the NativeFarCall object.
++inline NativeFarCall* nativeFarCall_at(address instr) {
++ NativeFarCall* call = (NativeFarCall*)instr;
++ call->verify();
++ return call;
++}
++
++// An interface for accessing/manipulating native set_oop imm, reg instructions.
++// (used to manipulate inlined data references, etc.)
++class NativeMovConstReg: public NativeInstruction {
++ public:
++
++ enum ppc_specific_constants {
++ load_const_instruction_size = 20,
++ load_const_from_method_toc_instruction_size = 8,
++ instruction_size = 8 // Used in shared code for calls with reloc_info.
++ };
++
++ address instruction_address() const {
++ return addr_at(0);
++ }
++
++ address next_instruction_address() const;
++
++ // (The [set_]data accessor respects oop_type relocs also.)
++ intptr_t data() const;
++
++ // Patch the code stream.
++ address set_data_plain(intptr_t x, CodeBlob *code);
++ // Patch the code stream and oop pool.
++ void set_data(intptr_t x);
++
++ // Patch narrow oop constants. Use this also for narrow klass.
++ void set_narrow_oop(narrowOop data, CodeBlob *code = NULL);
++
++ void verify() NOT_DEBUG_RETURN;
++};
++
++inline NativeMovConstReg* nativeMovConstReg_at(address address) {
++ NativeMovConstReg* test = (NativeMovConstReg*)address;
++ test->verify();
++ return test;
++}
++
++// The NativeJump is an abstraction for accessing/manipulating native
++// jump-anywhere instructions.
++class NativeJump: public NativeInstruction {
++ public:
++ // We use MacroAssembler::b64_patchable() for implementing a
++ // jump-anywhere instruction.
++
++ enum ppc_specific_constants {
++ instruction_size = MacroAssembler::b64_patchable_size
++ };
++
++ // Checks whether instr points at a NativeJump instruction.
++ static bool is_jump_at(address instr) {
++ return MacroAssembler::is_b64_patchable_at(instr)
++ || ( MacroAssembler::is_load_const_from_method_toc_at(instr)
++ && Assembler::is_mtctr(*(int*)(instr + 2 * 4))
++ && Assembler::is_bctr(*(int*)(instr + 3 * 4)));
++ }
++
++ // Does the NativeJump implementation use a pc-relative encoding
++ // of the call destination?
++ // Used when relocating code or patching jumps.
++ bool is_pcrelative() {
++ return MacroAssembler::is_b64_patchable_pcrelative_at((address)this);
++ }
++
++ // Returns the NativeJump's destination.
++ address jump_destination() const {
++ if (MacroAssembler::is_b64_patchable_at((address)this)) {
++ return MacroAssembler::get_dest_of_b64_patchable_at((address)this);
++ } else if (MacroAssembler::is_load_const_from_method_toc_at((address)this)
++ && Assembler::is_mtctr(*(int*)((address)this + 2 * 4))
++ && Assembler::is_bctr(*(int*)((address)this + 3 * 4))) {
++ return (address)((NativeMovConstReg *)this)->data();
++ } else {
++ ShouldNotReachHere();
++ return NULL;
++ }
++ }
++
++ // Sets the NativeJump's destination, not necessarily mt-safe.
++ // Used when relocating code or patching jumps.
++ void set_jump_destination(address dest) {
++ // Set new destination (implementation of call may change here).
++ if (MacroAssembler::is_b64_patchable_at((address)this)) {
++ MacroAssembler::set_dest_of_b64_patchable_at((address)this, dest);
++ } else if (MacroAssembler::is_load_const_from_method_toc_at((address)this)
++ && Assembler::is_mtctr(*(int*)((address)this + 2 * 4))
++ && Assembler::is_bctr(*(int*)((address)this + 3 * 4))) {
++ ((NativeMovConstReg *)this)->set_data((intptr_t)dest);
++ } else {
++ ShouldNotReachHere();
++ }
++ }
++
++ // MT-safe insertion of native jump at verified method entry
++ static void patch_verified_entry(address entry, address verified_entry, address dest);
++
++ void verify() NOT_DEBUG_RETURN;
++
++ static void check_verified_entry_alignment(address entry, address verified_entry) {
++ // We just patch one instruction on ppc64, so the jump doesn't have to
++ // be aligned. Nothing to do here.
++ }
++};
++
++// Instantiates a NativeJump object starting at the given instruction
++// address and returns the NativeJump object.
++inline NativeJump* nativeJump_at(address instr) {
++ NativeJump* call = (NativeJump*)instr;
++ call->verify();
++ return call;
++}
++
++// NativeConditionalFarBranch is abstraction for accessing/manipulating
++// conditional far branches.
++class NativeConditionalFarBranch : public NativeInstruction {
++ public:
++
++ static bool is_conditional_far_branch_at(address instr) {
++ return MacroAssembler::is_bc_far_at(instr);
++ }
++
++ address branch_destination() const {
++ return MacroAssembler::get_dest_of_bc_far_at((address)this);
++ }
++
++ void set_branch_destination(address dest) {
++ MacroAssembler::set_dest_of_bc_far_at((address)this, dest);
++ }
++};
++
++inline NativeConditionalFarBranch* NativeConditionalFarBranch_at(address address) {
++ assert(NativeConditionalFarBranch::is_conditional_far_branch_at(address),
++ "must be a conditional far branch");
++ return (NativeConditionalFarBranch*)address;
++}
++
++// Call trampoline stubs.
++class NativeCallTrampolineStub : public NativeInstruction {
++ private:
++
++ address encoded_destination_addr() const;
++
++ public:
++
++ address destination(nmethod *nm = NULL) const;
++ int destination_toc_offset() const;
++
++ void set_destination(address new_destination);
++};
++
++inline bool is_NativeCallTrampolineStub_at(address address) {
++ int first_instr = *(int*)address;
++ return Assembler::is_addis(first_instr) &&
++ (Register)(intptr_t)Assembler::inv_rt_field(first_instr) == R12_scratch2;
++}
++
++inline NativeCallTrampolineStub* NativeCallTrampolineStub_at(address address) {
++ assert(is_NativeCallTrampolineStub_at(address), "no call trampoline found");
++ return (NativeCallTrampolineStub*)address;
++}
++
++#endif // CPU_PPC_VM_NATIVEINST_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/ppc.ad Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/ppc.ad Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,12117 @@
++//
++// Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
++// Copyright 2012, 2014 SAP AG. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++//
++// PPC64 Architecture Description File
++//
++
++//----------REGISTER DEFINITION BLOCK------------------------------------------
++// This information is used by the matcher and the register allocator to
++// describe individual registers and classes of registers within the target
++// architecture.
++register %{
++//----------Architecture Description Register Definitions----------------------
++// General Registers
++// "reg_def" name (register save type, C convention save type,
++// ideal register type, encoding);
++//
++// Register Save Types:
++//
++// NS = No-Save: The register allocator assumes that these registers
++// can be used without saving upon entry to the method, &
++// that they do not need to be saved at call sites.
++//
++// SOC = Save-On-Call: The register allocator assumes that these registers
++// can be used without saving upon entry to the method,
++// but that they must be saved at call sites.
++// These are called "volatiles" on ppc.
++//
++// SOE = Save-On-Entry: The register allocator assumes that these registers
++// must be saved before using them upon entry to the
++// method, but they do not need to be saved at call
++// sites.
++// These are called "nonvolatiles" on ppc.
++//
++// AS = Always-Save: The register allocator assumes that these registers
++// must be saved before using them upon entry to the
++// method, & that they must be saved at call sites.
++//
++// Ideal Register Type is used to determine how to save & restore a
++// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
++// spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
++//
++// The encoding number is the actual bit-pattern placed into the opcodes.
++//
++// PPC64 register definitions, based on the 64-bit PowerPC ELF ABI
++// Supplement Version 1.7 as of 2003-10-29.
++//
++// For each 64-bit register we must define two registers: the register
++// itself, e.g. R3, and a corresponding virtual other (32-bit-)'half',
++// e.g. R3_H, which is needed by the allocator, but is not used
++// for stores, loads, etc.
++
++// ----------------------------
++// Integer/Long Registers
++// ----------------------------
++
++ // PPC64 has 32 64-bit integer registers.
++
++ // types: v = volatile, nv = non-volatile, s = system
++ reg_def R0 ( SOC, SOC, Op_RegI, 0, R0->as_VMReg() ); // v used in prologs
++ reg_def R0_H ( SOC, SOC, Op_RegI, 99, R0->as_VMReg()->next() );
++ reg_def R1 ( NS, NS, Op_RegI, 1, R1->as_VMReg() ); // s SP
++ reg_def R1_H ( NS, NS, Op_RegI, 99, R1->as_VMReg()->next() );
++ reg_def R2 ( SOC, SOC, Op_RegI, 2, R2->as_VMReg() ); // v TOC
++ reg_def R2_H ( SOC, SOC, Op_RegI, 99, R2->as_VMReg()->next() );
++ reg_def R3 ( SOC, SOC, Op_RegI, 3, R3->as_VMReg() ); // v iarg1 & iret
++ reg_def R3_H ( SOC, SOC, Op_RegI, 99, R3->as_VMReg()->next() );
++ reg_def R4 ( SOC, SOC, Op_RegI, 4, R4->as_VMReg() ); // iarg2
++ reg_def R4_H ( SOC, SOC, Op_RegI, 99, R4->as_VMReg()->next() );
++ reg_def R5 ( SOC, SOC, Op_RegI, 5, R5->as_VMReg() ); // v iarg3
++ reg_def R5_H ( SOC, SOC, Op_RegI, 99, R5->as_VMReg()->next() );
++ reg_def R6 ( SOC, SOC, Op_RegI, 6, R6->as_VMReg() ); // v iarg4
++ reg_def R6_H ( SOC, SOC, Op_RegI, 99, R6->as_VMReg()->next() );
++ reg_def R7 ( SOC, SOC, Op_RegI, 7, R7->as_VMReg() ); // v iarg5
++ reg_def R7_H ( SOC, SOC, Op_RegI, 99, R7->as_VMReg()->next() );
++ reg_def R8 ( SOC, SOC, Op_RegI, 8, R8->as_VMReg() ); // v iarg6
++ reg_def R8_H ( SOC, SOC, Op_RegI, 99, R8->as_VMReg()->next() );
++ reg_def R9 ( SOC, SOC, Op_RegI, 9, R9->as_VMReg() ); // v iarg7
++ reg_def R9_H ( SOC, SOC, Op_RegI, 99, R9->as_VMReg()->next() );
++ reg_def R10 ( SOC, SOC, Op_RegI, 10, R10->as_VMReg() ); // v iarg8
++ reg_def R10_H( SOC, SOC, Op_RegI, 99, R10->as_VMReg()->next());
++ reg_def R11 ( SOC, SOC, Op_RegI, 11, R11->as_VMReg() ); // v ENV / scratch
++ reg_def R11_H( SOC, SOC, Op_RegI, 99, R11->as_VMReg()->next());
++ reg_def R12 ( SOC, SOC, Op_RegI, 12, R12->as_VMReg() ); // v scratch
++ reg_def R12_H( SOC, SOC, Op_RegI, 99, R12->as_VMReg()->next());
++ reg_def R13 ( NS, NS, Op_RegI, 13, R13->as_VMReg() ); // s system thread id
++ reg_def R13_H( NS, NS, Op_RegI, 99, R13->as_VMReg()->next());
++ reg_def R14 ( SOC, SOE, Op_RegI, 14, R14->as_VMReg() ); // nv
++ reg_def R14_H( SOC, SOE, Op_RegI, 99, R14->as_VMReg()->next());
++ reg_def R15 ( SOC, SOE, Op_RegI, 15, R15->as_VMReg() ); // nv
++ reg_def R15_H( SOC, SOE, Op_RegI, 99, R15->as_VMReg()->next());
++ reg_def R16 ( SOC, SOE, Op_RegI, 16, R16->as_VMReg() ); // nv
++ reg_def R16_H( SOC, SOE, Op_RegI, 99, R16->as_VMReg()->next());
++ reg_def R17 ( SOC, SOE, Op_RegI, 17, R17->as_VMReg() ); // nv
++ reg_def R17_H( SOC, SOE, Op_RegI, 99, R17->as_VMReg()->next());
++ reg_def R18 ( SOC, SOE, Op_RegI, 18, R18->as_VMReg() ); // nv
++ reg_def R18_H( SOC, SOE, Op_RegI, 99, R18->as_VMReg()->next());
++ reg_def R19 ( SOC, SOE, Op_RegI, 19, R19->as_VMReg() ); // nv
++ reg_def R19_H( SOC, SOE, Op_RegI, 99, R19->as_VMReg()->next());
++ reg_def R20 ( SOC, SOE, Op_RegI, 20, R20->as_VMReg() ); // nv
++ reg_def R20_H( SOC, SOE, Op_RegI, 99, R20->as_VMReg()->next());
++ reg_def R21 ( SOC, SOE, Op_RegI, 21, R21->as_VMReg() ); // nv
++ reg_def R21_H( SOC, SOE, Op_RegI, 99, R21->as_VMReg()->next());
++ reg_def R22 ( SOC, SOE, Op_RegI, 22, R22->as_VMReg() ); // nv
++ reg_def R22_H( SOC, SOE, Op_RegI, 99, R22->as_VMReg()->next());
++ reg_def R23 ( SOC, SOE, Op_RegI, 23, R23->as_VMReg() ); // nv
++ reg_def R23_H( SOC, SOE, Op_RegI, 99, R23->as_VMReg()->next());
++ reg_def R24 ( SOC, SOE, Op_RegI, 24, R24->as_VMReg() ); // nv
++ reg_def R24_H( SOC, SOE, Op_RegI, 99, R24->as_VMReg()->next());
++ reg_def R25 ( SOC, SOE, Op_RegI, 25, R25->as_VMReg() ); // nv
++ reg_def R25_H( SOC, SOE, Op_RegI, 99, R25->as_VMReg()->next());
++ reg_def R26 ( SOC, SOE, Op_RegI, 26, R26->as_VMReg() ); // nv
++ reg_def R26_H( SOC, SOE, Op_RegI, 99, R26->as_VMReg()->next());
++ reg_def R27 ( SOC, SOE, Op_RegI, 27, R27->as_VMReg() ); // nv
++ reg_def R27_H( SOC, SOE, Op_RegI, 99, R27->as_VMReg()->next());
++ reg_def R28 ( SOC, SOE, Op_RegI, 28, R28->as_VMReg() ); // nv
++ reg_def R28_H( SOC, SOE, Op_RegI, 99, R28->as_VMReg()->next());
++ reg_def R29 ( SOC, SOE, Op_RegI, 29, R29->as_VMReg() ); // nv
++ reg_def R29_H( SOC, SOE, Op_RegI, 99, R29->as_VMReg()->next());
++ reg_def R30 ( SOC, SOE, Op_RegI, 30, R30->as_VMReg() ); // nv
++ reg_def R30_H( SOC, SOE, Op_RegI, 99, R30->as_VMReg()->next());
++ reg_def R31 ( SOC, SOE, Op_RegI, 31, R31->as_VMReg() ); // nv
++ reg_def R31_H( SOC, SOE, Op_RegI, 99, R31->as_VMReg()->next());
++
++
++// ----------------------------
++// Float/Double Registers
++// ----------------------------
++
++ // Double Registers
++ // The rules of ADL require that double registers be defined in pairs.
++ // Each pair must be two 32-bit values, but not necessarily a pair of
++ // single float registers. In each pair, ADLC-assigned register numbers
++ // must be adjacent, with the lower number even. Finally, when the
++ // CPU stores such a register pair to memory, the word associated with
++ // the lower ADLC-assigned number must be stored to the lower address.
++
++ // PPC64 has 32 64-bit floating-point registers. Each can store a single
++ // or double precision floating-point value.
++
++ // types: v = volatile, nv = non-volatile, s = system
++ reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg() ); // v scratch
++ reg_def F0_H ( SOC, SOC, Op_RegF, 99, F0->as_VMReg()->next() );
++ reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg() ); // v farg1 & fret
++ reg_def F1_H ( SOC, SOC, Op_RegF, 99, F1->as_VMReg()->next() );
++ reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg() ); // v farg2
++ reg_def F2_H ( SOC, SOC, Op_RegF, 99, F2->as_VMReg()->next() );
++ reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg() ); // v farg3
++ reg_def F3_H ( SOC, SOC, Op_RegF, 99, F3->as_VMReg()->next() );
++ reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg() ); // v farg4
++ reg_def F4_H ( SOC, SOC, Op_RegF, 99, F4->as_VMReg()->next() );
++ reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg() ); // v farg5
++ reg_def F5_H ( SOC, SOC, Op_RegF, 99, F5->as_VMReg()->next() );
++ reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg() ); // v farg6
++ reg_def F6_H ( SOC, SOC, Op_RegF, 99, F6->as_VMReg()->next() );
++ reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg() ); // v farg7
++ reg_def F7_H ( SOC, SOC, Op_RegF, 99, F7->as_VMReg()->next() );
++ reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg() ); // v farg8
++ reg_def F8_H ( SOC, SOC, Op_RegF, 99, F8->as_VMReg()->next() );
++ reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg() ); // v farg9
++ reg_def F9_H ( SOC, SOC, Op_RegF, 99, F9->as_VMReg()->next() );
++ reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg() ); // v farg10
++ reg_def F10_H( SOC, SOC, Op_RegF, 99, F10->as_VMReg()->next());
++ reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg() ); // v farg11
++ reg_def F11_H( SOC, SOC, Op_RegF, 99, F11->as_VMReg()->next());
++ reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg() ); // v farg12
++ reg_def F12_H( SOC, SOC, Op_RegF, 99, F12->as_VMReg()->next());
++ reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg() ); // v farg13
++ reg_def F13_H( SOC, SOC, Op_RegF, 99, F13->as_VMReg()->next());
++ reg_def F14 ( SOC, SOE, Op_RegF, 14, F14->as_VMReg() ); // nv
++ reg_def F14_H( SOC, SOE, Op_RegF, 99, F14->as_VMReg()->next());
++ reg_def F15 ( SOC, SOE, Op_RegF, 15, F15->as_VMReg() ); // nv
++ reg_def F15_H( SOC, SOE, Op_RegF, 99, F15->as_VMReg()->next());
++ reg_def F16 ( SOC, SOE, Op_RegF, 16, F16->as_VMReg() ); // nv
++ reg_def F16_H( SOC, SOE, Op_RegF, 99, F16->as_VMReg()->next());
++ reg_def F17 ( SOC, SOE, Op_RegF, 17, F17->as_VMReg() ); // nv
++ reg_def F17_H( SOC, SOE, Op_RegF, 99, F17->as_VMReg()->next());
++ reg_def F18 ( SOC, SOE, Op_RegF, 18, F18->as_VMReg() ); // nv
++ reg_def F18_H( SOC, SOE, Op_RegF, 99, F18->as_VMReg()->next());
++ reg_def F19 ( SOC, SOE, Op_RegF, 19, F19->as_VMReg() ); // nv
++ reg_def F19_H( SOC, SOE, Op_RegF, 99, F19->as_VMReg()->next());
++ reg_def F20 ( SOC, SOE, Op_RegF, 20, F20->as_VMReg() ); // nv
++ reg_def F20_H( SOC, SOE, Op_RegF, 99, F20->as_VMReg()->next());
++ reg_def F21 ( SOC, SOE, Op_RegF, 21, F21->as_VMReg() ); // nv
++ reg_def F21_H( SOC, SOE, Op_RegF, 99, F21->as_VMReg()->next());
++ reg_def F22 ( SOC, SOE, Op_RegF, 22, F22->as_VMReg() ); // nv
++ reg_def F22_H( SOC, SOE, Op_RegF, 99, F22->as_VMReg()->next());
++ reg_def F23 ( SOC, SOE, Op_RegF, 23, F23->as_VMReg() ); // nv
++ reg_def F23_H( SOC, SOE, Op_RegF, 99, F23->as_VMReg()->next());
++ reg_def F24 ( SOC, SOE, Op_RegF, 24, F24->as_VMReg() ); // nv
++ reg_def F24_H( SOC, SOE, Op_RegF, 99, F24->as_VMReg()->next());
++ reg_def F25 ( SOC, SOE, Op_RegF, 25, F25->as_VMReg() ); // nv
++ reg_def F25_H( SOC, SOE, Op_RegF, 99, F25->as_VMReg()->next());
++ reg_def F26 ( SOC, SOE, Op_RegF, 26, F26->as_VMReg() ); // nv
++ reg_def F26_H( SOC, SOE, Op_RegF, 99, F26->as_VMReg()->next());
++ reg_def F27 ( SOC, SOE, Op_RegF, 27, F27->as_VMReg() ); // nv
++ reg_def F27_H( SOC, SOE, Op_RegF, 99, F27->as_VMReg()->next());
++ reg_def F28 ( SOC, SOE, Op_RegF, 28, F28->as_VMReg() ); // nv
++ reg_def F28_H( SOC, SOE, Op_RegF, 99, F28->as_VMReg()->next());
++ reg_def F29 ( SOC, SOE, Op_RegF, 29, F29->as_VMReg() ); // nv
++ reg_def F29_H( SOC, SOE, Op_RegF, 99, F29->as_VMReg()->next());
++ reg_def F30 ( SOC, SOE, Op_RegF, 30, F30->as_VMReg() ); // nv
++ reg_def F30_H( SOC, SOE, Op_RegF, 99, F30->as_VMReg()->next());
++ reg_def F31 ( SOC, SOE, Op_RegF, 31, F31->as_VMReg() ); // nv
++ reg_def F31_H( SOC, SOE, Op_RegF, 99, F31->as_VMReg()->next());
++
++// ----------------------------
++// Special Registers
++// ----------------------------
++
++// Condition Codes Flag Registers
++
++ // PPC64 has 8 condition code "registers" which are all contained
++ // in the CR register.
++
++ // types: v = volatile, nv = non-volatile, s = system
++ reg_def CCR0(SOC, SOC, Op_RegFlags, 0, CCR0->as_VMReg()); // v
++ reg_def CCR1(SOC, SOC, Op_RegFlags, 1, CCR1->as_VMReg()); // v
++ reg_def CCR2(SOC, SOC, Op_RegFlags, 2, CCR2->as_VMReg()); // nv
++ reg_def CCR3(SOC, SOC, Op_RegFlags, 3, CCR3->as_VMReg()); // nv
++ reg_def CCR4(SOC, SOC, Op_RegFlags, 4, CCR4->as_VMReg()); // nv
++ reg_def CCR5(SOC, SOC, Op_RegFlags, 5, CCR5->as_VMReg()); // v
++ reg_def CCR6(SOC, SOC, Op_RegFlags, 6, CCR6->as_VMReg()); // v
++ reg_def CCR7(SOC, SOC, Op_RegFlags, 7, CCR7->as_VMReg()); // v
++
++ // Special registers of PPC64
++
++ reg_def SR_XER( SOC, SOC, Op_RegP, 0, SR_XER->as_VMReg()); // v
++ reg_def SR_LR( SOC, SOC, Op_RegP, 1, SR_LR->as_VMReg()); // v
++ reg_def SR_CTR( SOC, SOC, Op_RegP, 2, SR_CTR->as_VMReg()); // v
++ reg_def SR_VRSAVE( SOC, SOC, Op_RegP, 3, SR_VRSAVE->as_VMReg()); // v
++ reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v
++ reg_def SR_PPR( SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg()); // v
++
++
++// ----------------------------
++// Specify priority of register selection within phases of register
++// allocation. Highest priority is first. A useful heuristic is to
++// give registers a low priority when they are required by machine
++// instructions, like EAX and EDX on I486, and choose no-save registers
++// before save-on-call, & save-on-call before save-on-entry. Registers
++// which participate in fixed calling sequences should come last.
++// Registers which are used as pairs must fall on an even boundary.
++
++// It's worth about 1% on SPEC geomean to get this right.
++
++// Chunk0, chunk1, and chunk2 form the MachRegisterNumbers enumeration
++// in adGlobals_ppc64.hpp which defines the _num values, e.g.
++// R3_num. Therefore, R3_num may not be (and in reality is not)
++// the same as R3->encoding()! Furthermore, we cannot make any
++// assumptions on ordering, e.g. R3_num may be less than R2_num.
++// Additionally, the function
++// static enum RC rc_class(OptoReg::Name reg )
++// maps a given _num value to its chunk type (except for flags)
++// and its current implementation relies on chunk0 and chunk1 having a
++// size of 64 each.
++
++// If you change this allocation class, please have a look at the
++// default values for the parameters RoundRobinIntegerRegIntervalStart
++// and RoundRobinFloatRegIntervalStart
++
++alloc_class chunk0 (
++ // Chunk0 contains *all* 64 integer registers halves.
++
++ // "non-volatile" registers
++ R14, R14_H,
++ R15, R15_H,
++ R17, R17_H,
++ R18, R18_H,
++ R19, R19_H,
++ R20, R20_H,
++ R21, R21_H,
++ R22, R22_H,
++ R23, R23_H,
++ R24, R24_H,
++ R25, R25_H,
++ R26, R26_H,
++ R27, R27_H,
++ R28, R28_H,
++ R29, R29_H,
++ R30, R30_H,
++ R31, R31_H,
++
++ // scratch/special registers
++ R11, R11_H,
++ R12, R12_H,
++
++ // argument registers
++ R10, R10_H,
++ R9, R9_H,
++ R8, R8_H,
++ R7, R7_H,
++ R6, R6_H,
++ R5, R5_H,
++ R4, R4_H,
++ R3, R3_H,
++
++ // special registers, not available for allocation
++ R16, R16_H, // R16_thread
++ R13, R13_H, // system thread id
++ R2, R2_H, // may be used for TOC
++ R1, R1_H, // SP
++ R0, R0_H // R0 (scratch)
++);
++
++// If you change this allocation class, please have a look at the
++// default values for the parameters RoundRobinIntegerRegIntervalStart
++// and RoundRobinFloatRegIntervalStart
++
++alloc_class chunk1 (
++ // Chunk1 contains *all* 64 floating-point registers halves.
++
++ // scratch register
++ F0, F0_H,
++
++ // argument registers
++ F13, F13_H,
++ F12, F12_H,
++ F11, F11_H,
++ F10, F10_H,
++ F9, F9_H,
++ F8, F8_H,
++ F7, F7_H,
++ F6, F6_H,
++ F5, F5_H,
++ F4, F4_H,
++ F3, F3_H,
++ F2, F2_H,
++ F1, F1_H,
++
++ // non-volatile registers
++ F14, F14_H,
++ F15, F15_H,
++ F16, F16_H,
++ F17, F17_H,
++ F18, F18_H,
++ F19, F19_H,
++ F20, F20_H,
++ F21, F21_H,
++ F22, F22_H,
++ F23, F23_H,
++ F24, F24_H,
++ F25, F25_H,
++ F26, F26_H,
++ F27, F27_H,
++ F28, F28_H,
++ F29, F29_H,
++ F30, F30_H,
++ F31, F31_H
++);
++
++alloc_class chunk2 (
++ // Chunk2 contains *all* 8 condition code registers.
++
++ CCR0,
++ CCR1,
++ CCR2,
++ CCR3,
++ CCR4,
++ CCR5,
++ CCR6,
++ CCR7
++);
++
++alloc_class chunk3 (
++ // special registers
++ // These registers are not allocated, but used for nodes generated by postalloc expand.
++ SR_XER,
++ SR_LR,
++ SR_CTR,
++ SR_VRSAVE,
++ SR_SPEFSCR,
++ SR_PPR
++);
++
++//-------Architecture Description Register Classes-----------------------
++
++// Several register classes are automatically defined based upon
++// information in this architecture description.
++
++// 1) reg_class inline_cache_reg ( as defined in frame section )
++// 2) reg_class compiler_method_oop_reg ( as defined in frame section )
++// 2) reg_class interpreter_method_oop_reg ( as defined in frame section )
++// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
++//
++
++// ----------------------------
++// 32 Bit Register Classes
++// ----------------------------
++
++// We specify registers twice, once as read/write, and once read-only.
++// We use the read-only registers for source operands. With this, we
++// can include preset read only registers in this class, as a hard-coded
++// '0'-register. (We used to simulate this on ppc.)
++
++// 32 bit registers that can be read and written i.e. these registers
++// can be dest (or src) of normal instructions.
++reg_class bits32_reg_rw(
++/*R0*/ // R0
++/*R1*/ // SP
++ R2, // TOC
++ R3,
++ R4,
++ R5,
++ R6,
++ R7,
++ R8,
++ R9,
++ R10,
++ R11,
++ R12,
++/*R13*/ // system thread id
++ R14,
++ R15,
++/*R16*/ // R16_thread
++ R17,
++ R18,
++ R19,
++ R20,
++ R21,
++ R22,
++ R23,
++ R24,
++ R25,
++ R26,
++ R27,
++ R28,
++/*R29*/ // global TOC
++/*R30*/ // Narrow Oop Base
++ R31
++);
++
++// 32 bit registers that can only be read i.e. these registers can
++// only be src of all instructions.
++reg_class bits32_reg_ro(
++/*R0*/ // R0
++/*R1*/ // SP
++ R2 // TOC
++ R3,
++ R4,
++ R5,
++ R6,
++ R7,
++ R8,
++ R9,
++ R10,
++ R11,
++ R12,
++/*R13*/ // system thread id
++ R14,
++ R15,
++/*R16*/ // R16_thread
++ R17,
++ R18,
++ R19,
++ R20,
++ R21,
++ R22,
++ R23,
++ R24,
++ R25,
++ R26,
++ R27,
++ R28,
++/*R29*/
++/*R30*/ // Narrow Oop Base
++ R31
++);
++
++// Complement-required-in-pipeline operands for narrow oops.
++reg_class bits32_reg_ro_not_complement (
++/*R0*/ // R0
++ R1, // SP
++ R2, // TOC
++ R3,
++ R4,
++ R5,
++ R6,
++ R7,
++ R8,
++ R9,
++ R10,
++ R11,
++ R12,
++/*R13,*/ // system thread id
++ R14,
++ R15,
++ R16, // R16_thread
++ R17,
++ R18,
++ R19,
++ R20,
++ R21,
++ R22,
++/*R23,
++ R24,
++ R25,
++ R26,
++ R27,
++ R28,*/
++/*R29,*/ // TODO: let allocator handle TOC!!
++/*R30,*/
++ R31
++);
++
++// Complement-required-in-pipeline operands for narrow oops.
++// See 64-bit declaration.
++reg_class bits32_reg_ro_complement (
++ R23,
++ R24,
++ R25,
++ R26,
++ R27,
++ R28
++);
++
++reg_class rscratch1_bits32_reg(R11);
++reg_class rscratch2_bits32_reg(R12);
++reg_class rarg1_bits32_reg(R3);
++reg_class rarg2_bits32_reg(R4);
++reg_class rarg3_bits32_reg(R5);
++reg_class rarg4_bits32_reg(R6);
++
++// ----------------------------
++// 64 Bit Register Classes
++// ----------------------------
++// 64-bit build means 64-bit pointers means hi/lo pairs
++
++reg_class rscratch1_bits64_reg(R11_H, R11);
++reg_class rscratch2_bits64_reg(R12_H, R12);
++reg_class rarg1_bits64_reg(R3_H, R3);
++reg_class rarg2_bits64_reg(R4_H, R4);
++reg_class rarg3_bits64_reg(R5_H, R5);
++reg_class rarg4_bits64_reg(R6_H, R6);
++// Thread register, 'written' by tlsLoadP, see there.
++reg_class thread_bits64_reg(R16_H, R16);
++
++reg_class r19_bits64_reg(R19_H, R19);
++
++// 64 bit registers that can be read and written i.e. these registers
++// can be dest (or src) of normal instructions.
++reg_class bits64_reg_rw(
++/*R0_H, R0*/ // R0
++/*R1_H, R1*/ // SP
++ R2_H, R2, // TOC
++ R3_H, R3,
++ R4_H, R4,
++ R5_H, R5,
++ R6_H, R6,
++ R7_H, R7,
++ R8_H, R8,
++ R9_H, R9,
++ R10_H, R10,
++ R11_H, R11,
++ R12_H, R12,
++/*R13_H, R13*/ // system thread id
++ R14_H, R14,
++ R15_H, R15,
++/*R16_H, R16*/ // R16_thread
++ R17_H, R17,
++ R18_H, R18,
++ R19_H, R19,
++ R20_H, R20,
++ R21_H, R21,
++ R22_H, R22,
++ R23_H, R23,
++ R24_H, R24,
++ R25_H, R25,
++ R26_H, R26,
++ R27_H, R27,
++ R28_H, R28,
++/*R29_H, R29*/
++/*R30_H, R30*/
++ R31_H, R31
++);
++
++// 64 bit registers used excluding r2, r11 and r12
++// Used to hold the TOC to avoid collisions with expanded LeafCall which uses
++// r2, r11 and r12 internally.
++reg_class bits64_reg_leaf_call(
++/*R0_H, R0*/ // R0
++/*R1_H, R1*/ // SP
++/*R2_H, R2*/ // TOC
++ R3_H, R3,
++ R4_H, R4,
++ R5_H, R5,
++ R6_H, R6,
++ R7_H, R7,
++ R8_H, R8,
++ R9_H, R9,
++ R10_H, R10,
++/*R11_H, R11*/
++/*R12_H, R12*/
++/*R13_H, R13*/ // system thread id
++ R14_H, R14,
++ R15_H, R15,
++/*R16_H, R16*/ // R16_thread
++ R17_H, R17,
++ R18_H, R18,
++ R19_H, R19,
++ R20_H, R20,
++ R21_H, R21,
++ R22_H, R22,
++ R23_H, R23,
++ R24_H, R24,
++ R25_H, R25,
++ R26_H, R26,
++ R27_H, R27,
++ R28_H, R28,
++/*R29_H, R29*/
++/*R30_H, R30*/
++ R31_H, R31
++);
++
++// Used to hold the TOC to avoid collisions with expanded DynamicCall
++// which uses r19 as inline cache internally and expanded LeafCall which uses
++// r2, r11 and r12 internally.
++reg_class bits64_constant_table_base(
++/*R0_H, R0*/ // R0
++/*R1_H, R1*/ // SP
++/*R2_H, R2*/ // TOC
++ R3_H, R3,
++ R4_H, R4,
++ R5_H, R5,
++ R6_H, R6,
++ R7_H, R7,
++ R8_H, R8,
++ R9_H, R9,
++ R10_H, R10,
++/*R11_H, R11*/
++/*R12_H, R12*/
++/*R13_H, R13*/ // system thread id
++ R14_H, R14,
++ R15_H, R15,
++/*R16_H, R16*/ // R16_thread
++ R17_H, R17,
++ R18_H, R18,
++/*R19_H, R19*/
++ R20_H, R20,
++ R21_H, R21,
++ R22_H, R22,
++ R23_H, R23,
++ R24_H, R24,
++ R25_H, R25,
++ R26_H, R26,
++ R27_H, R27,
++ R28_H, R28,
++/*R29_H, R29*/
++/*R30_H, R30*/
++ R31_H, R31
++);
++
++// 64 bit registers that can only be read i.e. these registers can
++// only be src of all instructions.
++reg_class bits64_reg_ro(
++/*R0_H, R0*/ // R0
++ R1_H, R1,
++ R2_H, R2, // TOC
++ R3_H, R3,
++ R4_H, R4,
++ R5_H, R5,
++ R6_H, R6,
++ R7_H, R7,
++ R8_H, R8,
++ R9_H, R9,
++ R10_H, R10,
++ R11_H, R11,
++ R12_H, R12,
++/*R13_H, R13*/ // system thread id
++ R14_H, R14,
++ R15_H, R15,
++ R16_H, R16, // R16_thread
++ R17_H, R17,
++ R18_H, R18,
++ R19_H, R19,
++ R20_H, R20,
++ R21_H, R21,
++ R22_H, R22,
++ R23_H, R23,
++ R24_H, R24,
++ R25_H, R25,
++ R26_H, R26,
++ R27_H, R27,
++ R28_H, R28,
++/*R29_H, R29*/ // TODO: let allocator handle TOC!!
++/*R30_H, R30,*/
++ R31_H, R31
++);
++
++// Complement-required-in-pipeline operands.
++reg_class bits64_reg_ro_not_complement (
++/*R0_H, R0*/ // R0
++ R1_H, R1, // SP
++ R2_H, R2, // TOC
++ R3_H, R3,
++ R4_H, R4,
++ R5_H, R5,
++ R6_H, R6,
++ R7_H, R7,
++ R8_H, R8,
++ R9_H, R9,
++ R10_H, R10,
++ R11_H, R11,
++ R12_H, R12,
++/*R13_H, R13*/ // system thread id
++ R14_H, R14,
++ R15_H, R15,
++ R16_H, R16, // R16_thread
++ R17_H, R17,
++ R18_H, R18,
++ R19_H, R19,
++ R20_H, R20,
++ R21_H, R21,
++ R22_H, R22,
++/*R23_H, R23,
++ R24_H, R24,
++ R25_H, R25,
++ R26_H, R26,
++ R27_H, R27,
++ R28_H, R28,*/
++/*R29_H, R29*/ // TODO: let allocator handle TOC!!
++/*R30_H, R30,*/
++ R31_H, R31
++);
++
++// Complement-required-in-pipeline operands.
++// This register mask is used for the trap instructions that implement
++// the null checks on AIX. The trap instruction first computes the
++// complement of the value it shall trap on. Because of this, the
++// instruction can not be scheduled in the same cycle as an other
++// instruction reading the normal value of the same register. So we
++// force the value to check into 'bits64_reg_ro_not_complement'
++// and then copy it to 'bits64_reg_ro_complement' for the trap.
++reg_class bits64_reg_ro_complement (
++ R23_H, R23,
++ R24_H, R24,
++ R25_H, R25,
++ R26_H, R26,
++ R27_H, R27,
++ R28_H, R28
++);
++
++
++// ----------------------------
++// Special Class for Condition Code Flags Register
++
++reg_class int_flags(
++/*CCR0*/ // scratch
++/*CCR1*/ // scratch
++/*CCR2*/ // nv!
++/*CCR3*/ // nv!
++/*CCR4*/ // nv!
++ CCR5,
++ CCR6,
++ CCR7
++);
++
++reg_class int_flags_CR0(CCR0);
++reg_class int_flags_CR1(CCR1);
++reg_class int_flags_CR6(CCR6);
++reg_class ctr_reg(SR_CTR);
++
++// ----------------------------
++// Float Register Classes
++// ----------------------------
++
++reg_class flt_reg(
++/*F0*/ // scratch
++ F1,
++ F2,
++ F3,
++ F4,
++ F5,
++ F6,
++ F7,
++ F8,
++ F9,
++ F10,
++ F11,
++ F12,
++ F13,
++ F14, // nv!
++ F15, // nv!
++ F16, // nv!
++ F17, // nv!
++ F18, // nv!
++ F19, // nv!
++ F20, // nv!
++ F21, // nv!
++ F22, // nv!
++ F23, // nv!
++ F24, // nv!
++ F25, // nv!
++ F26, // nv!
++ F27, // nv!
++ F28, // nv!
++ F29, // nv!
++ F30, // nv!
++ F31 // nv!
++);
++
++// Double precision float registers have virtual `high halves' that
++// are needed by the allocator.
++reg_class dbl_reg(
++/*F0, F0_H*/ // scratch
++ F1, F1_H,
++ F2, F2_H,
++ F3, F3_H,
++ F4, F4_H,
++ F5, F5_H,
++ F6, F6_H,
++ F7, F7_H,
++ F8, F8_H,
++ F9, F9_H,
++ F10, F10_H,
++ F11, F11_H,
++ F12, F12_H,
++ F13, F13_H,
++ F14, F14_H, // nv!
++ F15, F15_H, // nv!
++ F16, F16_H, // nv!
++ F17, F17_H, // nv!
++ F18, F18_H, // nv!
++ F19, F19_H, // nv!
++ F20, F20_H, // nv!
++ F21, F21_H, // nv!
++ F22, F22_H, // nv!
++ F23, F23_H, // nv!
++ F24, F24_H, // nv!
++ F25, F25_H, // nv!
++ F26, F26_H, // nv!
++ F27, F27_H, // nv!
++ F28, F28_H, // nv!
++ F29, F29_H, // nv!
++ F30, F30_H, // nv!
++ F31, F31_H // nv!
++);
++
++ %}
++
++//----------DEFINITION BLOCK---------------------------------------------------
++// Define name --> value mappings to inform the ADLC of an integer valued name
++// Current support includes integer values in the range [0, 0x7FFFFFFF]
++// Format:
++// int_def ( , );
++// Generated Code in ad_.hpp
++// #define ()
++// // value ==
++// Generated code in ad_.cpp adlc_verification()
++// assert( == , "Expect () to equal ");
++//
++definitions %{
++ // The default cost (of an ALU instruction).
++ int_def DEFAULT_COST_LOW ( 30, 30);
++ int_def DEFAULT_COST ( 100, 100);
++ int_def HUGE_COST (1000000, 1000000);
++
++ // Memory refs
++ int_def MEMORY_REF_COST_LOW ( 200, DEFAULT_COST * 2);
++ int_def MEMORY_REF_COST ( 300, DEFAULT_COST * 3);
++
++ // Branches are even more expensive.
++ int_def BRANCH_COST ( 900, DEFAULT_COST * 9);
++ int_def CALL_COST ( 1300, DEFAULT_COST * 13);
++%}
++
++
++//----------SOURCE BLOCK-------------------------------------------------------
++// This is a block of C++ code which provides values, functions, and
++// definitions necessary in the rest of the architecture description.
++source_hpp %{
++ // Header information of the source block.
++ // Method declarations/definitions which are used outside
++ // the ad-scope can conveniently be defined here.
++ //
++ // To keep related declarations/definitions/uses close together,
++ // we switch between source %{ }% and source_hpp %{ }% freely as needed.
++
++ // Returns true if Node n is followed by a MemBar node that
++ // will do an acquire. If so, this node must not do the acquire
++ // operation.
++ bool followed_by_acquire(const Node *n);
++%}
++
++source %{
++
++// Optimize load-acquire.
++//
++// Check if acquire is unnecessary due to following operation that does
++// acquire anyways.
++// Walk the pattern:
++//
++// n: Load.acq
++// |
++// MemBarAcquire
++// | |
++// Proj(ctrl) Proj(mem)
++// | |
++// MemBarRelease/Volatile
++//
++bool followed_by_acquire(const Node *load) {
++ assert(load->is_Load(), "So far implemented only for loads.");
++
++ // Find MemBarAcquire.
++ const Node *mba = NULL;
++ for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) {
++ const Node *out = load->fast_out(i);
++ if (out->Opcode() == Op_MemBarAcquire) {
++ if (out->in(0) == load) continue; // Skip control edge, membar should be found via precedence edge.
++ mba = out;
++ break;
++ }
++ }
++ if (!mba) return false;
++
++ // Find following MemBar node.
++ //
++ // The following node must be reachable by control AND memory
++ // edge to assure no other operations are in between the two nodes.
++ //
++ // So first get the Proj node, mem_proj, to use it to iterate forward.
++ Node *mem_proj = NULL;
++ for (DUIterator_Fast imax, i = mba->fast_outs(imax); i < imax; i++) {
++ mem_proj = mba->fast_out(i); // Throw out-of-bounds if proj not found
++ assert(mem_proj->is_Proj(), "only projections here");
++ ProjNode *proj = mem_proj->as_Proj();
++ if (proj->_con == TypeFunc::Memory &&
++ !Compile::current()->node_arena()->contains(mem_proj)) // Unmatched old-space only
++ break;
++ }
++ assert(mem_proj->as_Proj()->_con == TypeFunc::Memory, "Graph broken");
++
++ // Search MemBar behind Proj. If there are other memory operations
++ // behind the Proj we lost.
++ for (DUIterator_Fast jmax, j = mem_proj->fast_outs(jmax); j < jmax; j++) {
++ Node *x = mem_proj->fast_out(j);
++ // Proj might have an edge to a store or load node which precedes the membar.
++ if (x->is_Mem()) return false;
++
++ // On PPC64 release and volatile are implemented by an instruction
++ // that also has acquire semantics. I.e. there is no need for an
++ // acquire before these.
++ int xop = x->Opcode();
++ if (xop == Op_MemBarRelease || xop == Op_MemBarVolatile) {
++ // Make sure we're not missing Call/Phi/MergeMem by checking
++ // control edges. The control edge must directly lead back
++ // to the MemBarAcquire
++ Node *ctrl_proj = x->in(0);
++ if (ctrl_proj->is_Proj() && ctrl_proj->in(0) == mba) {
++ return true;
++ }
++ }
++ }
++
++ return false;
++}
++
++#define __ _masm.
++
++// Tertiary op of a LoadP or StoreP encoding.
++#define REGP_OP true
++
++// ****************************************************************************
++
++// REQUIRED FUNCTIONALITY
++
++// !!!!! Special hack to get all type of calls to specify the byte offset
++// from the start of the call to the point where the return address
++// will point.
++
++// PPC port: Removed use of lazy constant construct.
++
++int MachCallStaticJavaNode::ret_addr_offset() {
++ // It's only a single branch-and-link instruction.
++ return 4;
++}
++
++int MachCallDynamicJavaNode::ret_addr_offset() {
++ // Offset is 4 with postalloc expanded calls (bl is one instruction). We use
++ // postalloc expanded calls if we use inline caches and do not update method data.
++ if (UseInlineCaches)
++ return 4;
++
++ int vtable_index = this->_vtable_index;
++ if (vtable_index < 0) {
++ // Must be invalid_vtable_index, not nonvirtual_vtable_index.
++ assert(vtable_index == Method::invalid_vtable_index, "correct sentinel value");
++ return 12;
++ } else {
++ assert(!UseInlineCaches, "expect vtable calls only if not using ICs");
++ return 24;
++ }
++}
++
++int MachCallRuntimeNode::ret_addr_offset() {
++#if defined(ABI_ELFv2)
++ return 28;
++#else
++ return 40;
++#endif
++}
++
++//=============================================================================
++
++// condition code conversions
++
++static int cc_to_boint(int cc) {
++ return Assembler::bcondCRbiIs0 | (cc & 8);
++}
++
++static int cc_to_inverse_boint(int cc) {
++ return Assembler::bcondCRbiIs0 | (8-(cc & 8));
++}
++
++static int cc_to_biint(int cc, int flags_reg) {
++ return (flags_reg << 2) | (cc & 3);
++}
++
++//=============================================================================
++
++// Compute padding required for nodes which need alignment. The padding
++// is the number of bytes (not instructions) which will be inserted before
++// the instruction. The padding must match the size of a NOP instruction.
++
++int string_indexOf_imm1_charNode::compute_padding(int current_offset) const {
++ return (3*4-current_offset)&31;
++}
++
++int string_indexOf_imm1Node::compute_padding(int current_offset) const {
++ return (2*4-current_offset)&31;
++}
++
++int string_indexOf_immNode::compute_padding(int current_offset) const {
++ return (3*4-current_offset)&31;
++}
++
++int string_indexOfNode::compute_padding(int current_offset) const {
++ return (1*4-current_offset)&31;
++}
++
++int string_compareNode::compute_padding(int current_offset) const {
++ return (4*4-current_offset)&31;
++}
++
++int string_equals_immNode::compute_padding(int current_offset) const {
++ if (opnd_array(3)->constant() < 16) return 0; // Don't insert nops for short version (loop completely unrolled).
++ return (2*4-current_offset)&31;
++}
++
++int string_equalsNode::compute_padding(int current_offset) const {
++ return (7*4-current_offset)&31;
++}
++
++int inlineCallClearArrayNode::compute_padding(int current_offset) const {
++ return (2*4-current_offset)&31;
++}
++
++//=============================================================================
++
++// Indicate if the safepoint node needs the polling page as an input.
++bool SafePointNode::needs_polling_address_input() {
++ // The address is loaded from thread by a seperate node.
++ return true;
++}
++
++//=============================================================================
++
++// Emit an interrupt that is caught by the debugger (for debugging compiler).
++void emit_break(CodeBuffer &cbuf) {
++ MacroAssembler _masm(&cbuf);
++ __ illtrap();
++}
++
++#ifndef PRODUCT
++void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++ st->print("BREAKPOINT");
++}
++#endif
++
++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++ emit_break(cbuf);
++}
++
++uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
++ return MachNode::size(ra_);
++}
++
++//=============================================================================
++
++void emit_nop(CodeBuffer &cbuf) {
++ MacroAssembler _masm(&cbuf);
++ __ nop();
++}
++
++static inline void emit_long(CodeBuffer &cbuf, int value) {
++ *((int*)(cbuf.insts_end())) = value;
++ cbuf.set_insts_end(cbuf.insts_end() + BytesPerInstWord);
++}
++
++//=============================================================================
++
++%} // interrupt source
++
++source_hpp %{ // Header information of the source block.
++
++//--------------------------------------------------------------
++//---< Used for optimization in Compile::Shorten_branches >---
++//--------------------------------------------------------------
++
++const uint trampoline_stub_size = 6 * BytesPerInstWord;
++
++class CallStubImpl {
++
++ public:
++
++ // Emit call stub, compiled java to interpreter.
++ static void emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset);
++
++ // Size of call trampoline stub.
++ // This doesn't need to be accurate to the byte, but it
++ // must be larger than or equal to the real size of the stub.
++ static uint size_call_trampoline() {
++ return trampoline_stub_size;
++ }
++
++ // number of relocations needed by a call trampoline stub
++ static uint reloc_call_trampoline() {
++ return 5;
++ }
++
++};
++
++%} // end source_hpp
++
++source %{
++
++// Emit a trampoline stub for a call to a target which is too far away.
++//
++// code sequences:
++//
++// call-site:
++// branch-and-link to or
++//
++// Related trampoline stub for this call-site in the stub section:
++// load the call target from the constant pool
++// branch via CTR (LR/link still points to the call-site above)
++
++void CallStubImpl::emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
++ // Start the stub.
++ address stub = __ start_a_stub(Compile::MAX_stubs_size/2);
++ if (stub == NULL) {
++ Compile::current()->env()->record_out_of_memory_failure();
++ return;
++ }
++
++ // For java_to_interp stubs we use R11_scratch1 as scratch register
++ // and in call trampoline stubs we use R12_scratch2. This way we
++ // can distinguish them (see is_NativeCallTrampolineStub_at()).
++ Register reg_scratch = R12_scratch2;
++
++ // Create a trampoline stub relocation which relates this trampoline stub
++ // with the call instruction at insts_call_instruction_offset in the
++ // instructions code-section.
++ __ relocate(trampoline_stub_Relocation::spec(__ code()->insts()->start() + insts_call_instruction_offset));
++ const int stub_start_offset = __ offset();
++
++ // Now, create the trampoline stub's code:
++ // - load the TOC
++ // - load the call target from the constant pool
++ // - call
++ __ calculate_address_from_global_toc(reg_scratch, __ method_toc());
++ __ ld_largeoffset_unchecked(reg_scratch, destination_toc_offset, reg_scratch, false);
++ __ mtctr(reg_scratch);
++ __ bctr();
++
++ const address stub_start_addr = __ addr_at(stub_start_offset);
++
++ // FIXME: Assert that the trampoline stub can be identified and patched.
++
++ // Assert that the encoded destination_toc_offset can be identified and that it is correct.
++ assert(destination_toc_offset == NativeCallTrampolineStub_at(stub_start_addr)->destination_toc_offset(),
++ "encoded offset into the constant pool must match");
++ // Trampoline_stub_size should be good.
++ assert((uint)(__ offset() - stub_start_offset) <= trampoline_stub_size, "should be good size");
++ assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
++
++ // End the stub.
++ __ end_a_stub();
++}
++
++//=============================================================================
++
++// Emit an inline branch-and-link call and a related trampoline stub.
++//
++// code sequences:
++//
++// call-site:
++// branch-and-link to or
++//
++// Related trampoline stub for this call-site in the stub section:
++// load the call target from the constant pool
++// branch via CTR (LR/link still points to the call-site above)
++//
++
++typedef struct {
++ int insts_call_instruction_offset;
++ int ret_addr_offset;
++} EmitCallOffsets;
++
++// Emit a branch-and-link instruction that branches to a trampoline.
++// - Remember the offset of the branch-and-link instruction.
++// - Add a relocation at the branch-and-link instruction.
++// - Emit a branch-and-link.
++// - Remember the return pc offset.
++EmitCallOffsets emit_call_with_trampoline_stub(MacroAssembler &_masm, address entry_point, relocInfo::relocType rtype) {
++ EmitCallOffsets offsets = { -1, -1 };
++ const int start_offset = __ offset();
++ offsets.insts_call_instruction_offset = __ offset();
++
++ // No entry point given, use the current pc.
++ if (entry_point == NULL) entry_point = __ pc();
++
++ if (!Compile::current()->in_scratch_emit_size()) {
++ // Put the entry point as a constant into the constant pool.
++ const address entry_point_toc_addr = __ address_constant(entry_point, RelocationHolder::none);
++ const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
++
++ // Emit the trampoline stub which will be related to the branch-and-link below.
++ CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, offsets.insts_call_instruction_offset);
++ __ relocate(rtype);
++ }
++
++ // Note: At this point we do not have the address of the trampoline
++ // stub, and the entry point might be too far away for bl, so __ pc()
++ // serves as dummy and the bl will be patched later.
++ __ bl((address) __ pc());
++
++ offsets.ret_addr_offset = __ offset() - start_offset;
++
++ return offsets;
++}
++
++//=============================================================================
++
++// Factory for creating loadConL* nodes for large/small constant pool.
++
++static inline jlong replicate_immF(float con) {
++ // Replicate float con 2 times and pack into vector.
++ int val = *((int*)&con);
++ jlong lval = val;
++ lval = (lval << 32) | (lval & 0xFFFFFFFFl);
++ return lval;
++}
++
++//=============================================================================
++
++const RegMask& MachConstantBaseNode::_out_RegMask = BITS64_CONSTANT_TABLE_BASE_mask();
++int Compile::ConstantTable::calculate_table_base_offset() const {
++ return 0; // absolute addressing, no offset
++}
++
++bool MachConstantBaseNode::requires_postalloc_expand() const { return true; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) {
++ Compile *C = ra_->C;
++
++ iRegPdstOper *op_dst = new (C) iRegPdstOper();
++ MachNode *m1 = new (C) loadToc_hiNode();
++ MachNode *m2 = new (C) loadToc_loNode();
++
++ m1->add_req(NULL);
++ m2->add_req(NULL, m1);
++ m1->_opnds[0] = op_dst;
++ m2->_opnds[0] = op_dst;
++ m2->_opnds[1] = op_dst;
++ ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ nodes->push(m1);
++ nodes->push(m2);
++}
++
++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
++ // Is postalloc expanded.
++ ShouldNotReachHere();
++}
++
++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
++ return 0;
++}
++
++#ifndef PRODUCT
++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
++ st->print("-- \t// MachConstantBaseNode (empty encoding)");
++}
++#endif
++
++//=============================================================================
++
++#ifndef PRODUCT
++void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++ Compile* C = ra_->C;
++ const long framesize = C->frame_slots() << LogBytesPerInt;
++
++ st->print("PROLOG\n\t");
++ if (C->need_stack_bang(framesize)) {
++ st->print("stack_overflow_check\n\t");
++ }
++
++ if (!false /* TODO: PPC port C->is_frameless_method()*/) {
++ st->print("save return pc\n\t");
++ st->print("push frame %d\n\t", -framesize);
++ }
++}
++#endif
++
++// Macro used instead of the common __ to emulate the pipes of PPC.
++// Instead of e.g. __ ld(...) one hase to write ___(ld) ld(...) This enables the
++// micro scheduler to cope with "hand written" assembler like in the prolog. Though
++// still no scheduling of this code is possible, the micro scheduler is aware of the
++// code and can update its internal data. The following mechanism is used to achieve this:
++// The micro scheduler calls size() of each compound node during scheduling. size() does a
++// dummy emit and only during this dummy emit C->hb_scheduling() is not NULL.
++#if 0 // TODO: PPC port
++#define ___(op) if (UsePower6SchedulerPPC64 && C->hb_scheduling()) \
++ C->hb_scheduling()->_pdScheduling->PdEmulatePipe(ppc64Opcode_##op); \
++ _masm.
++#define ___stop if (UsePower6SchedulerPPC64 && C->hb_scheduling()) \
++ C->hb_scheduling()->_pdScheduling->PdEmulatePipe(archOpcode_none)
++#define ___advance if (UsePower6SchedulerPPC64 && C->hb_scheduling()) \
++ C->hb_scheduling()->_pdScheduling->advance_offset
++#else
++#define ___(op) if (UsePower6SchedulerPPC64) \
++ Unimplemented(); \
++ _masm.
++#define ___stop if (UsePower6SchedulerPPC64) \
++ Unimplemented()
++#define ___advance if (UsePower6SchedulerPPC64) \
++ Unimplemented()
++#endif
++
++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++ Compile* C = ra_->C;
++ MacroAssembler _masm(&cbuf);
++
++ const long framesize = C->frame_size_in_bytes();
++ assert(framesize % (2 * wordSize) == 0, "must preserve 2*wordSize alignment");
++
++ const bool method_is_frameless = false /* TODO: PPC port C->is_frameless_method()*/;
++
++ const Register return_pc = R20; // Must match return_addr() in frame section.
++ const Register callers_sp = R21;
++ const Register push_frame_temp = R22;
++ const Register toc_temp = R23;
++ assert_different_registers(R11, return_pc, callers_sp, push_frame_temp, toc_temp);
++
++ if (method_is_frameless) {
++ // Add nop at beginning of all frameless methods to prevent any
++ // oop instructions from getting overwritten by make_not_entrant
++ // (patching attempt would fail).
++ ___(nop) nop();
++ } else {
++ // Get return pc.
++ ___(mflr) mflr(return_pc);
++ }
++
++ // Calls to C2R adapters often do not accept exceptional returns.
++ // We require that their callers must bang for them. But be
++ // careful, because some VM calls (such as call site linkage) can
++ // use several kilobytes of stack. But the stack safety zone should
++ // account for that. See bugs 4446381, 4468289, 4497237.
++
++ int bangsize = C->bang_size_in_bytes();
++ assert(bangsize >= framesize || bangsize <= 0, "stack bang size incorrect");
++ if (C->need_stack_bang(bangsize) && UseStackBanging) {
++ // Unfortunately we cannot use the function provided in
++ // assembler.cpp as we have to emulate the pipes. So I had to
++ // insert the code of generate_stack_overflow_check(), see
++ // assembler.cpp for some illuminative comments.
++ const int page_size = os::vm_page_size();
++ int bang_end = StackShadowPages * page_size;
++
++ // This is how far the previous frame's stack banging extended.
++ const int bang_end_safe = bang_end;
++
++ if (bangsize > page_size) {
++ bang_end += bangsize;
++ }
++
++ int bang_offset = bang_end_safe;
++
++ while (bang_offset <= bang_end) {
++ // Need at least one stack bang at end of shadow zone.
++
++ // Again I had to copy code, this time from assembler_ppc64.cpp,
++ // bang_stack_with_offset - see there for comments.
++
++ // Stack grows down, caller passes positive offset.
++ assert(bang_offset > 0, "must bang with positive offset");
++
++ long stdoffset = -bang_offset;
++
++ if (Assembler::is_simm(stdoffset, 16)) {
++ // Signed 16 bit offset, a simple std is ok.
++ if (UseLoadInstructionsForStackBangingPPC64) {
++ ___(ld) ld(R0, (int)(signed short)stdoffset, R1_SP);
++ } else {
++ ___(std) std(R0, (int)(signed short)stdoffset, R1_SP);
++ }
++ } else if (Assembler::is_simm(stdoffset, 31)) {
++ // Use largeoffset calculations for addis & ld/std.
++ const int hi = MacroAssembler::largeoffset_si16_si16_hi(stdoffset);
++ const int lo = MacroAssembler::largeoffset_si16_si16_lo(stdoffset);
++
++ Register tmp = R11;
++ ___(addis) addis(tmp, R1_SP, hi);
++ if (UseLoadInstructionsForStackBangingPPC64) {
++ ___(ld) ld(R0, lo, tmp);
++ } else {
++ ___(std) std(R0, lo, tmp);
++ }
++ } else {
++ ShouldNotReachHere();
++ }
++
++ bang_offset += page_size;
++ }
++ // R11 trashed
++ } // C->need_stack_bang(framesize) && UseStackBanging
++
++ unsigned int bytes = (unsigned int)framesize;
++ long offset = Assembler::align_addr(bytes, frame::alignment_in_bytes);
++ ciMethod *currMethod = C->method();
++
++ // Optimized version for most common case.
++ if (UsePower6SchedulerPPC64 &&
++ !method_is_frameless && Assembler::is_simm((int)(-offset), 16) &&
++ !(false /* ConstantsALot TODO: PPC port*/)) {
++ ___(or) mr(callers_sp, R1_SP);
++ ___(std) std(return_pc, _abi(lr), R1_SP);
++ ___(stdu) stdu(R1_SP, -offset, R1_SP);
++ return;
++ }
++
++ if (!method_is_frameless) {
++ // Get callers sp.
++ ___(or) mr(callers_sp, R1_SP);
++
++ // Push method's frame, modifies SP.
++ assert(Assembler::is_uimm(framesize, 32U), "wrong type");
++ // The ABI is already accounted for in 'framesize' via the
++ // 'out_preserve' area.
++ Register tmp = push_frame_temp;
++ // Had to insert code of push_frame((unsigned int)framesize, push_frame_temp).
++ if (Assembler::is_simm(-offset, 16)) {
++ ___(stdu) stdu(R1_SP, -offset, R1_SP);
++ } else {
++ long x = -offset;
++ // Had to insert load_const(tmp, -offset).
++ ___(addis) lis( tmp, (int)((signed short)(((x >> 32) & 0xffff0000) >> 16)));
++ ___(ori) ori( tmp, tmp, ((x >> 32) & 0x0000ffff));
++ ___(rldicr) sldi(tmp, tmp, 32);
++ ___(oris) oris(tmp, tmp, (x & 0xffff0000) >> 16);
++ ___(ori) ori( tmp, tmp, (x & 0x0000ffff));
++
++ ___(stdux) stdux(R1_SP, R1_SP, tmp);
++ }
++ }
++#if 0 // TODO: PPC port
++ // For testing large constant pools, emit a lot of constants to constant pool.
++ // "Randomize" const_size.
++ if (ConstantsALot) {
++ const int num_consts = const_size();
++ for (int i = 0; i < num_consts; i++) {
++ __ long_constant(0xB0B5B00BBABE);
++ }
++ }
++#endif
++ if (!method_is_frameless) {
++ // Save return pc.
++ ___(std) std(return_pc, _abi(lr), callers_sp);
++ }
++}
++#undef ___
++#undef ___stop
++#undef ___advance
++
++uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
++ // Variable size. determine dynamically.
++ return MachNode::size(ra_);
++}
++
++int MachPrologNode::reloc() const {
++ // Return number of relocatable values contained in this instruction.
++ return 1; // 1 reloc entry for load_const(toc).
++}
++
++//=============================================================================
++
++#ifndef PRODUCT
++void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++ Compile* C = ra_->C;
++
++ st->print("EPILOG\n\t");
++ st->print("restore return pc\n\t");
++ st->print("pop frame\n\t");
++
++ if (do_polling() && C->is_method_compilation()) {
++ st->print("touch polling page\n\t");
++ }
++}
++#endif
++
++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++ Compile* C = ra_->C;
++ MacroAssembler _masm(&cbuf);
++
++ const long framesize = ((long)C->frame_slots()) << LogBytesPerInt;
++ assert(framesize >= 0, "negative frame-size?");
++
++ const bool method_needs_polling = do_polling() && C->is_method_compilation();
++ const bool method_is_frameless = false /* TODO: PPC port C->is_frameless_method()*/;
++ const Register return_pc = R11;
++ const Register polling_page = R12;
++
++ if (!method_is_frameless) {
++ // Restore return pc relative to callers' sp.
++ __ ld(return_pc, ((int)framesize) + _abi(lr), R1_SP);
++ }
++
++ if (method_needs_polling) {
++ if (LoadPollAddressFromThread) {
++ // TODO: PPC port __ ld(polling_page, in_bytes(JavaThread::poll_address_offset()), R16_thread);
++ Unimplemented();
++ } else {
++ __ load_const_optimized(polling_page, (long)(address) os::get_polling_page()); // TODO: PPC port: get_standard_polling_page()
++ }
++ }
++
++ if (!method_is_frameless) {
++ // Move return pc to LR.
++ __ mtlr(return_pc);
++ // Pop frame (fixed frame-size).
++ __ addi(R1_SP, R1_SP, (int)framesize);
++ }
++
++ if (method_needs_polling) {
++ // We need to mark the code position where the load from the safepoint
++ // polling page was emitted as relocInfo::poll_return_type here.
++ __ relocate(relocInfo::poll_return_type);
++ __ load_from_polling_page(polling_page);
++ }
++}
++
++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
++ // Variable size. Determine dynamically.
++ return MachNode::size(ra_);
++}
++
++int MachEpilogNode::reloc() const {
++ // Return number of relocatable values contained in this instruction.
++ return 1; // 1 for load_from_polling_page.
++}
++
++const Pipeline * MachEpilogNode::pipeline() const {
++ return MachNode::pipeline_class();
++}
++
++// This method seems to be obsolete. It is declared in machnode.hpp
++// and defined in all *.ad files, but it is never called. Should we
++// get rid of it?
++int MachEpilogNode::safepoint_offset() const {
++ assert(do_polling(), "no return for this epilog node");
++ return 0;
++}
++
++#if 0 // TODO: PPC port
++void MachLoadPollAddrLateNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
++ MacroAssembler _masm(&cbuf);
++ if (LoadPollAddressFromThread) {
++ _masm.ld(R11, in_bytes(JavaThread::poll_address_offset()), R16_thread);
++ } else {
++ _masm.nop();
++ }
++}
++
++uint MachLoadPollAddrLateNode::size(PhaseRegAlloc* ra_) const {
++ if (LoadPollAddressFromThread) {
++ return 4;
++ } else {
++ return 4;
++ }
++}
++
++#ifndef PRODUCT
++void MachLoadPollAddrLateNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
++ st->print_cr(" LD R11, PollAddressOffset, R16_thread \t// LoadPollAddressFromThread");
++}
++#endif
++
++const RegMask &MachLoadPollAddrLateNode::out_RegMask() const {
++ return RSCRATCH1_BITS64_REG_mask();
++}
++#endif // PPC port
++
++// =============================================================================
++
++// Figure out which register class each belongs in: rc_int, rc_float or
++// rc_stack.
++enum RC { rc_bad, rc_int, rc_float, rc_stack };
++
++static enum RC rc_class(OptoReg::Name reg) {
++ // Return the register class for the given register. The given register
++ // reg is a _num value, which is an index into the MachRegisterNumbers
++ // enumeration in adGlobals_ppc64.hpp.
++
++ if (reg == OptoReg::Bad) return rc_bad;
++
++ // We have 64 integer register halves, starting at index 0.
++ if (reg < 64) return rc_int;
++
++ // We have 64 floating-point register halves, starting at index 64.
++ if (reg < 64+64) return rc_float;
++
++ // Between float regs & stack are the flags regs.
++ assert(OptoReg::is_stack(reg), "blow up if spilling flags");
++
++ return rc_stack;
++}
++
++static int ld_st_helper(CodeBuffer *cbuf, const char *op_str, uint opcode, int reg, int offset,
++ bool do_print, Compile* C, outputStream *st) {
++
++ assert(opcode == Assembler::LD_OPCODE ||
++ opcode == Assembler::STD_OPCODE ||
++ opcode == Assembler::LWZ_OPCODE ||
++ opcode == Assembler::STW_OPCODE ||
++ opcode == Assembler::LFD_OPCODE ||
++ opcode == Assembler::STFD_OPCODE ||
++ opcode == Assembler::LFS_OPCODE ||
++ opcode == Assembler::STFS_OPCODE,
++ "opcode not supported");
++
++ if (cbuf) {
++ int d =
++ (Assembler::LD_OPCODE == opcode || Assembler::STD_OPCODE == opcode) ?
++ Assembler::ds(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/)
++ : Assembler::d1(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/); // Makes no difference in opt build.
++ emit_long(*cbuf, opcode | Assembler::rt(Matcher::_regEncode[reg]) | d | Assembler::ra(R1_SP));
++ }
++#ifndef PRODUCT
++ else if (do_print) {
++ st->print("%-7s %s, [R1_SP + #%d+%d] \t// spill copy",
++ op_str,
++ Matcher::regName[reg],
++ offset, 0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/);
++ }
++#endif
++ return 4; // size
++}
++
++uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
++ Compile* C = ra_->C;
++
++ // Get registers to move.
++ OptoReg::Name src_hi = ra_->get_reg_second(in(1));
++ OptoReg::Name src_lo = ra_->get_reg_first(in(1));
++ OptoReg::Name dst_hi = ra_->get_reg_second(this);
++ OptoReg::Name dst_lo = ra_->get_reg_first(this);
++
++ enum RC src_hi_rc = rc_class(src_hi);
++ enum RC src_lo_rc = rc_class(src_lo);
++ enum RC dst_hi_rc = rc_class(dst_hi);
++ enum RC dst_lo_rc = rc_class(dst_lo);
++
++ assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
++ if (src_hi != OptoReg::Bad)
++ assert((src_lo&1)==0 && src_lo+1==src_hi &&
++ (dst_lo&1)==0 && dst_lo+1==dst_hi,
++ "expected aligned-adjacent pairs");
++ // Generate spill code!
++ int size = 0;
++
++ if (src_lo == dst_lo && src_hi == dst_hi)
++ return size; // Self copy, no move.
++
++ // --------------------------------------
++ // Memory->Memory Spill. Use R0 to hold the value.
++ if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
++ int src_offset = ra_->reg2offset(src_lo);
++ int dst_offset = ra_->reg2offset(dst_lo);
++ if (src_hi != OptoReg::Bad) {
++ assert(src_hi_rc==rc_stack && dst_hi_rc==rc_stack,
++ "expected same type of move for high parts");
++ size += ld_st_helper(cbuf, "LD ", Assembler::LD_OPCODE, R0_num, src_offset, !do_size, C, st);
++ if (!cbuf && !do_size) st->print("\n\t");
++ size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, R0_num, dst_offset, !do_size, C, st);
++ } else {
++ size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, R0_num, src_offset, !do_size, C, st);
++ if (!cbuf && !do_size) st->print("\n\t");
++ size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, R0_num, dst_offset, !do_size, C, st);
++ }
++ return size;
++ }
++
++ // --------------------------------------
++ // Check for float->int copy; requires a trip through memory.
++ if (src_lo_rc == rc_float && dst_lo_rc == rc_int) {
++ Unimplemented();
++ }
++
++ // --------------------------------------
++ // Check for integer reg-reg copy.
++ if (src_lo_rc == rc_int && dst_lo_rc == rc_int) {
++ Register Rsrc = as_Register(Matcher::_regEncode[src_lo]);
++ Register Rdst = as_Register(Matcher::_regEncode[dst_lo]);
++ size = (Rsrc != Rdst) ? 4 : 0;
++
++ if (cbuf) {
++ MacroAssembler _masm(cbuf);
++ if (size) {
++ __ mr(Rdst, Rsrc);
++ }
++ }
++#ifndef PRODUCT
++ else if (!do_size) {
++ if (size) {
++ st->print("%-7s %s, %s \t// spill copy", "MR", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
++ } else {
++ st->print("%-7s %s, %s \t// spill copy", "MR-NOP", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
++ }
++ }
++#endif
++ return size;
++ }
++
++ // Check for integer store.
++ if (src_lo_rc == rc_int && dst_lo_rc == rc_stack) {
++ int dst_offset = ra_->reg2offset(dst_lo);
++ if (src_hi != OptoReg::Bad) {
++ assert(src_hi_rc==rc_int && dst_hi_rc==rc_stack,
++ "expected same type of move for high parts");
++ size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, src_lo, dst_offset, !do_size, C, st);
++ } else {
++ size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, src_lo, dst_offset, !do_size, C, st);
++ }
++ return size;
++ }
++
++ // Check for integer load.
++ if (dst_lo_rc == rc_int && src_lo_rc == rc_stack) {
++ int src_offset = ra_->reg2offset(src_lo);
++ if (src_hi != OptoReg::Bad) {
++ assert(dst_hi_rc==rc_int && src_hi_rc==rc_stack,
++ "expected same type of move for high parts");
++ size += ld_st_helper(cbuf, "LD ", Assembler::LD_OPCODE, dst_lo, src_offset, !do_size, C, st);
++ } else {
++ size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, dst_lo, src_offset, !do_size, C, st);
++ }
++ return size;
++ }
++
++ // Check for float reg-reg copy.
++ if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
++ if (cbuf) {
++ MacroAssembler _masm(cbuf);
++ FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
++ FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
++ __ fmr(Rdst, Rsrc);
++ }
++#ifndef PRODUCT
++ else if (!do_size) {
++ st->print("%-7s %s, %s \t// spill copy", "FMR", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
++ }
++#endif
++ return 4;
++ }
++
++ // Check for float store.
++ if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
++ int dst_offset = ra_->reg2offset(dst_lo);
++ if (src_hi != OptoReg::Bad) {
++ assert(src_hi_rc==rc_float && dst_hi_rc==rc_stack,
++ "expected same type of move for high parts");
++ size += ld_st_helper(cbuf, "STFD", Assembler::STFD_OPCODE, src_lo, dst_offset, !do_size, C, st);
++ } else {
++ size += ld_st_helper(cbuf, "STFS", Assembler::STFS_OPCODE, src_lo, dst_offset, !do_size, C, st);
++ }
++ return size;
++ }
++
++ // Check for float load.
++ if (dst_lo_rc == rc_float && src_lo_rc == rc_stack) {
++ int src_offset = ra_->reg2offset(src_lo);
++ if (src_hi != OptoReg::Bad) {
++ assert(dst_hi_rc==rc_float && src_hi_rc==rc_stack,
++ "expected same type of move for high parts");
++ size += ld_st_helper(cbuf, "LFD ", Assembler::LFD_OPCODE, dst_lo, src_offset, !do_size, C, st);
++ } else {
++ size += ld_st_helper(cbuf, "LFS ", Assembler::LFS_OPCODE, dst_lo, src_offset, !do_size, C, st);
++ }
++ return size;
++ }
++
++ // --------------------------------------------------------------------
++ // Check for hi bits still needing moving. Only happens for misaligned
++ // arguments to native calls.
++ if (src_hi == dst_hi)
++ return size; // Self copy; no move.
++
++ assert(src_hi_rc != rc_bad && dst_hi_rc != rc_bad, "src_hi & dst_hi cannot be Bad");
++ ShouldNotReachHere(); // Unimplemented
++ return 0;
++}
++
++#ifndef PRODUCT
++void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++ if (!ra_)
++ st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
++ else
++ implementation(NULL, ra_, false, st);
++}
++#endif
++
++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++ implementation(&cbuf, ra_, false, NULL);
++}
++
++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
++ return implementation(NULL, ra_, true, NULL);
++}
++
++#if 0 // TODO: PPC port
++ArchOpcode MachSpillCopyNode_archOpcode(MachSpillCopyNode *n, PhaseRegAlloc *ra_) {
++#ifndef PRODUCT
++ if (ra_->node_regs_max_index() == 0) return archOpcode_undefined;
++#endif
++ assert(ra_->node_regs_max_index() != 0, "");
++
++ // Get registers to move.
++ OptoReg::Name src_hi = ra_->get_reg_second(n->in(1));
++ OptoReg::Name src_lo = ra_->get_reg_first(n->in(1));
++ OptoReg::Name dst_hi = ra_->get_reg_second(n);
++ OptoReg::Name dst_lo = ra_->get_reg_first(n);
++
++ enum RC src_lo_rc = rc_class(src_lo);
++ enum RC dst_lo_rc = rc_class(dst_lo);
++
++ if (src_lo == dst_lo && src_hi == dst_hi)
++ return ppc64Opcode_none; // Self copy, no move.
++
++ // --------------------------------------
++ // Memory->Memory Spill. Use R0 to hold the value.
++ if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
++ return ppc64Opcode_compound;
++ }
++
++ // --------------------------------------
++ // Check for float->int copy; requires a trip through memory.
++ if (src_lo_rc == rc_float && dst_lo_rc == rc_int) {
++ Unimplemented();
++ }
++
++ // --------------------------------------
++ // Check for integer reg-reg copy.
++ if (src_lo_rc == rc_int && dst_lo_rc == rc_int) {
++ Register Rsrc = as_Register(Matcher::_regEncode[src_lo]);
++ Register Rdst = as_Register(Matcher::_regEncode[dst_lo]);
++ if (Rsrc == Rdst) {
++ return ppc64Opcode_none;
++ } else {
++ return ppc64Opcode_or;
++ }
++ }
++
++ // Check for integer store.
++ if (src_lo_rc == rc_int && dst_lo_rc == rc_stack) {
++ if (src_hi != OptoReg::Bad) {
++ return ppc64Opcode_std;
++ } else {
++ return ppc64Opcode_stw;
++ }
++ }
++
++ // Check for integer load.
++ if (dst_lo_rc == rc_int && src_lo_rc == rc_stack) {
++ if (src_hi != OptoReg::Bad) {
++ return ppc64Opcode_ld;
++ } else {
++ return ppc64Opcode_lwz;
++ }
++ }
++
++ // Check for float reg-reg copy.
++ if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
++ return ppc64Opcode_fmr;
++ }
++
++ // Check for float store.
++ if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
++ if (src_hi != OptoReg::Bad) {
++ return ppc64Opcode_stfd;
++ } else {
++ return ppc64Opcode_stfs;
++ }
++ }
++
++ // Check for float load.
++ if (dst_lo_rc == rc_float && src_lo_rc == rc_stack) {
++ if (src_hi != OptoReg::Bad) {
++ return ppc64Opcode_lfd;
++ } else {
++ return ppc64Opcode_lfs;
++ }
++ }
++
++ // --------------------------------------------------------------------
++ // Check for hi bits still needing moving. Only happens for misaligned
++ // arguments to native calls.
++ if (src_hi == dst_hi)
++ return ppc64Opcode_none; // Self copy; no move.
++
++ ShouldNotReachHere();
++ return ppc64Opcode_undefined;
++}
++#endif // PPC port
++
++#ifndef PRODUCT
++void MachNopNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++ st->print("NOP \t// %d nops to pad for loops.", _count);
++}
++#endif
++
++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const {
++ MacroAssembler _masm(&cbuf);
++ // _count contains the number of nops needed for padding.
++ for (int i = 0; i < _count; i++) {
++ __ nop();
++ }
++}
++
++uint MachNopNode::size(PhaseRegAlloc *ra_) const {
++ return _count * 4;
++}
++
++#ifndef PRODUCT
++void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++ int reg = ra_->get_reg_first(this);
++ st->print("ADDI %s, SP, %d \t// box node", Matcher::regName[reg], offset);
++}
++#endif
++
++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++ MacroAssembler _masm(&cbuf);
++
++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++ int reg = ra_->get_encode(this);
++
++ if (Assembler::is_simm(offset, 16)) {
++ __ addi(as_Register(reg), R1, offset);
++ } else {
++ ShouldNotReachHere();
++ }
++}
++
++uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
++ // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
++ return 4;
++}
++
++#ifndef PRODUCT
++void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++ st->print_cr("---- MachUEPNode ----");
++ st->print_cr("...");
++}
++#endif
++
++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++ // This is the unverified entry point.
++ MacroAssembler _masm(&cbuf);
++
++ // Inline_cache contains a klass.
++ Register ic_klass = as_Register(Matcher::inline_cache_reg_encode());
++ Register receiver_klass = R0; // tmp
++
++ assert_different_registers(ic_klass, receiver_klass, R11_scratch1, R3_ARG1);
++ assert(R11_scratch1 == R11, "need prologue scratch register");
++
++ // Check for NULL argument if we don't have implicit null checks.
++ if (!ImplicitNullChecks || !os::zero_page_read_protected()) {
++ if (TrapBasedNullChecks) {
++ __ trap_null_check(R3_ARG1);
++ } else {
++ Label valid;
++ __ cmpdi(CCR0, R3_ARG1, 0);
++ __ bne_predict_taken(CCR0, valid);
++ // We have a null argument, branch to ic_miss_stub.
++ __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
++ relocInfo::runtime_call_type);
++ __ bind(valid);
++ }
++ }
++ // Assume argument is not NULL, load klass from receiver.
++ __ load_klass(receiver_klass, R3_ARG1);
++
++ if (TrapBasedICMissChecks) {
++ __ trap_ic_miss_check(receiver_klass, ic_klass);
++ } else {
++ Label valid;
++ __ cmpd(CCR0, receiver_klass, ic_klass);
++ __ beq_predict_taken(CCR0, valid);
++ // We have an unexpected klass, branch to ic_miss_stub.
++ __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
++ relocInfo::runtime_call_type);
++ __ bind(valid);
++ }
++
++ // Argument is valid and klass is as expected, continue.
++}
++
++#if 0 // TODO: PPC port
++// Optimize UEP code on z (save a load_const() call in main path).
++int MachUEPNode::ep_offset() {
++ return 0;
++}
++#endif
++
++uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
++ // Variable size. Determine dynamically.
++ return MachNode::size(ra_);
++}
++
++//=============================================================================
++
++%} // interrupt source
++
++source_hpp %{ // Header information of the source block.
++
++class HandlerImpl {
++
++ public:
++
++ static int emit_exception_handler(CodeBuffer &cbuf);
++ static int emit_deopt_handler(CodeBuffer& cbuf);
++
++ static uint size_exception_handler() {
++ // The exception_handler is a b64_patchable.
++ return MacroAssembler::b64_patchable_size;
++ }
++
++ static uint size_deopt_handler() {
++ // The deopt_handler is a bl64_patchable.
++ return MacroAssembler::bl64_patchable_size;
++ }
++
++};
++
++%} // end source_hpp
++
++source %{
++
++int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
++ MacroAssembler _masm(&cbuf);
++
++ address base = __ start_a_stub(size_exception_handler());
++ if (base == NULL) return 0; // CodeBuffer::expand failed
++
++ int offset = __ offset();
++ __ b64_patchable((address)OptoRuntime::exception_blob()->content_begin(),
++ relocInfo::runtime_call_type);
++ assert(__ offset() - offset == (int)size_exception_handler(), "must be fixed size");
++ __ end_a_stub();
++
++ return offset;
++}
++
++// The deopt_handler is like the exception handler, but it calls to
++// the deoptimization blob instead of jumping to the exception blob.
++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
++ MacroAssembler _masm(&cbuf);
++
++ address base = __ start_a_stub(size_deopt_handler());
++ if (base == NULL) return 0; // CodeBuffer::expand failed
++
++ int offset = __ offset();
++ __ bl64_patchable((address)SharedRuntime::deopt_blob()->unpack(),
++ relocInfo::runtime_call_type);
++ assert(__ offset() - offset == (int) size_deopt_handler(), "must be fixed size");
++ __ end_a_stub();
++
++ return offset;
++}
++
++//=============================================================================
++
++// Use a frame slots bias for frameless methods if accessing the stack.
++static int frame_slots_bias(int reg_enc, PhaseRegAlloc* ra_) {
++ if (as_Register(reg_enc) == R1_SP) {
++ return 0; // TODO: PPC port ra_->C->frame_slots_sp_bias_in_bytes();
++ }
++ return 0;
++}
++
++const bool Matcher::match_rule_supported(int opcode) {
++ if (!has_match_rule(opcode))
++ return false;
++
++ switch (opcode) {
++ case Op_SqrtD:
++ return VM_Version::has_fsqrt();
++ case Op_CountLeadingZerosI:
++ case Op_CountLeadingZerosL:
++ case Op_CountTrailingZerosI:
++ case Op_CountTrailingZerosL:
++ if (!UseCountLeadingZerosInstructionsPPC64)
++ return false;
++ break;
++
++ case Op_PopCountI:
++ case Op_PopCountL:
++ return (UsePopCountInstruction && VM_Version::has_popcntw());
++
++ case Op_StrComp:
++ return SpecialStringCompareTo;
++ case Op_StrEquals:
++ return SpecialStringEquals;
++ case Op_StrIndexOf:
++ return SpecialStringIndexOf;
++ }
++
++ return true; // Per default match rules are supported.
++}
++
++int Matcher::regnum_to_fpu_offset(int regnum) {
++ // No user for this method?
++ Unimplemented();
++ return 999;
++}
++
++const bool Matcher::convL2FSupported(void) {
++ // fcfids can do the conversion (>= Power7).
++ // fcfid + frsp showed rounding problem when result should be 0x3f800001.
++ return VM_Version::has_fcfids(); // False means that conversion is done by runtime call.
++}
++
++// Vector width in bytes.
++const int Matcher::vector_width_in_bytes(BasicType bt) {
++ assert(MaxVectorSize == 8, "");
++ return 8;
++}
++
++// Vector ideal reg.
++const int Matcher::vector_ideal_reg(int size) {
++ assert(MaxVectorSize == 8 && size == 8, "");
++ return Op_RegL;
++}
++
++const int Matcher::vector_shift_count_ideal_reg(int size) {
++ fatal("vector shift is not supported");
++ return Node::NotAMachineReg;
++}
++
++// Limits on vector size (number of elements) loaded into vector.
++const int Matcher::max_vector_size(const BasicType bt) {
++ assert(is_java_primitive(bt), "only primitive type vectors");
++ return vector_width_in_bytes(bt)/type2aelembytes(bt);
++}
++
++const int Matcher::min_vector_size(const BasicType bt) {
++ return max_vector_size(bt); // Same as max.
++}
++
++// PPC doesn't support misaligned vectors store/load.
++const bool Matcher::misaligned_vectors_ok() {
++ return false;
++}
++
++// PPC AES support not yet implemented
++const bool Matcher::pass_original_key_for_aes() {
++ return false;
++}
++
++// RETURNS: whether this branch offset is short enough that a short
++// branch can be used.
++//
++// If the platform does not provide any short branch variants, then
++// this method should return `false' for offset 0.
++//
++// `Compile::Fill_buffer' will decide on basis of this information
++// whether to do the pass `Compile::Shorten_branches' at all.
++//
++// And `Compile::Shorten_branches' will decide on basis of this
++// information whether to replace particular branch sites by short
++// ones.
++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
++ // Is the offset within the range of a ppc64 pc relative branch?
++ bool b;
++
++ const int safety_zone = 3 * BytesPerInstWord;
++ b = Assembler::is_simm((offset<0 ? offset-safety_zone : offset+safety_zone),
++ 29 - 16 + 1 + 2);
++ return b;
++}
++
++const bool Matcher::isSimpleConstant64(jlong value) {
++ // Probably always true, even if a temp register is required.
++ return true;
++}
++/* TODO: PPC port
++// Make a new machine dependent decode node (with its operands).
++MachTypeNode *Matcher::make_decode_node(Compile *C) {
++ assert(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0,
++ "This method is only implemented for unscaled cOops mode so far");
++ MachTypeNode *decode = new (C) decodeN_unscaledNode();
++ decode->set_opnd_array(0, new (C) iRegPdstOper());
++ decode->set_opnd_array(1, new (C) iRegNsrcOper());
++ return decode;
++}
++*/
++// Threshold size for cleararray.
++const int Matcher::init_array_short_size = 8 * BytesPerLong;
++
++// false => size gets scaled to BytesPerLong, ok.
++const bool Matcher::init_array_count_is_in_bytes = false;
++
++// Use conditional move (CMOVL) on Power7.
++const int Matcher::long_cmove_cost() { return 0; } // this only makes long cmoves more expensive than int cmoves
++
++// Suppress CMOVF. Conditional move available (sort of) on PPC64 only from P7 onwards. Not exploited yet.
++// fsel doesn't accept a condition register as input, so this would be slightly different.
++const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
++
++// Power6 requires postalloc expand (see block.cpp for description of postalloc expand).
++const bool Matcher::require_postalloc_expand = true;
++
++// Should the Matcher clone shifts on addressing modes, expecting them to
++// be subsumed into complex addressing expressions or compute them into
++// registers? True for Intel but false for most RISCs.
++const bool Matcher::clone_shift_expressions = false;
++
++// Do we need to mask the count passed to shift instructions or does
++// the cpu only look at the lower 5/6 bits anyway?
++// Off, as masks are generated in expand rules where required.
++// Constant shift counts are handled in Ideal phase.
++const bool Matcher::need_masked_shift_count = false;
++
++// This affects two different things:
++// - how Decode nodes are matched
++// - how ImplicitNullCheck opportunities are recognized
++// If true, the matcher will try to remove all Decodes and match them
++// (as operands) into nodes. NullChecks are not prepared to deal with
++// Decodes by final_graph_reshaping().
++// If false, final_graph_reshaping() forces the decode behind the Cmp
++// for a NullCheck. The matcher matches the Decode node into a register.
++// Implicit_null_check optimization moves the Decode along with the
++// memory operation back up before the NullCheck.
++bool Matcher::narrow_oop_use_complex_address() {
++ // TODO: PPC port if (MatchDecodeNodes) return true;
++ return false;
++}
++
++bool Matcher::narrow_klass_use_complex_address() {
++ NOT_LP64(ShouldNotCallThis());
++ assert(UseCompressedClassPointers, "only for compressed klass code");
++ // TODO: PPC port if (MatchDecodeNodes) return true;
++ return false;
++}
++
++// Is it better to copy float constants, or load them directly from memory?
++// Intel can load a float constant from a direct address, requiring no
++// extra registers. Most RISCs will have to materialize an address into a
++// register first, so they would do better to copy the constant from stack.
++const bool Matcher::rematerialize_float_constants = false;
++
++// If CPU can load and store mis-aligned doubles directly then no fixup is
++// needed. Else we split the double into 2 integer pieces and move it
++// piece-by-piece. Only happens when passing doubles into C code as the
++// Java calling convention forces doubles to be aligned.
++const bool Matcher::misaligned_doubles_ok = true;
++
++void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
++ Unimplemented();
++}
++
++// Advertise here if the CPU requires explicit rounding operations
++// to implement the UseStrictFP mode.
++const bool Matcher::strict_fp_requires_explicit_rounding = false;
++
++// Do floats take an entire double register or just half?
++//
++// A float occupies a ppc64 double register. For the allocator, a
++// ppc64 double register appears as a pair of float registers.
++bool Matcher::float_in_double() { return true; }
++
++// Do ints take an entire long register or just half?
++// The relevant question is how the int is callee-saved:
++// the whole long is written but de-opt'ing will have to extract
++// the relevant 32 bits.
++const bool Matcher::int_in_long = true;
++
++// Constants for c2c and c calling conventions.
++
++const MachRegisterNumbers iarg_reg[8] = {
++ R3_num, R4_num, R5_num, R6_num,
++ R7_num, R8_num, R9_num, R10_num
++};
++
++const MachRegisterNumbers farg_reg[13] = {
++ F1_num, F2_num, F3_num, F4_num,
++ F5_num, F6_num, F7_num, F8_num,
++ F9_num, F10_num, F11_num, F12_num,
++ F13_num
++};
++
++const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);
++
++const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);
++
++// Return whether or not this register is ever used as an argument. This
++// function is used on startup to build the trampoline stubs in generateOptoStub.
++// Registers not mentioned will be killed by the VM call in the trampoline, and
++// arguments in those registers not be available to the callee.
++bool Matcher::can_be_java_arg(int reg) {
++ // We return true for all registers contained in iarg_reg[] and
++ // farg_reg[] and their virtual halves.
++ // We must include the virtual halves in order to get STDs and LDs
++ // instead of STWs and LWs in the trampoline stubs.
++
++ if ( reg == R3_num || reg == R3_H_num
++ || reg == R4_num || reg == R4_H_num
++ || reg == R5_num || reg == R5_H_num
++ || reg == R6_num || reg == R6_H_num
++ || reg == R7_num || reg == R7_H_num
++ || reg == R8_num || reg == R8_H_num
++ || reg == R9_num || reg == R9_H_num
++ || reg == R10_num || reg == R10_H_num)
++ return true;
++
++ if ( reg == F1_num || reg == F1_H_num
++ || reg == F2_num || reg == F2_H_num
++ || reg == F3_num || reg == F3_H_num
++ || reg == F4_num || reg == F4_H_num
++ || reg == F5_num || reg == F5_H_num
++ || reg == F6_num || reg == F6_H_num
++ || reg == F7_num || reg == F7_H_num
++ || reg == F8_num || reg == F8_H_num
++ || reg == F9_num || reg == F9_H_num
++ || reg == F10_num || reg == F10_H_num
++ || reg == F11_num || reg == F11_H_num
++ || reg == F12_num || reg == F12_H_num
++ || reg == F13_num || reg == F13_H_num)
++ return true;
++
++ return false;
++}
++
++bool Matcher::is_spillable_arg(int reg) {
++ return can_be_java_arg(reg);
++}
++
++bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
++ return false;
++}
++
++// Register for DIVI projection of divmodI.
++RegMask Matcher::divI_proj_mask() {
++ ShouldNotReachHere();
++ return RegMask();
++}
++
++// Register for MODI projection of divmodI.
++RegMask Matcher::modI_proj_mask() {
++ ShouldNotReachHere();
++ return RegMask();
++}
++
++// Register for DIVL projection of divmodL.
++RegMask Matcher::divL_proj_mask() {
++ ShouldNotReachHere();
++ return RegMask();
++}
++
++// Register for MODL projection of divmodL.
++RegMask Matcher::modL_proj_mask() {
++ ShouldNotReachHere();
++ return RegMask();
++}
++
++const RegMask Matcher::method_handle_invoke_SP_save_mask() {
++ return RegMask();
++}
++
++%}
++
++//----------ENCODING BLOCK-----------------------------------------------------
++// This block specifies the encoding classes used by the compiler to output
++// byte streams. Encoding classes are parameterized macros used by
++// Machine Instruction Nodes in order to generate the bit encoding of the
++// instruction. Operands specify their base encoding interface with the
++// interface keyword. There are currently supported four interfaces,
++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
++// operand to generate a function which returns its register number when
++// queried. CONST_INTER causes an operand to generate a function which
++// returns the value of the constant when queried. MEMORY_INTER causes an
++// operand to generate four functions which return the Base Register, the
++// Index Register, the Scale Value, and the Offset Value of the operand when
++// queried. COND_INTER causes an operand to generate six functions which
++// return the encoding code (ie - encoding bits for the instruction)
++// associated with each basic boolean condition for a conditional instruction.
++//
++// Instructions specify two basic values for encoding. Again, a function
++// is available to check if the constant displacement is an oop. They use the
++// ins_encode keyword to specify their encoding classes (which must be
++// a sequence of enc_class names, and their parameters, specified in
++// the encoding block), and they use the
++// opcode keyword to specify, in order, their primary, secondary, and
++// tertiary opcode. Only the opcode sections which a particular instruction
++// needs for encoding need to be specified.
++encode %{
++ enc_class enc_unimplemented %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ MacroAssembler _masm(&cbuf);
++ __ unimplemented("Unimplemented mach node encoding in AD file.", 13);
++ %}
++
++ enc_class enc_untested %{
++#ifdef ASSERT
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ MacroAssembler _masm(&cbuf);
++ __ untested("Untested mach node encoding in AD file.");
++#else
++ // TODO: PPC port $archOpcode(ppc64Opcode_none);
++#endif
++ %}
++
++ enc_class enc_lbz(iRegIdst dst, memory mem) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_lbz);
++ MacroAssembler _masm(&cbuf);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
++ %}
++
++ // Load acquire.
++ enc_class enc_lbz_ac(iRegIdst dst, memory mem) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ MacroAssembler _masm(&cbuf);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
++ __ twi_0($dst$$Register);
++ __ isync();
++ %}
++
++ enc_class enc_lhz(iRegIdst dst, memory mem) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_lhz);
++
++ MacroAssembler _masm(&cbuf);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
++ %}
++
++ // Load acquire.
++ enc_class enc_lhz_ac(iRegIdst dst, memory mem) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++
++ MacroAssembler _masm(&cbuf);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
++ __ twi_0($dst$$Register);
++ __ isync();
++ %}
++
++ enc_class enc_lwz(iRegIdst dst, memory mem) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_lwz);
++
++ MacroAssembler _masm(&cbuf);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
++ %}
++
++ // Load acquire.
++ enc_class enc_lwz_ac(iRegIdst dst, memory mem) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++
++ MacroAssembler _masm(&cbuf);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
++ __ twi_0($dst$$Register);
++ __ isync();
++ %}
++
++ enc_class enc_ld(iRegLdst dst, memoryAlg4 mem) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_ld);
++ MacroAssembler _masm(&cbuf);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ // Operand 'ds' requires 4-alignment.
++ assert((Idisp & 0x3) == 0, "unaligned offset");
++ __ ld($dst$$Register, Idisp, $mem$$base$$Register);
++ %}
++
++ // Load acquire.
++ enc_class enc_ld_ac(iRegLdst dst, memoryAlg4 mem) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ MacroAssembler _masm(&cbuf);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ // Operand 'ds' requires 4-alignment.
++ assert((Idisp & 0x3) == 0, "unaligned offset");
++ __ ld($dst$$Register, Idisp, $mem$$base$$Register);
++ __ twi_0($dst$$Register);
++ __ isync();
++ %}
++
++ enc_class enc_lfd(RegF dst, memory mem) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_lfd);
++ MacroAssembler _masm(&cbuf);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
++ %}
++
++ enc_class enc_load_long_constL(iRegLdst dst, immL src, iRegLdst toc) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_ld);
++
++ MacroAssembler _masm(&cbuf);
++ int toc_offset = 0;
++
++ if (!ra_->C->in_scratch_emit_size()) {
++ address const_toc_addr;
++ // Create a non-oop constant, no relocation needed.
++ // If it is an IC, it has a virtual_call_Relocation.
++ const_toc_addr = __ long_constant((jlong)$src$$constant);
++
++ // Get the constant's TOC offset.
++ toc_offset = __ offset_to_method_toc(const_toc_addr);
++
++ // Keep the current instruction offset in mind.
++ ((loadConLNode*)this)->_cbuf_insts_offset = __ offset();
++ }
++
++ __ ld($dst$$Register, toc_offset, $toc$$Register);
++ %}
++
++ enc_class enc_load_long_constL_hi(iRegLdst dst, iRegLdst toc, immL src) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addis);
++
++ MacroAssembler _masm(&cbuf);
++
++ if (!ra_->C->in_scratch_emit_size()) {
++ address const_toc_addr;
++ // Create a non-oop constant, no relocation needed.
++ // If it is an IC, it has a virtual_call_Relocation.
++ const_toc_addr = __ long_constant((jlong)$src$$constant);
++
++ // Get the constant's TOC offset.
++ const int toc_offset = __ offset_to_method_toc(const_toc_addr);
++ // Store the toc offset of the constant.
++ ((loadConL_hiNode*)this)->_const_toc_offset = toc_offset;
++
++ // Also keep the current instruction offset in mind.
++ ((loadConL_hiNode*)this)->_cbuf_insts_offset = __ offset();
++ }
++
++ __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset));
++ %}
++
++%} // encode
++
++source %{
++
++typedef struct {
++ loadConL_hiNode *_large_hi;
++ loadConL_loNode *_large_lo;
++ loadConLNode *_small;
++ MachNode *_last;
++} loadConLNodesTuple;
++
++loadConLNodesTuple loadConLNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
++ OptoReg::Name reg_second, OptoReg::Name reg_first) {
++ loadConLNodesTuple nodes;
++
++ const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
++ if (large_constant_pool) {
++ // Create new nodes.
++ loadConL_hiNode *m1 = new (C) loadConL_hiNode();
++ loadConL_loNode *m2 = new (C) loadConL_loNode();
++
++ // inputs for new nodes
++ m1->add_req(NULL, toc);
++ m2->add_req(NULL, m1);
++
++ // operands for new nodes
++ m1->_opnds[0] = new (C) iRegLdstOper(); // dst
++ m1->_opnds[1] = immSrc; // src
++ m1->_opnds[2] = new (C) iRegPdstOper(); // toc
++ m2->_opnds[0] = new (C) iRegLdstOper(); // dst
++ m2->_opnds[1] = immSrc; // src
++ m2->_opnds[2] = new (C) iRegLdstOper(); // base
++
++ // Initialize ins_attrib TOC fields.
++ m1->_const_toc_offset = -1;
++ m2->_const_toc_offset_hi_node = m1;
++
++ // Initialize ins_attrib instruction offset.
++ m1->_cbuf_insts_offset = -1;
++
++ // register allocation for new nodes
++ ra_->set_pair(m1->_idx, reg_second, reg_first);
++ ra_->set_pair(m2->_idx, reg_second, reg_first);
++
++ // Create result.
++ nodes._large_hi = m1;
++ nodes._large_lo = m2;
++ nodes._small = NULL;
++ nodes._last = nodes._large_lo;
++ assert(m2->bottom_type()->isa_long(), "must be long");
++ } else {
++ loadConLNode *m2 = new (C) loadConLNode();
++
++ // inputs for new nodes
++ m2->add_req(NULL, toc);
++
++ // operands for new nodes
++ m2->_opnds[0] = new (C) iRegLdstOper(); // dst
++ m2->_opnds[1] = immSrc; // src
++ m2->_opnds[2] = new (C) iRegPdstOper(); // toc
++
++ // Initialize ins_attrib instruction offset.
++ m2->_cbuf_insts_offset = -1;
++
++ // register allocation for new nodes
++ ra_->set_pair(m2->_idx, reg_second, reg_first);
++
++ // Create result.
++ nodes._large_hi = NULL;
++ nodes._large_lo = NULL;
++ nodes._small = m2;
++ nodes._last = nodes._small;
++ assert(m2->bottom_type()->isa_long(), "must be long");
++ }
++
++ return nodes;
++}
++
++%} // source
++
++encode %{
++ // Postalloc expand emitter for loading a long constant from the method's TOC.
++ // Enc_class needed as consttanttablebase is not supported by postalloc
++ // expand.
++ enc_class postalloc_expand_load_long_constant(iRegLdst dst, immL src, iRegLdst toc) %{
++ // Create new nodes.
++ loadConLNodesTuple loadConLNodes =
++ loadConLNodesTuple_create(C, ra_, n_toc, op_src,
++ ra_->get_reg_second(this), ra_->get_reg_first(this));
++
++ // Push new nodes.
++ if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi);
++ if (loadConLNodes._last) nodes->push(loadConLNodes._last);
++
++ // some asserts
++ assert(nodes->length() >= 1, "must have created at least 1 node");
++ assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
++ %}
++
++ enc_class enc_load_long_constP(iRegLdst dst, immP src, iRegLdst toc) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_ld);
++
++ MacroAssembler _masm(&cbuf);
++ int toc_offset = 0;
++
++ if (!ra_->C->in_scratch_emit_size()) {
++ intptr_t val = $src$$constant;
++ relocInfo::relocType constant_reloc = $src->constant_reloc(); // src
++ address const_toc_addr;
++ if (constant_reloc == relocInfo::oop_type) {
++ // Create an oop constant and a corresponding relocation.
++ AddressLiteral a = __ allocate_oop_address((jobject)val);
++ const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
++ __ relocate(a.rspec());
++ } else if (constant_reloc == relocInfo::metadata_type) {
++ AddressLiteral a = __ allocate_metadata_address((Metadata *)val);
++ const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
++ __ relocate(a.rspec());
++ } else {
++ // Create a non-oop constant, no relocation needed.
++ const_toc_addr = __ long_constant((jlong)$src$$constant);
++ }
++
++ // Get the constant's TOC offset.
++ toc_offset = __ offset_to_method_toc(const_toc_addr);
++ }
++
++ __ ld($dst$$Register, toc_offset, $toc$$Register);
++ %}
++
++ enc_class enc_load_long_constP_hi(iRegLdst dst, immP src, iRegLdst toc) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addis);
++
++ MacroAssembler _masm(&cbuf);
++ if (!ra_->C->in_scratch_emit_size()) {
++ intptr_t val = $src$$constant;
++ relocInfo::relocType constant_reloc = $src->constant_reloc(); // src
++ address const_toc_addr;
++ if (constant_reloc == relocInfo::oop_type) {
++ // Create an oop constant and a corresponding relocation.
++ AddressLiteral a = __ allocate_oop_address((jobject)val);
++ const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
++ __ relocate(a.rspec());
++ } else if (constant_reloc == relocInfo::metadata_type) {
++ AddressLiteral a = __ allocate_metadata_address((Metadata *)val);
++ const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
++ __ relocate(a.rspec());
++ } else { // non-oop pointers, e.g. card mark base, heap top
++ // Create a non-oop constant, no relocation needed.
++ const_toc_addr = __ long_constant((jlong)$src$$constant);
++ }
++
++ // Get the constant's TOC offset.
++ const int toc_offset = __ offset_to_method_toc(const_toc_addr);
++ // Store the toc offset of the constant.
++ ((loadConP_hiNode*)this)->_const_toc_offset = toc_offset;
++ }
++
++ __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset));
++ %}
++
++ // Postalloc expand emitter for loading a ptr constant from the method's TOC.
++ // Enc_class needed as consttanttablebase is not supported by postalloc
++ // expand.
++ enc_class postalloc_expand_load_ptr_constant(iRegPdst dst, immP src, iRegLdst toc) %{
++ const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
++ if (large_constant_pool) {
++ // Create new nodes.
++ loadConP_hiNode *m1 = new (C) loadConP_hiNode();
++ loadConP_loNode *m2 = new (C) loadConP_loNode();
++
++ // inputs for new nodes
++ m1->add_req(NULL, n_toc);
++ m2->add_req(NULL, m1);
++
++ // operands for new nodes
++ m1->_opnds[0] = new (C) iRegPdstOper(); // dst
++ m1->_opnds[1] = op_src; // src
++ m1->_opnds[2] = new (C) iRegPdstOper(); // toc
++ m2->_opnds[0] = new (C) iRegPdstOper(); // dst
++ m2->_opnds[1] = op_src; // src
++ m2->_opnds[2] = new (C) iRegLdstOper(); // base
++
++ // Initialize ins_attrib TOC fields.
++ m1->_const_toc_offset = -1;
++ m2->_const_toc_offset_hi_node = m1;
++
++ // Register allocation for new nodes.
++ ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++
++ nodes->push(m1);
++ nodes->push(m2);
++ assert(m2->bottom_type()->isa_ptr(), "must be ptr");
++ } else {
++ loadConPNode *m2 = new (C) loadConPNode();
++
++ // inputs for new nodes
++ m2->add_req(NULL, n_toc);
++
++ // operands for new nodes
++ m2->_opnds[0] = new (C) iRegPdstOper(); // dst
++ m2->_opnds[1] = op_src; // src
++ m2->_opnds[2] = new (C) iRegPdstOper(); // toc
++
++ // Register allocation for new nodes.
++ ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++
++ nodes->push(m2);
++ assert(m2->bottom_type()->isa_ptr(), "must be ptr");
++ }
++ %}
++
++ // Enc_class needed as consttanttablebase is not supported by postalloc
++ // expand.
++ enc_class postalloc_expand_load_float_constant(regF dst, immF src, iRegLdst toc) %{
++ bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
++
++ MachNode *m2;
++ if (large_constant_pool) {
++ m2 = new (C) loadConFCompNode();
++ } else {
++ m2 = new (C) loadConFNode();
++ }
++ // inputs for new nodes
++ m2->add_req(NULL, n_toc);
++
++ // operands for new nodes
++ m2->_opnds[0] = op_dst;
++ m2->_opnds[1] = op_src;
++ m2->_opnds[2] = new (C) iRegPdstOper(); // constanttablebase
++
++ // register allocation for new nodes
++ ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ nodes->push(m2);
++ %}
++
++ // Enc_class needed as consttanttablebase is not supported by postalloc
++ // expand.
++ enc_class postalloc_expand_load_double_constant(regD dst, immD src, iRegLdst toc) %{
++ bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
++
++ MachNode *m2;
++ if (large_constant_pool) {
++ m2 = new (C) loadConDCompNode();
++ } else {
++ m2 = new (C) loadConDNode();
++ }
++ // inputs for new nodes
++ m2->add_req(NULL, n_toc);
++
++ // operands for new nodes
++ m2->_opnds[0] = op_dst;
++ m2->_opnds[1] = op_src;
++ m2->_opnds[2] = new (C) iRegPdstOper(); // constanttablebase
++
++ // register allocation for new nodes
++ ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ nodes->push(m2);
++ %}
++
++ enc_class enc_stw(iRegIsrc src, memory mem) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_stw);
++ MacroAssembler _masm(&cbuf);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ stw($src$$Register, Idisp, $mem$$base$$Register);
++ %}
++
++ enc_class enc_std(iRegIsrc src, memoryAlg4 mem) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_std);
++ MacroAssembler _masm(&cbuf);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ // Operand 'ds' requires 4-alignment.
++ assert((Idisp & 0x3) == 0, "unaligned offset");
++ __ std($src$$Register, Idisp, $mem$$base$$Register);
++ %}
++
++ enc_class enc_stfs(RegF src, memory mem) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_stfs);
++ MacroAssembler _masm(&cbuf);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ stfs($src$$FloatRegister, Idisp, $mem$$base$$Register);
++ %}
++
++ enc_class enc_stfd(RegF src, memory mem) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_stfd);
++ MacroAssembler _masm(&cbuf);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ stfd($src$$FloatRegister, Idisp, $mem$$base$$Register);
++ %}
++
++ // Use release_store for card-marking to ensure that previous
++ // oop-stores are visible before the card-mark change.
++ enc_class enc_cms_card_mark(memory mem, iRegLdst releaseFieldAddr) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ // FIXME: Implement this as a cmove and use a fixed condition code
++ // register which is written on every transition to compiled code,
++ // e.g. in call-stub and when returning from runtime stubs.
++ //
++ // Proposed code sequence for the cmove implementation:
++ //
++ // Label skip_release;
++ // __ beq(CCRfixed, skip_release);
++ // __ release();
++ // __ bind(skip_release);
++ // __ stb(card mark);
++
++ MacroAssembler _masm(&cbuf);
++ Label skip_storestore;
++
++#if 0 // TODO: PPC port
++ // Check CMSCollectorCardTableModRefBSExt::_requires_release and do the
++ // StoreStore barrier conditionally.
++ __ lwz(R0, 0, $releaseFieldAddr$$Register);
++ __ cmpwi(CCR0, R0, 0);
++ __ beq_predict_taken(CCR0, skip_storestore);
++#endif
++ __ li(R0, 0);
++ __ membar(Assembler::StoreStore);
++#if 0 // TODO: PPC port
++ __ bind(skip_storestore);
++#endif
++
++ // Do the store.
++ if ($mem$$index == 0) {
++ __ stb(R0, $mem$$disp, $mem$$base$$Register);
++ } else {
++ assert(0 == $mem$$disp, "no displacement possible with indexed load/stores on ppc");
++ __ stbx(R0, $mem$$base$$Register, $mem$$index$$Register);
++ }
++ %}
++
++ enc_class postalloc_expand_encode_oop(iRegNdst dst, iRegPdst src, flagsReg crx) %{
++
++ if (VM_Version::has_isel()) {
++ // use isel instruction with Power 7
++ cmpP_reg_imm16Node *n_compare = new (C) cmpP_reg_imm16Node();
++ encodeP_subNode *n_sub_base = new (C) encodeP_subNode();
++ encodeP_shiftNode *n_shift = new (C) encodeP_shiftNode();
++ cond_set_0_oopNode *n_cond_set = new (C) cond_set_0_oopNode();
++
++ n_compare->add_req(n_region, n_src);
++ n_compare->_opnds[0] = op_crx;
++ n_compare->_opnds[1] = op_src;
++ n_compare->_opnds[2] = new (C) immL16Oper(0);
++
++ n_sub_base->add_req(n_region, n_src);
++ n_sub_base->_opnds[0] = op_dst;
++ n_sub_base->_opnds[1] = op_src;
++ n_sub_base->_bottom_type = _bottom_type;
++
++ n_shift->add_req(n_region, n_sub_base);
++ n_shift->_opnds[0] = op_dst;
++ n_shift->_opnds[1] = op_dst;
++ n_shift->_bottom_type = _bottom_type;
++
++ n_cond_set->add_req(n_region, n_compare, n_shift);
++ n_cond_set->_opnds[0] = op_dst;
++ n_cond_set->_opnds[1] = op_crx;
++ n_cond_set->_opnds[2] = op_dst;
++ n_cond_set->_bottom_type = _bottom_type;
++
++ ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
++ ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++
++ nodes->push(n_compare);
++ nodes->push(n_sub_base);
++ nodes->push(n_shift);
++ nodes->push(n_cond_set);
++
++ } else {
++ // before Power 7
++ moveRegNode *n_move = new (C) moveRegNode();
++ cmpP_reg_imm16Node *n_compare = new (C) cmpP_reg_imm16Node();
++ encodeP_shiftNode *n_shift = new (C) encodeP_shiftNode();
++ cond_sub_baseNode *n_sub_base = new (C) cond_sub_baseNode();
++
++ n_move->add_req(n_region, n_src);
++ n_move->_opnds[0] = op_dst;
++ n_move->_opnds[1] = op_src;
++ ra_->set_oop(n_move, true); // Until here, 'n_move' still produces an oop.
++
++ n_compare->add_req(n_region, n_src);
++ n_compare->add_prec(n_move);
++
++ n_compare->_opnds[0] = op_crx;
++ n_compare->_opnds[1] = op_src;
++ n_compare->_opnds[2] = new (C) immL16Oper(0);
++
++ n_sub_base->add_req(n_region, n_compare, n_src);
++ n_sub_base->_opnds[0] = op_dst;
++ n_sub_base->_opnds[1] = op_crx;
++ n_sub_base->_opnds[2] = op_src;
++ n_sub_base->_bottom_type = _bottom_type;
++
++ n_shift->add_req(n_region, n_sub_base);
++ n_shift->_opnds[0] = op_dst;
++ n_shift->_opnds[1] = op_dst;
++ n_shift->_bottom_type = _bottom_type;
++
++ ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
++ ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ ra_->set_pair(n_move->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++
++ nodes->push(n_move);
++ nodes->push(n_compare);
++ nodes->push(n_sub_base);
++ nodes->push(n_shift);
++ }
++
++ assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed.
++ %}
++
++ enc_class postalloc_expand_encode_oop_not_null(iRegNdst dst, iRegPdst src) %{
++
++ encodeP_subNode *n1 = new (C) encodeP_subNode();
++ n1->add_req(n_region, n_src);
++ n1->_opnds[0] = op_dst;
++ n1->_opnds[1] = op_src;
++ n1->_bottom_type = _bottom_type;
++
++ encodeP_shiftNode *n2 = new (C) encodeP_shiftNode();
++ n2->add_req(n_region, n1);
++ n2->_opnds[0] = op_dst;
++ n2->_opnds[1] = op_dst;
++ n2->_bottom_type = _bottom_type;
++ ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++
++ nodes->push(n1);
++ nodes->push(n2);
++ assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed.
++ %}
++
++ enc_class postalloc_expand_decode_oop(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
++ decodeN_shiftNode *n_shift = new (C) decodeN_shiftNode();
++ cmpN_reg_imm0Node *n_compare = new (C) cmpN_reg_imm0Node();
++
++ n_compare->add_req(n_region, n_src);
++ n_compare->_opnds[0] = op_crx;
++ n_compare->_opnds[1] = op_src;
++ n_compare->_opnds[2] = new (C) immN_0Oper(TypeNarrowOop::NULL_PTR);
++
++ n_shift->add_req(n_region, n_src);
++ n_shift->_opnds[0] = op_dst;
++ n_shift->_opnds[1] = op_src;
++ n_shift->_bottom_type = _bottom_type;
++
++ if (VM_Version::has_isel()) {
++ // use isel instruction with Power 7
++
++ decodeN_addNode *n_add_base = new (C) decodeN_addNode();
++ n_add_base->add_req(n_region, n_shift);
++ n_add_base->_opnds[0] = op_dst;
++ n_add_base->_opnds[1] = op_dst;
++ n_add_base->_bottom_type = _bottom_type;
++
++ cond_set_0_ptrNode *n_cond_set = new (C) cond_set_0_ptrNode();
++ n_cond_set->add_req(n_region, n_compare, n_add_base);
++ n_cond_set->_opnds[0] = op_dst;
++ n_cond_set->_opnds[1] = op_crx;
++ n_cond_set->_opnds[2] = op_dst;
++ n_cond_set->_bottom_type = _bottom_type;
++
++ assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
++ ra_->set_oop(n_cond_set, true);
++
++ ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
++ ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++
++ nodes->push(n_compare);
++ nodes->push(n_shift);
++ nodes->push(n_add_base);
++ nodes->push(n_cond_set);
++
++ } else {
++ // before Power 7
++ cond_add_baseNode *n_add_base = new (C) cond_add_baseNode();
++
++ n_add_base->add_req(n_region, n_compare, n_shift);
++ n_add_base->_opnds[0] = op_dst;
++ n_add_base->_opnds[1] = op_crx;
++ n_add_base->_opnds[2] = op_dst;
++ n_add_base->_bottom_type = _bottom_type;
++
++ assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
++ ra_->set_oop(n_add_base, true);
++
++ ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
++ ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++
++ nodes->push(n_compare);
++ nodes->push(n_shift);
++ nodes->push(n_add_base);
++ }
++ %}
++
++ enc_class postalloc_expand_decode_oop_not_null(iRegPdst dst, iRegNsrc src) %{
++ decodeN_shiftNode *n1 = new (C) decodeN_shiftNode();
++ n1->add_req(n_region, n_src);
++ n1->_opnds[0] = op_dst;
++ n1->_opnds[1] = op_src;
++ n1->_bottom_type = _bottom_type;
++
++ decodeN_addNode *n2 = new (C) decodeN_addNode();
++ n2->add_req(n_region, n1);
++ n2->_opnds[0] = op_dst;
++ n2->_opnds[1] = op_dst;
++ n2->_bottom_type = _bottom_type;
++ ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++
++ assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
++ ra_->set_oop(n2, true);
++
++ nodes->push(n1);
++ nodes->push(n2);
++ %}
++
++ enc_class enc_cmove_reg(iRegIdst dst, flagsReg crx, iRegIsrc src, cmpOp cmp) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
++
++ MacroAssembler _masm(&cbuf);
++ int cc = $cmp$$cmpcode;
++ int flags_reg = $crx$$reg;
++ Label done;
++ assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
++ // Branch if not (cmp crx).
++ __ bc(cc_to_inverse_boint(cc), cc_to_biint(cc, flags_reg), done);
++ __ mr($dst$$Register, $src$$Register);
++ // TODO PPC port __ endgroup_if_needed(_size == 12);
++ __ bind(done);
++ %}
++
++ enc_class enc_cmove_imm(iRegIdst dst, flagsReg crx, immI16 src, cmpOp cmp) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
++
++ MacroAssembler _masm(&cbuf);
++ Label done;
++ assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
++ // Branch if not (cmp crx).
++ __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
++ __ li($dst$$Register, $src$$constant);
++ // TODO PPC port __ endgroup_if_needed(_size == 12);
++ __ bind(done);
++ %}
++
++ // New atomics.
++ enc_class enc_GetAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++
++ MacroAssembler _masm(&cbuf);
++ Register Rtmp = R0;
++ Register Rres = $res$$Register;
++ Register Rsrc = $src$$Register;
++ Register Rptr = $mem_ptr$$Register;
++ bool RegCollision = (Rres == Rsrc) || (Rres == Rptr);
++ Register Rold = RegCollision ? Rtmp : Rres;
++
++ Label Lretry;
++ __ bind(Lretry);
++ __ lwarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
++ __ add(Rtmp, Rsrc, Rold);
++ __ stwcx_(Rtmp, Rptr);
++ if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
++ __ bne_predict_not_taken(CCR0, Lretry);
++ } else {
++ __ bne( CCR0, Lretry);
++ }
++ if (RegCollision) __ subf(Rres, Rsrc, Rtmp);
++ __ fence();
++ %}
++
++ enc_class enc_GetAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++
++ MacroAssembler _masm(&cbuf);
++ Register Rtmp = R0;
++ Register Rres = $res$$Register;
++ Register Rsrc = $src$$Register;
++ Register Rptr = $mem_ptr$$Register;
++ bool RegCollision = (Rres == Rsrc) || (Rres == Rptr);
++ Register Rold = RegCollision ? Rtmp : Rres;
++
++ Label Lretry;
++ __ bind(Lretry);
++ __ ldarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
++ __ add(Rtmp, Rsrc, Rold);
++ __ stdcx_(Rtmp, Rptr);
++ if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
++ __ bne_predict_not_taken(CCR0, Lretry);
++ } else {
++ __ bne( CCR0, Lretry);
++ }
++ if (RegCollision) __ subf(Rres, Rsrc, Rtmp);
++ __ fence();
++ %}
++
++ enc_class enc_GetAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++
++ MacroAssembler _masm(&cbuf);
++ Register Rtmp = R0;
++ Register Rres = $res$$Register;
++ Register Rsrc = $src$$Register;
++ Register Rptr = $mem_ptr$$Register;
++ bool RegCollision = (Rres == Rsrc) || (Rres == Rptr);
++ Register Rold = RegCollision ? Rtmp : Rres;
++
++ Label Lretry;
++ __ bind(Lretry);
++ __ lwarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
++ __ stwcx_(Rsrc, Rptr);
++ if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
++ __ bne_predict_not_taken(CCR0, Lretry);
++ } else {
++ __ bne( CCR0, Lretry);
++ }
++ if (RegCollision) __ mr(Rres, Rtmp);
++ __ fence();
++ %}
++
++ enc_class enc_GetAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++
++ MacroAssembler _masm(&cbuf);
++ Register Rtmp = R0;
++ Register Rres = $res$$Register;
++ Register Rsrc = $src$$Register;
++ Register Rptr = $mem_ptr$$Register;
++ bool RegCollision = (Rres == Rsrc) || (Rres == Rptr);
++ Register Rold = RegCollision ? Rtmp : Rres;
++
++ Label Lretry;
++ __ bind(Lretry);
++ __ ldarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
++ __ stdcx_(Rsrc, Rptr);
++ if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
++ __ bne_predict_not_taken(CCR0, Lretry);
++ } else {
++ __ bne( CCR0, Lretry);
++ }
++ if (RegCollision) __ mr(Rres, Rtmp);
++ __ fence();
++ %}
++
++ // This enc_class is needed so that scheduler gets proper
++ // input mapping for latency computation.
++ enc_class enc_andc(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_andc);
++ MacroAssembler _masm(&cbuf);
++ __ andc($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++
++ enc_class enc_convI2B_regI__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++
++ MacroAssembler _masm(&cbuf);
++
++ Label done;
++ __ cmpwi($crx$$CondRegister, $src$$Register, 0);
++ __ li($dst$$Register, $zero$$constant);
++ __ beq($crx$$CondRegister, done);
++ __ li($dst$$Register, $notzero$$constant);
++ __ bind(done);
++ %}
++
++ enc_class enc_convP2B_regP__cmove(iRegIdst dst, iRegPsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++
++ MacroAssembler _masm(&cbuf);
++
++ Label done;
++ __ cmpdi($crx$$CondRegister, $src$$Register, 0);
++ __ li($dst$$Register, $zero$$constant);
++ __ beq($crx$$CondRegister, done);
++ __ li($dst$$Register, $notzero$$constant);
++ __ bind(done);
++ %}
++
++ enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL mem ) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
++
++ MacroAssembler _masm(&cbuf);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ Label done;
++ __ bso($crx$$CondRegister, done);
++ __ ld($dst$$Register, Idisp, $mem$$base$$Register);
++ // TODO PPC port __ endgroup_if_needed(_size == 12);
++ __ bind(done);
++ %}
++
++ enc_class enc_bc(flagsReg crx, cmpOp cmp, Label lbl) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_bc);
++
++ MacroAssembler _masm(&cbuf);
++ Label d; // dummy
++ __ bind(d);
++ Label* p = ($lbl$$label);
++ // `p' is `NULL' when this encoding class is used only to
++ // determine the size of the encoded instruction.
++ Label& l = (NULL == p)? d : *(p);
++ int cc = $cmp$$cmpcode;
++ int flags_reg = $crx$$reg;
++ assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
++ int bhint = Assembler::bhintNoHint;
++
++ if (UseStaticBranchPredictionForUncommonPathsPPC64) {
++ if (_prob <= PROB_NEVER) {
++ bhint = Assembler::bhintIsNotTaken;
++ } else if (_prob >= PROB_ALWAYS) {
++ bhint = Assembler::bhintIsTaken;
++ }
++ }
++
++ __ bc(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
++ cc_to_biint(cc, flags_reg),
++ l);
++ %}
++
++ enc_class enc_bc_far(flagsReg crx, cmpOp cmp, Label lbl) %{
++ // The scheduler doesn't know about branch shortening, so we set the opcode
++ // to ppc64Opcode_bc in order to hide this detail from the scheduler.
++ // TODO: PPC port $archOpcode(ppc64Opcode_bc);
++
++ MacroAssembler _masm(&cbuf);
++ Label d; // dummy
++ __ bind(d);
++ Label* p = ($lbl$$label);
++ // `p' is `NULL' when this encoding class is used only to
++ // determine the size of the encoded instruction.
++ Label& l = (NULL == p)? d : *(p);
++ int cc = $cmp$$cmpcode;
++ int flags_reg = $crx$$reg;
++ int bhint = Assembler::bhintNoHint;
++
++ if (UseStaticBranchPredictionForUncommonPathsPPC64) {
++ if (_prob <= PROB_NEVER) {
++ bhint = Assembler::bhintIsNotTaken;
++ } else if (_prob >= PROB_ALWAYS) {
++ bhint = Assembler::bhintIsTaken;
++ }
++ }
++
++ // Tell the conditional far branch to optimize itself when being relocated.
++ __ bc_far(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
++ cc_to_biint(cc, flags_reg),
++ l,
++ MacroAssembler::bc_far_optimize_on_relocate);
++ %}
++
++ // Branch used with Power6 scheduling (can be shortened without changing the node).
++ enc_class enc_bc_short_far(flagsReg crx, cmpOp cmp, Label lbl) %{
++ // The scheduler doesn't know about branch shortening, so we set the opcode
++ // to ppc64Opcode_bc in order to hide this detail from the scheduler.
++ // TODO: PPC port $archOpcode(ppc64Opcode_bc);
++
++ MacroAssembler _masm(&cbuf);
++ Label d; // dummy
++ __ bind(d);
++ Label* p = ($lbl$$label);
++ // `p' is `NULL' when this encoding class is used only to
++ // determine the size of the encoded instruction.
++ Label& l = (NULL == p)? d : *(p);
++ int cc = $cmp$$cmpcode;
++ int flags_reg = $crx$$reg;
++ int bhint = Assembler::bhintNoHint;
++
++ if (UseStaticBranchPredictionForUncommonPathsPPC64) {
++ if (_prob <= PROB_NEVER) {
++ bhint = Assembler::bhintIsNotTaken;
++ } else if (_prob >= PROB_ALWAYS) {
++ bhint = Assembler::bhintIsTaken;
++ }
++ }
++
++#if 0 // TODO: PPC port
++ if (_size == 8) {
++ // Tell the conditional far branch to optimize itself when being relocated.
++ __ bc_far(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
++ cc_to_biint(cc, flags_reg),
++ l,
++ MacroAssembler::bc_far_optimize_on_relocate);
++ } else {
++ __ bc (Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
++ cc_to_biint(cc, flags_reg),
++ l);
++ }
++#endif
++ Unimplemented();
++ %}
++
++ // Postalloc expand emitter for loading a replicatef float constant from
++ // the method's TOC.
++ // Enc_class needed as consttanttablebase is not supported by postalloc
++ // expand.
++ enc_class postalloc_expand_load_replF_constant(iRegLdst dst, immF src, iRegLdst toc) %{
++ // Create new nodes.
++
++ // Make an operand with the bit pattern to load as float.
++ immLOper *op_repl = new (C) immLOper((jlong)replicate_immF(op_src->constantF()));
++
++ loadConLNodesTuple loadConLNodes =
++ loadConLNodesTuple_create(C, ra_, n_toc, op_repl,
++ ra_->get_reg_second(this), ra_->get_reg_first(this));
++
++ // Push new nodes.
++ if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi);
++ if (loadConLNodes._last) nodes->push(loadConLNodes._last);
++
++ assert(nodes->length() >= 1, "must have created at least 1 node");
++ assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
++ %}
++
++ // This enc_class is needed so that scheduler gets proper
++ // input mapping for latency computation.
++ enc_class enc_poll(immI dst, iRegLdst poll) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_ld);
++ // Fake operand dst needed for PPC scheduler.
++ assert($dst$$constant == 0x0, "dst must be 0x0");
++
++ MacroAssembler _masm(&cbuf);
++ // Mark the code position where the load from the safepoint
++ // polling page was emitted as relocInfo::poll_type.
++ __ relocate(relocInfo::poll_type);
++ __ load_from_polling_page($poll$$Register);
++ %}
++
++ // A Java static call or a runtime call.
++ //
++ // Branch-and-link relative to a trampoline.
++ // The trampoline loads the target address and does a long branch to there.
++ // In case we call java, the trampoline branches to a interpreter_stub
++ // which loads the inline cache and the real call target from the constant pool.
++ //
++ // This basically looks like this:
++ //
++ // >>>> consts -+ -+
++ // | |- offset1
++ // [call target1] | <-+
++ // [IC cache] |- offset2
++ // [call target2] <--+
++ //
++ // <<<< consts
++ // >>>> insts
++ //
++ // bl offset16 -+ -+ ??? // How many bits available?
++ // | |
++ // <<<< insts | |
++ // >>>> stubs | |
++ // | |- trampoline_stub_Reloc
++ // trampoline stub: | <-+
++ // r2 = toc |
++ // r2 = [r2 + offset1] | // Load call target1 from const section
++ // mtctr r2 |
++ // bctr |- static_stub_Reloc
++ // comp_to_interp_stub: <---+
++ // r1 = toc
++ // ICreg = [r1 + IC_offset] // Load IC from const section
++ // r1 = [r1 + offset2] // Load call target2 from const section
++ // mtctr r1
++ // bctr
++ //
++ // <<<< stubs
++ //
++ // The call instruction in the code either
++ // - Branches directly to a compiled method if the offset is encodable in instruction.
++ // - Branches to the trampoline stub if the offset to the compiled method is not encodable.
++ // - Branches to the compiled_to_interp stub if the target is interpreted.
++ //
++ // Further there are three relocations from the loads to the constants in
++ // the constant section.
++ //
++ // Usage of r1 and r2 in the stubs allows to distinguish them.
++ enc_class enc_java_static_call(method meth) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_bl);
++
++ MacroAssembler _masm(&cbuf);
++ address entry_point = (address)$meth$$method;
++
++ if (!_method) {
++ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
++ emit_call_with_trampoline_stub(_masm, entry_point, relocInfo::runtime_call_type);
++ } else {
++ // Remember the offset not the address.
++ const int start_offset = __ offset();
++ // The trampoline stub.
++ if (!Compile::current()->in_scratch_emit_size()) {
++ // No entry point given, use the current pc.
++ // Make sure branch fits into
++ if (entry_point == 0) entry_point = __ pc();
++
++ // Put the entry point as a constant into the constant pool.
++ const address entry_point_toc_addr = __ address_constant(entry_point, RelocationHolder::none);
++ const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
++
++ // Emit the trampoline stub which will be related to the branch-and-link below.
++ CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset);
++ __ relocate(_optimized_virtual ?
++ relocInfo::opt_virtual_call_type : relocInfo::static_call_type);
++ }
++
++ // The real call.
++ // Note: At this point we do not have the address of the trampoline
++ // stub, and the entry point might be too far away for bl, so __ pc()
++ // serves as dummy and the bl will be patched later.
++ cbuf.set_insts_mark();
++ __ bl(__ pc()); // Emits a relocation.
++
++ // The stub for call to interpreter.
++ CompiledStaticCall::emit_to_interp_stub(cbuf);
++ }
++ %}
++
++ // Emit a method handle call.
++ //
++ // Method handle calls from compiled to compiled are going thru a
++ // c2i -> i2c adapter, extending the frame for their arguments. The
++ // caller however, returns directly to the compiled callee, that has
++ // to cope with the extended frame. We restore the original frame by
++ // loading the callers sp and adding the calculated framesize.
++ enc_class enc_java_handle_call(method meth) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++
++ MacroAssembler _masm(&cbuf);
++ address entry_point = (address)$meth$$method;
++
++ // Remember the offset not the address.
++ const int start_offset = __ offset();
++ // The trampoline stub.
++ if (!ra_->C->in_scratch_emit_size()) {
++ // No entry point given, use the current pc.
++ // Make sure branch fits into
++ if (entry_point == 0) entry_point = __ pc();
++
++ // Put the entry point as a constant into the constant pool.
++ const address entry_point_toc_addr = __ address_constant(entry_point, RelocationHolder::none);
++ const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
++
++ // Emit the trampoline stub which will be related to the branch-and-link below.
++ CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset);
++ assert(_optimized_virtual, "methodHandle call should be a virtual call");
++ __ relocate(relocInfo::opt_virtual_call_type);
++ }
++
++ // The real call.
++ // Note: At this point we do not have the address of the trampoline
++ // stub, and the entry point might be too far away for bl, so __ pc()
++ // serves as dummy and the bl will be patched later.
++ cbuf.set_insts_mark();
++ __ bl(__ pc()); // Emits a relocation.
++
++ assert(_method, "execute next statement conditionally");
++ // The stub for call to interpreter.
++ CompiledStaticCall::emit_to_interp_stub(cbuf);
++
++ // Restore original sp.
++ __ ld(R11_scratch1, 0, R1_SP); // Load caller sp.
++ const long framesize = ra_->C->frame_slots() << LogBytesPerInt;
++ unsigned int bytes = (unsigned int)framesize;
++ long offset = Assembler::align_addr(bytes, frame::alignment_in_bytes);
++ if (Assembler::is_simm(-offset, 16)) {
++ __ addi(R1_SP, R11_scratch1, -offset);
++ } else {
++ __ load_const_optimized(R12_scratch2, -offset);
++ __ add(R1_SP, R11_scratch1, R12_scratch2);
++ }
++#ifdef ASSERT
++ __ ld(R12_scratch2, 0, R1_SP); // Load from unextended_sp.
++ __ cmpd(CCR0, R11_scratch1, R12_scratch2);
++ __ asm_assert_eq("backlink changed", 0x8000);
++#endif
++ // If fails should store backlink before unextending.
++
++ if (ra_->C->env()->failing()) {
++ return;
++ }
++ %}
++
++ // Second node of expanded dynamic call - the call.
++ enc_class enc_java_dynamic_call_sched(method meth) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_bl);
++
++ MacroAssembler _masm(&cbuf);
++
++ if (!ra_->C->in_scratch_emit_size()) {
++ // Create a call trampoline stub for the given method.
++ const address entry_point = !($meth$$method) ? 0 : (address)$meth$$method;
++ const address entry_point_const = __ address_constant(entry_point, RelocationHolder::none);
++ const int entry_point_const_toc_offset = __ offset_to_method_toc(entry_point_const);
++ CallStubImpl::emit_trampoline_stub(_masm, entry_point_const_toc_offset, __ offset());
++
++ if (ra_->C->env()->failing())
++ return;
++
++ // Build relocation at call site with ic position as data.
++ assert((_load_ic_hi_node != NULL && _load_ic_node == NULL) ||
++ (_load_ic_hi_node == NULL && _load_ic_node != NULL),
++ "must have one, but can't have both");
++ assert((_load_ic_hi_node != NULL && _load_ic_hi_node->_cbuf_insts_offset != -1) ||
++ (_load_ic_node != NULL && _load_ic_node->_cbuf_insts_offset != -1),
++ "must contain instruction offset");
++ const int virtual_call_oop_addr_offset = _load_ic_hi_node != NULL
++ ? _load_ic_hi_node->_cbuf_insts_offset
++ : _load_ic_node->_cbuf_insts_offset;
++ const address virtual_call_oop_addr = __ addr_at(virtual_call_oop_addr_offset);
++ assert(MacroAssembler::is_load_const_from_method_toc_at(virtual_call_oop_addr),
++ "should be load from TOC");
++
++ __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr));
++ }
++
++ // At this point I do not have the address of the trampoline stub,
++ // and the entry point might be too far away for bl. Pc() serves
++ // as dummy and bl will be patched later.
++ __ bl((address) __ pc());
++ %}
++
++ // postalloc expand emitter for virtual calls.
++ enc_class postalloc_expand_java_dynamic_call_sched(method meth, iRegLdst toc) %{
++
++ // Create the nodes for loading the IC from the TOC.
++ loadConLNodesTuple loadConLNodes_IC =
++ loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper((jlong)Universe::non_oop_word()),
++ OptoReg::Name(R19_H_num), OptoReg::Name(R19_num));
++
++ // Create the call node.
++ CallDynamicJavaDirectSchedNode *call = new (C) CallDynamicJavaDirectSchedNode();
++ call->_method_handle_invoke = _method_handle_invoke;
++ call->_vtable_index = _vtable_index;
++ call->_method = _method;
++ call->_bci = _bci;
++ call->_optimized_virtual = _optimized_virtual;
++ call->_tf = _tf;
++ call->_entry_point = _entry_point;
++ call->_cnt = _cnt;
++ call->_argsize = _argsize;
++ call->_oop_map = _oop_map;
++ call->_jvms = _jvms;
++ call->_jvmadj = _jvmadj;
++ call->_in_rms = _in_rms;
++ call->_nesting = _nesting;
++
++ // New call needs all inputs of old call.
++ // Req...
++ for (uint i = 0; i < req(); ++i) {
++ // The expanded node does not need toc any more.
++ // Add the inline cache constant here instead. This expresses the
++ // register of the inline cache must be live at the call.
++ // Else we would have to adapt JVMState by -1.
++ if (i == mach_constant_base_node_input()) {
++ call->add_req(loadConLNodes_IC._last);
++ } else {
++ call->add_req(in(i));
++ }
++ }
++ // ...as well as prec
++ for (uint i = req(); i < len(); ++i) {
++ call->add_prec(in(i));
++ }
++
++ // Remember nodes loading the inline cache into r19.
++ call->_load_ic_hi_node = loadConLNodes_IC._large_hi;
++ call->_load_ic_node = loadConLNodes_IC._small;
++
++ // Operands for new nodes.
++ call->_opnds[0] = _opnds[0];
++ call->_opnds[1] = _opnds[1];
++
++ // Only the inline cache is associated with a register.
++ assert(Matcher::inline_cache_reg() == OptoReg::Name(R19_num), "ic reg should be R19");
++
++ // Push new nodes.
++ if (loadConLNodes_IC._large_hi) nodes->push(loadConLNodes_IC._large_hi);
++ if (loadConLNodes_IC._last) nodes->push(loadConLNodes_IC._last);
++ nodes->push(call);
++ %}
++
++ // Compound version of call dynamic
++ // Toc is only passed so that it can be used in ins_encode statement.
++ // In the code we have to use $constanttablebase.
++ enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ MacroAssembler _masm(&cbuf);
++ int start_offset = __ offset();
++
++ Register Rtoc = (ra_) ? $constanttablebase : R2_TOC;
++#if 0
++ int vtable_index = this->_vtable_index;
++ if (_vtable_index < 0) {
++ // Must be invalid_vtable_index, not nonvirtual_vtable_index.
++ assert(_vtable_index == Method::invalid_vtable_index, "correct sentinel value");
++ Register ic_reg = as_Register(Matcher::inline_cache_reg_encode());
++
++ // Virtual call relocation will point to ic load.
++ address virtual_call_meta_addr = __ pc();
++ // Load a clear inline cache.
++ AddressLiteral empty_ic((address) Universe::non_oop_word());
++ __ load_const_from_method_toc(ic_reg, empty_ic, Rtoc);
++ // CALL to fixup routine. Fixup routine uses ScopeDesc info
++ // to determine who we intended to call.
++ __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr));
++ emit_call_with_trampoline_stub(_masm, (address)$meth$$method, relocInfo::none);
++ assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
++ "Fix constant in ret_addr_offset()");
++ } else {
++ assert(!UseInlineCaches, "expect vtable calls only if not using ICs");
++ // Go thru the vtable. Get receiver klass. Receiver already
++ // checked for non-null. If we'll go thru a C2I adapter, the
++ // interpreter expects method in R19_method.
++
++ __ load_klass(R11_scratch1, R3);
++
++ int entry_offset = InstanceKlass::vtable_start_offset() + _vtable_index * vtableEntry::size();
++ int v_off = entry_offset * wordSize + vtableEntry::method_offset_in_bytes();
++ __ li(R19_method, v_off);
++ __ ldx(R19_method/*method oop*/, R19_method/*method offset*/, R11_scratch1/*class*/);
++ // NOTE: for vtable dispatches, the vtable entry will never be
++ // null. However it may very well end up in handle_wrong_method
++ // if the method is abstract for the particular class.
++ __ ld(R11_scratch1, in_bytes(Method::from_compiled_offset()), R19_method);
++ // Call target. Either compiled code or C2I adapter.
++ __ mtctr(R11_scratch1);
++ __ bctrl();
++ if (((MachCallDynamicJavaNode*)this)->ret_addr_offset() != __ offset() - start_offset) {
++ tty->print(" %d, %d\n", ((MachCallDynamicJavaNode*)this)->ret_addr_offset(),__ offset() - start_offset);
++ }
++ assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
++ "Fix constant in ret_addr_offset()");
++ }
++#endif
++ Unimplemented(); // ret_addr_offset not yet fixed. Depends on compressed oops (load klass!).
++ %}
++
++ // a runtime call
++ enc_class enc_java_to_runtime_call (method meth) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++
++ MacroAssembler _masm(&cbuf);
++ const address start_pc = __ pc();
++
++#if defined(ABI_ELFv2)
++ address entry= !($meth$$method) ? NULL : (address)$meth$$method;
++ __ call_c(entry, relocInfo::runtime_call_type);
++#else
++ // The function we're going to call.
++ FunctionDescriptor fdtemp;
++ const FunctionDescriptor* fd = !($meth$$method) ? &fdtemp : (FunctionDescriptor*)$meth$$method;
++
++ Register Rtoc = R12_scratch2;
++ // Calculate the method's TOC.
++ __ calculate_address_from_global_toc(Rtoc, __ method_toc());
++ // Put entry, env, toc into the constant pool, this needs up to 3 constant
++ // pool entries; call_c_using_toc will optimize the call.
++ __ call_c_using_toc(fd, relocInfo::runtime_call_type, Rtoc);
++#endif
++
++ // Check the ret_addr_offset.
++ assert(((MachCallRuntimeNode*)this)->ret_addr_offset() == __ last_calls_return_pc() - start_pc,
++ "Fix constant in ret_addr_offset()");
++ %}
++
++ // Move to ctr for leaf call.
++ // This enc_class is needed so that scheduler gets proper
++ // input mapping for latency computation.
++ enc_class enc_leaf_call_mtctr(iRegLsrc src) %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_mtctr);
++ MacroAssembler _masm(&cbuf);
++ __ mtctr($src$$Register);
++ %}
++
++ // Postalloc expand emitter for runtime leaf calls.
++ enc_class postalloc_expand_java_to_runtime_call(method meth, iRegLdst toc) %{
++ loadConLNodesTuple loadConLNodes_Entry;
++#if defined(ABI_ELFv2)
++ jlong entry_address = (jlong) this->entry_point();
++ assert(entry_address, "need address here");
++ loadConLNodes_Entry = loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper(entry_address),
++ OptoReg::Name(R12_H_num), OptoReg::Name(R12_num));
++#else
++ // Get the struct that describes the function we are about to call.
++ FunctionDescriptor* fd = (FunctionDescriptor*) this->entry_point();
++ assert(fd, "need fd here");
++ jlong entry_address = (jlong) fd->entry();
++ // new nodes
++ loadConLNodesTuple loadConLNodes_Env;
++ loadConLNodesTuple loadConLNodes_Toc;
++
++ // Create nodes and operands for loading the entry point.
++ loadConLNodes_Entry = loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper(entry_address),
++ OptoReg::Name(R12_H_num), OptoReg::Name(R12_num));
++
++
++ // Create nodes and operands for loading the env pointer.
++ if (fd->env() != NULL) {
++ loadConLNodes_Env = loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper((jlong) fd->env()),
++ OptoReg::Name(R11_H_num), OptoReg::Name(R11_num));
++ } else {
++ loadConLNodes_Env._large_hi = NULL;
++ loadConLNodes_Env._large_lo = NULL;
++ loadConLNodes_Env._small = NULL;
++ loadConLNodes_Env._last = new (C) loadConL16Node();
++ loadConLNodes_Env._last->_opnds[0] = new (C) iRegLdstOper();
++ loadConLNodes_Env._last->_opnds[1] = new (C) immL16Oper(0);
++ ra_->set_pair(loadConLNodes_Env._last->_idx, OptoReg::Name(R11_H_num), OptoReg::Name(R11_num));
++ }
++
++ // Create nodes and operands for loading the Toc point.
++ loadConLNodes_Toc = loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper((jlong) fd->toc()),
++ OptoReg::Name(R2_H_num), OptoReg::Name(R2_num));
++#endif // ABI_ELFv2
++ // mtctr node
++ MachNode *mtctr = new (C) CallLeafDirect_mtctrNode();
++
++ assert(loadConLNodes_Entry._last != NULL, "entry must exist");
++ mtctr->add_req(0, loadConLNodes_Entry._last);
++
++ mtctr->_opnds[0] = new (C) iRegLdstOper();
++ mtctr->_opnds[1] = new (C) iRegLdstOper();
++
++ // call node
++ MachCallLeafNode *call = new (C) CallLeafDirectNode();
++
++ call->_opnds[0] = _opnds[0];
++ call->_opnds[1] = new (C) methodOper((intptr_t) entry_address); // May get set later.
++
++ // Make the new call node look like the old one.
++ call->_name = _name;
++ call->_tf = _tf;
++ call->_entry_point = _entry_point;
++ call->_cnt = _cnt;
++ call->_argsize = _argsize;
++ call->_oop_map = _oop_map;
++ guarantee(!_jvms, "You must clone the jvms and adapt the offsets by fix_jvms().");
++ call->_jvms = NULL;
++ call->_jvmadj = _jvmadj;
++ call->_in_rms = _in_rms;
++ call->_nesting = _nesting;
++
++
++ // New call needs all inputs of old call.
++ // Req...
++ for (uint i = 0; i < req(); ++i) {
++ if (i != mach_constant_base_node_input()) {
++ call->add_req(in(i));
++ }
++ }
++
++ // These must be reqired edges, as the registers are live up to
++ // the call. Else the constants are handled as kills.
++ call->add_req(mtctr);
++#if !defined(ABI_ELFv2)
++ call->add_req(loadConLNodes_Env._last);
++ call->add_req(loadConLNodes_Toc._last);
++#endif
++
++ // ...as well as prec
++ for (uint i = req(); i < len(); ++i) {
++ call->add_prec(in(i));
++ }
++
++ // registers
++ ra_->set1(mtctr->_idx, OptoReg::Name(SR_CTR_num));
++
++ // Insert the new nodes.
++ if (loadConLNodes_Entry._large_hi) nodes->push(loadConLNodes_Entry._large_hi);
++ if (loadConLNodes_Entry._last) nodes->push(loadConLNodes_Entry._last);
++#if !defined(ABI_ELFv2)
++ if (loadConLNodes_Env._large_hi) nodes->push(loadConLNodes_Env._large_hi);
++ if (loadConLNodes_Env._last) nodes->push(loadConLNodes_Env._last);
++ if (loadConLNodes_Toc._large_hi) nodes->push(loadConLNodes_Toc._large_hi);
++ if (loadConLNodes_Toc._last) nodes->push(loadConLNodes_Toc._last);
++#endif
++ nodes->push(mtctr);
++ nodes->push(call);
++ %}
++%}
++
++//----------FRAME--------------------------------------------------------------
++// Definition of frame structure and management information.
++
++frame %{
++ // What direction does stack grow in (assumed to be same for native & Java).
++ stack_direction(TOWARDS_LOW);
++
++ // These two registers define part of the calling convention between
++ // compiled code and the interpreter.
++
++ // Inline Cache Register or method for I2C.
++ inline_cache_reg(R19); // R19_method
++
++ // Method Oop Register when calling interpreter.
++ interpreter_method_oop_reg(R19); // R19_method
++
++ // Optional: name the operand used by cisc-spilling to access
++ // [stack_pointer + offset].
++ cisc_spilling_operand_name(indOffset);
++
++ // Number of stack slots consumed by a Monitor enter.
++ sync_stack_slots((frame::jit_monitor_size / VMRegImpl::stack_slot_size));
++
++ // Compiled code's Frame Pointer.
++ frame_pointer(R1); // R1_SP
++
++ // Interpreter stores its frame pointer in a register which is
++ // stored to the stack by I2CAdaptors. I2CAdaptors convert from
++ // interpreted java to compiled java.
++ //
++ // R14_state holds pointer to caller's cInterpreter.
++ interpreter_frame_pointer(R14); // R14_state
++
++ stack_alignment(frame::alignment_in_bytes);
++
++ in_preserve_stack_slots((frame::jit_in_preserve_size / VMRegImpl::stack_slot_size));
++
++ // Number of outgoing stack slots killed above the
++ // out_preserve_stack_slots for calls to C. Supports the var-args
++ // backing area for register parms.
++ //
++ varargs_C_out_slots_killed(((frame::abi_reg_args_size - frame::jit_out_preserve_size) / VMRegImpl::stack_slot_size));
++
++ // The after-PROLOG location of the return address. Location of
++ // return address specifies a type (REG or STACK) and a number
++ // representing the register number (i.e. - use a register name) or
++ // stack slot.
++ //
++ // A: Link register is stored in stack slot ...
++ // M: ... but it's in the caller's frame according to PPC-64 ABI.
++ // J: Therefore, we make sure that the link register is also in R11_scratch1
++ // at the end of the prolog.
++ // B: We use R20, now.
++ //return_addr(REG R20);
++
++ // G: After reading the comments made by all the luminaries on their
++ // failure to tell the compiler where the return address really is,
++ // I hardly dare to try myself. However, I'm convinced it's in slot
++ // 4 what apparently works and saves us some spills.
++ return_addr(STACK 4);
++
++ // This is the body of the function
++ //
++ // void Matcher::calling_convention(OptoRegPair* sig, // array of ideal regs
++ // uint length, // length of array
++ // bool is_outgoing)
++ //
++ // The `sig' array is to be updated. sig[j] represents the location
++ // of the j-th argument, either a register or a stack slot.
++
++ // Comment taken from i486.ad:
++ // Body of function which returns an integer array locating
++ // arguments either in registers or in stack slots. Passed an array
++ // of ideal registers called "sig" and a "length" count. Stack-slot
++ // offsets are based on outgoing arguments, i.e. a CALLER setting up
++ // arguments for a CALLEE. Incoming stack arguments are
++ // automatically biased by the preserve_stack_slots field above.
++ calling_convention %{
++ // No difference between ingoing/outgoing. Just pass false.
++ SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
++ %}
++
++ // Comment taken from i486.ad:
++ // Body of function which returns an integer array locating
++ // arguments either in registers or in stack slots. Passed an array
++ // of ideal registers called "sig" and a "length" count. Stack-slot
++ // offsets are based on outgoing arguments, i.e. a CALLER setting up
++ // arguments for a CALLEE. Incoming stack arguments are
++ // automatically biased by the preserve_stack_slots field above.
++ c_calling_convention %{
++ // This is obviously always outgoing.
++ // C argument in register AND stack slot.
++ (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
++ %}
++
++ // Location of native (C/C++) and interpreter return values. This
++ // is specified to be the same as Java. In the 32-bit VM, long
++ // values are actually returned from native calls in O0:O1 and
++ // returned to the interpreter in I0:I1. The copying to and from
++ // the register pairs is done by the appropriate call and epilog
++ // opcodes. This simplifies the register allocator.
++ c_return_value %{
++ assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) ||
++ (ideal_reg == Op_RegN && Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0),
++ "only return normal values");
++ // enum names from opcodes.hpp: Op_Node Op_Set Op_RegN Op_RegI Op_RegP Op_RegF Op_RegD Op_RegL
++ static int typeToRegLo[Op_RegL+1] = { 0, 0, R3_num, R3_num, R3_num, F1_num, F1_num, R3_num };
++ static int typeToRegHi[Op_RegL+1] = { 0, 0, OptoReg::Bad, R3_H_num, R3_H_num, OptoReg::Bad, F1_H_num, R3_H_num };
++ return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
++ %}
++
++ // Location of compiled Java return values. Same as C
++ return_value %{
++ assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) ||
++ (ideal_reg == Op_RegN && Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0),
++ "only return normal values");
++ // enum names from opcodes.hpp: Op_Node Op_Set Op_RegN Op_RegI Op_RegP Op_RegF Op_RegD Op_RegL
++ static int typeToRegLo[Op_RegL+1] = { 0, 0, R3_num, R3_num, R3_num, F1_num, F1_num, R3_num };
++ static int typeToRegHi[Op_RegL+1] = { 0, 0, OptoReg::Bad, R3_H_num, R3_H_num, OptoReg::Bad, F1_H_num, R3_H_num };
++ return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
++ %}
++%}
++
++
++//----------ATTRIBUTES---------------------------------------------------------
++
++//----------Operand Attributes-------------------------------------------------
++op_attrib op_cost(1); // Required cost attribute.
++
++//----------Instruction Attributes---------------------------------------------
++
++// Cost attribute. required.
++ins_attrib ins_cost(DEFAULT_COST);
++
++// Is this instruction a non-matching short branch variant of some
++// long branch? Not required.
++ins_attrib ins_short_branch(0);
++
++ins_attrib ins_is_TrapBasedCheckNode(true);
++
++// Number of constants.
++// This instruction uses the given number of constants
++// (optional attribute).
++// This is needed to determine in time whether the constant pool will
++// exceed 4000 entries. Before postalloc_expand the overall number of constants
++// is determined. It's also used to compute the constant pool size
++// in Output().
++ins_attrib ins_num_consts(0);
++
++// Required alignment attribute (must be a power of 2) specifies the
++// alignment that some part of the instruction (not necessarily the
++// start) requires. If > 1, a compute_padding() function must be
++// provided for the instruction.
++ins_attrib ins_alignment(1);
++
++// Enforce/prohibit rematerializations.
++// - If an instruction is attributed with 'ins_cannot_rematerialize(true)'
++// then rematerialization of that instruction is prohibited and the
++// instruction's value will be spilled if necessary.
++// Causes that MachNode::rematerialize() returns false.
++// - If an instruction is attributed with 'ins_should_rematerialize(true)'
++// then rematerialization should be enforced and a copy of the instruction
++// should be inserted if possible; rematerialization is not guaranteed.
++// Note: this may result in rematerializations in front of every use.
++// Causes that MachNode::rematerialize() can return true.
++// (optional attribute)
++ins_attrib ins_cannot_rematerialize(false);
++ins_attrib ins_should_rematerialize(false);
++
++// Instruction has variable size depending on alignment.
++ins_attrib ins_variable_size_depending_on_alignment(false);
++
++// Instruction is a nop.
++ins_attrib ins_is_nop(false);
++
++// Instruction is mapped to a MachIfFastLock node (instead of MachFastLock).
++ins_attrib ins_use_mach_if_fast_lock_node(false);
++
++// Field for the toc offset of a constant.
++//
++// This is needed if the toc offset is not encodable as an immediate in
++// the PPC load instruction. If so, the upper (hi) bits of the offset are
++// added to the toc, and from this a load with immediate is performed.
++// With postalloc expand, we get two nodes that require the same offset
++// but which don't know about each other. The offset is only known
++// when the constant is added to the constant pool during emitting.
++// It is generated in the 'hi'-node adding the upper bits, and saved
++// in this node. The 'lo'-node has a link to the 'hi'-node and reads
++// the offset from there when it gets encoded.
++ins_attrib ins_field_const_toc_offset(0);
++ins_attrib ins_field_const_toc_offset_hi_node(0);
++
++// A field that can hold the instructions offset in the code buffer.
++// Set in the nodes emitter.
++ins_attrib ins_field_cbuf_insts_offset(-1);
++
++// Fields for referencing a call's load-IC-node.
++// If the toc offset can not be encoded as an immediate in a load, we
++// use two nodes.
++ins_attrib ins_field_load_ic_hi_node(0);
++ins_attrib ins_field_load_ic_node(0);
++
++//----------OPERANDS-----------------------------------------------------------
++// Operand definitions must precede instruction definitions for correct
++// parsing in the ADLC because operands constitute user defined types
++// which are used in instruction definitions.
++//
++// Formats are generated automatically for constants and base registers.
++
++//----------Simple Operands----------------------------------------------------
++// Immediate Operands
++
++// Integer Immediate: 32-bit
++operand immI() %{
++ match(ConI);
++ op_cost(40);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++operand immI8() %{
++ predicate(Assembler::is_simm(n->get_int(), 8));
++ op_cost(0);
++ match(ConI);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Integer Immediate: 16-bit
++operand immI16() %{
++ predicate(Assembler::is_simm(n->get_int(), 16));
++ op_cost(0);
++ match(ConI);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Integer Immediate: 32-bit, where lowest 16 bits are 0x0000.
++operand immIhi16() %{
++ predicate(((n->get_int() & 0xffff0000) != 0) && ((n->get_int() & 0xffff) == 0));
++ match(ConI);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++operand immInegpow2() %{
++ predicate(is_power_of_2_long((jlong) (julong) (juint) (-(n->get_int()))));
++ match(ConI);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++operand immIpow2minus1() %{
++ predicate(is_power_of_2_long((((jlong) (n->get_int()))+1)));
++ match(ConI);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++operand immIpowerOf2() %{
++ predicate(is_power_of_2_long((((jlong) (julong) (juint) (n->get_int())))));
++ match(ConI);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Unsigned Integer Immediate: the values 0-31
++operand uimmI5() %{
++ predicate(Assembler::is_uimm(n->get_int(), 5));
++ match(ConI);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Unsigned Integer Immediate: 6-bit
++operand uimmI6() %{
++ predicate(Assembler::is_uimm(n->get_int(), 6));
++ match(ConI);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Unsigned Integer Immediate: 6-bit int, greater than 32
++operand uimmI6_ge32() %{
++ predicate(Assembler::is_uimm(n->get_int(), 6) && n->get_int() >= 32);
++ match(ConI);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Unsigned Integer Immediate: 15-bit
++operand uimmI15() %{
++ predicate(Assembler::is_uimm(n->get_int(), 15));
++ match(ConI);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Unsigned Integer Immediate: 16-bit
++operand uimmI16() %{
++ predicate(Assembler::is_uimm(n->get_int(), 16));
++ match(ConI);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// constant 'int 0'.
++operand immI_0() %{
++ predicate(n->get_int() == 0);
++ match(ConI);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// constant 'int 1'.
++operand immI_1() %{
++ predicate(n->get_int() == 1);
++ match(ConI);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// constant 'int -1'.
++operand immI_minus1() %{
++ predicate(n->get_int() == -1);
++ match(ConI);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// int value 16.
++operand immI_16() %{
++ predicate(n->get_int() == 16);
++ match(ConI);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// int value 24.
++operand immI_24() %{
++ predicate(n->get_int() == 24);
++ match(ConI);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Compressed oops constants
++// Pointer Immediate
++operand immN() %{
++ match(ConN);
++
++ op_cost(10);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immN_0() %{
++ predicate(n->get_narrowcon() == 0);
++ match(ConN);
++
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Compressed klass constants
++operand immNKlass() %{
++ match(ConNKlass);
++
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// This operand can be used to avoid matching of an instruct
++// with chain rule.
++operand immNKlass_NM() %{
++ match(ConNKlass);
++ predicate(false);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Pointer Immediate: 64-bit
++operand immP() %{
++ match(ConP);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Operand to avoid match of loadConP.
++// This operand can be used to avoid matching of an instruct
++// with chain rule.
++operand immP_NM() %{
++ match(ConP);
++ predicate(false);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// costant 'pointer 0'.
++operand immP_0() %{
++ predicate(n->get_ptr() == 0);
++ match(ConP);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// pointer 0x0 or 0x1
++operand immP_0or1() %{
++ predicate((n->get_ptr() == 0) || (n->get_ptr() == 1));
++ match(ConP);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++operand immL() %{
++ match(ConL);
++ op_cost(40);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Long Immediate: 16-bit
++operand immL16() %{
++ predicate(Assembler::is_simm(n->get_long(), 16));
++ match(ConL);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Long Immediate: 16-bit, 4-aligned
++operand immL16Alg4() %{
++ predicate(Assembler::is_simm(n->get_long(), 16) && ((n->get_long() & 0x3) == 0));
++ match(ConL);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Long Immediate: 32-bit, where lowest 16 bits are 0x0000.
++operand immL32hi16() %{
++ predicate(Assembler::is_simm(n->get_long(), 32) && ((n->get_long() & 0xffffL) == 0L));
++ match(ConL);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Long Immediate: 32-bit
++operand immL32() %{
++ predicate(Assembler::is_simm(n->get_long(), 32));
++ match(ConL);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Long Immediate: 64-bit, where highest 16 bits are not 0x0000.
++operand immLhighest16() %{
++ predicate((n->get_long() & 0xffff000000000000L) != 0L && (n->get_long() & 0x0000ffffffffffffL) == 0L);
++ match(ConL);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++operand immLnegpow2() %{
++ predicate(is_power_of_2_long((jlong)-(n->get_long())));
++ match(ConL);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++operand immLpow2minus1() %{
++ predicate(is_power_of_2_long((((jlong) (n->get_long()))+1)) &&
++ (n->get_long() != (jlong)0xffffffffffffffffL));
++ match(ConL);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// constant 'long 0'.
++operand immL_0() %{
++ predicate(n->get_long() == 0L);
++ match(ConL);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// constat ' long -1'.
++operand immL_minus1() %{
++ predicate(n->get_long() == -1L);
++ match(ConL);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Long Immediate: low 32-bit mask
++operand immL_32bits() %{
++ predicate(n->get_long() == 0xFFFFFFFFL);
++ match(ConL);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Unsigned Long Immediate: 16-bit
++operand uimmL16() %{
++ predicate(Assembler::is_uimm(n->get_long(), 16));
++ match(ConL);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Float Immediate
++operand immF() %{
++ match(ConF);
++ op_cost(40);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// constant 'float +0.0'.
++operand immF_0() %{
++ predicate((n->getf() == 0) &&
++ (fpclassify(n->getf()) == FP_ZERO) && (signbit(n->getf()) == 0));
++ match(ConF);
++ op_cost(0);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Double Immediate
++operand immD() %{
++ match(ConD);
++ op_cost(40);
++ format %{ %}
++ interface(CONST_INTER);
++%}
++
++// Integer Register Operands
++// Integer Destination Register
++// See definition of reg_class bits32_reg_rw.
++operand iRegIdst() %{
++ constraint(ALLOC_IN_RC(bits32_reg_rw));
++ match(RegI);
++ match(rscratch1RegI);
++ match(rscratch2RegI);
++ match(rarg1RegI);
++ match(rarg2RegI);
++ match(rarg3RegI);
++ match(rarg4RegI);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++// Integer Source Register
++// See definition of reg_class bits32_reg_ro.
++operand iRegIsrc() %{
++ constraint(ALLOC_IN_RC(bits32_reg_ro));
++ match(RegI);
++ match(rscratch1RegI);
++ match(rscratch2RegI);
++ match(rarg1RegI);
++ match(rarg2RegI);
++ match(rarg3RegI);
++ match(rarg4RegI);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand rscratch1RegI() %{
++ constraint(ALLOC_IN_RC(rscratch1_bits32_reg));
++ match(iRegIdst);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand rscratch2RegI() %{
++ constraint(ALLOC_IN_RC(rscratch2_bits32_reg));
++ match(iRegIdst);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand rarg1RegI() %{
++ constraint(ALLOC_IN_RC(rarg1_bits32_reg));
++ match(iRegIdst);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand rarg2RegI() %{
++ constraint(ALLOC_IN_RC(rarg2_bits32_reg));
++ match(iRegIdst);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand rarg3RegI() %{
++ constraint(ALLOC_IN_RC(rarg3_bits32_reg));
++ match(iRegIdst);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand rarg4RegI() %{
++ constraint(ALLOC_IN_RC(rarg4_bits32_reg));
++ match(iRegIdst);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand rarg1RegL() %{
++ constraint(ALLOC_IN_RC(rarg1_bits64_reg));
++ match(iRegLdst);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand rarg2RegL() %{
++ constraint(ALLOC_IN_RC(rarg2_bits64_reg));
++ match(iRegLdst);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand rarg3RegL() %{
++ constraint(ALLOC_IN_RC(rarg3_bits64_reg));
++ match(iRegLdst);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand rarg4RegL() %{
++ constraint(ALLOC_IN_RC(rarg4_bits64_reg));
++ match(iRegLdst);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++// Pointer Destination Register
++// See definition of reg_class bits64_reg_rw.
++operand iRegPdst() %{
++ constraint(ALLOC_IN_RC(bits64_reg_rw));
++ match(RegP);
++ match(rscratch1RegP);
++ match(rscratch2RegP);
++ match(rarg1RegP);
++ match(rarg2RegP);
++ match(rarg3RegP);
++ match(rarg4RegP);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++// Pointer Destination Register
++// Operand not using r11 and r12 (killed in epilog).
++operand iRegPdstNoScratch() %{
++ constraint(ALLOC_IN_RC(bits64_reg_leaf_call));
++ match(RegP);
++ match(rarg1RegP);
++ match(rarg2RegP);
++ match(rarg3RegP);
++ match(rarg4RegP);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++// Pointer Source Register
++// See definition of reg_class bits64_reg_ro.
++operand iRegPsrc() %{
++ constraint(ALLOC_IN_RC(bits64_reg_ro));
++ match(RegP);
++ match(iRegPdst);
++ match(rscratch1RegP);
++ match(rscratch2RegP);
++ match(rarg1RegP);
++ match(rarg2RegP);
++ match(rarg3RegP);
++ match(rarg4RegP);
++ match(threadRegP);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++// Thread operand.
++operand threadRegP() %{
++ constraint(ALLOC_IN_RC(thread_bits64_reg));
++ match(iRegPdst);
++ format %{ "R16" %}
++ interface(REG_INTER);
++%}
++
++operand rscratch1RegP() %{
++ constraint(ALLOC_IN_RC(rscratch1_bits64_reg));
++ match(iRegPdst);
++ format %{ "R11" %}
++ interface(REG_INTER);
++%}
++
++operand rscratch2RegP() %{
++ constraint(ALLOC_IN_RC(rscratch2_bits64_reg));
++ match(iRegPdst);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand rarg1RegP() %{
++ constraint(ALLOC_IN_RC(rarg1_bits64_reg));
++ match(iRegPdst);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand rarg2RegP() %{
++ constraint(ALLOC_IN_RC(rarg2_bits64_reg));
++ match(iRegPdst);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand rarg3RegP() %{
++ constraint(ALLOC_IN_RC(rarg3_bits64_reg));
++ match(iRegPdst);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand rarg4RegP() %{
++ constraint(ALLOC_IN_RC(rarg4_bits64_reg));
++ match(iRegPdst);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand iRegNsrc() %{
++ constraint(ALLOC_IN_RC(bits32_reg_ro));
++ match(RegN);
++ match(iRegNdst);
++
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand iRegNdst() %{
++ constraint(ALLOC_IN_RC(bits32_reg_rw));
++ match(RegN);
++
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++// Long Destination Register
++// See definition of reg_class bits64_reg_rw.
++operand iRegLdst() %{
++ constraint(ALLOC_IN_RC(bits64_reg_rw));
++ match(RegL);
++ match(rscratch1RegL);
++ match(rscratch2RegL);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++// Long Source Register
++// See definition of reg_class bits64_reg_ro.
++operand iRegLsrc() %{
++ constraint(ALLOC_IN_RC(bits64_reg_ro));
++ match(RegL);
++ match(iRegLdst);
++ match(rscratch1RegL);
++ match(rscratch2RegL);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++// Special operand for ConvL2I.
++operand iRegL2Isrc(iRegLsrc reg) %{
++ constraint(ALLOC_IN_RC(bits64_reg_ro));
++ match(ConvL2I reg);
++ format %{ "ConvL2I($reg)" %}
++ interface(REG_INTER)
++%}
++
++operand rscratch1RegL() %{
++ constraint(ALLOC_IN_RC(rscratch1_bits64_reg));
++ match(RegL);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand rscratch2RegL() %{
++ constraint(ALLOC_IN_RC(rscratch2_bits64_reg));
++ match(RegL);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++// Condition Code Flag Registers
++operand flagsReg() %{
++ constraint(ALLOC_IN_RC(int_flags));
++ match(RegFlags);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++// Condition Code Flag Register CR0
++operand flagsRegCR0() %{
++ constraint(ALLOC_IN_RC(int_flags_CR0));
++ match(RegFlags);
++ format %{ "CR0" %}
++ interface(REG_INTER);
++%}
++
++operand flagsRegCR1() %{
++ constraint(ALLOC_IN_RC(int_flags_CR1));
++ match(RegFlags);
++ format %{ "CR1" %}
++ interface(REG_INTER);
++%}
++
++operand flagsRegCR6() %{
++ constraint(ALLOC_IN_RC(int_flags_CR6));
++ match(RegFlags);
++ format %{ "CR6" %}
++ interface(REG_INTER);
++%}
++
++operand regCTR() %{
++ constraint(ALLOC_IN_RC(ctr_reg));
++ // RegFlags should work. Introducing a RegSpecial type would cause a
++ // lot of changes.
++ match(RegFlags);
++ format %{"SR_CTR" %}
++ interface(REG_INTER);
++%}
++
++operand regD() %{
++ constraint(ALLOC_IN_RC(dbl_reg));
++ match(RegD);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand regF() %{
++ constraint(ALLOC_IN_RC(flt_reg));
++ match(RegF);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++// Special Registers
++
++// Method Register
++operand inline_cache_regP(iRegPdst reg) %{
++ constraint(ALLOC_IN_RC(r19_bits64_reg)); // inline_cache_reg
++ match(reg);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand compiler_method_oop_regP(iRegPdst reg) %{
++ constraint(ALLOC_IN_RC(rscratch1_bits64_reg)); // compiler_method_oop_reg
++ match(reg);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++operand interpreter_method_oop_regP(iRegPdst reg) %{
++ constraint(ALLOC_IN_RC(r19_bits64_reg)); // interpreter_method_oop_reg
++ match(reg);
++ format %{ %}
++ interface(REG_INTER);
++%}
++
++// Operands to remove register moves in unscaled mode.
++// Match read/write registers with an EncodeP node if neither shift nor add are required.
++operand iRegP2N(iRegPsrc reg) %{
++ predicate(false /* TODO: PPC port MatchDecodeNodes*/&& Universe::narrow_oop_shift() == 0);
++ constraint(ALLOC_IN_RC(bits64_reg_ro));
++ match(EncodeP reg);
++ format %{ "$reg" %}
++ interface(REG_INTER)
++%}
++
++operand iRegN2P(iRegNsrc reg) %{
++ predicate(false /* TODO: PPC port MatchDecodeNodes*/);
++ constraint(ALLOC_IN_RC(bits32_reg_ro));
++ match(DecodeN reg);
++ match(DecodeNKlass reg);
++ format %{ "$reg" %}
++ interface(REG_INTER)
++%}
++
++//----------Complex Operands---------------------------------------------------
++// Indirect Memory Reference
++operand indirect(iRegPsrc reg) %{
++ constraint(ALLOC_IN_RC(bits64_reg_ro));
++ match(reg);
++ op_cost(100);
++ format %{ "[$reg]" %}
++ interface(MEMORY_INTER) %{
++ base($reg);
++ index(0x0);
++ scale(0x0);
++ disp(0x0);
++ %}
++%}
++
++// Indirect with Offset
++operand indOffset16(iRegPsrc reg, immL16 offset) %{
++ constraint(ALLOC_IN_RC(bits64_reg_ro));
++ match(AddP reg offset);
++ op_cost(100);
++ format %{ "[$reg + $offset]" %}
++ interface(MEMORY_INTER) %{
++ base($reg);
++ index(0x0);
++ scale(0x0);
++ disp($offset);
++ %}
++%}
++
++// Indirect with 4-aligned Offset
++operand indOffset16Alg4(iRegPsrc reg, immL16Alg4 offset) %{
++ constraint(ALLOC_IN_RC(bits64_reg_ro));
++ match(AddP reg offset);
++ op_cost(100);
++ format %{ "[$reg + $offset]" %}
++ interface(MEMORY_INTER) %{
++ base($reg);
++ index(0x0);
++ scale(0x0);
++ disp($offset);
++ %}
++%}
++
++//----------Complex Operands for Compressed OOPs-------------------------------
++// Compressed OOPs with narrow_oop_shift == 0.
++
++// Indirect Memory Reference, compressed OOP
++operand indirectNarrow(iRegNsrc reg) %{
++ predicate(false /* TODO: PPC port MatchDecodeNodes*/);
++ constraint(ALLOC_IN_RC(bits64_reg_ro));
++ match(DecodeN reg);
++ match(DecodeNKlass reg);
++ op_cost(100);
++ format %{ "[$reg]" %}
++ interface(MEMORY_INTER) %{
++ base($reg);
++ index(0x0);
++ scale(0x0);
++ disp(0x0);
++ %}
++%}
++
++// Indirect with Offset, compressed OOP
++operand indOffset16Narrow(iRegNsrc reg, immL16 offset) %{
++ predicate(false /* TODO: PPC port MatchDecodeNodes*/);
++ constraint(ALLOC_IN_RC(bits64_reg_ro));
++ match(AddP (DecodeN reg) offset);
++ match(AddP (DecodeNKlass reg) offset);
++ op_cost(100);
++ format %{ "[$reg + $offset]" %}
++ interface(MEMORY_INTER) %{
++ base($reg);
++ index(0x0);
++ scale(0x0);
++ disp($offset);
++ %}
++%}
++
++// Indirect with 4-aligned Offset, compressed OOP
++operand indOffset16NarrowAlg4(iRegNsrc reg, immL16Alg4 offset) %{
++ predicate(false /* TODO: PPC port MatchDecodeNodes*/);
++ constraint(ALLOC_IN_RC(bits64_reg_ro));
++ match(AddP (DecodeN reg) offset);
++ match(AddP (DecodeNKlass reg) offset);
++ op_cost(100);
++ format %{ "[$reg + $offset]" %}
++ interface(MEMORY_INTER) %{
++ base($reg);
++ index(0x0);
++ scale(0x0);
++ disp($offset);
++ %}
++%}
++
++//----------Special Memory Operands--------------------------------------------
++// Stack Slot Operand
++//
++// This operand is used for loading and storing temporary values on
++// the stack where a match requires a value to flow through memory.
++operand stackSlotI(sRegI reg) %{
++ constraint(ALLOC_IN_RC(stack_slots));
++ op_cost(100);
++ //match(RegI);
++ format %{ "[sp+$reg]" %}
++ interface(MEMORY_INTER) %{
++ base(0x1); // R1_SP
++ index(0x0);
++ scale(0x0);
++ disp($reg); // Stack Offset
++ %}
++%}
++
++operand stackSlotL(sRegL reg) %{
++ constraint(ALLOC_IN_RC(stack_slots));
++ op_cost(100);
++ //match(RegL);
++ format %{ "[sp+$reg]" %}
++ interface(MEMORY_INTER) %{
++ base(0x1); // R1_SP
++ index(0x0);
++ scale(0x0);
++ disp($reg); // Stack Offset
++ %}
++%}
++
++operand stackSlotP(sRegP reg) %{
++ constraint(ALLOC_IN_RC(stack_slots));
++ op_cost(100);
++ //match(RegP);
++ format %{ "[sp+$reg]" %}
++ interface(MEMORY_INTER) %{
++ base(0x1); // R1_SP
++ index(0x0);
++ scale(0x0);
++ disp($reg); // Stack Offset
++ %}
++%}
++
++operand stackSlotF(sRegF reg) %{
++ constraint(ALLOC_IN_RC(stack_slots));
++ op_cost(100);
++ //match(RegF);
++ format %{ "[sp+$reg]" %}
++ interface(MEMORY_INTER) %{
++ base(0x1); // R1_SP
++ index(0x0);
++ scale(0x0);
++ disp($reg); // Stack Offset
++ %}
++%}
++
++operand stackSlotD(sRegD reg) %{
++ constraint(ALLOC_IN_RC(stack_slots));
++ op_cost(100);
++ //match(RegD);
++ format %{ "[sp+$reg]" %}
++ interface(MEMORY_INTER) %{
++ base(0x1); // R1_SP
++ index(0x0);
++ scale(0x0);
++ disp($reg); // Stack Offset
++ %}
++%}
++
++// Operands for expressing Control Flow
++// NOTE: Label is a predefined operand which should not be redefined in
++// the AD file. It is generically handled within the ADLC.
++
++//----------Conditional Branch Operands----------------------------------------
++// Comparison Op
++//
++// This is the operation of the comparison, and is limited to the
++// following set of codes: L (<), LE (<=), G (>), GE (>=), E (==), NE
++// (!=).
++//
++// Other attributes of the comparison, such as unsignedness, are specified
++// by the comparison instruction that sets a condition code flags register.
++// That result is represented by a flags operand whose subtype is appropriate
++// to the unsignedness (etc.) of the comparison.
++//
++// Later, the instruction which matches both the Comparison Op (a Bool) and
++// the flags (produced by the Cmp) specifies the coding of the comparison op
++// by matching a specific subtype of Bool operand below.
++
++// When used for floating point comparisons: unordered same as less.
++operand cmpOp() %{
++ match(Bool);
++ format %{ "" %}
++ interface(COND_INTER) %{
++ // BO only encodes bit 4 of bcondCRbiIsX, as bits 1-3 are always '100'.
++ // BO & BI
++ equal(0xA); // 10 10: bcondCRbiIs1 & Condition::equal
++ not_equal(0x2); // 00 10: bcondCRbiIs0 & Condition::equal
++ less(0x8); // 10 00: bcondCRbiIs1 & Condition::less
++ greater_equal(0x0); // 00 00: bcondCRbiIs0 & Condition::less
++ less_equal(0x1); // 00 01: bcondCRbiIs0 & Condition::greater
++ greater(0x9); // 10 01: bcondCRbiIs1 & Condition::greater
++ overflow(0xB); // 10 11: bcondCRbiIs1 & Condition::summary_overflow
++ no_overflow(0x3); // 00 11: bcondCRbiIs0 & Condition::summary_overflow
++ %}
++%}
++
++//----------OPERAND CLASSES----------------------------------------------------
++// Operand Classes are groups of operands that are used to simplify
++// instruction definitions by not requiring the AD writer to specify
++// seperate instructions for every form of operand when the
++// instruction accepts multiple operand types with the same basic
++// encoding and format. The classic case of this is memory operands.
++// Indirect is not included since its use is limited to Compare & Swap.
++
++opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indOffset16Narrow);
++// Memory operand where offsets are 4-aligned. Required for ld, std.
++opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4);
++opclass indirectMemory(indirect, indirectNarrow);
++
++// Special opclass for I and ConvL2I.
++opclass iRegIsrc_iRegL2Isrc(iRegIsrc, iRegL2Isrc);
++
++// Operand classes to match encode and decode. iRegN_P2N is only used
++// for storeN. I have never seen an encode node elsewhere.
++opclass iRegN_P2N(iRegNsrc, iRegP2N);
++opclass iRegP_N2P(iRegPsrc, iRegN2P);
++
++//----------PIPELINE-----------------------------------------------------------
++
++pipeline %{
++
++// See J.M.Tendler et al. "Power4 system microarchitecture", IBM
++// J. Res. & Dev., No. 1, Jan. 2002.
++
++//----------ATTRIBUTES---------------------------------------------------------
++attributes %{
++
++ // Power4 instructions are of fixed length.
++ fixed_size_instructions;
++
++ // TODO: if `bundle' means number of instructions fetched
++ // per cycle, this is 8. If `bundle' means Power4 `group', that is
++ // max instructions issued per cycle, this is 5.
++ max_instructions_per_bundle = 8;
++
++ // A Power4 instruction is 4 bytes long.
++ instruction_unit_size = 4;
++
++ // The Power4 processor fetches 64 bytes...
++ instruction_fetch_unit_size = 64;
++
++ // ...in one line
++ instruction_fetch_units = 1
++
++ // Unused, list one so that array generated by adlc is not empty.
++ // Aix compiler chokes if _nop_count = 0.
++ nops(fxNop);
++%}
++
++//----------RESOURCES----------------------------------------------------------
++// Resources are the functional units available to the machine
++resources(
++ PPC_BR, // branch unit
++ PPC_CR, // condition unit
++ PPC_FX1, // integer arithmetic unit 1
++ PPC_FX2, // integer arithmetic unit 2
++ PPC_LDST1, // load/store unit 1
++ PPC_LDST2, // load/store unit 2
++ PPC_FP1, // float arithmetic unit 1
++ PPC_FP2, // float arithmetic unit 2
++ PPC_LDST = PPC_LDST1 | PPC_LDST2,
++ PPC_FX = PPC_FX1 | PPC_FX2,
++ PPC_FP = PPC_FP1 | PPC_FP2
++ );
++
++//----------PIPELINE DESCRIPTION-----------------------------------------------
++// Pipeline Description specifies the stages in the machine's pipeline
++pipe_desc(
++ // Power4 longest pipeline path
++ PPC_IF, // instruction fetch
++ PPC_IC,
++ //PPC_BP, // branch prediction
++ PPC_D0, // decode
++ PPC_D1, // decode
++ PPC_D2, // decode
++ PPC_D3, // decode
++ PPC_Xfer1,
++ PPC_GD, // group definition
++ PPC_MP, // map
++ PPC_ISS, // issue
++ PPC_RF, // resource fetch
++ PPC_EX1, // execute (all units)
++ PPC_EX2, // execute (FP, LDST)
++ PPC_EX3, // execute (FP, LDST)
++ PPC_EX4, // execute (FP)
++ PPC_EX5, // execute (FP)
++ PPC_EX6, // execute (FP)
++ PPC_WB, // write back
++ PPC_Xfer2,
++ PPC_CP
++ );
++
++//----------PIPELINE CLASSES---------------------------------------------------
++// Pipeline Classes describe the stages in which input and output are
++// referenced by the hardware pipeline.
++
++// Simple pipeline classes.
++
++// Default pipeline class.
++pipe_class pipe_class_default() %{
++ single_instruction;
++ fixed_latency(2);
++%}
++
++// Pipeline class for empty instructions.
++pipe_class pipe_class_empty() %{
++ single_instruction;
++ fixed_latency(0);
++%}
++
++// Pipeline class for compares.
++pipe_class pipe_class_compare() %{
++ single_instruction;
++ fixed_latency(16);
++%}
++
++// Pipeline class for traps.
++pipe_class pipe_class_trap() %{
++ single_instruction;
++ fixed_latency(100);
++%}
++
++// Pipeline class for memory operations.
++pipe_class pipe_class_memory() %{
++ single_instruction;
++ fixed_latency(16);
++%}
++
++// Pipeline class for call.
++pipe_class pipe_class_call() %{
++ single_instruction;
++ fixed_latency(100);
++%}
++
++// Define the class for the Nop node.
++define %{
++ MachNop = pipe_class_default;
++%}
++
++%}
++
++//----------INSTRUCTIONS-------------------------------------------------------
++
++// Naming of instructions:
++// opA_operB / opA_operB_operC:
++// Operation 'op' with one or two source operands 'oper'. Result
++// type is A, source operand types are B and C.
++// Iff A == B == C, B and C are left out.
++//
++// The instructions are ordered according to the following scheme:
++// - loads
++// - load constants
++// - prefetch
++// - store
++// - encode/decode
++// - membar
++// - conditional moves
++// - compare & swap
++// - arithmetic and logic operations
++// * int: Add, Sub, Mul, Div, Mod
++// * int: lShift, arShift, urShift, rot
++// * float: Add, Sub, Mul, Div
++// * and, or, xor ...
++// - register moves: float <-> int, reg <-> stack, repl
++// - cast (high level type cast, XtoP, castPP, castII, not_null etc.
++// - conv (low level type cast requiring bit changes (sign extend etc)
++// - compares, range & zero checks.
++// - branches
++// - complex operations, intrinsics, min, max, replicate
++// - lock
++// - Calls
++//
++// If there are similar instructions with different types they are sorted:
++// int before float
++// small before big
++// signed before unsigned
++// e.g., loadS before loadUS before loadI before loadF.
++
++
++//----------Load/Store Instructions--------------------------------------------
++
++//----------Load Instructions--------------------------------------------------
++
++// Converts byte to int.
++// As convB2I_reg, but without match rule. The match rule of convB2I_reg
++// reuses the 'amount' operand, but adlc expects that operand specification
++// and operands in match rule are equivalent.
++instruct convB2I_reg_2(iRegIdst dst, iRegIsrc src) %{
++ effect(DEF dst, USE src);
++ format %{ "EXTSB $dst, $src \t// byte->int" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_extsb);
++ __ extsb($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct loadUB_indirect(iRegIdst dst, indirectMemory mem) %{
++ // match-rule, false predicate
++ match(Set dst (LoadB mem));
++ predicate(false);
++
++ format %{ "LBZ $dst, $mem" %}
++ size(4);
++ ins_encode( enc_lbz(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++instruct loadUB_indirect_ac(iRegIdst dst, indirectMemory mem) %{
++ // match-rule, false predicate
++ match(Set dst (LoadB mem));
++ predicate(false);
++
++ format %{ "LBZ $dst, $mem\n\t"
++ "TWI $dst\n\t"
++ "ISYNC" %}
++ size(12);
++ ins_encode( enc_lbz_ac(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B.
++instruct loadB_indirect_Ex(iRegIdst dst, indirectMemory mem) %{
++ match(Set dst (LoadB mem));
++ predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
++ ins_cost(MEMORY_REF_COST + DEFAULT_COST);
++ expand %{
++ iRegIdst tmp;
++ loadUB_indirect(tmp, mem);
++ convB2I_reg_2(dst, tmp);
++ %}
++%}
++
++instruct loadB_indirect_ac_Ex(iRegIdst dst, indirectMemory mem) %{
++ match(Set dst (LoadB mem));
++ ins_cost(3*MEMORY_REF_COST + DEFAULT_COST);
++ expand %{
++ iRegIdst tmp;
++ loadUB_indirect_ac(tmp, mem);
++ convB2I_reg_2(dst, tmp);
++ %}
++%}
++
++instruct loadUB_indOffset16(iRegIdst dst, indOffset16 mem) %{
++ // match-rule, false predicate
++ match(Set dst (LoadB mem));
++ predicate(false);
++
++ format %{ "LBZ $dst, $mem" %}
++ size(4);
++ ins_encode( enc_lbz(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++instruct loadUB_indOffset16_ac(iRegIdst dst, indOffset16 mem) %{
++ // match-rule, false predicate
++ match(Set dst (LoadB mem));
++ predicate(false);
++
++ format %{ "LBZ $dst, $mem\n\t"
++ "TWI $dst\n\t"
++ "ISYNC" %}
++ size(12);
++ ins_encode( enc_lbz_ac(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B.
++instruct loadB_indOffset16_Ex(iRegIdst dst, indOffset16 mem) %{
++ match(Set dst (LoadB mem));
++ predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
++ ins_cost(MEMORY_REF_COST + DEFAULT_COST);
++
++ expand %{
++ iRegIdst tmp;
++ loadUB_indOffset16(tmp, mem);
++ convB2I_reg_2(dst, tmp);
++ %}
++%}
++
++instruct loadB_indOffset16_ac_Ex(iRegIdst dst, indOffset16 mem) %{
++ match(Set dst (LoadB mem));
++ ins_cost(3*MEMORY_REF_COST + DEFAULT_COST);
++
++ expand %{
++ iRegIdst tmp;
++ loadUB_indOffset16_ac(tmp, mem);
++ convB2I_reg_2(dst, tmp);
++ %}
++%}
++
++// Load Unsigned Byte (8bit UNsigned) into an int reg.
++instruct loadUB(iRegIdst dst, memory mem) %{
++ predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
++ match(Set dst (LoadUB mem));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LBZ $dst, $mem \t// byte, zero-extend to int" %}
++ size(4);
++ ins_encode( enc_lbz(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Unsigned Byte (8bit UNsigned) acquire.
++instruct loadUB_ac(iRegIdst dst, memory mem) %{
++ match(Set dst (LoadUB mem));
++ ins_cost(3*MEMORY_REF_COST);
++
++ format %{ "LBZ $dst, $mem \t// byte, zero-extend to int, acquire\n\t"
++ "TWI $dst\n\t"
++ "ISYNC" %}
++ size(12);
++ ins_encode( enc_lbz_ac(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Unsigned Byte (8bit UNsigned) into a Long Register.
++instruct loadUB2L(iRegLdst dst, memory mem) %{
++ match(Set dst (ConvI2L (LoadUB mem)));
++ predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LBZ $dst, $mem \t// byte, zero-extend to long" %}
++ size(4);
++ ins_encode( enc_lbz(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++instruct loadUB2L_ac(iRegLdst dst, memory mem) %{
++ match(Set dst (ConvI2L (LoadUB mem)));
++ ins_cost(3*MEMORY_REF_COST);
++
++ format %{ "LBZ $dst, $mem \t// byte, zero-extend to long, acquire\n\t"
++ "TWI $dst\n\t"
++ "ISYNC" %}
++ size(12);
++ ins_encode( enc_lbz_ac(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Short (16bit signed)
++instruct loadS(iRegIdst dst, memory mem) %{
++ match(Set dst (LoadS mem));
++ predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LHA $dst, $mem" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_lha);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ lha($dst$$Register, Idisp, $mem$$base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Short (16bit signed) acquire.
++instruct loadS_ac(iRegIdst dst, memory mem) %{
++ match(Set dst (LoadS mem));
++ ins_cost(3*MEMORY_REF_COST);
++
++ format %{ "LHA $dst, $mem\t acquire\n\t"
++ "TWI $dst\n\t"
++ "ISYNC" %}
++ size(12);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ lha($dst$$Register, Idisp, $mem$$base$$Register);
++ __ twi_0($dst$$Register);
++ __ isync();
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Char (16bit unsigned)
++instruct loadUS(iRegIdst dst, memory mem) %{
++ match(Set dst (LoadUS mem));
++ predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LHZ $dst, $mem" %}
++ size(4);
++ ins_encode( enc_lhz(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Char (16bit unsigned) acquire.
++instruct loadUS_ac(iRegIdst dst, memory mem) %{
++ match(Set dst (LoadUS mem));
++ ins_cost(3*MEMORY_REF_COST);
++
++ format %{ "LHZ $dst, $mem \t// acquire\n\t"
++ "TWI $dst\n\t"
++ "ISYNC" %}
++ size(12);
++ ins_encode( enc_lhz_ac(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Unsigned Short/Char (16bit UNsigned) into a Long Register.
++instruct loadUS2L(iRegLdst dst, memory mem) %{
++ match(Set dst (ConvI2L (LoadUS mem)));
++ predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LHZ $dst, $mem \t// short, zero-extend to long" %}
++ size(4);
++ ins_encode( enc_lhz(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Unsigned Short/Char (16bit UNsigned) into a Long Register acquire.
++instruct loadUS2L_ac(iRegLdst dst, memory mem) %{
++ match(Set dst (ConvI2L (LoadUS mem)));
++ ins_cost(3*MEMORY_REF_COST);
++
++ format %{ "LHZ $dst, $mem \t// short, zero-extend to long, acquire\n\t"
++ "TWI $dst\n\t"
++ "ISYNC" %}
++ size(12);
++ ins_encode( enc_lhz_ac(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Integer.
++instruct loadI(iRegIdst dst, memory mem) %{
++ match(Set dst (LoadI mem));
++ predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LWZ $dst, $mem" %}
++ size(4);
++ ins_encode( enc_lwz(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Integer acquire.
++instruct loadI_ac(iRegIdst dst, memory mem) %{
++ match(Set dst (LoadI mem));
++ ins_cost(3*MEMORY_REF_COST);
++
++ format %{ "LWZ $dst, $mem \t// load acquire\n\t"
++ "TWI $dst\n\t"
++ "ISYNC" %}
++ size(12);
++ ins_encode( enc_lwz_ac(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Match loading integer and casting it to unsigned int in
++// long register.
++// LoadI + ConvI2L + AndL 0xffffffff.
++instruct loadUI2L(iRegLdst dst, memory mem, immL_32bits mask) %{
++ match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
++ predicate(_kids[0]->_kids[0]->_leaf->as_Load()->is_unordered());
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LWZ $dst, $mem \t// zero-extend to long" %}
++ size(4);
++ ins_encode( enc_lwz(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Match loading integer and casting it to long.
++instruct loadI2L(iRegLdst dst, memory mem) %{
++ match(Set dst (ConvI2L (LoadI mem)));
++ predicate(_kids[0]->_leaf->as_Load()->is_unordered());
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LWA $dst, $mem \t// loadI2L" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_lwa);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ lwa($dst$$Register, Idisp, $mem$$base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Match loading integer and casting it to long - acquire.
++instruct loadI2L_ac(iRegLdst dst, memory mem) %{
++ match(Set dst (ConvI2L (LoadI mem)));
++ ins_cost(3*MEMORY_REF_COST);
++
++ format %{ "LWA $dst, $mem \t// loadI2L acquire"
++ "TWI $dst\n\t"
++ "ISYNC" %}
++ size(12);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_lwa);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ lwa($dst$$Register, Idisp, $mem$$base$$Register);
++ __ twi_0($dst$$Register);
++ __ isync();
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Long - aligned
++instruct loadL(iRegLdst dst, memoryAlg4 mem) %{
++ match(Set dst (LoadL mem));
++ predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LD $dst, $mem \t// long" %}
++ size(4);
++ ins_encode( enc_ld(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Long - aligned acquire.
++instruct loadL_ac(iRegLdst dst, memoryAlg4 mem) %{
++ match(Set dst (LoadL mem));
++ ins_cost(3*MEMORY_REF_COST);
++
++ format %{ "LD $dst, $mem \t// long acquire\n\t"
++ "TWI $dst\n\t"
++ "ISYNC" %}
++ size(12);
++ ins_encode( enc_ld_ac(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Long - UNaligned
++instruct loadL_unaligned(iRegLdst dst, memoryAlg4 mem) %{
++ match(Set dst (LoadL_unaligned mem));
++ // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense).
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LD $dst, $mem \t// unaligned long" %}
++ size(4);
++ ins_encode( enc_ld(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load nodes for superwords
++
++// Load Aligned Packed Byte
++instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{
++ predicate(n->as_LoadVector()->memory_size() == 8);
++ match(Set dst (LoadVector mem));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LD $dst, $mem \t// load 8-byte Vector" %}
++ size(4);
++ ins_encode( enc_ld(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Range, range = array length (=jint)
++instruct loadRange(iRegIdst dst, memory mem) %{
++ match(Set dst (LoadRange mem));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LWZ $dst, $mem \t// range" %}
++ size(4);
++ ins_encode( enc_lwz(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Compressed Pointer
++instruct loadN(iRegNdst dst, memory mem) %{
++ match(Set dst (LoadN mem));
++ predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LWZ $dst, $mem \t// load compressed ptr" %}
++ size(4);
++ ins_encode( enc_lwz(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Compressed Pointer acquire.
++instruct loadN_ac(iRegNdst dst, memory mem) %{
++ match(Set dst (LoadN mem));
++ ins_cost(3*MEMORY_REF_COST);
++
++ format %{ "LWZ $dst, $mem \t// load acquire compressed ptr\n\t"
++ "TWI $dst\n\t"
++ "ISYNC" %}
++ size(12);
++ ins_encode( enc_lwz_ac(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Compressed Pointer and decode it if narrow_oop_shift == 0.
++instruct loadN2P_unscaled(iRegPdst dst, memory mem) %{
++ match(Set dst (DecodeN (LoadN mem)));
++ predicate(_kids[0]->_leaf->as_Load()->is_unordered() && Universe::narrow_oop_shift() == 0);
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LWZ $dst, $mem \t// DecodeN (unscaled)" %}
++ size(4);
++ ins_encode( enc_lwz(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Pointer
++instruct loadP(iRegPdst dst, memoryAlg4 mem) %{
++ match(Set dst (LoadP mem));
++ predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LD $dst, $mem \t// ptr" %}
++ size(4);
++ ins_encode( enc_ld(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Pointer acquire.
++instruct loadP_ac(iRegPdst dst, memoryAlg4 mem) %{
++ match(Set dst (LoadP mem));
++ ins_cost(3*MEMORY_REF_COST);
++
++ format %{ "LD $dst, $mem \t// ptr acquire\n\t"
++ "TWI $dst\n\t"
++ "ISYNC" %}
++ size(12);
++ ins_encode( enc_ld_ac(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// LoadP + CastP2L
++instruct loadP2X(iRegLdst dst, memoryAlg4 mem) %{
++ match(Set dst (CastP2X (LoadP mem)));
++ predicate(_kids[0]->_leaf->as_Load()->is_unordered());
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LD $dst, $mem \t// ptr + p2x" %}
++ size(4);
++ ins_encode( enc_ld(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load compressed klass pointer.
++instruct loadNKlass(iRegNdst dst, memory mem) %{
++ match(Set dst (LoadNKlass mem));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LWZ $dst, $mem \t// compressed klass ptr" %}
++ size(4);
++ ins_encode( enc_lwz(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++//// Load compressed klass and decode it if narrow_klass_shift == 0.
++//// TODO: will narrow_klass_shift ever be 0?
++//instruct decodeNKlass2Klass(iRegPdst dst, memory mem) %{
++// match(Set dst (DecodeNKlass (LoadNKlass mem)));
++// predicate(false /* TODO: PPC port Universe::narrow_klass_shift() == 0*);
++// ins_cost(MEMORY_REF_COST);
++//
++// format %{ "LWZ $dst, $mem \t// DecodeNKlass (unscaled)" %}
++// size(4);
++// ins_encode( enc_lwz(dst, mem) );
++// ins_pipe(pipe_class_memory);
++//%}
++
++// Load Klass Pointer
++instruct loadKlass(iRegPdst dst, memoryAlg4 mem) %{
++ match(Set dst (LoadKlass mem));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LD $dst, $mem \t// klass ptr" %}
++ size(4);
++ ins_encode( enc_ld(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Float
++instruct loadF(regF dst, memory mem) %{
++ match(Set dst (LoadF mem));
++ predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LFS $dst, $mem" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_lfs);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Float acquire.
++instruct loadF_ac(regF dst, memory mem) %{
++ match(Set dst (LoadF mem));
++ ins_cost(3*MEMORY_REF_COST);
++
++ format %{ "LFS $dst, $mem \t// acquire\n\t"
++ "FCMPU cr0, $dst, $dst\n\t"
++ "BNE cr0, next\n"
++ "next:\n\t"
++ "ISYNC" %}
++ size(16);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ Label next;
++ __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register);
++ __ fcmpu(CCR0, $dst$$FloatRegister, $dst$$FloatRegister);
++ __ bne(CCR0, next);
++ __ bind(next);
++ __ isync();
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Double - aligned
++instruct loadD(regD dst, memory mem) %{
++ match(Set dst (LoadD mem));
++ predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LFD $dst, $mem" %}
++ size(4);
++ ins_encode( enc_lfd(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Double - aligned acquire.
++instruct loadD_ac(regD dst, memory mem) %{
++ match(Set dst (LoadD mem));
++ ins_cost(3*MEMORY_REF_COST);
++
++ format %{ "LFD $dst, $mem \t// acquire\n\t"
++ "FCMPU cr0, $dst, $dst\n\t"
++ "BNE cr0, next\n"
++ "next:\n\t"
++ "ISYNC" %}
++ size(16);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ Label next;
++ __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
++ __ fcmpu(CCR0, $dst$$FloatRegister, $dst$$FloatRegister);
++ __ bne(CCR0, next);
++ __ bind(next);
++ __ isync();
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load Double - UNaligned
++instruct loadD_unaligned(regD dst, memory mem) %{
++ match(Set dst (LoadD_unaligned mem));
++ // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense).
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LFD $dst, $mem" %}
++ size(4);
++ ins_encode( enc_lfd(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++//----------Constants--------------------------------------------------------
++
++// Load MachConstantTableBase: add hi offset to global toc.
++// TODO: Handle hidden register r29 in bundler!
++instruct loadToc_hi(iRegLdst dst) %{
++ effect(DEF dst);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "ADDIS $dst, R29, DISP.hi \t// load TOC hi" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addis);
++ __ calculate_address_from_global_toc_hi16only($dst$$Register, __ method_toc());
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Load MachConstantTableBase: add lo offset to global toc.
++instruct loadToc_lo(iRegLdst dst, iRegLdst src) %{
++ effect(DEF dst, USE src);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "ADDI $dst, $src, DISP.lo \t// load TOC lo" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_ori);
++ __ calculate_address_from_global_toc_lo16only($dst$$Register, __ method_toc());
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Load 16-bit integer constant 0xssss????
++instruct loadConI16(iRegIdst dst, immI16 src) %{
++ match(Set dst src);
++
++ format %{ "LI $dst, $src" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Load integer constant 0x????0000
++instruct loadConIhi16(iRegIdst dst, immIhi16 src) %{
++ match(Set dst src);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "LIS $dst, $src.hi" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addis);
++ // Lis sign extends 16-bit src then shifts it 16 bit to the left.
++ __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16)));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Part 2 of loading 32 bit constant: hi16 is is src1 (properly shifted
++// and sign extended), this adds the low 16 bits.
++instruct loadConI32_lo16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src1, USE src2);
++ predicate(false);
++
++ format %{ "ORI $dst, $src1.hi, $src2.lo" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_ori);
++ __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct loadConI_Ex(iRegIdst dst, immI src) %{
++ match(Set dst src);
++ ins_cost(DEFAULT_COST*2);
++
++ expand %{
++ // Would like to use $src$$constant.
++ immI16 srcLo %{ _opnds[1]->constant() %}
++ // srcHi can be 0000 if srcLo sign-extends to a negative number.
++ immIhi16 srcHi %{ _opnds[1]->constant() %}
++ iRegIdst tmpI;
++ loadConIhi16(tmpI, srcHi);
++ loadConI32_lo16(dst, tmpI, srcLo);
++ %}
++%}
++
++// No constant pool entries required.
++instruct loadConL16(iRegLdst dst, immL16 src) %{
++ match(Set dst src);
++
++ format %{ "LI $dst, $src \t// long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ __ li($dst$$Register, (int)((short) ($src$$constant & 0xFFFF)));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Load long constant 0xssssssss????0000
++instruct loadConL32hi16(iRegLdst dst, immL32hi16 src) %{
++ match(Set dst src);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "LIS $dst, $src.hi \t// long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addis);
++ __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16)));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// To load a 32 bit constant: merge lower 16 bits into already loaded
++// high 16 bits.
++instruct loadConL32_lo16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src1, USE src2);
++ predicate(false);
++
++ format %{ "ORI $dst, $src1, $src2.lo" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_ori);
++ __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Load 32-bit long constant
++instruct loadConL32_Ex(iRegLdst dst, immL32 src) %{
++ match(Set dst src);
++ ins_cost(DEFAULT_COST*2);
++
++ expand %{
++ // Would like to use $src$$constant.
++ immL16 srcLo %{ _opnds[1]->constant() /*& 0x0000FFFFL */%}
++ // srcHi can be 0000 if srcLo sign-extends to a negative number.
++ immL32hi16 srcHi %{ _opnds[1]->constant() /*& 0xFFFF0000L */%}
++ iRegLdst tmpL;
++ loadConL32hi16(tmpL, srcHi);
++ loadConL32_lo16(dst, tmpL, srcLo);
++ %}
++%}
++
++// Load long constant 0x????000000000000.
++instruct loadConLhighest16_Ex(iRegLdst dst, immLhighest16 src) %{
++ match(Set dst src);
++ ins_cost(DEFAULT_COST);
++
++ expand %{
++ immL32hi16 srcHi %{ _opnds[1]->constant() >> 32 /*& 0xFFFF0000L */%}
++ immI shift32 %{ 32 %}
++ iRegLdst tmpL;
++ loadConL32hi16(tmpL, srcHi);
++ lshiftL_regL_immI(dst, tmpL, shift32);
++ %}
++%}
++
++// Expand node for constant pool load: small offset.
++instruct loadConL(iRegLdst dst, immL src, iRegLdst toc) %{
++ effect(DEF dst, USE src, USE toc);
++ ins_cost(MEMORY_REF_COST);
++
++ ins_num_consts(1);
++ // Needed so that CallDynamicJavaDirect can compute the address of this
++ // instruction for relocation.
++ ins_field_cbuf_insts_offset(int);
++
++ format %{ "LD $dst, offset, $toc \t// load long $src from TOC" %}
++ size(4);
++ ins_encode( enc_load_long_constL(dst, src, toc) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Expand node for constant pool load: large offset.
++instruct loadConL_hi(iRegLdst dst, immL src, iRegLdst toc) %{
++ effect(DEF dst, USE src, USE toc);
++ predicate(false);
++
++ ins_num_consts(1);
++ ins_field_const_toc_offset(int);
++ // Needed so that CallDynamicJavaDirect can compute the address of this
++ // instruction for relocation.
++ ins_field_cbuf_insts_offset(int);
++
++ format %{ "ADDIS $dst, $toc, offset \t// load long $src from TOC (hi)" %}
++ size(4);
++ ins_encode( enc_load_long_constL_hi(dst, toc, src) );
++ ins_pipe(pipe_class_default);
++%}
++
++// Expand node for constant pool load: large offset.
++// No constant pool entries required.
++instruct loadConL_lo(iRegLdst dst, immL src, iRegLdst base) %{
++ effect(DEF dst, USE src, USE base);
++ predicate(false);
++
++ ins_field_const_toc_offset_hi_node(loadConL_hiNode*);
++
++ format %{ "LD $dst, offset, $base \t// load long $src from TOC (lo)" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_ld);
++ int offset = ra_->C->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset;
++ __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load long constant from constant table. Expand in case of
++// offset > 16 bit is needed.
++// Adlc adds toc node MachConstantTableBase.
++instruct loadConL_Ex(iRegLdst dst, immL src) %{
++ match(Set dst src);
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LD $dst, offset, $constanttablebase\t// load long $src from table, postalloc expanded" %}
++ // We can not inline the enc_class for the expand as that does not support constanttablebase.
++ postalloc_expand( postalloc_expand_load_long_constant(dst, src, constanttablebase) );
++%}
++
++// Load NULL as compressed oop.
++instruct loadConN0(iRegNdst dst, immN_0 src) %{
++ match(Set dst src);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "LI $dst, $src \t// compressed ptr" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ __ li($dst$$Register, 0);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Load hi part of compressed oop constant.
++instruct loadConN_hi(iRegNdst dst, immN src) %{
++ effect(DEF dst, USE src);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "LIS $dst, $src \t// narrow oop hi" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addis);
++ __ lis($dst$$Register, (int)(short)(($src$$constant >> 16) & 0xffff));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Add lo part of compressed oop constant to already loaded hi part.
++instruct loadConN_lo(iRegNdst dst, iRegNsrc src1, immN src2) %{
++ effect(DEF dst, USE src1, USE src2);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "ORI $dst, $src1, $src2 \t// narrow oop lo" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ assert(__ oop_recorder() != NULL, "this assembler needs an OopRecorder");
++ int oop_index = __ oop_recorder()->find_index((jobject)$src2$$constant);
++ RelocationHolder rspec = oop_Relocation::spec(oop_index);
++ __ relocate(rspec, 1);
++ __ ori($dst$$Register, $src1$$Register, $src2$$constant & 0xffff);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Needed to postalloc expand loadConN: ConN is loaded as ConI
++// leaving the upper 32 bits with sign-extension bits.
++// This clears these bits: dst = src & 0xFFFFFFFF.
++// TODO: Eventually call this maskN_regN_FFFFFFFF.
++instruct clearMs32b(iRegNdst dst, iRegNsrc src) %{
++ effect(DEF dst, USE src);
++ predicate(false);
++
++ format %{ "MASK $dst, $src, 0xFFFFFFFF" %} // mask
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
++ __ clrldi($dst$$Register, $src$$Register, 0x20);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Loading ConN must be postalloc expanded so that edges between
++// the nodes are safe. They may not interfere with a safepoint.
++// GL TODO: This needs three instructions: better put this into the constant pool.
++instruct loadConN_Ex(iRegNdst dst, immN src) %{
++ match(Set dst src);
++ ins_cost(DEFAULT_COST*2);
++
++ format %{ "LoadN $dst, $src \t// postalloc expanded" %} // mask
++ postalloc_expand %{
++ MachNode *m1 = new (C) loadConN_hiNode();
++ MachNode *m2 = new (C) loadConN_loNode();
++ MachNode *m3 = new (C) clearMs32bNode();
++ m1->add_req(NULL);
++ m2->add_req(NULL, m1);
++ m3->add_req(NULL, m2);
++ m1->_opnds[0] = op_dst;
++ m1->_opnds[1] = op_src;
++ m2->_opnds[0] = op_dst;
++ m2->_opnds[1] = op_dst;
++ m2->_opnds[2] = op_src;
++ m3->_opnds[0] = op_dst;
++ m3->_opnds[1] = op_dst;
++ ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ nodes->push(m1);
++ nodes->push(m2);
++ nodes->push(m3);
++ %}
++%}
++
++instruct loadConNKlass_hi(iRegNdst dst, immNKlass src) %{
++ effect(DEF dst, USE src);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "LIS $dst, $src \t// narrow oop hi" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addis);
++ intptr_t Csrc = Klass::encode_klass((Klass *)$src$$constant);
++ __ lis($dst$$Register, (int)(short)((Csrc >> 16) & 0xffff));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// This needs a match rule so that build_oop_map knows this is
++// not a narrow oop.
++instruct loadConNKlass_lo(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{
++ match(Set dst src1);
++ effect(TEMP src2);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "ADDI $dst, $src1, $src2 \t// narrow oop lo" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ intptr_t Csrc = Klass::encode_klass((Klass *)$src1$$constant);
++ assert(__ oop_recorder() != NULL, "this assembler needs an OopRecorder");
++ int klass_index = __ oop_recorder()->find_index((Klass *)$src1$$constant);
++ RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++
++ __ relocate(rspec, 1);
++ __ ori($dst$$Register, $src2$$Register, Csrc & 0xffff);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Loading ConNKlass must be postalloc expanded so that edges between
++// the nodes are safe. They may not interfere with a safepoint.
++instruct loadConNKlass_Ex(iRegNdst dst, immNKlass src) %{
++ match(Set dst src);
++ ins_cost(DEFAULT_COST*2);
++
++ format %{ "LoadN $dst, $src \t// postalloc expanded" %} // mask
++ postalloc_expand %{
++ // Load high bits into register. Sign extended.
++ MachNode *m1 = new (C) loadConNKlass_hiNode();
++ m1->add_req(NULL);
++ m1->_opnds[0] = op_dst;
++ m1->_opnds[1] = op_src;
++ ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ nodes->push(m1);
++
++ MachNode *m2 = m1;
++ if (!Assembler::is_uimm((jlong)Klass::encode_klass((Klass *)op_src->constant()), 31)) {
++ // Value might be 1-extended. Mask out these bits.
++ m2 = new (C) clearMs32bNode();
++ m2->add_req(NULL, m1);
++ m2->_opnds[0] = op_dst;
++ m2->_opnds[1] = op_dst;
++ ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ nodes->push(m2);
++ }
++
++ MachNode *m3 = new (C) loadConNKlass_loNode();
++ m3->add_req(NULL, m2);
++ m3->_opnds[0] = op_dst;
++ m3->_opnds[1] = op_src;
++ m3->_opnds[2] = op_dst;
++ ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ nodes->push(m3);
++ %}
++%}
++
++// 0x1 is used in object initialization (initial object header).
++// No constant pool entries required.
++instruct loadConP0or1(iRegPdst dst, immP_0or1 src) %{
++ match(Set dst src);
++
++ format %{ "LI $dst, $src \t// ptr" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Expand node for constant pool load: small offset.
++// The match rule is needed to generate the correct bottom_type(),
++// however this node should never match. The use of predicate is not
++// possible since ADLC forbids predicates for chain rules. The higher
++// costs do not prevent matching in this case. For that reason the
++// operand immP_NM with predicate(false) is used.
++instruct loadConP(iRegPdst dst, immP_NM src, iRegLdst toc) %{
++ match(Set dst src);
++ effect(TEMP toc);
++
++ ins_num_consts(1);
++
++ format %{ "LD $dst, offset, $toc \t// load ptr $src from TOC" %}
++ size(4);
++ ins_encode( enc_load_long_constP(dst, src, toc) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Expand node for constant pool load: large offset.
++instruct loadConP_hi(iRegPdst dst, immP_NM src, iRegLdst toc) %{
++ effect(DEF dst, USE src, USE toc);
++ predicate(false);
++
++ ins_num_consts(1);
++ ins_field_const_toc_offset(int);
++
++ format %{ "ADDIS $dst, $toc, offset \t// load ptr $src from TOC (hi)" %}
++ size(4);
++ ins_encode( enc_load_long_constP_hi(dst, src, toc) );
++ ins_pipe(pipe_class_default);
++%}
++
++// Expand node for constant pool load: large offset.
++instruct loadConP_lo(iRegPdst dst, immP_NM src, iRegLdst base) %{
++ match(Set dst src);
++ effect(TEMP base);
++
++ ins_field_const_toc_offset_hi_node(loadConP_hiNode*);
++
++ format %{ "LD $dst, offset, $base \t// load ptr $src from TOC (lo)" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_ld);
++ int offset = ra_->C->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset;
++ __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load pointer constant from constant table. Expand in case an
++// offset > 16 bit is needed.
++// Adlc adds toc node MachConstantTableBase.
++instruct loadConP_Ex(iRegPdst dst, immP src) %{
++ match(Set dst src);
++ ins_cost(MEMORY_REF_COST);
++
++ // This rule does not use "expand" because then
++ // the result type is not known to be an Oop. An ADLC
++ // enhancement will be needed to make that work - not worth it!
++
++ // If this instruction rematerializes, it prolongs the live range
++ // of the toc node, causing illegal graphs.
++ // assert(edge_from_to(_reg_node[reg_lo],def)) fails in verify_good_schedule().
++ ins_cannot_rematerialize(true);
++
++ format %{ "LD $dst, offset, $constanttablebase \t// load ptr $src from table, postalloc expanded" %}
++ postalloc_expand( postalloc_expand_load_ptr_constant(dst, src, constanttablebase) );
++%}
++
++// Expand node for constant pool load: small offset.
++instruct loadConF(regF dst, immF src, iRegLdst toc) %{
++ effect(DEF dst, USE src, USE toc);
++ ins_cost(MEMORY_REF_COST);
++
++ ins_num_consts(1);
++
++ format %{ "LFS $dst, offset, $toc \t// load float $src from TOC" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_lfs);
++ address float_address = __ float_constant($src$$constant);
++ __ lfs($dst$$FloatRegister, __ offset_to_method_toc(float_address), $toc$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Expand node for constant pool load: large offset.
++instruct loadConFComp(regF dst, immF src, iRegLdst toc) %{
++ effect(DEF dst, USE src, USE toc);
++ ins_cost(MEMORY_REF_COST);
++
++ ins_num_consts(1);
++
++ format %{ "ADDIS $toc, $toc, offset_hi\n\t"
++ "LFS $dst, offset_lo, $toc \t// load float $src from TOC (hi/lo)\n\t"
++ "ADDIS $toc, $toc, -offset_hi"%}
++ size(12);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ FloatRegister Rdst = $dst$$FloatRegister;
++ Register Rtoc = $toc$$Register;
++ address float_address = __ float_constant($src$$constant);
++ int offset = __ offset_to_method_toc(float_address);
++ int hi = (offset + (1<<15))>>16;
++ int lo = offset - hi * (1<<16);
++
++ __ addis(Rtoc, Rtoc, hi);
++ __ lfs(Rdst, lo, Rtoc);
++ __ addis(Rtoc, Rtoc, -hi);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Adlc adds toc node MachConstantTableBase.
++instruct loadConF_Ex(regF dst, immF src) %{
++ match(Set dst src);
++ ins_cost(MEMORY_REF_COST);
++
++ // See loadConP.
++ ins_cannot_rematerialize(true);
++
++ format %{ "LFS $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %}
++ postalloc_expand( postalloc_expand_load_float_constant(dst, src, constanttablebase) );
++%}
++
++// Expand node for constant pool load: small offset.
++instruct loadConD(regD dst, immD src, iRegLdst toc) %{
++ effect(DEF dst, USE src, USE toc);
++ ins_cost(MEMORY_REF_COST);
++
++ ins_num_consts(1);
++
++ format %{ "LFD $dst, offset, $toc \t// load double $src from TOC" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_lfd);
++ int offset = __ offset_to_method_toc(__ double_constant($src$$constant));
++ __ lfd($dst$$FloatRegister, offset, $toc$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Expand node for constant pool load: large offset.
++instruct loadConDComp(regD dst, immD src, iRegLdst toc) %{
++ effect(DEF dst, USE src, USE toc);
++ ins_cost(MEMORY_REF_COST);
++
++ ins_num_consts(1);
++
++ format %{ "ADDIS $toc, $toc, offset_hi\n\t"
++ "LFD $dst, offset_lo, $toc \t// load double $src from TOC (hi/lo)\n\t"
++ "ADDIS $toc, $toc, -offset_hi" %}
++ size(12);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ FloatRegister Rdst = $dst$$FloatRegister;
++ Register Rtoc = $toc$$Register;
++ address float_address = __ double_constant($src$$constant);
++ int offset = __ offset_to_method_toc(float_address);
++ int hi = (offset + (1<<15))>>16;
++ int lo = offset - hi * (1<<16);
++
++ __ addis(Rtoc, Rtoc, hi);
++ __ lfd(Rdst, lo, Rtoc);
++ __ addis(Rtoc, Rtoc, -hi);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Adlc adds toc node MachConstantTableBase.
++instruct loadConD_Ex(regD dst, immD src) %{
++ match(Set dst src);
++ ins_cost(MEMORY_REF_COST);
++
++ // See loadConP.
++ ins_cannot_rematerialize(true);
++
++ format %{ "ConD $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %}
++ postalloc_expand( postalloc_expand_load_double_constant(dst, src, constanttablebase) );
++%}
++
++// Prefetch instructions.
++// Must be safe to execute with invalid address (cannot fault).
++
++instruct prefetchr(indirectMemory mem, iRegLsrc src) %{
++ match(PrefetchRead (AddP mem src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "PREFETCH $mem, 0, $src \t// Prefetch read-many" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_dcbt);
++ __ dcbt($src$$Register, $mem$$base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++instruct prefetchr_no_offset(indirectMemory mem) %{
++ match(PrefetchRead mem);
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "PREFETCH $mem" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_dcbt);
++ __ dcbt($mem$$base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++instruct prefetchw(indirectMemory mem, iRegLsrc src) %{
++ match(PrefetchWrite (AddP mem src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many (and read)" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst);
++ __ dcbtst($src$$Register, $mem$$base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++instruct prefetchw_no_offset(indirectMemory mem) %{
++ match(PrefetchWrite mem);
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "PREFETCH $mem" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst);
++ __ dcbtst($mem$$base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Special prefetch versions which use the dcbz instruction.
++instruct prefetch_alloc_zero(indirectMemory mem, iRegLsrc src) %{
++ match(PrefetchAllocation (AddP mem src));
++ predicate(AllocatePrefetchStyle == 3);
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many with zero" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst);
++ __ dcbz($src$$Register, $mem$$base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++instruct prefetch_alloc_zero_no_offset(indirectMemory mem) %{
++ match(PrefetchAllocation mem);
++ predicate(AllocatePrefetchStyle == 3);
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "PREFETCH $mem, 2 \t// Prefetch write-many with zero" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst);
++ __ dcbz($mem$$base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{
++ match(PrefetchAllocation (AddP mem src));
++ predicate(AllocatePrefetchStyle != 3);
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst);
++ __ dcbtst($src$$Register, $mem$$base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++instruct prefetch_alloc_no_offset(indirectMemory mem) %{
++ match(PrefetchAllocation mem);
++ predicate(AllocatePrefetchStyle != 3);
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "PREFETCH $mem, 2 \t// Prefetch write-many" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst);
++ __ dcbtst($mem$$base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++//----------Store Instructions-------------------------------------------------
++
++// Store Byte
++instruct storeB(memory mem, iRegIsrc src) %{
++ match(Set mem (StoreB mem src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STB $src, $mem \t// byte" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_stb);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ stb($src$$Register, Idisp, $mem$$base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Store Char/Short
++instruct storeC(memory mem, iRegIsrc src) %{
++ match(Set mem (StoreC mem src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STH $src, $mem \t// short" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_sth);
++ int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
++ __ sth($src$$Register, Idisp, $mem$$base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Store Integer
++instruct storeI(memory mem, iRegIsrc src) %{
++ match(Set mem (StoreI mem src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STW $src, $mem" %}
++ size(4);
++ ins_encode( enc_stw(src, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// ConvL2I + StoreI.
++instruct storeI_convL2I(memory mem, iRegLsrc src) %{
++ match(Set mem (StoreI mem (ConvL2I src)));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STW l2i($src), $mem" %}
++ size(4);
++ ins_encode( enc_stw(src, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Store Long
++instruct storeL(memoryAlg4 mem, iRegLsrc src) %{
++ match(Set mem (StoreL mem src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STD $src, $mem \t// long" %}
++ size(4);
++ ins_encode( enc_std(src, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Store super word nodes.
++
++// Store Aligned Packed Byte long register to memory
++instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{
++ predicate(n->as_StoreVector()->memory_size() == 8);
++ match(Set mem (StoreVector mem src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STD $mem, $src \t// packed8B" %}
++ size(4);
++ ins_encode( enc_std(src, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Store Compressed Oop
++instruct storeN(memory dst, iRegN_P2N src) %{
++ match(Set dst (StoreN dst src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STW $src, $dst \t// compressed oop" %}
++ size(4);
++ ins_encode( enc_stw(src, dst) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Store Compressed KLass
++instruct storeNKlass(memory dst, iRegN_P2N src) %{
++ match(Set dst (StoreNKlass dst src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STW $src, $dst \t// compressed klass" %}
++ size(4);
++ ins_encode( enc_stw(src, dst) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Store Pointer
++instruct storeP(memoryAlg4 dst, iRegPsrc src) %{
++ match(Set dst (StoreP dst src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STD $src, $dst \t// ptr" %}
++ size(4);
++ ins_encode( enc_std(src, dst) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Store Float
++instruct storeF(memory mem, regF src) %{
++ match(Set mem (StoreF mem src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STFS $src, $mem" %}
++ size(4);
++ ins_encode( enc_stfs(src, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Store Double
++instruct storeD(memory mem, regD src) %{
++ match(Set mem (StoreD mem src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STFD $src, $mem" %}
++ size(4);
++ ins_encode( enc_stfd(src, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++//----------Store Instructions With Zeros--------------------------------------
++
++// Card-mark for CMS garbage collection.
++// This cardmark does an optimization so that it must not always
++// do a releasing store. For this, it gets the address of
++// CMSCollectorCardTableModRefBSExt::_requires_release as input.
++// (Using releaseFieldAddr in the match rule is a hack.)
++instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr) %{
++ match(Set mem (StoreCM mem releaseFieldAddr));
++ predicate(false);
++ ins_cost(MEMORY_REF_COST);
++
++ // See loadConP.
++ ins_cannot_rematerialize(true);
++
++ format %{ "STB #0, $mem \t// CMS card-mark byte (must be 0!), checking requires_release in [$releaseFieldAddr]" %}
++ ins_encode( enc_cms_card_mark(mem, releaseFieldAddr) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Card-mark for CMS garbage collection.
++// This cardmark does an optimization so that it must not always
++// do a releasing store. For this, it needs the constant address of
++// CMSCollectorCardTableModRefBSExt::_requires_release.
++// This constant address is split off here by expand so we can use
++// adlc / matcher functionality to load it from the constant section.
++instruct storeCM_CMS_ExEx(memory mem, immI_0 zero) %{
++ match(Set mem (StoreCM mem zero));
++ predicate(UseConcMarkSweepGC);
++
++ expand %{
++ immL baseImm %{ 0 /* TODO: PPC port (jlong)CMSCollectorCardTableModRefBSExt::requires_release_address() */ %}
++ iRegLdst releaseFieldAddress;
++ loadConL_Ex(releaseFieldAddress, baseImm);
++ storeCM_CMS(mem, releaseFieldAddress);
++ %}
++%}
++
++instruct storeCM_G1(memory mem, immI_0 zero) %{
++ match(Set mem (StoreCM mem zero));
++ predicate(UseG1GC);
++ ins_cost(MEMORY_REF_COST);
++
++ ins_cannot_rematerialize(true);
++
++ format %{ "STB #0, $mem \t// CMS card-mark byte store (G1)" %}
++ size(8);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ __ li(R0, 0);
++ //__ release(); // G1: oops are allowed to get visible after dirty marking
++ guarantee($mem$$base$$Register != R1_SP, "use frame_slots_bias");
++ __ stb(R0, $mem$$disp, $mem$$base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Convert oop pointer into compressed form.
++
++// Nodes for postalloc expand.
++
++// Shift node for expand.
++instruct encodeP_shift(iRegNdst dst, iRegNsrc src) %{
++ // The match rule is needed to make it a 'MachTypeNode'!
++ match(Set dst (EncodeP src));
++ predicate(false);
++
++ format %{ "SRDI $dst, $src, 3 \t// encode" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
++ __ srdi($dst$$Register, $src$$Register, Universe::narrow_oop_shift() & 0x3f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Add node for expand.
++instruct encodeP_sub(iRegPdst dst, iRegPdst src) %{
++ // The match rule is needed to make it a 'MachTypeNode'!
++ match(Set dst (EncodeP src));
++ predicate(false);
++
++ format %{ "SUB $dst, $src, oop_base \t// encode" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_subf);
++ __ subf($dst$$Register, R30, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Conditional sub base.
++instruct cond_sub_base(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{
++ // The match rule is needed to make it a 'MachTypeNode'!
++ match(Set dst (EncodeP (Binary crx src1)));
++ predicate(false);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "BEQ $crx, done\n\t"
++ "SUB $dst, $src1, R30 \t// encode: subtract base if != NULL\n"
++ "done:" %}
++ size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
++ Label done;
++ __ beq($crx$$CondRegister, done);
++ __ subf($dst$$Register, R30, $src1$$Register);
++ // TODO PPC port __ endgroup_if_needed(_size == 12);
++ __ bind(done);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Power 7 can use isel instruction
++instruct cond_set_0_oop(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{
++ // The match rule is needed to make it a 'MachTypeNode'!
++ match(Set dst (EncodeP (Binary crx src1)));
++ predicate(false);
++
++ format %{ "CMOVE $dst, $crx eq, 0, $src1 \t// encode: preserve 0" %}
++ size(4);
++ ins_encode %{
++ // This is a Power7 instruction for which no machine description exists.
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// base != 0
++// 32G aligned narrow oop base.
++instruct encodeP_32GAligned(iRegNdst dst, iRegPsrc src) %{
++ match(Set dst (EncodeP src));
++ predicate(false /* TODO: PPC port Universe::narrow_oop_base_disjoint()*/);
++
++ format %{ "EXTRDI $dst, $src, #32, #3 \t// encode with 32G aligned base" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
++ __ rldicl($dst$$Register, $src$$Register, 64-Universe::narrow_oop_shift(), 32);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// shift != 0, base != 0
++instruct encodeP_Ex(iRegNdst dst, flagsReg crx, iRegPsrc src) %{
++ match(Set dst (EncodeP src));
++ effect(TEMP crx);
++ predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull &&
++ Universe::narrow_oop_shift() != 0 &&
++ true /* TODO: PPC port Universe::narrow_oop_base_overlaps()*/);
++
++ format %{ "EncodeP $dst, $crx, $src \t// postalloc expanded" %}
++ postalloc_expand( postalloc_expand_encode_oop(dst, src, crx));
++%}
++
++// shift != 0, base != 0
++instruct encodeP_not_null_Ex(iRegNdst dst, iRegPsrc src) %{
++ match(Set dst (EncodeP src));
++ predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull &&
++ Universe::narrow_oop_shift() != 0 &&
++ true /* TODO: PPC port Universe::narrow_oop_base_overlaps()*/);
++
++ format %{ "EncodeP $dst, $src\t// $src != Null, postalloc expanded" %}
++ postalloc_expand( postalloc_expand_encode_oop_not_null(dst, src) );
++%}
++
++// shift != 0, base == 0
++// TODO: This is the same as encodeP_shift. Merge!
++instruct encodeP_not_null_base_null(iRegNdst dst, iRegPsrc src) %{
++ match(Set dst (EncodeP src));
++ predicate(Universe::narrow_oop_shift() != 0 &&
++ Universe::narrow_oop_base() ==0);
++
++ format %{ "SRDI $dst, $src, #3 \t// encodeP, $src != NULL" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
++ __ srdi($dst$$Register, $src$$Register, Universe::narrow_oop_shift() & 0x3f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Compressed OOPs with narrow_oop_shift == 0.
++// shift == 0, base == 0
++instruct encodeP_narrow_oop_shift_0(iRegNdst dst, iRegPsrc src) %{
++ match(Set dst (EncodeP src));
++ predicate(Universe::narrow_oop_shift() == 0);
++
++ format %{ "MR $dst, $src \t// Ptr->Narrow" %}
++ // variable size, 0 or 4.
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_or);
++ __ mr_if_needed($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Decode nodes.
++
++// Shift node for expand.
++instruct decodeN_shift(iRegPdst dst, iRegPsrc src) %{
++ // The match rule is needed to make it a 'MachTypeNode'!
++ match(Set dst (DecodeN src));
++ predicate(false);
++
++ format %{ "SLDI $dst, $src, #3 \t// DecodeN" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicr);
++ __ sldi($dst$$Register, $src$$Register, Universe::narrow_oop_shift());
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Add node for expand.
++instruct decodeN_add(iRegPdst dst, iRegPdst src) %{
++ // The match rule is needed to make it a 'MachTypeNode'!
++ match(Set dst (DecodeN src));
++ predicate(false);
++
++ format %{ "ADD $dst, $src, R30 \t// DecodeN, add oop base" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_add);
++ __ add($dst$$Register, $src$$Register, R30);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// conditianal add base for expand
++instruct cond_add_base(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{
++ // The match rule is needed to make it a 'MachTypeNode'!
++ // NOTICE that the rule is nonsense - we just have to make sure that:
++ // - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
++ // - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
++ match(Set dst (DecodeN (Binary crx src1)));
++ predicate(false);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "BEQ $crx, done\n\t"
++ "ADD $dst, $src1, R30 \t// DecodeN: add oop base if $src1 != NULL\n"
++ "done:" %}
++ size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling()) */? 12 : 8);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
++ Label done;
++ __ beq($crx$$CondRegister, done);
++ __ add($dst$$Register, $src1$$Register, R30);
++ // TODO PPC port __ endgroup_if_needed(_size == 12);
++ __ bind(done);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cond_set_0_ptr(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{
++ // The match rule is needed to make it a 'MachTypeNode'!
++ // NOTICE that the rule is nonsense - we just have to make sure that:
++ // - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
++ // - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
++ match(Set dst (DecodeN (Binary crx src1)));
++ predicate(false);
++
++ format %{ "CMOVE $dst, $crx eq, 0, $src1 \t// decode: preserve 0" %}
++ size(4);
++ ins_encode %{
++ // This is a Power7 instruction for which no machine description exists.
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// shift != 0, base != 0
++instruct decodeN_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
++ match(Set dst (DecodeN src));
++ predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
++ n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) &&
++ Universe::narrow_oop_shift() != 0 &&
++ Universe::narrow_oop_base() != 0);
++ effect(TEMP crx);
++
++ format %{ "DecodeN $dst, $src \t// Kills $crx, postalloc expanded" %}
++ postalloc_expand( postalloc_expand_decode_oop(dst, src, crx) );
++%}
++
++// shift != 0, base == 0
++instruct decodeN_nullBase(iRegPdst dst, iRegNsrc src) %{
++ match(Set dst (DecodeN src));
++ predicate(Universe::narrow_oop_shift() != 0 &&
++ Universe::narrow_oop_base() == 0);
++
++ format %{ "SLDI $dst, $src, #3 \t// DecodeN (zerobased)" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicr);
++ __ sldi($dst$$Register, $src$$Register, Universe::narrow_oop_shift());
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// src != 0, shift != 0, base != 0
++instruct decodeN_notNull_addBase_Ex(iRegPdst dst, iRegNsrc src) %{
++ match(Set dst (DecodeN src));
++ predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
++ n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
++ Universe::narrow_oop_shift() != 0 &&
++ Universe::narrow_oop_base() != 0);
++
++ format %{ "DecodeN $dst, $src \t// $src != NULL, postalloc expanded" %}
++ postalloc_expand( postalloc_expand_decode_oop_not_null(dst, src));
++%}
++
++// Compressed OOPs with narrow_oop_shift == 0.
++instruct decodeN_unscaled(iRegPdst dst, iRegNsrc src) %{
++ match(Set dst (DecodeN src));
++ predicate(Universe::narrow_oop_shift() == 0);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "MR $dst, $src \t// DecodeN (unscaled)" %}
++ // variable size, 0 or 4.
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_or);
++ __ mr_if_needed($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Convert compressed oop into int for vectors alignment masking.
++instruct decodeN2I_unscaled(iRegIdst dst, iRegNsrc src) %{
++ match(Set dst (ConvL2I (CastP2X (DecodeN src))));
++ predicate(Universe::narrow_oop_shift() == 0);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "MR $dst, $src \t// (int)DecodeN (unscaled)" %}
++ // variable size, 0 or 4.
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_or);
++ __ mr_if_needed($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Convert klass pointer into compressed form.
++
++// Nodes for postalloc expand.
++
++// Shift node for expand.
++instruct encodePKlass_shift(iRegNdst dst, iRegNsrc src) %{
++ // The match rule is needed to make it a 'MachTypeNode'!
++ match(Set dst (EncodePKlass src));
++ predicate(false);
++
++ format %{ "SRDI $dst, $src, 3 \t// encode" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
++ __ srdi($dst$$Register, $src$$Register, Universe::narrow_klass_shift());
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Add node for expand.
++instruct encodePKlass_sub_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{
++ // The match rule is needed to make it a 'MachTypeNode'!
++ match(Set dst (EncodePKlass (Binary base src)));
++ predicate(false);
++
++ format %{ "SUB $dst, $base, $src \t// encode" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_subf);
++ __ subf($dst$$Register, $base$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// base != 0
++// 32G aligned narrow oop base.
++instruct encodePKlass_32GAligned(iRegNdst dst, iRegPsrc src) %{
++ match(Set dst (EncodePKlass src));
++ predicate(false /* TODO: PPC port Universe::narrow_klass_base_disjoint()*/);
++
++ format %{ "EXTRDI $dst, $src, #32, #3 \t// encode with 32G aligned base" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
++ __ rldicl($dst$$Register, $src$$Register, 64-Universe::narrow_oop_shift(), 32);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// shift != 0, base != 0
++instruct encodePKlass_not_null_Ex(iRegNdst dst, iRegLsrc base, iRegPsrc src) %{
++ match(Set dst (EncodePKlass (Binary base src)));
++ predicate(false);
++
++ format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %}
++ postalloc_expand %{
++ encodePKlass_sub_baseNode *n1 = new (C) encodePKlass_sub_baseNode();
++ n1->add_req(n_region, n_base, n_src);
++ n1->_opnds[0] = op_dst;
++ n1->_opnds[1] = op_base;
++ n1->_opnds[2] = op_src;
++ n1->_bottom_type = _bottom_type;
++
++ encodePKlass_shiftNode *n2 = new (C) encodePKlass_shiftNode();
++ n2->add_req(n_region, n1);
++ n2->_opnds[0] = op_dst;
++ n2->_opnds[1] = op_dst;
++ n2->_bottom_type = _bottom_type;
++ ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++
++ nodes->push(n1);
++ nodes->push(n2);
++ %}
++%}
++
++// shift != 0, base != 0
++instruct encodePKlass_not_null_ExEx(iRegNdst dst, iRegPsrc src) %{
++ match(Set dst (EncodePKlass src));
++ //predicate(Universe::narrow_klass_shift() != 0 &&
++ // true /* TODO: PPC port Universe::narrow_klass_base_overlaps()*/);
++
++ //format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %}
++ ins_cost(DEFAULT_COST*2); // Don't count constant.
++ expand %{
++ immL baseImm %{ (jlong)(intptr_t)Universe::narrow_klass_base() %}
++ iRegLdst base;
++ loadConL_Ex(base, baseImm);
++ encodePKlass_not_null_Ex(dst, base, src);
++ %}
++%}
++
++// Decode nodes.
++
++// Shift node for expand.
++instruct decodeNKlass_shift(iRegPdst dst, iRegPsrc src) %{
++ // The match rule is needed to make it a 'MachTypeNode'!
++ match(Set dst (DecodeNKlass src));
++ predicate(false);
++
++ format %{ "SLDI $dst, $src, #3 \t// DecodeNKlass" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicr);
++ __ sldi($dst$$Register, $src$$Register, Universe::narrow_klass_shift());
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Add node for expand.
++
++instruct decodeNKlass_add_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{
++ // The match rule is needed to make it a 'MachTypeNode'!
++ match(Set dst (DecodeNKlass (Binary base src)));
++ predicate(false);
++
++ format %{ "ADD $dst, $base, $src \t// DecodeNKlass, add klass base" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_add);
++ __ add($dst$$Register, $base$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// src != 0, shift != 0, base != 0
++instruct decodeNKlass_notNull_addBase_Ex(iRegPdst dst, iRegLsrc base, iRegNsrc src) %{
++ match(Set dst (DecodeNKlass (Binary base src)));
++ //effect(kill src); // We need a register for the immediate result after shifting.
++ predicate(false);
++
++ format %{ "DecodeNKlass $dst = $base + ($src << 3) \t// $src != NULL, postalloc expanded" %}
++ postalloc_expand %{
++ decodeNKlass_add_baseNode *n1 = new (C) decodeNKlass_add_baseNode();
++ n1->add_req(n_region, n_base, n_src);
++ n1->_opnds[0] = op_dst;
++ n1->_opnds[1] = op_base;
++ n1->_opnds[2] = op_src;
++ n1->_bottom_type = _bottom_type;
++
++ decodeNKlass_shiftNode *n2 = new (C) decodeNKlass_shiftNode();
++ n2->add_req(n_region, n1);
++ n2->_opnds[0] = op_dst;
++ n2->_opnds[1] = op_dst;
++ n2->_bottom_type = _bottom_type;
++
++ ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++ ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
++
++ nodes->push(n1);
++ nodes->push(n2);
++ %}
++%}
++
++// src != 0, shift != 0, base != 0
++instruct decodeNKlass_notNull_addBase_ExEx(iRegPdst dst, iRegNsrc src) %{
++ match(Set dst (DecodeNKlass src));
++ // predicate(Universe::narrow_klass_shift() != 0 &&
++ // Universe::narrow_klass_base() != 0);
++
++ //format %{ "DecodeNKlass $dst, $src \t// $src != NULL, expanded" %}
++
++ ins_cost(DEFAULT_COST*2); // Don't count constant.
++ expand %{
++ // We add first, then we shift. Like this, we can get along with one register less.
++ // But we have to load the base pre-shifted.
++ immL baseImm %{ (jlong)((intptr_t)Universe::narrow_klass_base() >> Universe::narrow_klass_shift()) %}
++ iRegLdst base;
++ loadConL_Ex(base, baseImm);
++ decodeNKlass_notNull_addBase_Ex(dst, base, src);
++ %}
++%}
++
++//----------MemBar Instructions-----------------------------------------------
++// Memory barrier flavors
++
++instruct membar_acquire() %{
++ match(LoadFence);
++ ins_cost(4*MEMORY_REF_COST);
++
++ format %{ "MEMBAR-acquire" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_lwsync);
++ __ acquire();
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct unnecessary_membar_acquire() %{
++ match(MemBarAcquire);
++ ins_cost(0);
++
++ format %{ " -- \t// redundant MEMBAR-acquire - empty" %}
++ size(0);
++ ins_encode( /*empty*/ );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct membar_acquire_lock() %{
++ match(MemBarAcquireLock);
++ ins_cost(0);
++
++ format %{ " -- \t// redundant MEMBAR-acquire - empty (acquire as part of CAS in prior FastLock)" %}
++ size(0);
++ ins_encode( /*empty*/ );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct membar_release() %{
++ match(MemBarRelease);
++ match(StoreFence);
++ ins_cost(4*MEMORY_REF_COST);
++
++ format %{ "MEMBAR-release" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_lwsync);
++ __ release();
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct membar_storestore() %{
++ match(MemBarStoreStore);
++ ins_cost(4*MEMORY_REF_COST);
++
++ format %{ "MEMBAR-store-store" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_lwsync);
++ __ membar(Assembler::StoreStore);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct membar_release_lock() %{
++ match(MemBarReleaseLock);
++ ins_cost(0);
++
++ format %{ " -- \t// redundant MEMBAR-release - empty (release in FastUnlock)" %}
++ size(0);
++ ins_encode( /*empty*/ );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct membar_volatile() %{
++ match(MemBarVolatile);
++ ins_cost(4*MEMORY_REF_COST);
++
++ format %{ "MEMBAR-volatile" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_sync);
++ __ fence();
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// This optimization is wrong on PPC. The following pattern is not supported:
++// MemBarVolatile
++// ^ ^
++// | |
++// CtrlProj MemProj
++// ^ ^
++// | |
++// | Load
++// |
++// MemBarVolatile
++//
++// The first MemBarVolatile could get optimized out! According to
++// Vladimir, this pattern can not occur on Oracle platforms.
++// However, it does occur on PPC64 (because of membars in
++// inline_unsafe_load_store).
++//
++// Add this node again if we found a good solution for inline_unsafe_load_store().
++// Don't forget to look at the implementation of post_store_load_barrier again,
++// we did other fixes in that method.
++//instruct unnecessary_membar_volatile() %{
++// match(MemBarVolatile);
++// predicate(Matcher::post_store_load_barrier(n));
++// ins_cost(0);
++//
++// format %{ " -- \t// redundant MEMBAR-volatile - empty" %}
++// size(0);
++// ins_encode( /*empty*/ );
++// ins_pipe(pipe_class_default);
++//%}
++
++instruct membar_CPUOrder() %{
++ match(MemBarCPUOrder);
++ ins_cost(0);
++
++ format %{ " -- \t// MEMBAR-CPUOrder - empty: PPC64 processors are self-consistent." %}
++ size(0);
++ ins_encode( /*empty*/ );
++ ins_pipe(pipe_class_default);
++%}
++
++//----------Conditional Move---------------------------------------------------
++
++// Cmove using isel.
++instruct cmovI_reg_isel(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{
++ match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
++ predicate(VM_Version::has_isel());
++ ins_cost(DEFAULT_COST);
++
++ format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %}
++ size(4);
++ ins_encode %{
++ // This is a Power7 instruction for which no machine description
++ // exists. Anyways, the scheduler should be off on Power7.
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ int cc = $cmp$$cmpcode;
++ __ isel($dst$$Register, $crx$$CondRegister,
++ (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmovI_reg(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{
++ match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
++ predicate(!VM_Version::has_isel());
++ ins_cost(DEFAULT_COST+BRANCH_COST);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %}
++ // Worst case is branch + move + stop, no stop without scheduler
++ size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
++ ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmovI_imm(cmpOp cmp, flagsReg crx, iRegIdst dst, immI16 src) %{
++ match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
++ ins_cost(DEFAULT_COST+BRANCH_COST);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %}
++ // Worst case is branch + move + stop, no stop without scheduler
++ size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
++ ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
++ ins_pipe(pipe_class_default);
++%}
++
++// Cmove using isel.
++instruct cmovL_reg_isel(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{
++ match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
++ predicate(VM_Version::has_isel());
++ ins_cost(DEFAULT_COST);
++
++ format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %}
++ size(4);
++ ins_encode %{
++ // This is a Power7 instruction for which no machine description
++ // exists. Anyways, the scheduler should be off on Power7.
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ int cc = $cmp$$cmpcode;
++ __ isel($dst$$Register, $crx$$CondRegister,
++ (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmovL_reg(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{
++ match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
++ predicate(!VM_Version::has_isel());
++ ins_cost(DEFAULT_COST+BRANCH_COST);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %}
++ // Worst case is branch + move + stop, no stop without scheduler.
++ size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
++ ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmovL_imm(cmpOp cmp, flagsReg crx, iRegLdst dst, immL16 src) %{
++ match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
++ ins_cost(DEFAULT_COST+BRANCH_COST);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %}
++ // Worst case is branch + move + stop, no stop without scheduler.
++ size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
++ ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
++ ins_pipe(pipe_class_default);
++%}
++
++// Cmove using isel.
++instruct cmovN_reg_isel(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{
++ match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
++ predicate(VM_Version::has_isel());
++ ins_cost(DEFAULT_COST);
++
++ format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %}
++ size(4);
++ ins_encode %{
++ // This is a Power7 instruction for which no machine description
++ // exists. Anyways, the scheduler should be off on Power7.
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ int cc = $cmp$$cmpcode;
++ __ isel($dst$$Register, $crx$$CondRegister,
++ (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Conditional move for RegN. Only cmov(reg, reg).
++instruct cmovN_reg(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{
++ match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
++ predicate(!VM_Version::has_isel());
++ ins_cost(DEFAULT_COST+BRANCH_COST);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %}
++ // Worst case is branch + move + stop, no stop without scheduler.
++ size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
++ ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmovN_imm(cmpOp cmp, flagsReg crx, iRegNdst dst, immN_0 src) %{
++ match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
++ ins_cost(DEFAULT_COST+BRANCH_COST);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %}
++ // Worst case is branch + move + stop, no stop without scheduler.
++ size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
++ ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
++ ins_pipe(pipe_class_default);
++%}
++
++// Cmove using isel.
++instruct cmovP_reg_isel(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegPsrc src) %{
++ match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
++ predicate(VM_Version::has_isel());
++ ins_cost(DEFAULT_COST);
++
++ format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %}
++ size(4);
++ ins_encode %{
++ // This is a Power7 instruction for which no machine description
++ // exists. Anyways, the scheduler should be off on Power7.
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ int cc = $cmp$$cmpcode;
++ __ isel($dst$$Register, $crx$$CondRegister,
++ (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmovP_reg(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegP_N2P src) %{
++ match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
++ predicate(!VM_Version::has_isel());
++ ins_cost(DEFAULT_COST+BRANCH_COST);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %}
++ // Worst case is branch + move + stop, no stop without scheduler.
++ size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
++ ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmovP_imm(cmpOp cmp, flagsReg crx, iRegPdst dst, immP_0 src) %{
++ match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
++ ins_cost(DEFAULT_COST+BRANCH_COST);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %}
++ // Worst case is branch + move + stop, no stop without scheduler.
++ size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
++ ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmovF_reg(cmpOp cmp, flagsReg crx, regF dst, regF src) %{
++ match(Set dst (CMoveF (Binary cmp crx) (Binary dst src)));
++ ins_cost(DEFAULT_COST+BRANCH_COST);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "CMOVEF $cmp, $crx, $dst, $src\n\t" %}
++ // Worst case is branch + move + stop, no stop without scheduler.
++ size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmovef);
++ Label done;
++ assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
++ // Branch if not (cmp crx).
++ __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
++ __ fmr($dst$$FloatRegister, $src$$FloatRegister);
++ // TODO PPC port __ endgroup_if_needed(_size == 12);
++ __ bind(done);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmovD_reg(cmpOp cmp, flagsReg crx, regD dst, regD src) %{
++ match(Set dst (CMoveD (Binary cmp crx) (Binary dst src)));
++ ins_cost(DEFAULT_COST+BRANCH_COST);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "CMOVEF $cmp, $crx, $dst, $src\n\t" %}
++ // Worst case is branch + move + stop, no stop without scheduler.
++ size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmovef);
++ Label done;
++ assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
++ // Branch if not (cmp crx).
++ __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
++ __ fmr($dst$$FloatRegister, $src$$FloatRegister);
++ // TODO PPC port __ endgroup_if_needed(_size == 12);
++ __ bind(done);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++//----------Conditional_store--------------------------------------------------
++// Conditional-store of the updated heap-top.
++// Used during allocation of the shared heap.
++// Sets flags (EQ) on success. Implemented with a CASA on Sparc.
++
++// As compareAndSwapL, but return flag register instead of boolean value in
++// int register.
++// Used by sun/misc/AtomicLongCSImpl.java.
++// Mem_ptr must be a memory operand, else this node does not get
++// Flag_needs_anti_dependence_check set by adlc. If this is not set this node
++// can be rematerialized which leads to errors.
++instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal) %{
++ match(Set crx (StoreLConditional mem_ptr (Binary oldVal newVal)));
++ format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register,
++ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
++ noreg, NULL, true);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// As compareAndSwapP, but return flag register instead of boolean value in
++// int register.
++// This instruction is matched if UseTLAB is off.
++// Mem_ptr must be a memory operand, else this node does not get
++// Flag_needs_anti_dependence_check set by adlc. If this is not set this node
++// can be rematerialized which leads to errors.
++instruct storePConditional_regP_regP_regP(flagsReg crx, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{
++ match(Set crx (StorePConditional mem_ptr (Binary oldVal newVal)));
++ format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register,
++ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
++ noreg, NULL, true);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Implement LoadPLocked. Must be ordered against changes of the memory location
++// by storePConditional.
++// Don't know whether this is ever used.
++instruct loadPLocked(iRegPdst dst, memory mem) %{
++ match(Set dst (LoadPLocked mem));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LD $dst, $mem \t// loadPLocked\n\t"
++ "TWI $dst\n\t"
++ "ISYNC" %}
++ size(12);
++ ins_encode( enc_ld_ac(dst, mem) );
++ ins_pipe(pipe_class_memory);
++%}
++
++//----------Compare-And-Swap---------------------------------------------------
++
++// CompareAndSwap{P,I,L} have more than one output, therefore "CmpI
++// (CompareAndSwap ...)" or "If (CmpI (CompareAndSwap ..))" cannot be
++// matched.
++
++instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2)));
++ format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
++ // Variable size: instruction count smaller if regs are disjoint.
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
++ __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
++ MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(),
++ $res$$Register, true);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2) %{
++ match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2)));
++ format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
++ // Variable size: instruction count smaller if regs are disjoint.
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
++ __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
++ MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(),
++ $res$$Register, true);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2) %{
++ match(Set res (CompareAndSwapL mem_ptr (Binary src1 src2)));
++ format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
++ // Variable size: instruction count smaller if regs are disjoint.
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
++ __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
++ MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(),
++ $res$$Register, NULL, true);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2) %{
++ match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2)));
++ format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
++ // Variable size: instruction count smaller if regs are disjoint.
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
++ __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
++ MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(),
++ $res$$Register, NULL, true);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
++ match(Set res (GetAndAddI mem_ptr src));
++ format %{ "GetAndAddI $res, $mem_ptr, $src" %}
++ // Variable size: instruction count smaller if regs are disjoint.
++ ins_encode( enc_GetAndAddI(res, mem_ptr, src) );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
++ match(Set res (GetAndAddL mem_ptr src));
++ format %{ "GetAndAddL $res, $mem_ptr, $src" %}
++ // Variable size: instruction count smaller if regs are disjoint.
++ ins_encode( enc_GetAndAddL(res, mem_ptr, src) );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
++ match(Set res (GetAndSetI mem_ptr src));
++ format %{ "GetAndSetI $res, $mem_ptr, $src" %}
++ // Variable size: instruction count smaller if regs are disjoint.
++ ins_encode( enc_GetAndSetI(res, mem_ptr, src) );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
++ match(Set res (GetAndSetL mem_ptr src));
++ format %{ "GetAndSetL $res, $mem_ptr, $src" %}
++ // Variable size: instruction count smaller if regs are disjoint.
++ ins_encode( enc_GetAndSetL(res, mem_ptr, src) );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src) %{
++ match(Set res (GetAndSetP mem_ptr src));
++ format %{ "GetAndSetP $res, $mem_ptr, $src" %}
++ // Variable size: instruction count smaller if regs are disjoint.
++ ins_encode( enc_GetAndSetL(res, mem_ptr, src) );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src) %{
++ match(Set res (GetAndSetN mem_ptr src));
++ format %{ "GetAndSetN $res, $mem_ptr, $src" %}
++ // Variable size: instruction count smaller if regs are disjoint.
++ ins_encode( enc_GetAndSetI(res, mem_ptr, src) );
++ ins_pipe(pipe_class_default);
++%}
++
++//----------Arithmetic Instructions--------------------------------------------
++// Addition Instructions
++
++// Register Addition
++instruct addI_reg_reg(iRegIdst dst, iRegIsrc_iRegL2Isrc src1, iRegIsrc_iRegL2Isrc src2) %{
++ match(Set dst (AddI src1 src2));
++ format %{ "ADD $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_add);
++ __ add($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Expand does not work with above instruct. (??)
++instruct addI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ // no match-rule
++ effect(DEF dst, USE src1, USE src2);
++ format %{ "ADD $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_add);
++ __ add($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct tree_addI_addI_addI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
++ match(Set dst (AddI (AddI (AddI src1 src2) src3) src4));
++ ins_cost(DEFAULT_COST*3);
++
++ expand %{
++ // FIXME: we should do this in the ideal world.
++ iRegIdst tmp1;
++ iRegIdst tmp2;
++ addI_reg_reg(tmp1, src1, src2);
++ addI_reg_reg_2(tmp2, src3, src4); // Adlc complains about addI_reg_reg.
++ addI_reg_reg(dst, tmp1, tmp2);
++ %}
++%}
++
++// Immediate Addition
++instruct addI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
++ match(Set dst (AddI src1 src2));
++ format %{ "ADDI $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ __ addi($dst$$Register, $src1$$Register, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Immediate Addition with 16-bit shifted operand
++instruct addI_reg_immhi16(iRegIdst dst, iRegIsrc src1, immIhi16 src2) %{
++ match(Set dst (AddI src1 src2));
++ format %{ "ADDIS $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addis);
++ __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Long Addition
++instruct addL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
++ match(Set dst (AddL src1 src2));
++ format %{ "ADD $dst, $src1, $src2 \t// long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_add);
++ __ add($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Expand does not work with above instruct. (??)
++instruct addL_reg_reg_2(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
++ // no match-rule
++ effect(DEF dst, USE src1, USE src2);
++ format %{ "ADD $dst, $src1, $src2 \t// long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_add);
++ __ add($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct tree_addL_addL_addL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, iRegLsrc src3, iRegLsrc src4) %{
++ match(Set dst (AddL (AddL (AddL src1 src2) src3) src4));
++ ins_cost(DEFAULT_COST*3);
++
++ expand %{
++ // FIXME: we should do this in the ideal world.
++ iRegLdst tmp1;
++ iRegLdst tmp2;
++ addL_reg_reg(tmp1, src1, src2);
++ addL_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg.
++ addL_reg_reg(dst, tmp1, tmp2);
++ %}
++%}
++
++// AddL + ConvL2I.
++instruct addI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
++ match(Set dst (ConvL2I (AddL src1 src2)));
++
++ format %{ "ADD $dst, $src1, $src2 \t// long + l2i" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_add);
++ __ add($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// No constant pool entries required.
++instruct addL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
++ match(Set dst (AddL src1 src2));
++
++ format %{ "ADDI $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ __ addi($dst$$Register, $src1$$Register, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Long Immediate Addition with 16-bit shifted operand.
++// No constant pool entries required.
++instruct addL_reg_immhi16(iRegLdst dst, iRegLsrc src1, immL32hi16 src2) %{
++ match(Set dst (AddL src1 src2));
++
++ format %{ "ADDIS $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addis);
++ __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Pointer Register Addition
++instruct addP_reg_reg(iRegPdst dst, iRegP_N2P src1, iRegLsrc src2) %{
++ match(Set dst (AddP src1 src2));
++ format %{ "ADD $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_add);
++ __ add($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Pointer Immediate Addition
++// No constant pool entries required.
++instruct addP_reg_imm16(iRegPdst dst, iRegP_N2P src1, immL16 src2) %{
++ match(Set dst (AddP src1 src2));
++
++ format %{ "ADDI $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ __ addi($dst$$Register, $src1$$Register, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Pointer Immediate Addition with 16-bit shifted operand.
++// No constant pool entries required.
++instruct addP_reg_immhi16(iRegPdst dst, iRegP_N2P src1, immL32hi16 src2) %{
++ match(Set dst (AddP src1 src2));
++
++ format %{ "ADDIS $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addis);
++ __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++//---------------------
++// Subtraction Instructions
++
++// Register Subtraction
++instruct subI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set dst (SubI src1 src2));
++ format %{ "SUBF $dst, $src2, $src1" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_subf);
++ __ subf($dst$$Register, $src2$$Register, $src1$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Immediate Subtraction
++// The compiler converts "x-c0" into "x+ -c0" (see SubINode::Ideal),
++// so this rule seems to be unused.
++instruct subI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
++ match(Set dst (SubI src1 src2));
++ format %{ "SUBI $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// SubI from constant (using subfic).
++instruct subI_imm16_reg(iRegIdst dst, immI16 src1, iRegIsrc src2) %{
++ match(Set dst (SubI src1 src2));
++ format %{ "SUBI $dst, $src1, $src2" %}
++
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_subfic);
++ __ subfic($dst$$Register, $src2$$Register, $src1$$constant);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Turn the sign-bit of an integer into a 32-bit mask, 0x0...0 for
++// positive integers and 0xF...F for negative ones.
++instruct signmask32I_regI(iRegIdst dst, iRegIsrc src) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src);
++ predicate(false);
++
++ format %{ "SRAWI $dst, $src, #31" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_srawi);
++ __ srawi($dst$$Register, $src$$Register, 0x1f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct absI_reg_Ex(iRegIdst dst, iRegIsrc src) %{
++ match(Set dst (AbsI src));
++ ins_cost(DEFAULT_COST*3);
++
++ expand %{
++ iRegIdst tmp1;
++ iRegIdst tmp2;
++ signmask32I_regI(tmp1, src);
++ xorI_reg_reg(tmp2, tmp1, src);
++ subI_reg_reg(dst, tmp2, tmp1);
++ %}
++%}
++
++instruct negI_regI(iRegIdst dst, immI_0 zero, iRegIsrc src2) %{
++ match(Set dst (SubI zero src2));
++ format %{ "NEG $dst, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_neg);
++ __ neg($dst$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Long subtraction
++instruct subL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
++ match(Set dst (SubL src1 src2));
++ format %{ "SUBF $dst, $src2, $src1 \t// long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_subf);
++ __ subf($dst$$Register, $src2$$Register, $src1$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// SubL + convL2I.
++instruct subI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
++ match(Set dst (ConvL2I (SubL src1 src2)));
++
++ format %{ "SUBF $dst, $src2, $src1 \t// long + l2i" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_subf);
++ __ subf($dst$$Register, $src2$$Register, $src1$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Immediate Subtraction
++// The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal),
++// so this rule seems to be unused.
++// No constant pool entries required.
++instruct subL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
++ match(Set dst (SubL src1 src2));
++
++ format %{ "SUBI $dst, $src1, $src2 \t// long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
++// positive longs and 0xF...F for negative ones.
++instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src);
++ predicate(false);
++
++ format %{ "SRADI $dst, $src, #63" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_sradi);
++ __ sradi($dst$$Register, $src$$Register, 0x3f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
++// positive longs and 0xF...F for negative ones.
++instruct signmask64L_regL(iRegLdst dst, iRegLsrc src) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src);
++ predicate(false);
++
++ format %{ "SRADI $dst, $src, #63" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_sradi);
++ __ sradi($dst$$Register, $src$$Register, 0x3f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Long negation
++instruct negL_reg_reg(iRegLdst dst, immL_0 zero, iRegLsrc src2) %{
++ match(Set dst (SubL zero src2));
++ format %{ "NEG $dst, $src2 \t// long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_neg);
++ __ neg($dst$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// NegL + ConvL2I.
++instruct negI_con0_regL(iRegIdst dst, immL_0 zero, iRegLsrc src2) %{
++ match(Set dst (ConvL2I (SubL zero src2)));
++
++ format %{ "NEG $dst, $src2 \t// long + l2i" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_neg);
++ __ neg($dst$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Multiplication Instructions
++// Integer Multiplication
++
++// Register Multiplication
++instruct mulI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set dst (MulI src1 src2));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "MULLW $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_mullw);
++ __ mullw($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Immediate Multiplication
++instruct mulI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
++ match(Set dst (MulI src1 src2));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "MULLI $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_mulli);
++ __ mulli($dst$$Register, $src1$$Register, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct mulL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
++ match(Set dst (MulL src1 src2));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "MULLD $dst $src1, $src2 \t// long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_mulld);
++ __ mulld($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Multiply high for optimized long division by constant.
++instruct mulHighL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
++ match(Set dst (MulHiL src1 src2));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "MULHD $dst $src1, $src2 \t// long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_mulhd);
++ __ mulhd($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Immediate Multiplication
++instruct mulL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
++ match(Set dst (MulL src1 src2));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "MULLI $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_mulli);
++ __ mulli($dst$$Register, $src1$$Register, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Integer Division with Immediate -1: Negate.
++instruct divI_reg_immIvalueMinus1(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{
++ match(Set dst (DivI src1 src2));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "NEG $dst, $src1 \t// /-1" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_neg);
++ __ neg($dst$$Register, $src1$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Integer Division with constant, but not -1.
++// We should be able to improve this by checking the type of src2.
++// It might well be that src2 is known to be positive.
++instruct divI_reg_regnotMinus1(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set dst (DivI src1 src2));
++ predicate(n->in(2)->find_int_con(-1) != -1); // src2 is a constant, but not -1
++ ins_cost(2*DEFAULT_COST);
++
++ format %{ "DIVW $dst, $src1, $src2 \t// /not-1" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_divw);
++ __ divw($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmovI_bne_negI_reg(iRegIdst dst, flagsReg crx, iRegIsrc src1) %{
++ effect(USE_DEF dst, USE src1, USE crx);
++ predicate(false);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "CMOVE $dst, neg($src1), $crx" %}
++ // Worst case is branch + move + stop, no stop without scheduler.
++ size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
++ Label done;
++ __ bne($crx$$CondRegister, done);
++ __ neg($dst$$Register, $src1$$Register);
++ // TODO PPC port __ endgroup_if_needed(_size == 12);
++ __ bind(done);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Integer Division with Registers not containing constants.
++instruct divI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set dst (DivI src1 src2));
++ ins_cost(10*DEFAULT_COST);
++
++ expand %{
++ immI16 imm %{ (int)-1 %}
++ flagsReg tmp1;
++ cmpI_reg_imm16(tmp1, src2, imm); // check src2 == -1
++ divI_reg_regnotMinus1(dst, src1, src2); // dst = src1 / src2
++ cmovI_bne_negI_reg(dst, tmp1, src1); // cmove dst = neg(src1) if src2 == -1
++ %}
++%}
++
++// Long Division with Immediate -1: Negate.
++instruct divL_reg_immLvalueMinus1(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{
++ match(Set dst (DivL src1 src2));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "NEG $dst, $src1 \t// /-1, long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_neg);
++ __ neg($dst$$Register, $src1$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Long Division with constant, but not -1.
++instruct divL_reg_regnotMinus1(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
++ match(Set dst (DivL src1 src2));
++ predicate(n->in(2)->find_long_con(-1L) != -1L); // Src2 is a constant, but not -1.
++ ins_cost(2*DEFAULT_COST);
++
++ format %{ "DIVD $dst, $src1, $src2 \t// /not-1, long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_divd);
++ __ divd($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmovL_bne_negL_reg(iRegLdst dst, flagsReg crx, iRegLsrc src1) %{
++ effect(USE_DEF dst, USE src1, USE crx);
++ predicate(false);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "CMOVE $dst, neg($src1), $crx" %}
++ // Worst case is branch + move + stop, no stop without scheduler.
++ size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
++ Label done;
++ __ bne($crx$$CondRegister, done);
++ __ neg($dst$$Register, $src1$$Register);
++ // TODO PPC port __ endgroup_if_needed(_size == 12);
++ __ bind(done);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Long Division with Registers not containing constants.
++instruct divL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
++ match(Set dst (DivL src1 src2));
++ ins_cost(10*DEFAULT_COST);
++
++ expand %{
++ immL16 imm %{ (int)-1 %}
++ flagsReg tmp1;
++ cmpL_reg_imm16(tmp1, src2, imm); // check src2 == -1
++ divL_reg_regnotMinus1(dst, src1, src2); // dst = src1 / src2
++ cmovL_bne_negL_reg(dst, tmp1, src1); // cmove dst = neg(src1) if src2 == -1
++ %}
++%}
++
++// Integer Remainder with registers.
++instruct modI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set dst (ModI src1 src2));
++ ins_cost(10*DEFAULT_COST);
++
++ expand %{
++ immI16 imm %{ (int)-1 %}
++ flagsReg tmp1;
++ iRegIdst tmp2;
++ iRegIdst tmp3;
++ cmpI_reg_imm16(tmp1, src2, imm); // check src2 == -1
++ divI_reg_regnotMinus1(tmp2, src1, src2); // tmp2 = src1 / src2
++ cmovI_bne_negI_reg(tmp2, tmp1, src1); // cmove tmp2 = neg(src1) if src2 == -1
++ mulI_reg_reg(tmp3, src2, tmp2); // tmp3 = src2 * tmp2
++ subI_reg_reg(dst, src1, tmp3); // dst = src1 - tmp3
++ %}
++%}
++
++// Long Remainder with registers
++instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
++ match(Set dst (ModL src1 src2));
++ ins_cost(10*DEFAULT_COST);
++
++ expand %{
++ immL16 imm %{ (int)-1 %}
++ flagsReg tmp1;
++ iRegLdst tmp2;
++ iRegLdst tmp3;
++ cmpL_reg_imm16(tmp1, src2, imm); // check src2 == -1
++ divL_reg_regnotMinus1(tmp2, src1, src2); // tmp2 = src1 / src2
++ cmovL_bne_negL_reg(tmp2, tmp1, src1); // cmove tmp2 = neg(src1) if src2 == -1
++ mulL_reg_reg(tmp3, src2, tmp2); // tmp3 = src2 * tmp2
++ subL_reg_reg(dst, src1, tmp3); // dst = src1 - tmp3
++ %}
++%}
++
++// Integer Shift Instructions
++
++// Register Shift Left
++
++// Clear all but the lowest #mask bits.
++// Used to normalize shift amounts in registers.
++instruct maskI_reg_imm(iRegIdst dst, iRegIsrc src, uimmI6 mask) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src, USE mask);
++ predicate(false);
++
++ format %{ "MASK $dst, $src, $mask \t// clear $mask upper bits" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
++ __ clrldi($dst$$Register, $src$$Register, $mask$$constant);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct lShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src1, USE src2);
++ predicate(false);
++
++ format %{ "SLW $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_slw);
++ __ slw($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct lShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set dst (LShiftI src1 src2));
++ ins_cost(DEFAULT_COST*2);
++ expand %{
++ uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
++ iRegIdst tmpI;
++ maskI_reg_imm(tmpI, src2, mask);
++ lShiftI_reg_reg(dst, src1, tmpI);
++ %}
++%}
++
++// Register Shift Left Immediate
++instruct lShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
++ match(Set dst (LShiftI src1 src2));
++
++ format %{ "SLWI $dst, $src1, ($src2 & 0x1f)" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm);
++ __ slwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// AndI with negpow2-constant + LShiftI
++instruct lShiftI_andI_immInegpow2_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{
++ match(Set dst (LShiftI (AndI src1 src2) src3));
++ predicate(UseRotateAndMaskInstructionsPPC64);
++
++ format %{ "RLWINM $dst, lShiftI(AndI($src1, $src2), $src3)" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); // FIXME: assert that rlwinm is equal to addi
++ long src2 = $src2$$constant;
++ long src3 = $src3$$constant;
++ long maskbits = src3 + log2_long((jlong) (julong) (juint) -src2);
++ if (maskbits >= 32) {
++ __ li($dst$$Register, 0); // addi
++ } else {
++ __ rlwinm($dst$$Register, $src1$$Register, src3 & 0x1f, 0, (31-maskbits) & 0x1f);
++ }
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// RShiftI + AndI with negpow2-constant + LShiftI
++instruct lShiftI_andI_immInegpow2_rShiftI_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{
++ match(Set dst (LShiftI (AndI (RShiftI src1 src3) src2) src3));
++ predicate(UseRotateAndMaskInstructionsPPC64);
++
++ format %{ "RLWINM $dst, lShiftI(AndI(RShiftI($src1, $src3), $src2), $src3)" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); // FIXME: assert that rlwinm is equal to addi
++ long src2 = $src2$$constant;
++ long src3 = $src3$$constant;
++ long maskbits = src3 + log2_long((jlong) (julong) (juint) -src2);
++ if (maskbits >= 32) {
++ __ li($dst$$Register, 0); // addi
++ } else {
++ __ rlwinm($dst$$Register, $src1$$Register, 0, 0, (31-maskbits) & 0x1f);
++ }
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct lShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src1, USE src2);
++ predicate(false);
++
++ format %{ "SLD $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_sld);
++ __ sld($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Register Shift Left
++instruct lShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
++ match(Set dst (LShiftL src1 src2));
++ ins_cost(DEFAULT_COST*2);
++ expand %{
++ uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
++ iRegIdst tmpI;
++ maskI_reg_imm(tmpI, src2, mask);
++ lShiftL_regL_regI(dst, src1, tmpI);
++ %}
++%}
++
++// Register Shift Left Immediate
++instruct lshiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
++ match(Set dst (LShiftL src1 src2));
++ format %{ "SLDI $dst, $src1, ($src2 & 0x3f)" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicr);
++ __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// If we shift more than 32 bits, we need not convert I2L.
++instruct lShiftL_regI_immGE32(iRegLdst dst, iRegIsrc src1, uimmI6_ge32 src2) %{
++ match(Set dst (LShiftL (ConvI2L src1) src2));
++ ins_cost(DEFAULT_COST);
++
++ size(4);
++ format %{ "SLDI $dst, i2l($src1), $src2" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicr);
++ __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Shift a postivie int to the left.
++// Clrlsldi clears the upper 32 bits and shifts.
++instruct scaledPositiveI2L_lShiftL_convI2L_reg_imm6(iRegLdst dst, iRegIsrc src1, uimmI6 src2) %{
++ match(Set dst (LShiftL (ConvI2L src1) src2));
++ predicate(((ConvI2LNode*)(_kids[0]->_leaf))->type()->is_long()->is_positive_int());
++
++ format %{ "SLDI $dst, i2l(positive_int($src1)), $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldic);
++ __ clrlsldi($dst$$Register, $src1$$Register, 0x20, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct arShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src1, USE src2);
++ predicate(false);
++
++ format %{ "SRAW $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_sraw);
++ __ sraw($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Register Arithmetic Shift Right
++instruct arShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set dst (RShiftI src1 src2));
++ ins_cost(DEFAULT_COST*2);
++ expand %{
++ uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
++ iRegIdst tmpI;
++ maskI_reg_imm(tmpI, src2, mask);
++ arShiftI_reg_reg(dst, src1, tmpI);
++ %}
++%}
++
++// Register Arithmetic Shift Right Immediate
++instruct arShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
++ match(Set dst (RShiftI src1 src2));
++
++ format %{ "SRAWI $dst, $src1, ($src2 & 0x1f)" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_srawi);
++ __ srawi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct arShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src1, USE src2);
++ predicate(false);
++
++ format %{ "SRAD $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_srad);
++ __ srad($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Register Shift Right Arithmetic Long
++instruct arShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
++ match(Set dst (RShiftL src1 src2));
++ ins_cost(DEFAULT_COST*2);
++
++ expand %{
++ uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
++ iRegIdst tmpI;
++ maskI_reg_imm(tmpI, src2, mask);
++ arShiftL_regL_regI(dst, src1, tmpI);
++ %}
++%}
++
++// Register Shift Right Immediate
++instruct arShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
++ match(Set dst (RShiftL src1 src2));
++
++ format %{ "SRADI $dst, $src1, ($src2 & 0x3f)" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_sradi);
++ __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// RShiftL + ConvL2I
++instruct convL2I_arShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{
++ match(Set dst (ConvL2I (RShiftL src1 src2)));
++
++ format %{ "SRADI $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_sradi);
++ __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct urShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src1, USE src2);
++ predicate(false);
++
++ format %{ "SRW $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_srw);
++ __ srw($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Register Shift Right
++instruct urShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set dst (URShiftI src1 src2));
++ ins_cost(DEFAULT_COST*2);
++
++ expand %{
++ uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
++ iRegIdst tmpI;
++ maskI_reg_imm(tmpI, src2, mask);
++ urShiftI_reg_reg(dst, src1, tmpI);
++ %}
++%}
++
++// Register Shift Right Immediate
++instruct urShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
++ match(Set dst (URShiftI src1 src2));
++
++ format %{ "SRWI $dst, $src1, ($src2 & 0x1f)" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm);
++ __ srwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct urShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src1, USE src2);
++ predicate(false);
++
++ format %{ "SRD $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_srd);
++ __ srd($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Register Shift Right
++instruct urShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
++ match(Set dst (URShiftL src1 src2));
++ ins_cost(DEFAULT_COST*2);
++
++ expand %{
++ uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
++ iRegIdst tmpI;
++ maskI_reg_imm(tmpI, src2, mask);
++ urShiftL_regL_regI(dst, src1, tmpI);
++ %}
++%}
++
++// Register Shift Right Immediate
++instruct urShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
++ match(Set dst (URShiftL src1 src2));
++
++ format %{ "SRDI $dst, $src1, ($src2 & 0x3f)" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
++ __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// URShiftL + ConvL2I.
++instruct convL2I_urShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{
++ match(Set dst (ConvL2I (URShiftL src1 src2)));
++
++ format %{ "SRDI $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
++ __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Register Shift Right Immediate with a CastP2X
++instruct shrP_convP2X_reg_imm6(iRegLdst dst, iRegP_N2P src1, uimmI6 src2) %{
++ match(Set dst (URShiftL (CastP2X src1) src2));
++
++ format %{ "SRDI $dst, $src1, $src2 \t// Cast ptr $src1 to long and shift" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
++ __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct sxtI_reg(iRegIdst dst, iRegIsrc src) %{
++ match(Set dst (ConvL2I (ConvI2L src)));
++
++ format %{ "EXTSW $dst, $src \t// int->int" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_extsw);
++ __ extsw($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++//----------Rotate Instructions------------------------------------------------
++
++// Rotate Left by 8-bit immediate
++instruct rotlI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 lshift, immI8 rshift) %{
++ match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift)));
++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++
++ format %{ "ROTLWI $dst, $src, $lshift" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm);
++ __ rotlwi($dst$$Register, $src$$Register, $lshift$$constant);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Rotate Right by 8-bit immediate
++instruct rotrI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 rshift, immI8 lshift) %{
++ match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++
++ format %{ "ROTRWI $dst, $rshift" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm);
++ __ rotrwi($dst$$Register, $src$$Register, $rshift$$constant);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++//----------Floating Point Arithmetic Instructions-----------------------------
++
++// Add float single precision
++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
++ match(Set dst (AddF src1 src2));
++
++ format %{ "FADDS $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fadds);
++ __ fadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Add float double precision
++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
++ match(Set dst (AddD src1 src2));
++
++ format %{ "FADD $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fadd);
++ __ fadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Sub float single precision
++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
++ match(Set dst (SubF src1 src2));
++
++ format %{ "FSUBS $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fsubs);
++ __ fsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Sub float double precision
++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
++ match(Set dst (SubD src1 src2));
++ format %{ "FSUB $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fsub);
++ __ fsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Mul float single precision
++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
++ match(Set dst (MulF src1 src2));
++ format %{ "FMULS $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fmuls);
++ __ fmuls($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Mul float double precision
++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
++ match(Set dst (MulD src1 src2));
++ format %{ "FMUL $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fmul);
++ __ fmul($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Div float single precision
++instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
++ match(Set dst (DivF src1 src2));
++ format %{ "FDIVS $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fdivs);
++ __ fdivs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Div float double precision
++instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
++ match(Set dst (DivD src1 src2));
++ format %{ "FDIV $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fdiv);
++ __ fdiv($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Absolute float single precision
++instruct absF_reg(regF dst, regF src) %{
++ match(Set dst (AbsF src));
++ format %{ "FABS $dst, $src \t// float" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fabs);
++ __ fabs($dst$$FloatRegister, $src$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Absolute float double precision
++instruct absD_reg(regD dst, regD src) %{
++ match(Set dst (AbsD src));
++ format %{ "FABS $dst, $src \t// double" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fabs);
++ __ fabs($dst$$FloatRegister, $src$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct negF_reg(regF dst, regF src) %{
++ match(Set dst (NegF src));
++ format %{ "FNEG $dst, $src \t// float" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fneg);
++ __ fneg($dst$$FloatRegister, $src$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct negD_reg(regD dst, regD src) %{
++ match(Set dst (NegD src));
++ format %{ "FNEG $dst, $src \t// double" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fneg);
++ __ fneg($dst$$FloatRegister, $src$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// AbsF + NegF.
++instruct negF_absF_reg(regF dst, regF src) %{
++ match(Set dst (NegF (AbsF src)));
++ format %{ "FNABS $dst, $src \t// float" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fnabs);
++ __ fnabs($dst$$FloatRegister, $src$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// AbsD + NegD.
++instruct negD_absD_reg(regD dst, regD src) %{
++ match(Set dst (NegD (AbsD src)));
++ format %{ "FNABS $dst, $src \t// double" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fnabs);
++ __ fnabs($dst$$FloatRegister, $src$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// VM_Version::has_fsqrt() decides if this node will be used.
++// Sqrt float double precision
++instruct sqrtD_reg(regD dst, regD src) %{
++ match(Set dst (SqrtD src));
++ format %{ "FSQRT $dst, $src" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fsqrt);
++ __ fsqrt($dst$$FloatRegister, $src$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Single-precision sqrt.
++instruct sqrtF_reg(regF dst, regF src) %{
++ match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
++ predicate(VM_Version::has_fsqrts());
++ ins_cost(DEFAULT_COST);
++
++ format %{ "FSQRTS $dst, $src" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fsqrts);
++ __ fsqrts($dst$$FloatRegister, $src$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct roundDouble_nop(regD dst) %{
++ match(Set dst (RoundDouble dst));
++ ins_cost(0);
++
++ format %{ " -- \t// RoundDouble not needed - empty" %}
++ size(0);
++ // PPC results are already "rounded" (i.e., normal-format IEEE).
++ ins_encode( /*empty*/ );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct roundFloat_nop(regF dst) %{
++ match(Set dst (RoundFloat dst));
++ ins_cost(0);
++
++ format %{ " -- \t// RoundFloat not needed - empty" %}
++ size(0);
++ // PPC results are already "rounded" (i.e., normal-format IEEE).
++ ins_encode( /*empty*/ );
++ ins_pipe(pipe_class_default);
++%}
++
++//----------Logical Instructions-----------------------------------------------
++
++// And Instructions
++
++// Register And
++instruct andI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set dst (AndI src1 src2));
++ format %{ "AND $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_and);
++ __ andr($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Immediate And
++instruct andI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2, flagsRegCR0 cr0) %{
++ match(Set dst (AndI src1 src2));
++ effect(KILL cr0);
++
++ format %{ "ANDI $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_andi_);
++ // FIXME: avoid andi_ ?
++ __ andi_($dst$$Register, $src1$$Register, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Immediate And where the immediate is a negative power of 2.
++instruct andI_reg_immInegpow2(iRegIdst dst, iRegIsrc src1, immInegpow2 src2) %{
++ match(Set dst (AndI src1 src2));
++ format %{ "ANDWI $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicr);
++ __ clrrdi($dst$$Register, $src1$$Register, log2_long((jlong)(julong)(juint)-($src2$$constant)));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct andI_reg_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immIpow2minus1 src2) %{
++ match(Set dst (AndI src1 src2));
++ format %{ "ANDWI $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
++ __ clrldi($dst$$Register, $src1$$Register, 64-log2_long((((jlong) $src2$$constant)+1)));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src1, immIpowerOf2 src2) %{
++ match(Set dst (AndI src1 src2));
++ predicate(UseRotateAndMaskInstructionsPPC64);
++ format %{ "ANDWI $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm);
++ __ rlwinm($dst$$Register, $src1$$Register, 0,
++ (31-log2_long((jlong) $src2$$constant)) & 0x1f, (31-log2_long((jlong) $src2$$constant)) & 0x1f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Register And Long
++instruct andL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
++ match(Set dst (AndL src1 src2));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "AND $dst, $src1, $src2 \t// long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_and);
++ __ andr($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Immediate And long
++instruct andL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2, flagsRegCR0 cr0) %{
++ match(Set dst (AndL src1 src2));
++ effect(KILL cr0);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "ANDI $dst, $src1, $src2 \t// long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_andi_);
++ // FIXME: avoid andi_ ?
++ __ andi_($dst$$Register, $src1$$Register, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Immediate And Long where the immediate is a negative power of 2.
++instruct andL_reg_immLnegpow2(iRegLdst dst, iRegLsrc src1, immLnegpow2 src2) %{
++ match(Set dst (AndL src1 src2));
++ format %{ "ANDDI $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicr);
++ __ clrrdi($dst$$Register, $src1$$Register, log2_long((jlong)-$src2$$constant));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct andL_reg_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immLpow2minus1 src2) %{
++ match(Set dst (AndL src1 src2));
++ format %{ "ANDDI $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
++ __ clrldi($dst$$Register, $src1$$Register, 64-log2_long((((jlong) $src2$$constant)+1)));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// AndL + ConvL2I.
++instruct convL2I_andL_reg_immLpow2minus1(iRegIdst dst, iRegLsrc src1, immLpow2minus1 src2) %{
++ match(Set dst (ConvL2I (AndL src1 src2)));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "ANDDI $dst, $src1, $src2 \t// long + l2i" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
++ __ clrldi($dst$$Register, $src1$$Register, 64-log2_long((((jlong) $src2$$constant)+1)));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Or Instructions
++
++// Register Or
++instruct orI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set dst (OrI src1 src2));
++ format %{ "OR $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_or);
++ __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Expand does not work with above instruct. (??)
++instruct orI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ // no match-rule
++ effect(DEF dst, USE src1, USE src2);
++ format %{ "OR $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_or);
++ __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct tree_orI_orI_orI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
++ match(Set dst (OrI (OrI (OrI src1 src2) src3) src4));
++ ins_cost(DEFAULT_COST*3);
++
++ expand %{
++ // FIXME: we should do this in the ideal world.
++ iRegIdst tmp1;
++ iRegIdst tmp2;
++ orI_reg_reg(tmp1, src1, src2);
++ orI_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg.
++ orI_reg_reg(dst, tmp1, tmp2);
++ %}
++%}
++
++// Immediate Or
++instruct orI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{
++ match(Set dst (OrI src1 src2));
++ format %{ "ORI $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_ori);
++ __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Register Or Long
++instruct orL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
++ match(Set dst (OrL src1 src2));
++ ins_cost(DEFAULT_COST);
++
++ size(4);
++ format %{ "OR $dst, $src1, $src2 \t// long" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_or);
++ __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// OrL + ConvL2I.
++instruct orI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
++ match(Set dst (ConvL2I (OrL src1 src2)));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "OR $dst, $src1, $src2 \t// long + l2i" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_or);
++ __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Immediate Or long
++instruct orL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 con) %{
++ match(Set dst (OrL src1 con));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "ORI $dst, $src1, $con \t// long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_ori);
++ __ ori($dst$$Register, $src1$$Register, ($con$$constant) & 0xFFFF);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Xor Instructions
++
++// Register Xor
++instruct xorI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set dst (XorI src1 src2));
++ format %{ "XOR $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_xor);
++ __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Expand does not work with above instruct. (??)
++instruct xorI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ // no match-rule
++ effect(DEF dst, USE src1, USE src2);
++ format %{ "XOR $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_xor);
++ __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct tree_xorI_xorI_xorI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
++ match(Set dst (XorI (XorI (XorI src1 src2) src3) src4));
++ ins_cost(DEFAULT_COST*3);
++
++ expand %{
++ // FIXME: we should do this in the ideal world.
++ iRegIdst tmp1;
++ iRegIdst tmp2;
++ xorI_reg_reg(tmp1, src1, src2);
++ xorI_reg_reg_2(tmp2, src3, src4); // Adlc complains about xorI_reg_reg.
++ xorI_reg_reg(dst, tmp1, tmp2);
++ %}
++%}
++
++// Immediate Xor
++instruct xorI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{
++ match(Set dst (XorI src1 src2));
++ format %{ "XORI $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_xori);
++ __ xori($dst$$Register, $src1$$Register, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Register Xor Long
++instruct xorL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
++ match(Set dst (XorL src1 src2));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "XOR $dst, $src1, $src2 \t// long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_xor);
++ __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// XorL + ConvL2I.
++instruct xorI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
++ match(Set dst (ConvL2I (XorL src1 src2)));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "XOR $dst, $src1, $src2 \t// long + l2i" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_xor);
++ __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Immediate Xor Long
++instruct xorL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2) %{
++ match(Set dst (XorL src1 src2));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "XORI $dst, $src1, $src2 \t// long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_xori);
++ __ xori($dst$$Register, $src1$$Register, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct notI_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{
++ match(Set dst (XorI src1 src2));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "NOT $dst, $src1 ($src2)" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_nor);
++ __ nor($dst$$Register, $src1$$Register, $src1$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct notL_reg(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{
++ match(Set dst (XorL src1 src2));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "NOT $dst, $src1 ($src2) \t// long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_nor);
++ __ nor($dst$$Register, $src1$$Register, $src1$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// And-complement
++instruct andcI_reg_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2, iRegIsrc src3) %{
++ match(Set dst (AndI (XorI src1 src2) src3));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "ANDW $dst, xori($src1, $src2), $src3" %}
++ size(4);
++ ins_encode( enc_andc(dst, src3, src1) );
++ ins_pipe(pipe_class_default);
++%}
++
++// And-complement
++instruct andcL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src1, USE src2);
++ predicate(false);
++
++ format %{ "ANDC $dst, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_andc);
++ __ andc($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++//----------Moves between int/long and float/double----------------------------
++//
++// The following rules move values from int/long registers/stack-locations
++// to float/double registers/stack-locations and vice versa, without doing any
++// conversions. These rules are used to implement the bit-conversion methods
++// of java.lang.Float etc., e.g.
++// int floatToIntBits(float value)
++// float intBitsToFloat(int bits)
++//
++// Notes on the implementation on ppc64:
++// We only provide rules which move between a register and a stack-location,
++// because we always have to go through memory when moving between a float
++// register and an integer register.
++
++//---------- Chain stack slots between similar types --------
++
++// These are needed so that the rules below can match.
++
++// Load integer from stack slot
++instruct stkI_to_regI(iRegIdst dst, stackSlotI src) %{
++ match(Set dst src);
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LWZ $dst, $src" %}
++ size(4);
++ ins_encode( enc_lwz(dst, src) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Store integer to stack slot
++instruct regI_to_stkI(stackSlotI dst, iRegIsrc src) %{
++ match(Set dst src);
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STW $src, $dst \t// stk" %}
++ size(4);
++ ins_encode( enc_stw(src, dst) ); // rs=rt
++ ins_pipe(pipe_class_memory);
++%}
++
++// Load long from stack slot
++instruct stkL_to_regL(iRegLdst dst, stackSlotL src) %{
++ match(Set dst src);
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LD $dst, $src \t// long" %}
++ size(4);
++ ins_encode( enc_ld(dst, src) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Store long to stack slot
++instruct regL_to_stkL(stackSlotL dst, iRegLsrc src) %{
++ match(Set dst src);
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STD $src, $dst \t// long" %}
++ size(4);
++ ins_encode( enc_std(src, dst) ); // rs=rt
++ ins_pipe(pipe_class_memory);
++%}
++
++//----------Moves between int and float
++
++// Move float value from float stack-location to integer register.
++instruct moveF2I_stack_reg(iRegIdst dst, stackSlotF src) %{
++ match(Set dst (MoveF2I src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LWZ $dst, $src \t// MoveF2I" %}
++ size(4);
++ ins_encode( enc_lwz(dst, src) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Move float value from float register to integer stack-location.
++instruct moveF2I_reg_stack(stackSlotI dst, regF src) %{
++ match(Set dst (MoveF2I src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STFS $src, $dst \t// MoveF2I" %}
++ size(4);
++ ins_encode( enc_stfs(src, dst) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Move integer value from integer stack-location to float register.
++instruct moveI2F_stack_reg(regF dst, stackSlotI src) %{
++ match(Set dst (MoveI2F src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LFS $dst, $src \t// MoveI2F" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_lfs);
++ int Idisp = $src$$disp + frame_slots_bias($src$$base, ra_);
++ __ lfs($dst$$FloatRegister, Idisp, $src$$base$$Register);
++ %}
++ ins_pipe(pipe_class_memory);
++%}
++
++// Move integer value from integer register to float stack-location.
++instruct moveI2F_reg_stack(stackSlotF dst, iRegIsrc src) %{
++ match(Set dst (MoveI2F src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STW $src, $dst \t// MoveI2F" %}
++ size(4);
++ ins_encode( enc_stw(src, dst) );
++ ins_pipe(pipe_class_memory);
++%}
++
++//----------Moves between long and float
++
++instruct moveF2L_reg_stack(stackSlotL dst, regF src) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src);
++ predicate(false);
++
++ format %{ "storeD $src, $dst \t// STACK" %}
++ size(4);
++ ins_encode( enc_stfd(src, dst) );
++ ins_pipe(pipe_class_default);
++%}
++
++//----------Moves between long and double
++
++// Move double value from double stack-location to long register.
++instruct moveD2L_stack_reg(iRegLdst dst, stackSlotD src) %{
++ match(Set dst (MoveD2L src));
++ ins_cost(MEMORY_REF_COST);
++ size(4);
++ format %{ "LD $dst, $src \t// MoveD2L" %}
++ ins_encode( enc_ld(dst, src) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Move double value from double register to long stack-location.
++instruct moveD2L_reg_stack(stackSlotL dst, regD src) %{
++ match(Set dst (MoveD2L src));
++ effect(DEF dst, USE src);
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STFD $src, $dst \t// MoveD2L" %}
++ size(4);
++ ins_encode( enc_stfd(src, dst) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Move long value from long stack-location to double register.
++instruct moveL2D_stack_reg(regD dst, stackSlotL src) %{
++ match(Set dst (MoveL2D src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "LFD $dst, $src \t// MoveL2D" %}
++ size(4);
++ ins_encode( enc_lfd(dst, src) );
++ ins_pipe(pipe_class_memory);
++%}
++
++// Move long value from long register to double stack-location.
++instruct moveL2D_reg_stack(stackSlotD dst, iRegLsrc src) %{
++ match(Set dst (MoveL2D src));
++ ins_cost(MEMORY_REF_COST);
++
++ format %{ "STD $src, $dst \t// MoveL2D" %}
++ size(4);
++ ins_encode( enc_std(src, dst) );
++ ins_pipe(pipe_class_memory);
++%}
++
++//----------Register Move Instructions-----------------------------------------
++
++// Replicate for Superword
++
++instruct moveReg(iRegLdst dst, iRegIsrc src) %{
++ predicate(false);
++ effect(DEF dst, USE src);
++
++ format %{ "MR $dst, $src \t// replicate " %}
++ // variable size, 0 or 4.
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_or);
++ __ mr_if_needed($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++//----------Cast instructions (Java-level type cast)---------------------------
++
++// Cast Long to Pointer for unsafe natives.
++instruct castX2P(iRegPdst dst, iRegLsrc src) %{
++ match(Set dst (CastX2P src));
++
++ format %{ "MR $dst, $src \t// Long->Ptr" %}
++ // variable size, 0 or 4.
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_or);
++ __ mr_if_needed($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Cast Pointer to Long for unsafe natives.
++instruct castP2X(iRegLdst dst, iRegP_N2P src) %{
++ match(Set dst (CastP2X src));
++
++ format %{ "MR $dst, $src \t// Ptr->Long" %}
++ // variable size, 0 or 4.
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_or);
++ __ mr_if_needed($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct castPP(iRegPdst dst) %{
++ match(Set dst (CastPP dst));
++ format %{ " -- \t// castPP of $dst" %}
++ size(0);
++ ins_encode( /*empty*/ );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct castII(iRegIdst dst) %{
++ match(Set dst (CastII dst));
++ format %{ " -- \t// castII of $dst" %}
++ size(0);
++ ins_encode( /*empty*/ );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct checkCastPP(iRegPdst dst) %{
++ match(Set dst (CheckCastPP dst));
++ format %{ " -- \t// checkcastPP of $dst" %}
++ size(0);
++ ins_encode( /*empty*/ );
++ ins_pipe(pipe_class_default);
++%}
++
++//----------Convert instructions-----------------------------------------------
++
++// Convert to boolean.
++
++// int_to_bool(src) : { 1 if src != 0
++// { 0 else
++//
++// strategy:
++// 1) Count leading zeros of 32 bit-value src,
++// this returns 32 (0b10.0000) iff src == 0 and <32 otherwise.
++// 2) Shift 5 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise.
++// 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0.
++
++// convI2Bool
++instruct convI2Bool_reg__cntlz_Ex(iRegIdst dst, iRegIsrc src) %{
++ match(Set dst (Conv2B src));
++ predicate(UseCountLeadingZerosInstructionsPPC64);
++ ins_cost(DEFAULT_COST);
++
++ expand %{
++ immI shiftAmount %{ 0x5 %}
++ uimmI16 mask %{ 0x1 %}
++ iRegIdst tmp1;
++ iRegIdst tmp2;
++ countLeadingZerosI(tmp1, src);
++ urShiftI_reg_imm(tmp2, tmp1, shiftAmount);
++ xorI_reg_uimm16(dst, tmp2, mask);
++ %}
++%}
++
++instruct convI2Bool_reg__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx) %{
++ match(Set dst (Conv2B src));
++ effect(TEMP crx);
++ predicate(!UseCountLeadingZerosInstructionsPPC64);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "CMPWI $crx, $src, #0 \t// convI2B"
++ "LI $dst, #0\n\t"
++ "BEQ $crx, done\n\t"
++ "LI $dst, #1\n"
++ "done:" %}
++ size(16);
++ ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x0, 0x1) );
++ ins_pipe(pipe_class_compare);
++%}
++
++// ConvI2B + XorI
++instruct xorI_convI2Bool_reg_immIvalue1__cntlz_Ex(iRegIdst dst, iRegIsrc src, immI_1 mask) %{
++ match(Set dst (XorI (Conv2B src) mask));
++ predicate(UseCountLeadingZerosInstructionsPPC64);
++ ins_cost(DEFAULT_COST);
++
++ expand %{
++ immI shiftAmount %{ 0x5 %}
++ iRegIdst tmp1;
++ countLeadingZerosI(tmp1, src);
++ urShiftI_reg_imm(dst, tmp1, shiftAmount);
++ %}
++%}
++
++instruct xorI_convI2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI_1 mask) %{
++ match(Set dst (XorI (Conv2B src) mask));
++ effect(TEMP crx);
++ predicate(!UseCountLeadingZerosInstructionsPPC64);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "CMPWI $crx, $src, #0 \t// Xor(convI2B($src), $mask)"
++ "LI $dst, #1\n\t"
++ "BEQ $crx, done\n\t"
++ "LI $dst, #0\n"
++ "done:" %}
++ size(16);
++ ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x1, 0x0) );
++ ins_pipe(pipe_class_compare);
++%}
++
++// AndI 0b0..010..0 + ConvI2B
++instruct convI2Bool_andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src, immIpowerOf2 mask) %{
++ match(Set dst (Conv2B (AndI src mask)));
++ predicate(UseRotateAndMaskInstructionsPPC64);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "RLWINM $dst, $src, $mask \t// convI2B(AndI($src, $mask))" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm);
++ __ rlwinm($dst$$Register, $src$$Register, (32-log2_long((jlong)$mask$$constant)) & 0x1f, 31, 31);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Convert pointer to boolean.
++//
++// ptr_to_bool(src) : { 1 if src != 0
++// { 0 else
++//
++// strategy:
++// 1) Count leading zeros of 64 bit-value src,
++// this returns 64 (0b100.0000) iff src == 0 and <64 otherwise.
++// 2) Shift 6 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise.
++// 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0.
++
++// ConvP2B
++instruct convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src) %{
++ match(Set dst (Conv2B src));
++ predicate(UseCountLeadingZerosInstructionsPPC64);
++ ins_cost(DEFAULT_COST);
++
++ expand %{
++ immI shiftAmount %{ 0x6 %}
++ uimmI16 mask %{ 0x1 %}
++ iRegIdst tmp1;
++ iRegIdst tmp2;
++ countLeadingZerosP(tmp1, src);
++ urShiftI_reg_imm(tmp2, tmp1, shiftAmount);
++ xorI_reg_uimm16(dst, tmp2, mask);
++ %}
++%}
++
++instruct convP2Bool_reg__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx) %{
++ match(Set dst (Conv2B src));
++ effect(TEMP crx);
++ predicate(!UseCountLeadingZerosInstructionsPPC64);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "CMPDI $crx, $src, #0 \t// convP2B"
++ "LI $dst, #0\n\t"
++ "BEQ $crx, done\n\t"
++ "LI $dst, #1\n"
++ "done:" %}
++ size(16);
++ ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x0, 0x1) );
++ ins_pipe(pipe_class_compare);
++%}
++
++// ConvP2B + XorI
++instruct xorI_convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src, immI_1 mask) %{
++ match(Set dst (XorI (Conv2B src) mask));
++ predicate(UseCountLeadingZerosInstructionsPPC64);
++ ins_cost(DEFAULT_COST);
++
++ expand %{
++ immI shiftAmount %{ 0x6 %}
++ iRegIdst tmp1;
++ countLeadingZerosP(tmp1, src);
++ urShiftI_reg_imm(dst, tmp1, shiftAmount);
++ %}
++%}
++
++instruct xorI_convP2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx, immI_1 mask) %{
++ match(Set dst (XorI (Conv2B src) mask));
++ effect(TEMP crx);
++ predicate(!UseCountLeadingZerosInstructionsPPC64);
++ ins_cost(DEFAULT_COST);
++
++ format %{ "CMPDI $crx, $src, #0 \t// XorI(convP2B($src), $mask)"
++ "LI $dst, #1\n\t"
++ "BEQ $crx, done\n\t"
++ "LI $dst, #0\n"
++ "done:" %}
++ size(16);
++ ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x1, 0x0) );
++ ins_pipe(pipe_class_compare);
++%}
++
++// if src1 < src2, return -1 else return 0
++instruct cmpLTMask_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set dst (CmpLTMask src1 src2));
++ ins_cost(DEFAULT_COST*4);
++
++ expand %{
++ iRegLdst src1s;
++ iRegLdst src2s;
++ iRegLdst diff;
++ convI2L_reg(src1s, src1); // Ensure proper sign extension.
++ convI2L_reg(src2s, src2); // Ensure proper sign extension.
++ subL_reg_reg(diff, src1s, src2s);
++ // Need to consider >=33 bit result, therefore we need signmaskL.
++ signmask64I_regL(dst, diff);
++ %}
++%}
++
++instruct cmpLTMask_reg_immI0(iRegIdst dst, iRegIsrc src1, immI_0 src2) %{
++ match(Set dst (CmpLTMask src1 src2)); // if src1 < src2, return -1 else return 0
++ format %{ "SRAWI $dst, $src1, $src2 \t// CmpLTMask" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_srawi);
++ __ srawi($dst$$Register, $src1$$Register, 0x1f);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++//----------Arithmetic Conversion Instructions---------------------------------
++
++// Convert to Byte -- nop
++// Convert to Short -- nop
++
++// Convert to Int
++
++instruct convB2I_reg(iRegIdst dst, iRegIsrc src, immI_24 amount) %{
++ match(Set dst (RShiftI (LShiftI src amount) amount));
++ format %{ "EXTSB $dst, $src \t// byte->int" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_extsb);
++ __ extsb($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// LShiftI 16 + RShiftI 16 converts short to int.
++instruct convS2I_reg(iRegIdst dst, iRegIsrc src, immI_16 amount) %{
++ match(Set dst (RShiftI (LShiftI src amount) amount));
++ format %{ "EXTSH $dst, $src \t// short->int" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_extsh);
++ __ extsh($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// ConvL2I + ConvI2L: Sign extend int in long register.
++instruct sxtI_L2L_reg(iRegLdst dst, iRegLsrc src) %{
++ match(Set dst (ConvI2L (ConvL2I src)));
++
++ format %{ "EXTSW $dst, $src \t// long->long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_extsw);
++ __ extsw($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct convL2I_reg(iRegIdst dst, iRegLsrc src) %{
++ match(Set dst (ConvL2I src));
++ format %{ "MR $dst, $src \t// long->int" %}
++ // variable size, 0 or 4
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_or);
++ __ mr_if_needed($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct convD2IRaw_regD(regD dst, regD src) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src);
++ predicate(false);
++
++ format %{ "FCTIWZ $dst, $src \t// convD2I, $src != NaN" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fctiwz);;
++ __ fctiwz($dst$$FloatRegister, $src$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsReg crx, stackSlotL src) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE crx, USE src);
++ predicate(false);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "cmovI $crx, $dst, $src" %}
++ // Worst case is branch + move + stop, no stop without scheduler.
++ size(false /* TODO: PPC PORT(InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
++ ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsReg crx, stackSlotL mem) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE crx, USE mem);
++ predicate(false);
++
++ format %{ "CmovI $dst, $crx, $mem \t// postalloc expanded" %}
++ postalloc_expand %{
++ //
++ // replaces
++ //
++ // region dst crx mem
++ // \ | | /
++ // dst=cmovI_bso_stackSlotL_conLvalue0
++ //
++ // with
++ //
++ // region dst
++ // \ /
++ // dst=loadConI16(0)
++ // |
++ // ^ region dst crx mem
++ // | \ | | /
++ // dst=cmovI_bso_stackSlotL
++ //
++
++ // Create new nodes.
++ MachNode *m1 = new (C) loadConI16Node();
++ MachNode *m2 = new (C) cmovI_bso_stackSlotLNode();
++
++ // inputs for new nodes
++ m1->add_req(n_region);
++ m2->add_req(n_region, n_crx, n_mem);
++
++ // precedences for new nodes
++ m2->add_prec(m1);
++
++ // operands for new nodes
++ m1->_opnds[0] = op_dst;
++ m1->_opnds[1] = new (C) immI16Oper(0);
++
++ m2->_opnds[0] = op_dst;
++ m2->_opnds[1] = op_crx;
++ m2->_opnds[2] = op_mem;
++
++ // registers for new nodes
++ ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
++ ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
++
++ // Insert new nodes.
++ nodes->push(m1);
++ nodes->push(m2);
++ %}
++%}
++
++// Double to Int conversion, NaN is mapped to 0.
++instruct convD2I_reg_ExEx(iRegIdst dst, regD src) %{
++ match(Set dst (ConvD2I src));
++ ins_cost(DEFAULT_COST);
++
++ expand %{
++ regD tmpD;
++ stackSlotL tmpS;
++ flagsReg crx;
++ cmpDUnordered_reg_reg(crx, src, src); // Check whether src is NaN.
++ convD2IRaw_regD(tmpD, src); // Convert float to int (speculated).
++ moveD2L_reg_stack(tmpS, tmpD); // Store float to stack (speculated).
++ cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
++ %}
++%}
++
++instruct convF2IRaw_regF(regF dst, regF src) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src);
++ predicate(false);
++
++ format %{ "FCTIWZ $dst, $src \t// convF2I, $src != NaN" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fctiwz);
++ __ fctiwz($dst$$FloatRegister, $src$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Float to Int conversion, NaN is mapped to 0.
++instruct convF2I_regF_ExEx(iRegIdst dst, regF src) %{
++ match(Set dst (ConvF2I src));
++ ins_cost(DEFAULT_COST);
++
++ expand %{
++ regF tmpF;
++ stackSlotL tmpS;
++ flagsReg crx;
++ cmpFUnordered_reg_reg(crx, src, src); // Check whether src is NaN.
++ convF2IRaw_regF(tmpF, src); // Convert float to int (speculated).
++ moveF2L_reg_stack(tmpS, tmpF); // Store float to stack (speculated).
++ cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
++ %}
++%}
++
++// Convert to Long
++
++instruct convI2L_reg(iRegLdst dst, iRegIsrc src) %{
++ match(Set dst (ConvI2L src));
++ format %{ "EXTSW $dst, $src \t// int->long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_extsw);
++ __ extsw($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Zero-extend: convert unsigned int to long (convUI2L).
++instruct zeroExtendL_regI(iRegLdst dst, iRegIsrc src, immL_32bits mask) %{
++ match(Set dst (AndL (ConvI2L src) mask));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "CLRLDI $dst, $src, #32 \t// zero-extend int to long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
++ __ clrldi($dst$$Register, $src$$Register, 32);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Zero-extend: convert unsigned int to long in long register.
++instruct zeroExtendL_regL(iRegLdst dst, iRegLsrc src, immL_32bits mask) %{
++ match(Set dst (AndL src mask));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "CLRLDI $dst, $src, #32 \t// zero-extend int to long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
++ __ clrldi($dst$$Register, $src$$Register, 32);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct convF2LRaw_regF(regF dst, regF src) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src);
++ predicate(false);
++
++ format %{ "FCTIDZ $dst, $src \t// convF2L, $src != NaN" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fctiwz);
++ __ fctidz($dst$$FloatRegister, $src$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL src) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE crx, USE src);
++ predicate(false);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "cmovL $crx, $dst, $src" %}
++ // Worst case is branch + move + stop, no stop without scheduler.
++ size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
++ ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsReg crx, stackSlotL mem) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE crx, USE mem);
++ predicate(false);
++
++ format %{ "CmovL $dst, $crx, $mem \t// postalloc expanded" %}
++ postalloc_expand %{
++ //
++ // replaces
++ //
++ // region dst crx mem
++ // \ | | /
++ // dst=cmovL_bso_stackSlotL_conLvalue0
++ //
++ // with
++ //
++ // region dst
++ // \ /
++ // dst=loadConL16(0)
++ // |
++ // ^ region dst crx mem
++ // | \ | | /
++ // dst=cmovL_bso_stackSlotL
++ //
++
++ // Create new nodes.
++ MachNode *m1 = new (C) loadConL16Node();
++ MachNode *m2 = new (C) cmovL_bso_stackSlotLNode();
++
++ // inputs for new nodes
++ m1->add_req(n_region);
++ m2->add_req(n_region, n_crx, n_mem);
++ m2->add_prec(m1);
++
++ // operands for new nodes
++ m1->_opnds[0] = op_dst;
++ m1->_opnds[1] = new (C) immL16Oper(0);
++ m2->_opnds[0] = op_dst;
++ m2->_opnds[1] = op_crx;
++ m2->_opnds[2] = op_mem;
++
++ // registers for new nodes
++ ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
++ ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
++
++ // Insert new nodes.
++ nodes->push(m1);
++ nodes->push(m2);
++ %}
++%}
++
++// Float to Long conversion, NaN is mapped to 0.
++instruct convF2L_reg_ExEx(iRegLdst dst, regF src) %{
++ match(Set dst (ConvF2L src));
++ ins_cost(DEFAULT_COST);
++
++ expand %{
++ regF tmpF;
++ stackSlotL tmpS;
++ flagsReg crx;
++ cmpFUnordered_reg_reg(crx, src, src); // Check whether src is NaN.
++ convF2LRaw_regF(tmpF, src); // Convert float to long (speculated).
++ moveF2L_reg_stack(tmpS, tmpF); // Store float to stack (speculated).
++ cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
++ %}
++%}
++
++instruct convD2LRaw_regD(regD dst, regD src) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src);
++ predicate(false);
++
++ format %{ "FCTIDZ $dst, $src \t// convD2L $src != NaN" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fctiwz);
++ __ fctidz($dst$$FloatRegister, $src$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Double to Long conversion, NaN is mapped to 0.
++instruct convD2L_reg_ExEx(iRegLdst dst, regD src) %{
++ match(Set dst (ConvD2L src));
++ ins_cost(DEFAULT_COST);
++
++ expand %{
++ regD tmpD;
++ stackSlotL tmpS;
++ flagsReg crx;
++ cmpDUnordered_reg_reg(crx, src, src); // Check whether src is NaN.
++ convD2LRaw_regD(tmpD, src); // Convert float to long (speculated).
++ moveD2L_reg_stack(tmpS, tmpD); // Store float to stack (speculated).
++ cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
++ %}
++%}
++
++// Convert to Float
++
++// Placed here as needed in expand.
++instruct convL2DRaw_regD(regD dst, regD src) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src);
++ predicate(false);
++
++ format %{ "FCFID $dst, $src \t// convL2D" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fcfid);
++ __ fcfid($dst$$FloatRegister, $src$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Placed here as needed in expand.
++instruct convD2F_reg(regF dst, regD src) %{
++ match(Set dst (ConvD2F src));
++ format %{ "FRSP $dst, $src \t// convD2F" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_frsp);
++ __ frsp($dst$$FloatRegister, $src$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Integer to Float conversion.
++instruct convI2F_ireg_Ex(regF dst, iRegIsrc src) %{
++ match(Set dst (ConvI2F src));
++ predicate(!VM_Version::has_fcfids());
++ ins_cost(DEFAULT_COST);
++
++ expand %{
++ iRegLdst tmpL;
++ stackSlotL tmpS;
++ regD tmpD;
++ regD tmpD2;
++ convI2L_reg(tmpL, src); // Sign-extension int to long.
++ regL_to_stkL(tmpS, tmpL); // Store long to stack.
++ moveL2D_stack_reg(tmpD, tmpS); // Load long into double register.
++ convL2DRaw_regD(tmpD2, tmpD); // Convert to double.
++ convD2F_reg(dst, tmpD2); // Convert double to float.
++ %}
++%}
++
++instruct convL2FRaw_regF(regF dst, regD src) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src);
++ predicate(false);
++
++ format %{ "FCFIDS $dst, $src \t// convL2F" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fcfid);
++ __ fcfids($dst$$FloatRegister, $src$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Integer to Float conversion. Special version for Power7.
++instruct convI2F_ireg_fcfids_Ex(regF dst, iRegIsrc src) %{
++ match(Set dst (ConvI2F src));
++ predicate(VM_Version::has_fcfids());
++ ins_cost(DEFAULT_COST);
++
++ expand %{
++ iRegLdst tmpL;
++ stackSlotL tmpS;
++ regD tmpD;
++ convI2L_reg(tmpL, src); // Sign-extension int to long.
++ regL_to_stkL(tmpS, tmpL); // Store long to stack.
++ moveL2D_stack_reg(tmpD, tmpS); // Load long into double register.
++ convL2FRaw_regF(dst, tmpD); // Convert to float.
++ %}
++%}
++
++// L2F to avoid runtime call.
++instruct convL2F_ireg_fcfids_Ex(regF dst, iRegLsrc src) %{
++ match(Set dst (ConvL2F src));
++ predicate(VM_Version::has_fcfids());
++ ins_cost(DEFAULT_COST);
++
++ expand %{
++ stackSlotL tmpS;
++ regD tmpD;
++ regL_to_stkL(tmpS, src); // Store long to stack.
++ moveL2D_stack_reg(tmpD, tmpS); // Load long into double register.
++ convL2FRaw_regF(dst, tmpD); // Convert to float.
++ %}
++%}
++
++// Moved up as used in expand.
++//instruct convD2F_reg(regF dst, regD src) %{%}
++
++// Convert to Double
++
++// Integer to Double conversion.
++instruct convI2D_reg_Ex(regD dst, iRegIsrc src) %{
++ match(Set dst (ConvI2D src));
++ ins_cost(DEFAULT_COST);
++
++ expand %{
++ iRegLdst tmpL;
++ stackSlotL tmpS;
++ regD tmpD;
++ convI2L_reg(tmpL, src); // Sign-extension int to long.
++ regL_to_stkL(tmpS, tmpL); // Store long to stack.
++ moveL2D_stack_reg(tmpD, tmpS); // Load long into double register.
++ convL2DRaw_regD(dst, tmpD); // Convert to double.
++ %}
++%}
++
++// Long to Double conversion
++instruct convL2D_reg_Ex(regD dst, stackSlotL src) %{
++ match(Set dst (ConvL2D src));
++ ins_cost(DEFAULT_COST + MEMORY_REF_COST);
++
++ expand %{
++ regD tmpD;
++ moveL2D_stack_reg(tmpD, src);
++ convL2DRaw_regD(dst, tmpD);
++ %}
++%}
++
++instruct convF2D_reg(regD dst, regF src) %{
++ match(Set dst (ConvF2D src));
++ format %{ "FMR $dst, $src \t// float->double" %}
++ // variable size, 0 or 4
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fmr);
++ __ fmr_if_needed($dst$$FloatRegister, $src$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++//----------Control Flow Instructions------------------------------------------
++// Compare Instructions
++
++// Compare Integers
++instruct cmpI_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set crx (CmpI src1 src2));
++ size(4);
++ format %{ "CMPW $crx, $src1, $src2" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmp);
++ __ cmpw($crx$$CondRegister, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++instruct cmpI_reg_imm16(flagsReg crx, iRegIsrc src1, immI16 src2) %{
++ match(Set crx (CmpI src1 src2));
++ format %{ "CMPWI $crx, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmpi);
++ __ cmpwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++// (src1 & src2) == 0?
++instruct testI_reg_imm(flagsRegCR0 cr0, iRegIsrc src1, uimmI16 src2, immI_0 zero) %{
++ match(Set cr0 (CmpI (AndI src1 src2) zero));
++ // r0 is killed
++ format %{ "ANDI R0, $src1, $src2 \t// BTST int" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_andi_);
++ // FIXME: avoid andi_ ?
++ __ andi_(R0, $src1$$Register, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++instruct cmpL_reg_reg(flagsReg crx, iRegLsrc src1, iRegLsrc src2) %{
++ match(Set crx (CmpL src1 src2));
++ format %{ "CMPD $crx, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmp);
++ __ cmpd($crx$$CondRegister, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++instruct cmpL_reg_imm16(flagsReg crx, iRegLsrc src1, immL16 src2) %{
++ match(Set crx (CmpL src1 src2));
++ format %{ "CMPDI $crx, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmpi);
++ __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++instruct testL_reg_reg(flagsRegCR0 cr0, iRegLsrc src1, iRegLsrc src2, immL_0 zero) %{
++ match(Set cr0 (CmpL (AndL src1 src2) zero));
++ // r0 is killed
++ format %{ "AND R0, $src1, $src2 \t// BTST long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_and_);
++ __ and_(R0, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++instruct testL_reg_imm(flagsRegCR0 cr0, iRegLsrc src1, uimmL16 src2, immL_0 zero) %{
++ match(Set cr0 (CmpL (AndL src1 src2) zero));
++ // r0 is killed
++ format %{ "ANDI R0, $src1, $src2 \t// BTST long" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_andi_);
++ // FIXME: avoid andi_ ?
++ __ andi_(R0, $src1$$Register, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsReg crx) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE crx);
++ predicate(false);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "cmovI $crx, $dst, -1, 0, +1" %}
++ // Worst case is branch + move + branch + move + stop, no stop without scheduler.
++ size(false /* TODO: PPC PORTInsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 20 : 16);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
++ Label done;
++ // li(Rdst, 0); // equal -> 0
++ __ beq($crx$$CondRegister, done);
++ __ li($dst$$Register, 1); // greater -> +1
++ __ bgt($crx$$CondRegister, done);
++ __ li($dst$$Register, -1); // unordered or less -> -1
++ // TODO: PPC port__ endgroup_if_needed(_size == 20);
++ __ bind(done);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsReg crx) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE crx);
++ predicate(false);
++
++ format %{ "CmovI $crx, $dst, -1, 0, +1 \t// postalloc expanded" %}
++ postalloc_expand %{
++ //
++ // replaces
++ //
++ // region crx
++ // \ |
++ // dst=cmovI_conIvalueMinus1_conIvalue0_conIvalue1
++ //
++ // with
++ //
++ // region
++ // \
++ // dst=loadConI16(0)
++ // |
++ // ^ region crx
++ // | \ |
++ // dst=cmovI_conIvalueMinus1_conIvalue1
++ //
++
++ // Create new nodes.
++ MachNode *m1 = new (C) loadConI16Node();
++ MachNode *m2 = new (C) cmovI_conIvalueMinus1_conIvalue1Node();
++
++ // inputs for new nodes
++ m1->add_req(n_region);
++ m2->add_req(n_region, n_crx);
++ m2->add_prec(m1);
++
++ // operands for new nodes
++ m1->_opnds[0] = op_dst;
++ m1->_opnds[1] = new (C) immI16Oper(0);
++ m2->_opnds[0] = op_dst;
++ m2->_opnds[1] = op_crx;
++
++ // registers for new nodes
++ ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
++ ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
++
++ // Insert new nodes.
++ nodes->push(m1);
++ nodes->push(m2);
++ %}
++%}
++
++// Manifest a CmpL3 result in an integer register. Very painful.
++// This is the test to avoid.
++// (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
++instruct cmpL3_reg_reg_ExEx(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
++ match(Set dst (CmpL3 src1 src2));
++ ins_cost(DEFAULT_COST*5+BRANCH_COST);
++
++ expand %{
++ flagsReg tmp1;
++ cmpL_reg_reg(tmp1, src1, src2);
++ cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(dst, tmp1);
++ %}
++%}
++
++// Implicit range checks.
++// A range check in the ideal world has one of the following shapes:
++// - (If le (CmpU length index)), (IfTrue throw exception)
++// - (If lt (CmpU index length)), (IfFalse throw exception)
++//
++// Match range check 'If le (CmpU length index)'.
++instruct rangeCheck_iReg_uimm15(cmpOp cmp, iRegIsrc src_length, uimmI15 index, label labl) %{
++ match(If cmp (CmpU src_length index));
++ effect(USE labl);
++ predicate(TrapBasedRangeChecks &&
++ _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le &&
++ PROB_UNLIKELY(_leaf->as_If()->_prob) >= PROB_ALWAYS &&
++ (Matcher::branches_to_uncommon_trap(_leaf)));
++
++ ins_is_TrapBasedCheckNode(true);
++
++ format %{ "TWI $index $cmp $src_length \t// RangeCheck => trap $labl" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_twi);
++ if ($cmp$$cmpcode == 0x1 /* less_equal */) {
++ __ trap_range_check_le($src_length$$Register, $index$$constant);
++ } else {
++ // Both successors are uncommon traps, probability is 0.
++ // Node got flipped during fixup flow.
++ assert($cmp$$cmpcode == 0x9, "must be greater");
++ __ trap_range_check_g($src_length$$Register, $index$$constant);
++ }
++ %}
++ ins_pipe(pipe_class_trap);
++%}
++
++// Match range check 'If lt (CmpU index length)'.
++instruct rangeCheck_iReg_iReg(cmpOp cmp, iRegIsrc src_index, iRegIsrc src_length, label labl) %{
++ match(If cmp (CmpU src_index src_length));
++ effect(USE labl);
++ predicate(TrapBasedRangeChecks &&
++ _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
++ _leaf->as_If()->_prob >= PROB_ALWAYS &&
++ (Matcher::branches_to_uncommon_trap(_leaf)));
++
++ ins_is_TrapBasedCheckNode(true);
++
++ format %{ "TW $src_index $cmp $src_length \t// RangeCheck => trap $labl" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_tw);
++ if ($cmp$$cmpcode == 0x0 /* greater_equal */) {
++ __ trap_range_check_ge($src_index$$Register, $src_length$$Register);
++ } else {
++ // Both successors are uncommon traps, probability is 0.
++ // Node got flipped during fixup flow.
++ assert($cmp$$cmpcode == 0x8, "must be less");
++ __ trap_range_check_l($src_index$$Register, $src_length$$Register);
++ }
++ %}
++ ins_pipe(pipe_class_trap);
++%}
++
++// Match range check 'If lt (CmpU index length)'.
++instruct rangeCheck_uimm15_iReg(cmpOp cmp, iRegIsrc src_index, uimmI15 length, label labl) %{
++ match(If cmp (CmpU src_index length));
++ effect(USE labl);
++ predicate(TrapBasedRangeChecks &&
++ _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
++ _leaf->as_If()->_prob >= PROB_ALWAYS &&
++ (Matcher::branches_to_uncommon_trap(_leaf)));
++
++ ins_is_TrapBasedCheckNode(true);
++
++ format %{ "TWI $src_index $cmp $length \t// RangeCheck => trap $labl" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_twi);
++ if ($cmp$$cmpcode == 0x0 /* greater_equal */) {
++ __ trap_range_check_ge($src_index$$Register, $length$$constant);
++ } else {
++ // Both successors are uncommon traps, probability is 0.
++ // Node got flipped during fixup flow.
++ assert($cmp$$cmpcode == 0x8, "must be less");
++ __ trap_range_check_l($src_index$$Register, $length$$constant);
++ }
++ %}
++ ins_pipe(pipe_class_trap);
++%}
++
++instruct compU_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set crx (CmpU src1 src2));
++ format %{ "CMPLW $crx, $src1, $src2 \t// unsigned" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmpl);
++ __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++instruct compU_reg_uimm16(flagsReg crx, iRegIsrc src1, uimmI16 src2) %{
++ match(Set crx (CmpU src1 src2));
++ size(4);
++ format %{ "CMPLWI $crx, $src1, $src2" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmpli);
++ __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++// Implicit zero checks (more implicit null checks).
++// No constant pool entries required.
++instruct zeroCheckN_iReg_imm0(cmpOp cmp, iRegNsrc value, immN_0 zero, label labl) %{
++ match(If cmp (CmpN value zero));
++ effect(USE labl);
++ predicate(TrapBasedNullChecks &&
++ _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
++ _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) &&
++ Matcher::branches_to_uncommon_trap(_leaf));
++ ins_cost(1);
++
++ ins_is_TrapBasedCheckNode(true);
++
++ format %{ "TDI $value $cmp $zero \t// ZeroCheckN => trap $labl" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_tdi);
++ if ($cmp$$cmpcode == 0xA) {
++ __ trap_null_check($value$$Register);
++ } else {
++ // Both successors are uncommon traps, probability is 0.
++ // Node got flipped during fixup flow.
++ assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)");
++ __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned);
++ }
++ %}
++ ins_pipe(pipe_class_trap);
++%}
++
++// Compare narrow oops.
++instruct cmpN_reg_reg(flagsReg crx, iRegNsrc src1, iRegNsrc src2) %{
++ match(Set crx (CmpN src1 src2));
++
++ size(4);
++ ins_cost(DEFAULT_COST);
++ format %{ "CMPLW $crx, $src1, $src2 \t// compressed ptr" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmpl);
++ __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++instruct cmpN_reg_imm0(flagsReg crx, iRegNsrc src1, immN_0 src2) %{
++ match(Set crx (CmpN src1 src2));
++ // Make this more expensive than zeroCheckN_iReg_imm0.
++ ins_cost(DEFAULT_COST);
++
++ format %{ "CMPLWI $crx, $src1, $src2 \t// compressed ptr" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmpli);
++ __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++// Implicit zero checks (more implicit null checks).
++// No constant pool entries required.
++instruct zeroCheckP_reg_imm0(cmpOp cmp, iRegP_N2P value, immP_0 zero, label labl) %{
++ match(If cmp (CmpP value zero));
++ effect(USE labl);
++ predicate(TrapBasedNullChecks &&
++ _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
++ _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) &&
++ Matcher::branches_to_uncommon_trap(_leaf));
++
++ ins_is_TrapBasedCheckNode(true);
++
++ format %{ "TDI $value $cmp $zero \t// ZeroCheckP => trap $labl" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_tdi);
++ if ($cmp$$cmpcode == 0xA) {
++ __ trap_null_check($value$$Register);
++ } else {
++ // Both successors are uncommon traps, probability is 0.
++ // Node got flipped during fixup flow.
++ assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)");
++ __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned);
++ }
++ %}
++ ins_pipe(pipe_class_trap);
++%}
++
++// Compare Pointers
++instruct cmpP_reg_reg(flagsReg crx, iRegP_N2P src1, iRegP_N2P src2) %{
++ match(Set crx (CmpP src1 src2));
++ format %{ "CMPLD $crx, $src1, $src2 \t// ptr" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmpl);
++ __ cmpld($crx$$CondRegister, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++// Used in postalloc expand.
++instruct cmpP_reg_imm16(flagsReg crx, iRegPsrc src1, immL16 src2) %{
++ // This match rule prevents reordering of node before a safepoint.
++ // This only makes sense if this instructions is used exclusively
++ // for the expansion of EncodeP!
++ match(Set crx (CmpP src1 src2));
++ predicate(false);
++
++ format %{ "CMPDI $crx, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmpi);
++ __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++//----------Float Compares----------------------------------------------------
++
++instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{
++ // no match-rule, false predicate
++ effect(DEF crx, USE src1, USE src2);
++ predicate(false);
++
++ format %{ "cmpFUrd $crx, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fcmpu);
++ __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmov_bns_less(flagsReg crx) %{
++ // no match-rule, false predicate
++ effect(DEF crx);
++ predicate(false);
++
++ ins_variable_size_depending_on_alignment(true);
++
++ format %{ "cmov $crx" %}
++ // Worst case is branch + move + stop, no stop without scheduler.
++ size(false /* TODO: PPC PORT(InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 16 : 12);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cmovecr);
++ Label done;
++ __ bns($crx$$CondRegister, done); // not unordered -> keep crx
++ __ li(R0, 0);
++ __ cmpwi($crx$$CondRegister, R0, 1); // unordered -> set crx to 'less'
++ // TODO PPC port __ endgroup_if_needed(_size == 16);
++ __ bind(done);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Compare floating, generate condition code.
++instruct cmpF_reg_reg_Ex(flagsReg crx, regF src1, regF src2) %{
++ // FIXME: should we match 'If cmp (CmpF src1 src2))' ??
++ //
++ // The following code sequence occurs a lot in mpegaudio:
++ //
++ // block BXX:
++ // 0: instruct cmpFUnordered_reg_reg (cmpF_reg_reg-0):
++ // cmpFUrd CCR6, F11, F9
++ // 4: instruct cmov_bns_less (cmpF_reg_reg-1):
++ // cmov CCR6
++ // 8: instruct branchConSched:
++ // B_FARle CCR6, B56 P=0.500000 C=-1.000000
++ match(Set crx (CmpF src1 src2));
++ ins_cost(DEFAULT_COST+BRANCH_COST);
++
++ format %{ "CmpF $crx, $src1, $src2 \t// postalloc expanded" %}
++ postalloc_expand %{
++ //
++ // replaces
++ //
++ // region src1 src2
++ // \ | |
++ // crx=cmpF_reg_reg
++ //
++ // with
++ //
++ // region src1 src2
++ // \ | |
++ // crx=cmpFUnordered_reg_reg
++ // |
++ // ^ region
++ // | \
++ // crx=cmov_bns_less
++ //
++
++ // Create new nodes.
++ MachNode *m1 = new (C) cmpFUnordered_reg_regNode();
++ MachNode *m2 = new (C) cmov_bns_lessNode();
++
++ // inputs for new nodes
++ m1->add_req(n_region, n_src1, n_src2);
++ m2->add_req(n_region);
++ m2->add_prec(m1);
++
++ // operands for new nodes
++ m1->_opnds[0] = op_crx;
++ m1->_opnds[1] = op_src1;
++ m1->_opnds[2] = op_src2;
++ m2->_opnds[0] = op_crx;
++
++ // registers for new nodes
++ ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
++ ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
++
++ // Insert new nodes.
++ nodes->push(m1);
++ nodes->push(m2);
++ %}
++%}
++
++// Compare float, generate -1,0,1
++instruct cmpF3_reg_reg_ExEx(iRegIdst dst, regF src1, regF src2) %{
++ match(Set dst (CmpF3 src1 src2));
++ ins_cost(DEFAULT_COST*5+BRANCH_COST);
++
++ expand %{
++ flagsReg tmp1;
++ cmpFUnordered_reg_reg(tmp1, src1, src2);
++ cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(dst, tmp1);
++ %}
++%}
++
++instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
++ // no match-rule, false predicate
++ effect(DEF crx, USE src1, USE src2);
++ predicate(false);
++
++ format %{ "cmpFUrd $crx, $src1, $src2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fcmpu);
++ __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct cmpD_reg_reg_Ex(flagsReg crx, regD src1, regD src2) %{
++ match(Set crx (CmpD src1 src2));
++ ins_cost(DEFAULT_COST+BRANCH_COST);
++
++ format %{ "CmpD $crx, $src1, $src2 \t// postalloc expanded" %}
++ postalloc_expand %{
++ //
++ // replaces
++ //
++ // region src1 src2
++ // \ | |
++ // crx=cmpD_reg_reg
++ //
++ // with
++ //
++ // region src1 src2
++ // \ | |
++ // crx=cmpDUnordered_reg_reg
++ // |
++ // ^ region
++ // | \
++ // crx=cmov_bns_less
++ //
++
++ // create new nodes
++ MachNode *m1 = new (C) cmpDUnordered_reg_regNode();
++ MachNode *m2 = new (C) cmov_bns_lessNode();
++
++ // inputs for new nodes
++ m1->add_req(n_region, n_src1, n_src2);
++ m2->add_req(n_region);
++ m2->add_prec(m1);
++
++ // operands for new nodes
++ m1->_opnds[0] = op_crx;
++ m1->_opnds[1] = op_src1;
++ m1->_opnds[2] = op_src2;
++ m2->_opnds[0] = op_crx;
++
++ // registers for new nodes
++ ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
++ ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
++
++ // Insert new nodes.
++ nodes->push(m1);
++ nodes->push(m2);
++ %}
++%}
++
++// Compare double, generate -1,0,1
++instruct cmpD3_reg_reg_ExEx(iRegIdst dst, regD src1, regD src2) %{
++ match(Set dst (CmpD3 src1 src2));
++ ins_cost(DEFAULT_COST*5+BRANCH_COST);
++
++ expand %{
++ flagsReg tmp1;
++ cmpDUnordered_reg_reg(tmp1, src1, src2);
++ cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(dst, tmp1);
++ %}
++%}
++
++//----------Branches---------------------------------------------------------
++// Jump
++
++// Direct Branch.
++instruct branch(label labl) %{
++ match(Goto);
++ effect(USE labl);
++ ins_cost(BRANCH_COST);
++
++ format %{ "B $labl" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_b);
++ Label d; // dummy
++ __ bind(d);
++ Label* p = $labl$$label;
++ // `p' is `NULL' when this encoding class is used only to
++ // determine the size of the encoded instruction.
++ Label& l = (NULL == p)? d : *(p);
++ __ b(l);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Conditional Near Branch
++instruct branchCon(cmpOp cmp, flagsReg crx, label lbl) %{
++ // Same match rule as `branchConFar'.
++ match(If cmp crx);
++ effect(USE lbl);
++ ins_cost(BRANCH_COST);
++
++ // If set to 1 this indicates that the current instruction is a
++ // short variant of a long branch. This avoids using this
++ // instruction in first-pass matching. It will then only be used in
++ // the `Shorten_branches' pass.
++ ins_short_branch(1);
++
++ format %{ "B$cmp $crx, $lbl" %}
++ size(4);
++ ins_encode( enc_bc(crx, cmp, lbl) );
++ ins_pipe(pipe_class_default);
++%}
++
++// This is for cases when the ppc64 `bc' instruction does not
++// reach far enough. So we emit a far branch here, which is more
++// expensive.
++//
++// Conditional Far Branch
++instruct branchConFar(cmpOp cmp, flagsReg crx, label lbl) %{
++ // Same match rule as `branchCon'.
++ match(If cmp crx);
++ effect(USE crx, USE lbl);
++ predicate(!false /* TODO: PPC port HB_Schedule*/);
++ // Higher cost than `branchCon'.
++ ins_cost(5*BRANCH_COST);
++
++ // This is not a short variant of a branch, but the long variant.
++ ins_short_branch(0);
++
++ format %{ "B_FAR$cmp $crx, $lbl" %}
++ size(8);
++ ins_encode( enc_bc_far(crx, cmp, lbl) );
++ ins_pipe(pipe_class_default);
++%}
++
++// Conditional Branch used with Power6 scheduler (can be far or short).
++instruct branchConSched(cmpOp cmp, flagsReg crx, label lbl) %{
++ // Same match rule as `branchCon'.
++ match(If cmp crx);
++ effect(USE crx, USE lbl);
++ predicate(false /* TODO: PPC port HB_Schedule*/);
++ // Higher cost than `branchCon'.
++ ins_cost(5*BRANCH_COST);
++
++ // Actually size doesn't depend on alignment but on shortening.
++ ins_variable_size_depending_on_alignment(true);
++ // long variant.
++ ins_short_branch(0);
++
++ format %{ "B_FAR$cmp $crx, $lbl" %}
++ size(8); // worst case
++ ins_encode( enc_bc_short_far(crx, cmp, lbl) );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct branchLoopEnd(cmpOp cmp, flagsReg crx, label labl) %{
++ match(CountedLoopEnd cmp crx);
++ effect(USE labl);
++ ins_cost(BRANCH_COST);
++
++ // short variant.
++ ins_short_branch(1);
++
++ format %{ "B$cmp $crx, $labl \t// counted loop end" %}
++ size(4);
++ ins_encode( enc_bc(crx, cmp, labl) );
++ ins_pipe(pipe_class_default);
++%}
++
++instruct branchLoopEndFar(cmpOp cmp, flagsReg crx, label labl) %{
++ match(CountedLoopEnd cmp crx);
++ effect(USE labl);
++ predicate(!false /* TODO: PPC port HB_Schedule */);
++ ins_cost(BRANCH_COST);
++
++ // Long variant.
++ ins_short_branch(0);
++
++ format %{ "B_FAR$cmp $crx, $labl \t// counted loop end" %}
++ size(8);
++ ins_encode( enc_bc_far(crx, cmp, labl) );
++ ins_pipe(pipe_class_default);
++%}
++
++// Conditional Branch used with Power6 scheduler (can be far or short).
++instruct branchLoopEndSched(cmpOp cmp, flagsReg crx, label labl) %{
++ match(CountedLoopEnd cmp crx);
++ effect(USE labl);
++ predicate(false /* TODO: PPC port HB_Schedule */);
++ // Higher cost than `branchCon'.
++ ins_cost(5*BRANCH_COST);
++
++ // Actually size doesn't depend on alignment but on shortening.
++ ins_variable_size_depending_on_alignment(true);
++ // Long variant.
++ ins_short_branch(0);
++
++ format %{ "B_FAR$cmp $crx, $labl \t// counted loop end" %}
++ size(8); // worst case
++ ins_encode( enc_bc_short_far(crx, cmp, labl) );
++ ins_pipe(pipe_class_default);
++%}
++
++// ============================================================================
++// Java runtime operations, intrinsics and other complex operations.
++
++// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
++// array for an instance of the superklass. Set a hidden internal cache on a
++// hit (cache is checked with exposed code in gen_subtype_check()). Return
++// not zero for a miss or zero for a hit. The encoding ALSO sets flags.
++//
++// GL TODO: Improve this.
++// - result should not be a TEMP
++// - Add match rule as on sparc avoiding additional Cmp.
++instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P superklass,
++ iRegPdst tmp_klass, iRegPdst tmp_arrayptr) %{
++ match(Set result (PartialSubtypeCheck subklass superklass));
++ effect(TEMP result, TEMP tmp_klass, TEMP tmp_arrayptr);
++ ins_cost(DEFAULT_COST*10);
++
++ format %{ "PartialSubtypeCheck $result = ($subklass instanceOf $superklass) tmp: $tmp_klass, $tmp_arrayptr" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ __ check_klass_subtype_slow_path($subklass$$Register, $superklass$$Register, $tmp_arrayptr$$Register,
++ $tmp_klass$$Register, NULL, $result$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// inlined locking and unlocking
++
++instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
++ match(Set crx (FastLock oop box));
++ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
++ // TODO PPC port predicate(!UseNewFastLockPPC64 || UseBiasedLocking);
++
++ format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2, $tmp3" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
++ $tmp3$$Register, $tmp1$$Register, $tmp2$$Register);
++ // If locking was successfull, crx should indicate 'EQ'.
++ // The compiler generates a branch to the runtime call to
++ // _complete_monitor_locking_Java for the case where crx is 'NE'.
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++instruct cmpFastUnlock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
++ match(Set crx (FastUnlock oop box));
++ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++ format %{ "FASTUNLOCK $oop, $box, $tmp1, $tmp2" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
++ $tmp3$$Register, $tmp1$$Register, $tmp2$$Register);
++ // If unlocking was successfull, crx should indicate 'EQ'.
++ // The compiler generates a branch to the runtime call to
++ // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++// Align address.
++instruct align_addr(iRegPdst dst, iRegPsrc src, immLnegpow2 mask) %{
++ match(Set dst (CastX2P (AndL (CastP2X src) mask)));
++
++ format %{ "ANDDI $dst, $src, $mask \t// next aligned address" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldicr);
++ __ clrrdi($dst$$Register, $src$$Register, log2_long((jlong)-$mask$$constant));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Array size computation.
++instruct array_size(iRegLdst dst, iRegPsrc end, iRegPsrc start) %{
++ match(Set dst (SubL (CastP2X end) (CastP2X start)));
++
++ format %{ "SUB $dst, $end, $start \t// array size in bytes" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_subf);
++ __ subf($dst$$Register, $start$$Register, $end$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Clear-array with dynamic array-size.
++instruct inlineCallClearArray(rarg1RegL cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{
++ match(Set dummy (ClearArray cnt base));
++ effect(USE_KILL cnt, USE_KILL base, KILL ctr);
++ ins_cost(MEMORY_REF_COST);
++
++ ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
++
++ format %{ "ClearArray $cnt, $base" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ __ clear_memory_doubleword($base$$Register, $cnt$$Register); // kills cnt, base, R0
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// String_IndexOf for needle of length 1.
++//
++// Match needle into immediate operands: no loadConP node needed. Saves one
++// register and two instructions over string_indexOf_imm1Node.
++//
++// Assumes register result differs from all input registers.
++//
++// Preserves registers haystack, haycnt
++// Kills registers tmp1, tmp2
++// Defines registers result
++//
++// Use dst register classes if register gets killed, as it is the case for tmp registers!
++//
++// Unfortunately this does not match too often. In many situations the AddP is used
++// by several nodes, even several StrIndexOf nodes, breaking the match tree.
++instruct string_indexOf_imm1_char(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
++ immP needleImm, immL offsetImm, immI_1 needlecntImm,
++ iRegIdst tmp1, iRegIdst tmp2,
++ flagsRegCR0 cr0, flagsRegCR1 cr1) %{
++ predicate(SpecialStringIndexOf); // type check implicit by parameter type, See Matcher::match_rule_supported
++ match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
++
++ effect(TEMP result, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1);
++
++ ins_cost(150);
++ format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
++ "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
++
++ ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ immPOper *needleOper = (immPOper *)$needleImm;
++ const TypeOopPtr *t = needleOper->type()->isa_oopptr();
++ ciTypeArray* needle_values = t->const_oop()->as_type_array(); // Pointer to live char *
++
++ __ string_indexof_1($result$$Register,
++ $haystack$$Register, $haycnt$$Register,
++ R0, needle_values->char_at(0),
++ $tmp1$$Register, $tmp2$$Register);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++// String_IndexOf for needle of length 1.
++//
++// Special case requires less registers and emits less instructions.
++//
++// Assumes register result differs from all input registers.
++//
++// Preserves registers haystack, haycnt
++// Kills registers tmp1, tmp2, needle
++// Defines registers result
++//
++// Use dst register classes if register gets killed, as it is the case for tmp registers!
++instruct string_indexOf_imm1(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
++ rscratch2RegP needle, immI_1 needlecntImm,
++ iRegIdst tmp1, iRegIdst tmp2,
++ flagsRegCR0 cr0, flagsRegCR1 cr1) %{
++ match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
++ effect(USE_KILL needle, /* TDEF needle, */ TEMP result,
++ TEMP tmp1, TEMP tmp2);
++ // Required for EA: check if it is still a type_array.
++ predicate(SpecialStringIndexOf && n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
++ n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
++ ins_cost(180);
++
++ ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
++
++ format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
++ " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ Node *ndl = in(operand_index($needle)); // The node that defines needle.
++ ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
++ guarantee(needle_values, "sanity");
++ if (needle_values != NULL) {
++ __ string_indexof_1($result$$Register,
++ $haystack$$Register, $haycnt$$Register,
++ R0, needle_values->char_at(0),
++ $tmp1$$Register, $tmp2$$Register);
++ } else {
++ __ string_indexof_1($result$$Register,
++ $haystack$$Register, $haycnt$$Register,
++ $needle$$Register, 0,
++ $tmp1$$Register, $tmp2$$Register);
++ }
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++// String_IndexOf.
++//
++// Length of needle as immediate. This saves instruction loading constant needle
++// length.
++// @@@ TODO Specify rules for length < 8 or so, and roll out comparison of needle
++// completely or do it in vector instruction. This should save registers for
++// needlecnt and needle.
++//
++// Assumes register result differs from all input registers.
++// Overwrites haycnt, needlecnt.
++// Use dst register classes if register gets killed, as it is the case for tmp registers!
++instruct string_indexOf_imm(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
++ iRegPsrc needle, uimmI15 needlecntImm,
++ iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
++ flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6) %{
++ match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
++ effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP result,
++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6);
++ // Required for EA: check if it is still a type_array.
++ predicate(SpecialStringIndexOf && n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
++ n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
++ ins_cost(250);
++
++ ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
++
++ format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
++ " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ Node *ndl = in(operand_index($needle)); // The node that defines needle.
++ ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
++
++ __ string_indexof($result$$Register,
++ $haystack$$Register, $haycnt$$Register,
++ $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++// StrIndexOf node.
++//
++// Assumes register result differs from all input registers.
++// Overwrites haycnt, needlecnt.
++// Use dst register classes if register gets killed, as it is the case for tmp registers!
++instruct string_indexOf(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
++ iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
++ flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6) %{
++ match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
++ effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
++ TEMP result,
++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6);
++ predicate(SpecialStringIndexOf); // See Matcher::match_rule_supported.
++ ins_cost(300);
++
++ ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
++
++ format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
++ " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ __ string_indexof($result$$Register,
++ $haystack$$Register, $haycnt$$Register,
++ $needle$$Register, NULL, $needlecnt$$Register, 0, // needlecnt not constant.
++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++// String equals with immediate.
++instruct string_equals_imm(iRegPsrc str1, iRegPsrc str2, uimmI15 cntImm, iRegIdst result,
++ iRegPdst tmp1, iRegPdst tmp2,
++ flagsRegCR0 cr0, flagsRegCR6 cr6, regCTR ctr) %{
++ match(Set result (StrEquals (Binary str1 str2) cntImm));
++ effect(TEMP result, TEMP tmp1, TEMP tmp2,
++ KILL cr0, KILL cr6, KILL ctr);
++ predicate(SpecialStringEquals); // See Matcher::match_rule_supported.
++ ins_cost(250);
++
++ ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
++
++ format %{ "String Equals SCL [0..$cntImm]($str1),[0..$cntImm]($str2)"
++ " -> $result \t// KILL $cr0, $cr6, $ctr, TEMP $result, $tmp1, $tmp2" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ __ char_arrays_equalsImm($str1$$Register, $str2$$Register, $cntImm$$constant,
++ $result$$Register, $tmp1$$Register, $tmp2$$Register);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++// String equals.
++// Use dst register classes if register gets killed, as it is the case for TEMP operands!
++instruct string_equals(iRegPsrc str1, iRegPsrc str2, iRegIsrc cnt, iRegIdst result,
++ iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3, iRegPdst tmp4, iRegPdst tmp5,
++ flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
++ match(Set result (StrEquals (Binary str1 str2) cnt));
++ effect(TEMP result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
++ KILL cr0, KILL cr1, KILL cr6, KILL ctr);
++ predicate(SpecialStringEquals); // See Matcher::match_rule_supported.
++ ins_cost(300);
++
++ ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
++
++ format %{ "String Equals [0..$cnt]($str1),[0..$cnt]($str2) -> $result"
++ " \t// KILL $cr0, $cr1, $cr6, $ctr, TEMP $result, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ __ char_arrays_equals($str1$$Register, $str2$$Register, $cnt$$Register, $result$$Register,
++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++// String compare.
++// Char[] pointers are passed in.
++// Use dst register classes if register gets killed, as it is the case for TEMP operands!
++instruct string_compare(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
++ iRegPdst tmp, flagsRegCR0 cr0, regCTR ctr) %{
++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++ effect(USE_KILL cnt1, USE_KILL cnt2, USE_KILL str1, USE_KILL str2, TEMP result, TEMP tmp, KILL cr0, KILL ctr);
++ ins_cost(300);
++
++ ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
++
++ format %{ "String Compare $str1[0..$cnt1], $str2[0..$cnt2] -> $result"
++ " \t// TEMP $tmp, $result KILLs $str1, $cnt1, $str2, $cnt2, $cr0, $ctr" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ __ string_compare($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register,
++ $result$$Register, $tmp$$Register);
++ %}
++ ins_pipe(pipe_class_compare);
++%}
++
++//---------- Min/Max Instructions ---------------------------------------------
++
++instruct minI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set dst (MinI src1 src2));
++ ins_cost(DEFAULT_COST*6);
++
++ expand %{
++ iRegLdst src1s;
++ iRegLdst src2s;
++ iRegLdst diff;
++ iRegLdst sm;
++ iRegLdst doz; // difference or zero
++ convI2L_reg(src1s, src1); // Ensure proper sign extension.
++ convI2L_reg(src2s, src2); // Ensure proper sign extension.
++ subL_reg_reg(diff, src2s, src1s);
++ // Need to consider >=33 bit result, therefore we need signmaskL.
++ signmask64L_regL(sm, diff);
++ andL_reg_reg(doz, diff, sm); // <=0
++ addI_regL_regL(dst, doz, src1s);
++ %}
++%}
++
++instruct maxI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
++ match(Set dst (MaxI src1 src2));
++ ins_cost(DEFAULT_COST*6);
++
++ expand %{
++ iRegLdst src1s;
++ iRegLdst src2s;
++ iRegLdst diff;
++ iRegLdst sm;
++ iRegLdst doz; // difference or zero
++ convI2L_reg(src1s, src1); // Ensure proper sign extension.
++ convI2L_reg(src2s, src2); // Ensure proper sign extension.
++ subL_reg_reg(diff, src2s, src1s);
++ // Need to consider >=33 bit result, therefore we need signmaskL.
++ signmask64L_regL(sm, diff);
++ andcL_reg_reg(doz, diff, sm); // >=0
++ addI_regL_regL(dst, doz, src1s);
++ %}
++%}
++
++//---------- Population Count Instructions ------------------------------------
++
++// Popcnt for Power7.
++instruct popCountI(iRegIdst dst, iRegIsrc src) %{
++ match(Set dst (PopCountI src));
++ predicate(UsePopCountInstruction && VM_Version::has_popcntw());
++ ins_cost(DEFAULT_COST);
++
++ format %{ "POPCNTW $dst, $src" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_popcntb);
++ __ popcntw($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Popcnt for Power7.
++instruct popCountL(iRegIdst dst, iRegLsrc src) %{
++ predicate(UsePopCountInstruction && VM_Version::has_popcntw());
++ match(Set dst (PopCountL src));
++ ins_cost(DEFAULT_COST);
++
++ format %{ "POPCNTD $dst, $src" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_popcntb);
++ __ popcntd($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct countLeadingZerosI(iRegIdst dst, iRegIsrc src) %{
++ match(Set dst (CountLeadingZerosI src));
++ predicate(UseCountLeadingZerosInstructionsPPC64); // See Matcher::match_rule_supported.
++ ins_cost(DEFAULT_COST);
++
++ format %{ "CNTLZW $dst, $src" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cntlzw);
++ __ cntlzw($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct countLeadingZerosL(iRegIdst dst, iRegLsrc src) %{
++ match(Set dst (CountLeadingZerosL src));
++ predicate(UseCountLeadingZerosInstructionsPPC64); // See Matcher::match_rule_supported.
++ ins_cost(DEFAULT_COST);
++
++ format %{ "CNTLZD $dst, $src" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cntlzd);
++ __ cntlzd($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct countLeadingZerosP(iRegIdst dst, iRegPsrc src) %{
++ // no match-rule, false predicate
++ effect(DEF dst, USE src);
++ predicate(false);
++
++ format %{ "CNTLZD $dst, $src" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_cntlzd);
++ __ cntlzd($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct countTrailingZerosI_Ex(iRegIdst dst, iRegIsrc src) %{
++ match(Set dst (CountTrailingZerosI src));
++ predicate(UseCountLeadingZerosInstructionsPPC64);
++ ins_cost(DEFAULT_COST);
++
++ expand %{
++ immI16 imm1 %{ (int)-1 %}
++ immI16 imm2 %{ (int)32 %}
++ immI_minus1 m1 %{ -1 %}
++ iRegIdst tmpI1;
++ iRegIdst tmpI2;
++ iRegIdst tmpI3;
++ addI_reg_imm16(tmpI1, src, imm1);
++ andcI_reg_reg(tmpI2, src, m1, tmpI1);
++ countLeadingZerosI(tmpI3, tmpI2);
++ subI_imm16_reg(dst, imm2, tmpI3);
++ %}
++%}
++
++instruct countTrailingZerosL_Ex(iRegIdst dst, iRegLsrc src) %{
++ match(Set dst (CountTrailingZerosL src));
++ predicate(UseCountLeadingZerosInstructionsPPC64);
++ ins_cost(DEFAULT_COST);
++
++ expand %{
++ immL16 imm1 %{ (long)-1 %}
++ immI16 imm2 %{ (int)64 %}
++ iRegLdst tmpL1;
++ iRegLdst tmpL2;
++ iRegIdst tmpL3;
++ addL_reg_imm16(tmpL1, src, imm1);
++ andcL_reg_reg(tmpL2, tmpL1, src);
++ countLeadingZerosL(tmpL3, tmpL2);
++ subI_imm16_reg(dst, imm2, tmpL3);
++ %}
++%}
++
++// Expand nodes for byte_reverse_int.
++instruct insrwi_a(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
++ effect(DEF dst, USE src, USE pos, USE shift);
++ predicate(false);
++
++ format %{ "INSRWI $dst, $src, $pos, $shift" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rlwimi);
++ __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// As insrwi_a, but with USE_DEF.
++instruct insrwi(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
++ effect(USE_DEF dst, USE src, USE pos, USE shift);
++ predicate(false);
++
++ format %{ "INSRWI $dst, $src, $pos, $shift" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rlwimi);
++ __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Just slightly faster than java implementation.
++instruct bytes_reverse_int_Ex(iRegIdst dst, iRegIsrc src) %{
++ match(Set dst (ReverseBytesI src));
++ predicate(UseCountLeadingZerosInstructionsPPC64);
++ ins_cost(DEFAULT_COST);
++
++ expand %{
++ immI16 imm24 %{ (int) 24 %}
++ immI16 imm16 %{ (int) 16 %}
++ immI16 imm8 %{ (int) 8 %}
++ immI16 imm4 %{ (int) 4 %}
++ immI16 imm0 %{ (int) 0 %}
++ iRegLdst tmpI1;
++ iRegLdst tmpI2;
++ iRegLdst tmpI3;
++
++ urShiftI_reg_imm(tmpI1, src, imm24);
++ insrwi_a(dst, tmpI1, imm24, imm8);
++ urShiftI_reg_imm(tmpI2, src, imm16);
++ insrwi(dst, tmpI2, imm8, imm16);
++ urShiftI_reg_imm(tmpI3, src, imm8);
++ insrwi(dst, tmpI3, imm8, imm8);
++ insrwi(dst, src, imm0, imm8);
++ %}
++%}
++
++//---------- Replicate Vector Instructions ------------------------------------
++
++// Insrdi does replicate if src == dst.
++instruct repl32(iRegLdst dst) %{
++ predicate(false);
++ effect(USE_DEF dst);
++
++ format %{ "INSRDI $dst, #0, $dst, #32 \t// replicate" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldimi);
++ __ insrdi($dst$$Register, $dst$$Register, 32, 0);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Insrdi does replicate if src == dst.
++instruct repl48(iRegLdst dst) %{
++ predicate(false);
++ effect(USE_DEF dst);
++
++ format %{ "INSRDI $dst, #0, $dst, #48 \t// replicate" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldimi);
++ __ insrdi($dst$$Register, $dst$$Register, 48, 0);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Insrdi does replicate if src == dst.
++instruct repl56(iRegLdst dst) %{
++ predicate(false);
++ effect(USE_DEF dst);
++
++ format %{ "INSRDI $dst, #0, $dst, #56 \t// replicate" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_rldimi);
++ __ insrdi($dst$$Register, $dst$$Register, 56, 0);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct repl8B_reg_Ex(iRegLdst dst, iRegIsrc src) %{
++ match(Set dst (ReplicateB src));
++ predicate(n->as_Vector()->length() == 8);
++ expand %{
++ moveReg(dst, src);
++ repl56(dst);
++ repl48(dst);
++ repl32(dst);
++ %}
++%}
++
++instruct repl8B_immI0(iRegLdst dst, immI_0 zero) %{
++ match(Set dst (ReplicateB zero));
++ predicate(n->as_Vector()->length() == 8);
++ format %{ "LI $dst, #0 \t// replicate8B" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct repl8B_immIminus1(iRegLdst dst, immI_minus1 src) %{
++ match(Set dst (ReplicateB src));
++ predicate(n->as_Vector()->length() == 8);
++ format %{ "LI $dst, #-1 \t// replicate8B" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{
++ match(Set dst (ReplicateS src));
++ predicate(n->as_Vector()->length() == 4);
++ expand %{
++ moveReg(dst, src);
++ repl48(dst);
++ repl32(dst);
++ %}
++%}
++
++instruct repl4S_immI0(iRegLdst dst, immI_0 zero) %{
++ match(Set dst (ReplicateS zero));
++ predicate(n->as_Vector()->length() == 4);
++ format %{ "LI $dst, #0 \t// replicate4C" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct repl4S_immIminus1(iRegLdst dst, immI_minus1 src) %{
++ match(Set dst (ReplicateS src));
++ predicate(n->as_Vector()->length() == 4);
++ format %{ "LI $dst, -1 \t// replicate4C" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{
++ match(Set dst (ReplicateI src));
++ predicate(n->as_Vector()->length() == 2);
++ ins_cost(2 * DEFAULT_COST);
++ expand %{
++ moveReg(dst, src);
++ repl32(dst);
++ %}
++%}
++
++instruct repl2I_immI0(iRegLdst dst, immI_0 zero) %{
++ match(Set dst (ReplicateI zero));
++ predicate(n->as_Vector()->length() == 2);
++ format %{ "LI $dst, #0 \t// replicate4C" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct repl2I_immIminus1(iRegLdst dst, immI_minus1 src) %{
++ match(Set dst (ReplicateI src));
++ predicate(n->as_Vector()->length() == 2);
++ format %{ "LI $dst, -1 \t// replicate4C" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Move float to int register via stack, replicate.
++instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{
++ match(Set dst (ReplicateF src));
++ predicate(n->as_Vector()->length() == 2);
++ ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
++ expand %{
++ stackSlotL tmpS;
++ iRegIdst tmpI;
++ moveF2I_reg_stack(tmpS, src); // Move float to stack.
++ moveF2I_stack_reg(tmpI, tmpS); // Move stack to int reg.
++ moveReg(dst, tmpI); // Move int to long reg.
++ repl32(dst); // Replicate bitpattern.
++ %}
++%}
++
++// Replicate scalar constant to packed float values in Double register
++instruct repl2F_immF_Ex(iRegLdst dst, immF src) %{
++ match(Set dst (ReplicateF src));
++ predicate(n->as_Vector()->length() == 2);
++ ins_cost(5 * DEFAULT_COST);
++
++ format %{ "LD $dst, offset, $constanttablebase\t// load replicated float $src $src from table, postalloc expanded" %}
++ postalloc_expand( postalloc_expand_load_replF_constant(dst, src, constanttablebase) );
++%}
++
++// Replicate scalar zero constant to packed float values in Double register
++instruct repl2F_immF0(iRegLdst dst, immF_0 zero) %{
++ match(Set dst (ReplicateF zero));
++ predicate(n->as_Vector()->length() == 2);
++
++ format %{ "LI $dst, #0 \t// replicate2F" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_addi);
++ __ li($dst$$Register, 0x0);
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// ============================================================================
++// Safepoint Instruction
++
++instruct safePoint_poll(iRegPdst poll) %{
++ match(SafePoint poll);
++ predicate(LoadPollAddressFromThread);
++
++ // It caused problems to add the effect that r0 is killed, but this
++ // effect no longer needs to be mentioned, since r0 is not contained
++ // in a reg_class.
++
++ format %{ "LD R0, #0, $poll \t// Safepoint poll for GC" %}
++ size(4);
++ ins_encode( enc_poll(0x0, poll) );
++ ins_pipe(pipe_class_default);
++%}
++
++// Safepoint without per-thread support. Load address of page to poll
++// as constant.
++// Rscratch2RegP is R12.
++// LoadConPollAddr node is added in pd_post_matching_hook(). It must be
++// a seperate node so that the oop map is at the right location.
++instruct safePoint_poll_conPollAddr(rscratch2RegP poll) %{
++ match(SafePoint poll);
++ predicate(!LoadPollAddressFromThread);
++
++ // It caused problems to add the effect that r0 is killed, but this
++ // effect no longer needs to be mentioned, since r0 is not contained
++ // in a reg_class.
++
++ format %{ "LD R0, #0, R12 \t// Safepoint poll for GC" %}
++ ins_encode( enc_poll(0x0, poll) );
++ ins_pipe(pipe_class_default);
++%}
++
++// ============================================================================
++// Call Instructions
++
++// Call Java Static Instruction
++
++// Schedulable version of call static node.
++instruct CallStaticJavaDirect(method meth) %{
++ match(CallStaticJava);
++ effect(USE meth);
++ predicate(!((CallStaticJavaNode*)n)->is_method_handle_invoke());
++ ins_cost(CALL_COST);
++
++ ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */);
++
++ format %{ "CALL,static $meth \t// ==> " %}
++ size(4);
++ ins_encode( enc_java_static_call(meth) );
++ ins_pipe(pipe_class_call);
++%}
++
++// Schedulable version of call static node.
++instruct CallStaticJavaDirectHandle(method meth) %{
++ match(CallStaticJava);
++ effect(USE meth);
++ predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
++ ins_cost(CALL_COST);
++
++ ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */);
++
++ format %{ "CALL,static $meth \t// ==> " %}
++ ins_encode( enc_java_handle_call(meth) );
++ ins_pipe(pipe_class_call);
++%}
++
++// Call Java Dynamic Instruction
++
++// Used by postalloc expand of CallDynamicJavaDirectSchedEx (actual call).
++// Loading of IC was postalloc expanded. The nodes loading the IC are reachable
++// via fields ins_field_load_ic_hi_node and ins_field_load_ic_node.
++// The call destination must still be placed in the constant pool.
++instruct CallDynamicJavaDirectSched(method meth) %{
++ match(CallDynamicJava); // To get all the data fields we need ...
++ effect(USE meth);
++ predicate(false); // ... but never match.
++
++ ins_field_load_ic_hi_node(loadConL_hiNode*);
++ ins_field_load_ic_node(loadConLNode*);
++ ins_num_consts(1 /* 1 patchable constant: call destination */);
++
++ format %{ "BL \t// dynamic $meth ==> " %}
++ size(4);
++ ins_encode( enc_java_dynamic_call_sched(meth) );
++ ins_pipe(pipe_class_call);
++%}
++
++// Schedulable (i.e. postalloc expanded) version of call dynamic java.
++// We use postalloc expanded calls if we use inline caches
++// and do not update method data.
++//
++// This instruction has two constants: inline cache (IC) and call destination.
++// Loading the inline cache will be postalloc expanded, thus leaving a call with
++// one constant.
++instruct CallDynamicJavaDirectSched_Ex(method meth) %{
++ match(CallDynamicJava);
++ effect(USE meth);
++ predicate(UseInlineCaches);
++ ins_cost(CALL_COST);
++
++ ins_num_consts(2 /* 2 patchable constants: inline cache, call destination. */);
++
++ format %{ "CALL,dynamic $meth \t// postalloc expanded" %}
++ postalloc_expand( postalloc_expand_java_dynamic_call_sched(meth, constanttablebase) );
++%}
++
++// Compound version of call dynamic java
++// We use postalloc expanded calls if we use inline caches
++// and do not update method data.
++instruct CallDynamicJavaDirect(method meth) %{
++ match(CallDynamicJava);
++ effect(USE meth);
++ predicate(!UseInlineCaches);
++ ins_cost(CALL_COST);
++
++ // Enc_java_to_runtime_call needs up to 4 constants (method data oop).
++ ins_num_consts(4);
++
++ format %{ "CALL,dynamic $meth \t// ==> " %}
++ ins_encode( enc_java_dynamic_call(meth, constanttablebase) );
++ ins_pipe(pipe_class_call);
++%}
++
++// Call Runtime Instruction
++
++instruct CallRuntimeDirect(method meth) %{
++ match(CallRuntime);
++ effect(USE meth);
++ ins_cost(CALL_COST);
++
++ // Enc_java_to_runtime_call needs up to 3 constants: call target,
++ // env for callee, C-toc.
++ ins_num_consts(3);
++
++ format %{ "CALL,runtime" %}
++ ins_encode( enc_java_to_runtime_call(meth) );
++ ins_pipe(pipe_class_call);
++%}
++
++// Call Leaf
++
++// Used by postalloc expand of CallLeafDirect_Ex (mtctr).
++instruct CallLeafDirect_mtctr(iRegLdst dst, iRegLsrc src) %{
++ effect(DEF dst, USE src);
++
++ ins_num_consts(1);
++
++ format %{ "MTCTR $src" %}
++ size(4);
++ ins_encode( enc_leaf_call_mtctr(src) );
++ ins_pipe(pipe_class_default);
++%}
++
++// Used by postalloc expand of CallLeafDirect_Ex (actual call).
++instruct CallLeafDirect(method meth) %{
++ match(CallLeaf); // To get the data all the data fields we need ...
++ effect(USE meth);
++ predicate(false); // but never match.
++
++ format %{ "BCTRL \t// leaf call $meth ==> " %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_bctrl);
++ __ bctrl();
++ %}
++ ins_pipe(pipe_class_call);
++%}
++
++// postalloc expand of CallLeafDirect.
++// Load adress to call from TOC, then bl to it.
++instruct CallLeafDirect_Ex(method meth) %{
++ match(CallLeaf);
++ effect(USE meth);
++ ins_cost(CALL_COST);
++
++ // Postalloc_expand_java_to_runtime_call needs up to 3 constants: call target,
++ // env for callee, C-toc.
++ ins_num_consts(3);
++
++ format %{ "CALL,runtime leaf $meth \t// postalloc expanded" %}
++ postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) );
++%}
++
++// Call runtime without safepoint - same as CallLeaf.
++// postalloc expand of CallLeafNoFPDirect.
++// Load adress to call from TOC, then bl to it.
++instruct CallLeafNoFPDirect_Ex(method meth) %{
++ match(CallLeafNoFP);
++ effect(USE meth);
++ ins_cost(CALL_COST);
++
++ // Enc_java_to_runtime_call needs up to 3 constants: call target,
++ // env for callee, C-toc.
++ ins_num_consts(3);
++
++ format %{ "CALL,runtime leaf nofp $meth \t// postalloc expanded" %}
++ postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) );
++%}
++
++// Tail Call; Jump from runtime stub to Java code.
++// Also known as an 'interprocedural jump'.
++// Target of jump will eventually return to caller.
++// TailJump below removes the return address.
++instruct TailCalljmpInd(iRegPdstNoScratch jump_target, inline_cache_regP method_oop) %{
++ match(TailCall jump_target method_oop);
++ ins_cost(CALL_COST);
++
++ format %{ "MTCTR $jump_target \t// $method_oop holds method oop\n\t"
++ "BCTR \t// tail call" %}
++ size(8);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ __ mtctr($jump_target$$Register);
++ __ bctr();
++ %}
++ ins_pipe(pipe_class_call);
++%}
++
++// Return Instruction
++instruct Ret() %{
++ match(Return);
++ format %{ "BLR \t// branch to link register" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_blr);
++ // LR is restored in MachEpilogNode. Just do the RET here.
++ __ blr();
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Tail Jump; remove the return address; jump to target.
++// TailCall above leaves the return address around.
++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
++// "restore" before this instruction (in Epilogue), we need to materialize it
++// in %i0.
++instruct tailjmpInd(iRegPdstNoScratch jump_target, rarg1RegP ex_oop) %{
++ match(TailJump jump_target ex_oop);
++ ins_cost(CALL_COST);
++
++ format %{ "LD R4_ARG2 = LR\n\t"
++ "MTCTR $jump_target\n\t"
++ "BCTR \t// TailJump, exception oop: $ex_oop" %}
++ size(12);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ __ ld(R4_ARG2/* issuing pc */, _abi(lr), R1_SP);
++ __ mtctr($jump_target$$Register);
++ __ bctr();
++ %}
++ ins_pipe(pipe_class_call);
++%}
++
++// Create exception oop: created by stack-crawling runtime code.
++// Created exception is now available to this handler, and is setup
++// just prior to jumping to this handler. No code emitted.
++instruct CreateException(rarg1RegP ex_oop) %{
++ match(Set ex_oop (CreateEx));
++ ins_cost(0);
++
++ format %{ " -- \t// exception oop; no code emitted" %}
++ size(0);
++ ins_encode( /*empty*/ );
++ ins_pipe(pipe_class_default);
++%}
++
++// Rethrow exception: The exception oop will come in the first
++// argument position. Then JUMP (not call) to the rethrow stub code.
++instruct RethrowException() %{
++ match(Rethrow);
++ ins_cost(CALL_COST);
++
++ format %{ "Jmp rethrow_stub" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_compound);
++ cbuf.set_insts_mark();
++ __ b64_patchable((address)OptoRuntime::rethrow_stub(), relocInfo::runtime_call_type);
++ %}
++ ins_pipe(pipe_class_call);
++%}
++
++// Die now.
++instruct ShouldNotReachHere() %{
++ match(Halt);
++ ins_cost(CALL_COST);
++
++ format %{ "ShouldNotReachHere" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_tdi);
++ __ trap_should_not_reach_here();
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// This name is KNOWN by the ADLC and cannot be changed. The ADLC
++// forces a 'TypeRawPtr::BOTTOM' output type for this guy.
++// Get a DEF on threadRegP, no costs, no encoding, use
++// 'ins_should_rematerialize(true)' to avoid spilling.
++instruct tlsLoadP(threadRegP dst) %{
++ match(Set dst (ThreadLocal));
++ ins_cost(0);
++
++ ins_should_rematerialize(true);
++
++ format %{ " -- \t// $dst=Thread::current(), empty" %}
++ size(0);
++ ins_encode( /*empty*/ );
++ ins_pipe(pipe_class_empty);
++%}
++
++//---Some PPC specific nodes---------------------------------------------------
++
++// Stop a group.
++instruct endGroup() %{
++ ins_cost(0);
++
++ ins_is_nop(true);
++
++ format %{ "End Bundle (ori r1, r1, 0)" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_endgroup);
++ __ endgroup();
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++// Nop instructions
++
++instruct fxNop() %{
++ ins_cost(0);
++
++ ins_is_nop(true);
++
++ format %{ "fxNop" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fmr);
++ __ nop();
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct fpNop0() %{
++ ins_cost(0);
++
++ ins_is_nop(true);
++
++ format %{ "fpNop0" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fmr);
++ __ fpnop0();
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct fpNop1() %{
++ ins_cost(0);
++
++ ins_is_nop(true);
++
++ format %{ "fpNop1" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_fmr);
++ __ fpnop1();
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct brNop0() %{
++ ins_cost(0);
++ size(4);
++ format %{ "brNop0" %}
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_mcrf);
++ __ brnop0();
++ %}
++ ins_is_nop(true);
++ ins_pipe(pipe_class_default);
++%}
++
++instruct brNop1() %{
++ ins_cost(0);
++
++ ins_is_nop(true);
++
++ format %{ "brNop1" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_mcrf);
++ __ brnop1();
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++instruct brNop2() %{
++ ins_cost(0);
++
++ ins_is_nop(true);
++
++ format %{ "brNop2" %}
++ size(4);
++ ins_encode %{
++ // TODO: PPC port $archOpcode(ppc64Opcode_mcrf);
++ __ brnop2();
++ %}
++ ins_pipe(pipe_class_default);
++%}
++
++//----------PEEPHOLE RULES-----------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++//
++// peepmatch ( root_instr_name [preceeding_instruction]* );
++//
++// peepconstraint %{
++// (instruction_number.operand_name relational_op instruction_number.operand_name
++// [, ...] );
++// // instruction numbers are zero-based using left to right order in peepmatch
++//
++// peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
++// // provide an instruction_number.operand_name for each operand that appears
++// // in the replacement instruction's match rule
++//
++// ---------VM FLAGS---------------------------------------------------------
++//
++// All peephole optimizations can be turned off using -XX:-OptoPeephole
++//
++// Each peephole rule is given an identifying number starting with zero and
++// increasing by one in the order seen by the parser. An individual peephole
++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
++// on the command-line.
++//
++// ---------CURRENT LIMITATIONS----------------------------------------------
++//
++// Only match adjacent instructions in same basic block
++// Only equality constraints
++// Only constraints between operands, not (0.dest_reg == EAX_enc)
++// Only one replacement instruction
++//
++// ---------EXAMPLE----------------------------------------------------------
++//
++// // pertinent parts of existing instructions in architecture description
++// instruct movI(eRegI dst, eRegI src) %{
++// match(Set dst (CopyI src));
++// %}
++//
++// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
++// match(Set dst (AddI dst src));
++// effect(KILL cr);
++// %}
++//
++// // Change (inc mov) to lea
++// peephole %{
++// // increment preceeded by register-register move
++// peepmatch ( incI_eReg movI );
++// // require that the destination register of the increment
++// // match the destination register of the move
++// peepconstraint ( 0.dst == 1.dst );
++// // construct a replacement instruction that sets
++// // the destination to ( move's source register + one )
++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// Implementation no longer uses movX instructions since
++// machine-independent system no longer uses CopyX nodes.
++//
++// peephole %{
++// peepmatch ( incI_eReg movI );
++// peepconstraint ( 0.dst == 1.dst );
++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++// peepmatch ( decI_eReg movI );
++// peepconstraint ( 0.dst == 1.dst );
++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++// peepmatch ( addI_eReg_imm movI );
++// peepconstraint ( 0.dst == 1.dst );
++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++// peepmatch ( addP_eReg_imm movP );
++// peepconstraint ( 0.dst == 1.dst );
++// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++
++// // Change load of spilled value to only a spill
++// instruct storeI(memory mem, eRegI src) %{
++// match(Set mem (StoreI mem src));
++// %}
++//
++// instruct loadI(eRegI dst, memory mem) %{
++// match(Set dst (LoadI mem));
++// %}
++//
++peephole %{
++ peepmatch ( loadI storeI );
++ peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
++ peepreplace ( storeI( 1.mem 1.mem 1.src ) );
++%}
++
++peephole %{
++ peepmatch ( loadL storeL );
++ peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
++ peepreplace ( storeL( 1.mem 1.mem 1.src ) );
++%}
++
++peephole %{
++ peepmatch ( loadP storeP );
++ peepconstraint ( 1.src == 0.dst, 1.dst == 0.mem );
++ peepreplace ( storeP( 1.dst 1.dst 1.src ) );
++%}
++
++//----------SMARTSPILL RULES---------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
+--- ./hotspot/src/cpu/ppc/vm/ppc_64.ad Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/ppc_64.ad Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,24 @@
++//
++// Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
++// Copyright 2012, 2013 SAP AG. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
+--- ./hotspot/src/cpu/ppc/vm/registerMap_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/registerMap_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_REGISTERMAP_PPC_HPP
++#define CPU_PPC_VM_REGISTERMAP_PPC_HPP
++
++// machine-dependent implemention for register maps
++ friend class frame;
++
++ private:
++ // This is the hook for finding a register in an "well-known" location,
++ // such as a register block of a predetermined format.
++ // Since there is none, we just return NULL.
++ // See registerMap_sparc.hpp for an example of grabbing registers
++ // from register save areas of a standard layout.
++ address pd_location(VMReg reg) const { return NULL; }
++
++ // no PD state to clear or copy:
++ void pd_clear() {}
++ void pd_initialize() {}
++ void pd_initialize_from(const RegisterMap* map) {}
++
++#endif // CPU_PPC_VM_REGISTERMAP_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/register_definitions_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/register_definitions_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// make sure the defines don't screw up the declarations later on in this file
++#define DONT_USE_REGISTER_DEFINES
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/register.hpp"
++#include "register_ppc.hpp"
++#ifdef TARGET_ARCH_MODEL_ppc_32
++# include "interp_masm_ppc_32.hpp"
++#endif
++#ifdef TARGET_ARCH_MODEL_ppc_64
++# include "interp_masm_ppc_64.hpp"
++#endif
++
++REGISTER_DEFINITION(Register, noreg);
++
++REGISTER_DEFINITION(FloatRegister, fnoreg);
+--- ./hotspot/src/cpu/ppc/vm/register_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/register_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "register_ppc.hpp"
++
++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * 2;
++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
++ FloatRegisterImpl::number_of_registers * 2;
++const int ConcreteRegisterImpl::max_cnd = ConcreteRegisterImpl::max_fpr +
++ ConditionRegisterImpl::number_of_registers;
++
++const char* RegisterImpl::name() const {
++ const char* names[number_of_registers] = {
++ "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7",
++ "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
++ "R16", "R17", "R18", "R19", "R20", "R21", "R22", "R23",
++ "R24", "R25", "R26", "R27", "R28", "R29", "R30", "R31"
++ };
++ return is_valid() ? names[encoding()] : "noreg";
++}
++
++const char* ConditionRegisterImpl::name() const {
++ const char* names[number_of_registers] = {
++ "CR0", "CR1", "CR2", "CR3", "CR4", "CR5", "CR6", "CR7"
++ };
++ return is_valid() ? names[encoding()] : "cnoreg";
++}
++
++const char* FloatRegisterImpl::name() const {
++ const char* names[number_of_registers] = {
++ "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7",
++ "F8", "F9", "F10", "F11", "F12", "F13", "F14", "F15",
++ "F16", "F17", "F18", "F19", "F20", "F21", "F22", "F23",
++ "F24", "F25", "F26", "F27", "F28", "F29", "F30", "F31"
++ };
++ return is_valid() ? names[encoding()] : "fnoreg";
++}
++
++const char* SpecialRegisterImpl::name() const {
++ const char* names[number_of_registers] = {
++ "SR_XER", "SR_LR", "SR_CTR", "SR_VRSAVE", "SR_SPEFSCR", "SR_PPR"
++ };
++ return is_valid() ? names[encoding()] : "snoreg";
++}
++
++const char* VectorRegisterImpl::name() const {
++ const char* names[number_of_registers] = {
++ "VR0", "VR1", "VR2", "VR3", "VR4", "VR5", "VR6", "VR7",
++ "VR8", "VR9", "VR10", "VR11", "VR12", "VR13", "VR14", "VR15",
++ "VR16", "VR17", "VR18", "VR19", "VR20", "VR21", "VR22", "VR23",
++ "VR24", "VR25", "VR26", "VR27", "VR28", "VR29", "VR30", "VR31"
++ };
++ return is_valid() ? names[encoding()] : "vnoreg";
++}
+--- ./hotspot/src/cpu/ppc/vm/register_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/register_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,663 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_REGISTER_PPC_HPP
++#define CPU_PPC_VM_REGISTER_PPC_HPP
++
++#include "asm/register.hpp"
++#include "vm_version_ppc.hpp"
++
++// forward declaration
++class Address;
++class VMRegImpl;
++typedef VMRegImpl* VMReg;
++
++// PPC64 registers
++//
++// See "64-bit PowerPC ELF ABI Supplement 1.7", IBM Corp. (2003-10-29).
++// (http://math-atlas.sourceforge.net/devel/assembly/PPC-elf64abi-1.7.pdf)
++//
++// r0 Register used in function prologs (volatile)
++// r1 Stack pointer (nonvolatile)
++// r2 TOC pointer (volatile)
++// r3 Parameter and return value (volatile)
++// r4-r10 Function parameters (volatile)
++// r11 Register used in calls by pointer and as an environment pointer for languages which require one (volatile)
++// r12 Register used for exception handling and glink code (volatile)
++// r13 Reserved for use as system thread ID
++// r14-r31 Local variables (nonvolatile)
++//
++// f0 Scratch register (volatile)
++// f1-f4 Floating point parameters and return value (volatile)
++// f5-f13 Floating point parameters (volatile)
++// f14-f31 Floating point values (nonvolatile)
++//
++// LR Link register for return address (volatile)
++// CTR Loop counter (volatile)
++// XER Fixed point exception register (volatile)
++// FPSCR Floating point status and control register (volatile)
++//
++// CR0-CR1 Condition code fields (volatile)
++// CR2-CR4 Condition code fields (nonvolatile)
++// CR5-CR7 Condition code fields (volatile)
++//
++// ----------------------------------------------
++// On processors with the VMX feature:
++// v0-v1 Volatile scratch registers
++// v2-v13 Volatile vector parameters registers
++// v14-v19 Volatile scratch registers
++// v20-v31 Non-volatile registers
++// vrsave Non-volatile 32-bit register
++
++
++// Use Register as shortcut
++class RegisterImpl;
++typedef RegisterImpl* Register;
++
++inline Register as_Register(int encoding) {
++ assert(encoding >= 0 && encoding < 32, "bad register encoding");
++ return (Register)(intptr_t)encoding;
++}
++
++// The implementation of integer registers for the Power architecture
++class RegisterImpl: public AbstractRegisterImpl {
++ public:
++ enum {
++ number_of_registers = 32
++ };
++
++ // general construction
++ inline friend Register as_Register(int encoding);
++
++ // accessors
++ int encoding() const { assert(is_valid(), "invalid register"); return value(); }
++ VMReg as_VMReg();
++ Register successor() const { return as_Register(encoding() + 1); }
++
++ // testers
++ bool is_valid() const { return ( 0 <= (value()&0x7F) && (value()&0x7F) < number_of_registers); }
++ bool is_volatile() const { return ( 0 <= (value()&0x7F) && (value()&0x7F) <= 13 ); }
++ bool is_nonvolatile() const { return (14 <= (value()&0x7F) && (value()&0x7F) <= 31 ); }
++
++ const char* name() const;
++};
++
++// The integer registers of the PPC architecture
++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
++
++CONSTANT_REGISTER_DECLARATION(Register, R0, (0));
++CONSTANT_REGISTER_DECLARATION(Register, R1, (1));
++CONSTANT_REGISTER_DECLARATION(Register, R2, (2));
++CONSTANT_REGISTER_DECLARATION(Register, R3, (3));
++CONSTANT_REGISTER_DECLARATION(Register, R4, (4));
++CONSTANT_REGISTER_DECLARATION(Register, R5, (5));
++CONSTANT_REGISTER_DECLARATION(Register, R6, (6));
++CONSTANT_REGISTER_DECLARATION(Register, R7, (7));
++CONSTANT_REGISTER_DECLARATION(Register, R8, (8));
++CONSTANT_REGISTER_DECLARATION(Register, R9, (9));
++CONSTANT_REGISTER_DECLARATION(Register, R10, (10));
++CONSTANT_REGISTER_DECLARATION(Register, R11, (11));
++CONSTANT_REGISTER_DECLARATION(Register, R12, (12));
++CONSTANT_REGISTER_DECLARATION(Register, R13, (13));
++CONSTANT_REGISTER_DECLARATION(Register, R14, (14));
++CONSTANT_REGISTER_DECLARATION(Register, R15, (15));
++CONSTANT_REGISTER_DECLARATION(Register, R16, (16));
++CONSTANT_REGISTER_DECLARATION(Register, R17, (17));
++CONSTANT_REGISTER_DECLARATION(Register, R18, (18));
++CONSTANT_REGISTER_DECLARATION(Register, R19, (19));
++CONSTANT_REGISTER_DECLARATION(Register, R20, (20));
++CONSTANT_REGISTER_DECLARATION(Register, R21, (21));
++CONSTANT_REGISTER_DECLARATION(Register, R22, (22));
++CONSTANT_REGISTER_DECLARATION(Register, R23, (23));
++CONSTANT_REGISTER_DECLARATION(Register, R24, (24));
++CONSTANT_REGISTER_DECLARATION(Register, R25, (25));
++CONSTANT_REGISTER_DECLARATION(Register, R26, (26));
++CONSTANT_REGISTER_DECLARATION(Register, R27, (27));
++CONSTANT_REGISTER_DECLARATION(Register, R28, (28));
++CONSTANT_REGISTER_DECLARATION(Register, R29, (29));
++CONSTANT_REGISTER_DECLARATION(Register, R30, (30));
++CONSTANT_REGISTER_DECLARATION(Register, R31, (31));
++
++
++//
++// Because Power has many registers, #define'ing values for them is
++// beneficial in code size and is worth the cost of some of the
++// dangers of defines. If a particular file has a problem with these
++// defines then it's possible to turn them off in that file by
++// defining DONT_USE_REGISTER_DEFINES. Register_definition_ppc.cpp
++// does that so that it's able to provide real definitions of these
++// registers for use in debuggers and such.
++//
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define noreg ((Register)(noreg_RegisterEnumValue))
++
++#define R0 ((Register)(R0_RegisterEnumValue))
++#define R1 ((Register)(R1_RegisterEnumValue))
++#define R2 ((Register)(R2_RegisterEnumValue))
++#define R3 ((Register)(R3_RegisterEnumValue))
++#define R4 ((Register)(R4_RegisterEnumValue))
++#define R5 ((Register)(R5_RegisterEnumValue))
++#define R6 ((Register)(R6_RegisterEnumValue))
++#define R7 ((Register)(R7_RegisterEnumValue))
++#define R8 ((Register)(R8_RegisterEnumValue))
++#define R9 ((Register)(R9_RegisterEnumValue))
++#define R10 ((Register)(R10_RegisterEnumValue))
++#define R11 ((Register)(R11_RegisterEnumValue))
++#define R12 ((Register)(R12_RegisterEnumValue))
++#define R13 ((Register)(R13_RegisterEnumValue))
++#define R14 ((Register)(R14_RegisterEnumValue))
++#define R15 ((Register)(R15_RegisterEnumValue))
++#define R16 ((Register)(R16_RegisterEnumValue))
++#define R17 ((Register)(R17_RegisterEnumValue))
++#define R18 ((Register)(R18_RegisterEnumValue))
++#define R19 ((Register)(R19_RegisterEnumValue))
++#define R20 ((Register)(R20_RegisterEnumValue))
++#define R21 ((Register)(R21_RegisterEnumValue))
++#define R22 ((Register)(R22_RegisterEnumValue))
++#define R23 ((Register)(R23_RegisterEnumValue))
++#define R24 ((Register)(R24_RegisterEnumValue))
++#define R25 ((Register)(R25_RegisterEnumValue))
++#define R26 ((Register)(R26_RegisterEnumValue))
++#define R27 ((Register)(R27_RegisterEnumValue))
++#define R28 ((Register)(R28_RegisterEnumValue))
++#define R29 ((Register)(R29_RegisterEnumValue))
++#define R30 ((Register)(R30_RegisterEnumValue))
++#define R31 ((Register)(R31_RegisterEnumValue))
++#endif
++
++// Use ConditionRegister as shortcut
++class ConditionRegisterImpl;
++typedef ConditionRegisterImpl* ConditionRegister;
++
++inline ConditionRegister as_ConditionRegister(int encoding) {
++ assert(encoding >= 0 && encoding < 8, "bad condition register encoding");
++ return (ConditionRegister)(intptr_t)encoding;
++}
++
++// The implementation of condition register(s) for the PPC architecture
++class ConditionRegisterImpl: public AbstractRegisterImpl {
++ public:
++ enum {
++ number_of_registers = 8
++ };
++
++ // construction.
++ inline friend ConditionRegister as_ConditionRegister(int encoding);
++
++ // accessors
++ int encoding() const { assert(is_valid(), "invalid register"); return value(); }
++ VMReg as_VMReg();
++
++ // testers
++ bool is_valid() const { return (0 <= value() && value() < number_of_registers); }
++ bool is_nonvolatile() const { return (2 <= (value()&0x7F) && (value()&0x7F) <= 4 ); }
++
++ const char* name() const;
++};
++
++// The (parts of the) condition register(s) of the PPC architecture
++// sys/ioctl.h on AIX defines CR0-CR3, so I name these CCR.
++CONSTANT_REGISTER_DECLARATION(ConditionRegister, CCR0, (0));
++CONSTANT_REGISTER_DECLARATION(ConditionRegister, CCR1, (1));
++CONSTANT_REGISTER_DECLARATION(ConditionRegister, CCR2, (2));
++CONSTANT_REGISTER_DECLARATION(ConditionRegister, CCR3, (3));
++CONSTANT_REGISTER_DECLARATION(ConditionRegister, CCR4, (4));
++CONSTANT_REGISTER_DECLARATION(ConditionRegister, CCR5, (5));
++CONSTANT_REGISTER_DECLARATION(ConditionRegister, CCR6, (6));
++CONSTANT_REGISTER_DECLARATION(ConditionRegister, CCR7, (7));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++
++#define CCR0 ((ConditionRegister)(CCR0_ConditionRegisterEnumValue))
++#define CCR1 ((ConditionRegister)(CCR1_ConditionRegisterEnumValue))
++#define CCR2 ((ConditionRegister)(CCR2_ConditionRegisterEnumValue))
++#define CCR3 ((ConditionRegister)(CCR3_ConditionRegisterEnumValue))
++#define CCR4 ((ConditionRegister)(CCR4_ConditionRegisterEnumValue))
++#define CCR5 ((ConditionRegister)(CCR5_ConditionRegisterEnumValue))
++#define CCR6 ((ConditionRegister)(CCR6_ConditionRegisterEnumValue))
++#define CCR7 ((ConditionRegister)(CCR7_ConditionRegisterEnumValue))
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++
++// Use FloatRegister as shortcut
++class FloatRegisterImpl;
++typedef FloatRegisterImpl* FloatRegister;
++
++inline FloatRegister as_FloatRegister(int encoding) {
++ assert(encoding >= 0 && encoding < 32, "bad float register encoding");
++ return (FloatRegister)(intptr_t)encoding;
++}
++
++// The implementation of float registers for the PPC architecture
++class FloatRegisterImpl: public AbstractRegisterImpl {
++ public:
++ enum {
++ number_of_registers = 32
++ };
++
++ // construction
++ inline friend FloatRegister as_FloatRegister(int encoding);
++
++ // accessors
++ int encoding() const { assert(is_valid(), "invalid register"); return value(); }
++ VMReg as_VMReg();
++ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
++
++ // testers
++ bool is_valid() const { return (0 <= value() && value() < number_of_registers); }
++
++ const char* name() const;
++};
++
++// The float registers of the PPC architecture
++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg, (-1));
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F0, ( 0));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F1, ( 1));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F2, ( 2));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F3, ( 3));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F4, ( 4));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F5, ( 5));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F6, ( 6));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F7, ( 7));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F8, ( 8));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F9, ( 9));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F10, (10));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F11, (11));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F12, (12));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F13, (13));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F14, (14));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F15, (15));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F16, (16));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F17, (17));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F18, (18));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F19, (19));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F20, (20));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F21, (21));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F22, (22));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F23, (23));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F24, (24));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F25, (25));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F26, (26));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F27, (27));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F28, (28));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F29, (29));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F30, (30));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, F31, (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define fnoreg ((FloatRegister)(fnoreg_FloatRegisterEnumValue))
++#define F0 ((FloatRegister)( F0_FloatRegisterEnumValue))
++#define F1 ((FloatRegister)( F1_FloatRegisterEnumValue))
++#define F2 ((FloatRegister)( F2_FloatRegisterEnumValue))
++#define F3 ((FloatRegister)( F3_FloatRegisterEnumValue))
++#define F4 ((FloatRegister)( F4_FloatRegisterEnumValue))
++#define F5 ((FloatRegister)( F5_FloatRegisterEnumValue))
++#define F6 ((FloatRegister)( F6_FloatRegisterEnumValue))
++#define F7 ((FloatRegister)( F7_FloatRegisterEnumValue))
++#define F8 ((FloatRegister)( F8_FloatRegisterEnumValue))
++#define F9 ((FloatRegister)( F9_FloatRegisterEnumValue))
++#define F10 ((FloatRegister)( F10_FloatRegisterEnumValue))
++#define F11 ((FloatRegister)( F11_FloatRegisterEnumValue))
++#define F12 ((FloatRegister)( F12_FloatRegisterEnumValue))
++#define F13 ((FloatRegister)( F13_FloatRegisterEnumValue))
++#define F14 ((FloatRegister)( F14_FloatRegisterEnumValue))
++#define F15 ((FloatRegister)( F15_FloatRegisterEnumValue))
++#define F16 ((FloatRegister)( F16_FloatRegisterEnumValue))
++#define F17 ((FloatRegister)( F17_FloatRegisterEnumValue))
++#define F18 ((FloatRegister)( F18_FloatRegisterEnumValue))
++#define F19 ((FloatRegister)( F19_FloatRegisterEnumValue))
++#define F20 ((FloatRegister)( F20_FloatRegisterEnumValue))
++#define F21 ((FloatRegister)( F21_FloatRegisterEnumValue))
++#define F22 ((FloatRegister)( F22_FloatRegisterEnumValue))
++#define F23 ((FloatRegister)( F23_FloatRegisterEnumValue))
++#define F24 ((FloatRegister)( F24_FloatRegisterEnumValue))
++#define F25 ((FloatRegister)( F25_FloatRegisterEnumValue))
++#define F26 ((FloatRegister)( F26_FloatRegisterEnumValue))
++#define F27 ((FloatRegister)( F27_FloatRegisterEnumValue))
++#define F28 ((FloatRegister)( F28_FloatRegisterEnumValue))
++#define F29 ((FloatRegister)( F29_FloatRegisterEnumValue))
++#define F30 ((FloatRegister)( F30_FloatRegisterEnumValue))
++#define F31 ((FloatRegister)( F31_FloatRegisterEnumValue))
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Use SpecialRegister as shortcut
++class SpecialRegisterImpl;
++typedef SpecialRegisterImpl* SpecialRegister;
++
++inline SpecialRegister as_SpecialRegister(int encoding) {
++ return (SpecialRegister)(intptr_t)encoding;
++}
++
++// The implementation of special registers for the Power architecture (LR, CTR and friends)
++class SpecialRegisterImpl: public AbstractRegisterImpl {
++ public:
++ enum {
++ number_of_registers = 6
++ };
++
++ // construction
++ inline friend SpecialRegister as_SpecialRegister(int encoding);
++
++ // accessors
++ int encoding() const { assert(is_valid(), "invalid register"); return value(); }
++ VMReg as_VMReg();
++
++ // testers
++ bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
++
++ const char* name() const;
++};
++
++// The special registers of the PPC architecture
++CONSTANT_REGISTER_DECLARATION(SpecialRegister, SR_XER, (0));
++CONSTANT_REGISTER_DECLARATION(SpecialRegister, SR_LR, (1));
++CONSTANT_REGISTER_DECLARATION(SpecialRegister, SR_CTR, (2));
++CONSTANT_REGISTER_DECLARATION(SpecialRegister, SR_VRSAVE, (3));
++CONSTANT_REGISTER_DECLARATION(SpecialRegister, SR_SPEFSCR, (4));
++CONSTANT_REGISTER_DECLARATION(SpecialRegister, SR_PPR, (5));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define SR_XER ((SpecialRegister)(SR_XER_SpecialRegisterEnumValue))
++#define SR_LR ((SpecialRegister)(SR_LR_SpecialRegisterEnumValue))
++#define SR_CTR ((SpecialRegister)(SR_CTR_SpecialRegisterEnumValue))
++#define SR_VRSAVE ((SpecialRegister)(SR_VRSAVE_SpecialRegisterEnumValue))
++#define SR_SPEFSCR ((SpecialRegister)(SR_SPEFSCR_SpecialRegisterEnumValue))
++#define SR_PPR ((SpecialRegister)(SR_PPR_SpecialRegisterEnumValue))
++#endif // DONT_USE_REGISTER_DEFINES
++
++
++// Use VectorRegister as shortcut
++class VectorRegisterImpl;
++typedef VectorRegisterImpl* VectorRegister;
++
++inline VectorRegister as_VectorRegister(int encoding) {
++ return (VectorRegister)(intptr_t)encoding;
++}
++
++// The implementation of vector registers for the Power architecture
++class VectorRegisterImpl: public AbstractRegisterImpl {
++ public:
++ enum {
++ number_of_registers = 32
++ };
++
++ // construction
++ inline friend VectorRegister as_VectorRegister(int encoding);
++
++ // accessors
++ int encoding() const { assert(is_valid(), "invalid register"); return value(); }
++
++ // testers
++ bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
++
++ const char* name() const;
++};
++
++// The Vector registers of the Power architecture
++
++CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg, (-1));
++
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR0, ( 0));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR1, ( 1));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR2, ( 2));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR3, ( 3));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR4, ( 4));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR5, ( 5));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR6, ( 6));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR7, ( 7));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR8, ( 8));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR9, ( 9));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR10, (10));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR11, (11));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR12, (12));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR13, (13));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR14, (14));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR15, (15));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR16, (16));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR17, (17));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR18, (18));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR19, (19));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR20, (20));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR21, (21));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR22, (22));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR23, (23));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR24, (24));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR25, (25));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR26, (26));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR27, (27));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR28, (28));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR29, (29));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR30, (30));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, VR31, (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define vnoreg ((VectorRegister)(vnoreg_VectorRegisterEnumValue))
++#define VR0 ((VectorRegister)( VR0_VectorRegisterEnumValue))
++#define VR1 ((VectorRegister)( VR1_VectorRegisterEnumValue))
++#define VR2 ((VectorRegister)( VR2_VectorRegisterEnumValue))
++#define VR3 ((VectorRegister)( VR3_VectorRegisterEnumValue))
++#define VR4 ((VectorRegister)( VR4_VectorRegisterEnumValue))
++#define VR5 ((VectorRegister)( VR5_VectorRegisterEnumValue))
++#define VR6 ((VectorRegister)( VR6_VectorRegisterEnumValue))
++#define VR7 ((VectorRegister)( VR7_VectorRegisterEnumValue))
++#define VR8 ((VectorRegister)( VR8_VectorRegisterEnumValue))
++#define VR9 ((VectorRegister)( VR9_VectorRegisterEnumValue))
++#define VR10 ((VectorRegister)( VR10_VectorRegisterEnumValue))
++#define VR11 ((VectorRegister)( VR11_VectorRegisterEnumValue))
++#define VR12 ((VectorRegister)( VR12_VectorRegisterEnumValue))
++#define VR13 ((VectorRegister)( VR13_VectorRegisterEnumValue))
++#define VR14 ((VectorRegister)( VR14_VectorRegisterEnumValue))
++#define VR15 ((VectorRegister)( VR15_VectorRegisterEnumValue))
++#define VR16 ((VectorRegister)( VR16_VectorRegisterEnumValue))
++#define VR17 ((VectorRegister)( VR17_VectorRegisterEnumValue))
++#define VR18 ((VectorRegister)( VR18_VectorRegisterEnumValue))
++#define VR19 ((VectorRegister)( VR19_VectorRegisterEnumValue))
++#define VR20 ((VectorRegister)( VR20_VectorRegisterEnumValue))
++#define VR21 ((VectorRegister)( VR21_VectorRegisterEnumValue))
++#define VR22 ((VectorRegister)( VR22_VectorRegisterEnumValue))
++#define VR23 ((VectorRegister)( VR23_VectorRegisterEnumValue))
++#define VR24 ((VectorRegister)( VR24_VectorRegisterEnumValue))
++#define VR25 ((VectorRegister)( VR25_VectorRegisterEnumValue))
++#define VR26 ((VectorRegister)( VR26_VectorRegisterEnumValue))
++#define VR27 ((VectorRegister)( VR27_VectorRegisterEnumValue))
++#define VR28 ((VectorRegister)( VR28_VectorRegisterEnumValue))
++#define VR29 ((VectorRegister)( VR29_VectorRegisterEnumValue))
++#define VR30 ((VectorRegister)( VR30_VectorRegisterEnumValue))
++#define VR31 ((VectorRegister)( VR31_VectorRegisterEnumValue))
++#endif // DONT_USE_REGISTER_DEFINES
++
++
++// Maximum number of incoming arguments that can be passed in i registers.
++const int PPC_ARGS_IN_REGS_NUM = 8;
++
++
++// Need to know the total number of registers of all sorts for SharedInfo.
++// Define a class that exports it.
++class ConcreteRegisterImpl : public AbstractRegisterImpl {
++ public:
++ enum {
++ // This number must be large enough to cover REG_COUNT (defined by c2) registers.
++ // There is no requirement that any ordering here matches any ordering c2 gives
++ // it's optoregs.
++ number_of_registers =
++ ( RegisterImpl::number_of_registers +
++ FloatRegisterImpl::number_of_registers )
++ * 2 // register halves
++ + ConditionRegisterImpl::number_of_registers // condition code registers
++ + SpecialRegisterImpl::number_of_registers // special registers
++ + VectorRegisterImpl::number_of_registers // vector registers
++ };
++
++ static const int max_gpr;
++ static const int max_fpr;
++ static const int max_cnd;
++};
++
++// Common register declarations used in assembler code.
++REGISTER_DECLARATION(Register, R0_SCRATCH, R0); // volatile
++REGISTER_DECLARATION(Register, R1_SP, R1); // non-volatile
++REGISTER_DECLARATION(Register, R2_TOC, R2); // volatile
++REGISTER_DECLARATION(Register, R3_RET, R3); // volatile
++REGISTER_DECLARATION(Register, R3_ARG1, R3); // volatile
++REGISTER_DECLARATION(Register, R4_ARG2, R4); // volatile
++REGISTER_DECLARATION(Register, R5_ARG3, R5); // volatile
++REGISTER_DECLARATION(Register, R6_ARG4, R6); // volatile
++REGISTER_DECLARATION(Register, R7_ARG5, R7); // volatile
++REGISTER_DECLARATION(Register, R8_ARG6, R8); // volatile
++REGISTER_DECLARATION(Register, R9_ARG7, R9); // volatile
++REGISTER_DECLARATION(Register, R10_ARG8, R10); // volatile
++REGISTER_DECLARATION(FloatRegister, F0_SCRATCH, F0); // volatile
++REGISTER_DECLARATION(FloatRegister, F1_RET, F1); // volatile
++REGISTER_DECLARATION(FloatRegister, F1_ARG1, F1); // volatile
++REGISTER_DECLARATION(FloatRegister, F2_ARG2, F2); // volatile
++REGISTER_DECLARATION(FloatRegister, F3_ARG3, F3); // volatile
++REGISTER_DECLARATION(FloatRegister, F4_ARG4, F4); // volatile
++REGISTER_DECLARATION(FloatRegister, F5_ARG5, F5); // volatile
++REGISTER_DECLARATION(FloatRegister, F6_ARG6, F6); // volatile
++REGISTER_DECLARATION(FloatRegister, F7_ARG7, F7); // volatile
++REGISTER_DECLARATION(FloatRegister, F8_ARG8, F8); // volatile
++REGISTER_DECLARATION(FloatRegister, F9_ARG9, F9); // volatile
++REGISTER_DECLARATION(FloatRegister, F10_ARG10, F10); // volatile
++REGISTER_DECLARATION(FloatRegister, F11_ARG11, F11); // volatile
++REGISTER_DECLARATION(FloatRegister, F12_ARG12, F12); // volatile
++REGISTER_DECLARATION(FloatRegister, F13_ARG13, F13); // volatile
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define R0_SCRATCH AS_REGISTER(Register, R0)
++#define R1_SP AS_REGISTER(Register, R1)
++#define R2_TOC AS_REGISTER(Register, R2)
++#define R3_RET AS_REGISTER(Register, R3)
++#define R3_ARG1 AS_REGISTER(Register, R3)
++#define R4_ARG2 AS_REGISTER(Register, R4)
++#define R5_ARG3 AS_REGISTER(Register, R5)
++#define R6_ARG4 AS_REGISTER(Register, R6)
++#define R7_ARG5 AS_REGISTER(Register, R7)
++#define R8_ARG6 AS_REGISTER(Register, R8)
++#define R9_ARG7 AS_REGISTER(Register, R9)
++#define R10_ARG8 AS_REGISTER(Register, R10)
++#define F0_SCRATCH AS_REGISTER(FloatRegister, F0)
++#define F1_RET AS_REGISTER(FloatRegister, F1)
++#define F1_ARG1 AS_REGISTER(FloatRegister, F1)
++#define F2_ARG2 AS_REGISTER(FloatRegister, F2)
++#define F3_ARG3 AS_REGISTER(FloatRegister, F3)
++#define F4_ARG4 AS_REGISTER(FloatRegister, F4)
++#define F5_ARG5 AS_REGISTER(FloatRegister, F5)
++#define F6_ARG6 AS_REGISTER(FloatRegister, F6)
++#define F7_ARG7 AS_REGISTER(FloatRegister, F7)
++#define F8_ARG8 AS_REGISTER(FloatRegister, F8)
++#define F9_ARG9 AS_REGISTER(FloatRegister, F9)
++#define F10_ARG10 AS_REGISTER(FloatRegister, F10)
++#define F11_ARG11 AS_REGISTER(FloatRegister, F11)
++#define F12_ARG12 AS_REGISTER(FloatRegister, F12)
++#define F13_ARG13 AS_REGISTER(FloatRegister, F13)
++#endif
++
++// Register declarations to be used in frame manager assembly code.
++// Use only non-volatile registers in order to keep values across C-calls.
++#ifdef CC_INTERP
++REGISTER_DECLARATION(Register, R14_state, R14); // address of new cInterpreter.
++REGISTER_DECLARATION(Register, R15_prev_state, R15); // address of old cInterpreter
++#else // CC_INTERP
++REGISTER_DECLARATION(Register, R14_bcp, R14);
++REGISTER_DECLARATION(Register, R15_esp, R15);
++REGISTER_DECLARATION(FloatRegister, F15_ftos, F15);
++#endif // CC_INTERP
++REGISTER_DECLARATION(Register, R16_thread, R16); // address of current thread
++REGISTER_DECLARATION(Register, R17_tos, R17); // address of Java tos (prepushed).
++REGISTER_DECLARATION(Register, R18_locals, R18); // address of first param slot (receiver).
++REGISTER_DECLARATION(Register, R19_method, R19); // address of current method
++#ifndef DONT_USE_REGISTER_DEFINES
++#ifdef CC_INTERP
++#define R14_state AS_REGISTER(Register, R14)
++#define R15_prev_state AS_REGISTER(Register, R15)
++#else // CC_INTERP
++#define R14_bcp AS_REGISTER(Register, R14)
++#define R15_esp AS_REGISTER(Register, R15)
++#define F15_ftos AS_REGISTER(FloatRegister, F15)
++#endif // CC_INTERP
++#define R16_thread AS_REGISTER(Register, R16)
++#define R17_tos AS_REGISTER(Register, R17)
++#define R18_locals AS_REGISTER(Register, R18)
++#define R19_method AS_REGISTER(Register, R19)
++#define R21_sender_SP AS_REGISTER(Register, R21)
++#define R23_method_handle AS_REGISTER(Register, R23)
++#endif
++
++// Temporary registers to be used within frame manager. We can use
++// the non-volatiles because the call stub has saved them.
++// Use only non-volatile registers in order to keep values across C-calls.
++REGISTER_DECLARATION(Register, R21_tmp1, R21);
++REGISTER_DECLARATION(Register, R22_tmp2, R22);
++REGISTER_DECLARATION(Register, R23_tmp3, R23);
++REGISTER_DECLARATION(Register, R24_tmp4, R24);
++REGISTER_DECLARATION(Register, R25_tmp5, R25);
++REGISTER_DECLARATION(Register, R26_tmp6, R26);
++REGISTER_DECLARATION(Register, R27_tmp7, R27);
++REGISTER_DECLARATION(Register, R28_tmp8, R28);
++REGISTER_DECLARATION(Register, R29_tmp9, R29);
++#ifndef CC_INTERP
++REGISTER_DECLARATION(Register, R24_dispatch_addr, R24);
++REGISTER_DECLARATION(Register, R25_templateTableBase, R25);
++REGISTER_DECLARATION(Register, R26_monitor, R26);
++REGISTER_DECLARATION(Register, R27_constPoolCache, R27);
++REGISTER_DECLARATION(Register, R28_mdx, R28);
++#endif // CC_INTERP
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define R21_tmp1 AS_REGISTER(Register, R21)
++#define R22_tmp2 AS_REGISTER(Register, R22)
++#define R23_tmp3 AS_REGISTER(Register, R23)
++#define R24_tmp4 AS_REGISTER(Register, R24)
++#define R25_tmp5 AS_REGISTER(Register, R25)
++#define R26_tmp6 AS_REGISTER(Register, R26)
++#define R27_tmp7 AS_REGISTER(Register, R27)
++#define R28_tmp8 AS_REGISTER(Register, R28)
++#define R29_tmp9 AS_REGISTER(Register, R29)
++#ifndef CC_INTERP
++// Lmonitors : monitor pointer
++// LcpoolCache: constant pool cache
++// mdx: method data index
++#define R24_dispatch_addr AS_REGISTER(Register, R24)
++#define R25_templateTableBase AS_REGISTER(Register, R25)
++#define R26_monitor AS_REGISTER(Register, R26)
++#define R27_constPoolCache AS_REGISTER(Register, R27)
++#define R28_mdx AS_REGISTER(Register, R28)
++#endif
++
++#define CCR4_is_synced AS_REGISTER(ConditionRegister, CCR4)
++#endif
++
++// Scratch registers are volatile.
++REGISTER_DECLARATION(Register, R11_scratch1, R11);
++REGISTER_DECLARATION(Register, R12_scratch2, R12);
++#ifndef DONT_USE_REGISTER_DEFINES
++#define R11_scratch1 AS_REGISTER(Register, R11)
++#define R12_scratch2 AS_REGISTER(Register, R12)
++#endif
++
++#endif // CPU_PPC_VM_REGISTER_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,133 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.inline.hpp"
++#include "assembler_ppc.inline.hpp"
++#include "code/relocInfo.hpp"
++#include "nativeInst_ppc.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/safepoint.hpp"
++
++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
++ bool copy_back_to_oop_pool = true; // TODO: PPC port
++ // The following comment is from the declaration of DataRelocation:
++ //
++ // "The "o" (displacement) argument is relevant only to split relocations
++ // on RISC machines. In some CPUs (SPARC), the set-hi and set-lo ins'ns
++ // can encode more than 32 bits between them. This allows compilers to
++ // share set-hi instructions between addresses that differ by a small
++ // offset (e.g., different static variables in the same class).
++ // On such machines, the "x" argument to set_value on all set-lo
++ // instructions must be the same as the "x" argument for the
++ // corresponding set-hi instructions. The "o" arguments for the
++ // set-hi instructions are ignored, and must not affect the high-half
++ // immediate constant. The "o" arguments for the set-lo instructions are
++ // added into the low-half immediate constant, and must not overflow it."
++ //
++ // Currently we don't support splitting of relocations, so o must be
++ // zero:
++ assert(o == 0, "tried to split relocations");
++
++ if (!verify_only) {
++ if (format() != 1) {
++ nativeMovConstReg_at(addr())->set_data_plain(((intptr_t)x), code());
++ } else {
++ assert(type() == relocInfo::oop_type || type() == relocInfo::metadata_type,
++ "how to encode else?");
++ narrowOop no = (type() == relocInfo::oop_type) ?
++ oopDesc::encode_heap_oop((oop)x) : Klass::encode_klass((Klass*)x);
++ nativeMovConstReg_at(addr())->set_narrow_oop(no, code());
++ }
++ } else {
++ assert((address) (nativeMovConstReg_at(addr())->data()) == x, "data must match");
++ }
++}
++
++address Relocation::pd_call_destination(address orig_addr) {
++ intptr_t adj = 0;
++ address inst_loc = addr();
++
++ if (orig_addr != NULL) {
++ // We just moved this call instruction from orig_addr to addr().
++ // This means its target will appear to have grown by addr() - orig_addr.
++ adj = -(inst_loc - orig_addr);
++ }
++ if (NativeFarCall::is_far_call_at(inst_loc)) {
++ NativeFarCall* call = nativeFarCall_at(inst_loc);
++ return call->destination() + (intptr_t)(call->is_pcrelative() ? adj : 0);
++ } else if (NativeJump::is_jump_at(inst_loc)) {
++ NativeJump* jump = nativeJump_at(inst_loc);
++ return jump->jump_destination() + (intptr_t)(jump->is_pcrelative() ? adj : 0);
++ } else if (NativeConditionalFarBranch::is_conditional_far_branch_at(inst_loc)) {
++ NativeConditionalFarBranch* branch = NativeConditionalFarBranch_at(inst_loc);
++ return branch->branch_destination();
++ } else {
++ // There are two instructions at the beginning of a stub, therefore we
++ // load at orig_addr + 8.
++ orig_addr = nativeCall_at(inst_loc)->get_trampoline();
++ if (orig_addr == NULL) {
++ return (address) -1;
++ } else {
++ return (address) nativeMovConstReg_at(orig_addr + 8)->data();
++ }
++ }
++}
++
++void Relocation::pd_set_call_destination(address x) {
++ address inst_loc = addr();
++
++ if (NativeFarCall::is_far_call_at(inst_loc)) {
++ NativeFarCall* call = nativeFarCall_at(inst_loc);
++ call->set_destination(x);
++ } else if (NativeJump::is_jump_at(inst_loc)) {
++ NativeJump* jump= nativeJump_at(inst_loc);
++ jump->set_jump_destination(x);
++ } else if (NativeConditionalFarBranch::is_conditional_far_branch_at(inst_loc)) {
++ NativeConditionalFarBranch* branch = NativeConditionalFarBranch_at(inst_loc);
++ branch->set_branch_destination(x);
++ } else {
++ NativeCall* call = nativeCall_at(inst_loc);
++ call->set_destination_mt_safe(x, false);
++ }
++}
++
++address* Relocation::pd_address_in_code() {
++ ShouldNotReachHere();
++ return 0;
++}
++
++address Relocation::pd_get_address_from_code() {
++ return (address)(nativeMovConstReg_at(addr())->data());
++}
++
++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++
++void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++
++void metadata_Relocation::pd_fix_value(address x) {
++}
+--- ./hotspot/src/cpu/ppc/vm/relocInfo_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/relocInfo_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,46 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_RELOCINFO_PPC_HPP
++#define CPU_PPC_VM_RELOCINFO_PPC_HPP
++
++ // machine-dependent parts of class relocInfo
++ private:
++ enum {
++ // Since Power instructions are whole words,
++ // the two low-order offset bits can always be discarded.
++ offset_unit = 4,
++
++ // There is no need for format bits; the instructions are
++ // sufficiently self-identifying.
++#ifndef _LP64
++ format_width = 0
++#else
++ // Except narrow oops in 64-bits VM.
++ format_width = 1
++#endif
++ };
++
++#endif // CPU_PPC_VM_RELOCINFO_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/runtime_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/runtime_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,191 @@
++/*
++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#ifdef COMPILER2
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "code/vmreg.hpp"
++#include "interpreter/interpreter.hpp"
++#include "nativeInst_ppc.hpp"
++#include "opto/runtime.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/globalDefinitions.hpp"
++#include "vmreg_ppc.inline.hpp"
++#endif
++
++#define __ masm->
++
++
++#ifdef COMPILER2
++
++// SP adjustment (must use unextended SP) for method handle call sites
++// during exception handling.
++static intptr_t adjust_SP_for_methodhandle_callsite(JavaThread *thread) {
++ RegisterMap map(thread, false);
++ // The frame constructor will do the correction for us (see frame::adjust_unextended_SP).
++ frame mh_caller_frame = thread->last_frame().sender(&map);
++ assert(mh_caller_frame.is_compiled_frame(), "Only may reach here for compiled MH call sites");
++ return (intptr_t) mh_caller_frame.unextended_sp();
++}
++
++//------------------------------generate_exception_blob---------------------------
++// Creates exception blob at the end.
++// Using exception blob, this code is jumped from a compiled method.
++//
++// Given an exception pc at a call we call into the runtime for the
++// handler in this method. This handler might merely restore state
++// (i.e. callee save registers) unwind the frame and jump to the
++// exception handler for the nmethod if there is no Java level handler
++// for the nmethod.
++//
++// This code is entered with a jmp.
++//
++// Arguments:
++// R3_ARG1: exception oop
++// R4_ARG2: exception pc
++//
++// Results:
++// R3_ARG1: exception oop
++// R4_ARG2: exception pc in caller
++// destination: exception handler of caller
++//
++// Note: the exception pc MUST be at a call (precise debug information)
++//
++void OptoRuntime::generate_exception_blob() {
++ // Allocate space for the code.
++ ResourceMark rm;
++ // Setup code generation tools.
++ CodeBuffer buffer("exception_blob", 2048, 1024);
++ InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
++
++ address start = __ pc();
++
++ int frame_size_in_bytes = frame::abi_reg_args_size;
++ OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
++
++ // Exception pc is 'return address' for stack walker.
++ __ std(R4_ARG2/*exception pc*/, _abi(lr), R1_SP);
++
++ // Store the exception in the Thread object.
++ __ std(R3_ARG1/*exception oop*/, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
++ __ std(R4_ARG2/*exception pc*/, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
++
++ // Save callee-saved registers.
++ // Push a C frame for the exception blob. It is needed for the C call later on.
++ __ push_frame_reg_args(0, R11_scratch1);
++
++ // This call does all the hard work. It checks if an exception handler
++ // exists in the method.
++ // If so, it returns the handler address.
++ // If not, it prepares for stack-unwinding, restoring the callee-save
++ // registers of the frame being removed.
++ __ set_last_Java_frame(/*sp=*/R1_SP, noreg);
++
++ __ mr(R3_ARG1, R16_thread);
++#if defined(ABI_ELFv2)
++ __ call_c((address) OptoRuntime::handle_exception_C, relocInfo::none);
++#else
++ __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, OptoRuntime::handle_exception_C),
++ relocInfo::none);
++#endif
++ address calls_return_pc = __ last_calls_return_pc();
++# ifdef ASSERT
++ __ cmpdi(CCR0, R3_RET, 0);
++ __ asm_assert_ne("handle_exception_C must not return NULL", 0x601);
++# endif
++
++ // Set an oopmap for the call site. This oopmap will only be used if we
++ // are unwinding the stack. Hence, all locations will be dead.
++ // Callee-saved registers will be the same as the frame above (i.e.,
++ // handle_exception_stub), since they were restored when we got the
++ // exception.
++ OopMapSet* oop_maps = new OopMapSet();
++ oop_maps->add_gc_map(calls_return_pc - start, map);
++
++ // Get unextended_sp for method handle call sites.
++ Label mh_callsite, mh_done; // Use a 2nd c call if it's a method handle call site.
++ __ lwa(R4_ARG2, in_bytes(JavaThread::is_method_handle_return_offset()), R16_thread);
++ __ cmpwi(CCR0, R4_ARG2, 0);
++ __ bne(CCR0, mh_callsite);
++
++ __ mtctr(R3_RET); // Move address of exception handler to SR_CTR.
++ __ reset_last_Java_frame();
++ __ pop_frame();
++
++ __ bind(mh_done);
++ // We have a handler in register SR_CTR (could be deopt blob).
++
++ // Get the exception oop.
++ __ ld(R3_ARG1, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
++
++ // Get the exception pc in case we are deoptimized.
++ __ ld(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
++
++ // Reset thread values.
++ __ li(R0, 0);
++#ifdef ASSERT
++ __ std(R0, in_bytes(JavaThread::exception_handler_pc_offset()), R16_thread);
++ __ std(R0, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
++#endif
++ // Clear the exception oop so GC no longer processes it as a root.
++ __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
++
++ // Move exception pc into SR_LR.
++ __ mtlr(R4_ARG2);
++ __ bctr();
++
++
++ // Same as above, but also set sp to unextended_sp.
++ __ bind(mh_callsite);
++ __ mr(R31, R3_RET); // Save branch address.
++ __ mr(R3_ARG1, R16_thread);
++#if defined(ABI_ELFv2)
++ __ call_c((address) adjust_SP_for_methodhandle_callsite, relocInfo::none);
++#else
++ __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, adjust_SP_for_methodhandle_callsite), relocInfo::none);
++#endif
++ // Returns unextended_sp in R3_RET.
++
++ __ mtctr(R31); // Move address of exception handler to SR_CTR.
++ __ reset_last_Java_frame();
++
++ __ mr(R1_SP, R3_RET); // Set sp to unextended_sp.
++ __ b(mh_done);
++
++
++ // Make sure all code is generated.
++ masm->flush();
++
++ // Set exception blob.
++ _exception_blob = ExceptionBlob::create(&buffer, oop_maps,
++ frame_size_in_bytes/wordSize);
++}
++
++#endif // COMPILER2
+--- ./hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,3255 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/debugInfoRec.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "prims/jvmtiRedefineClassesTrace.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/vframeArray.hpp"
++#include "vmreg_ppc.inline.hpp"
++#include "adfiles/ad_ppc_64.hpp"
++#ifdef COMPILER1
++#include "c1/c1_Runtime1.hpp"
++#endif
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++#define __ masm->
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) // nothing
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++
++class RegisterSaver {
++ // Used for saving volatile registers.
++ public:
++
++ // Support different return pc locations.
++ enum ReturnPCLocation {
++ return_pc_is_lr,
++ return_pc_is_r4,
++ return_pc_is_thread_saved_exception_pc
++ };
++
++ static OopMap* push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
++ int* out_frame_size_in_bytes,
++ bool generate_oop_map,
++ int return_pc_adjustment,
++ ReturnPCLocation return_pc_location);
++ static void restore_live_registers_and_pop_frame(MacroAssembler* masm,
++ int frame_size_in_bytes,
++ bool restore_ctr);
++
++ static void push_frame_and_save_argument_registers(MacroAssembler* masm,
++ Register r_temp,
++ int frame_size,
++ int total_args,
++ const VMRegPair *regs, const VMRegPair *regs2 = NULL);
++ static void restore_argument_registers_and_pop_frame(MacroAssembler*masm,
++ int frame_size,
++ int total_args,
++ const VMRegPair *regs, const VMRegPair *regs2 = NULL);
++
++ // During deoptimization only the result registers need to be restored
++ // all the other values have already been extracted.
++ static void restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes);
++
++ // Constants and data structures:
++
++ typedef enum {
++ int_reg = 0,
++ float_reg = 1,
++ special_reg = 2
++ } RegisterType;
++
++ typedef enum {
++ reg_size = 8,
++ half_reg_size = reg_size / 2,
++ } RegisterConstants;
++
++ typedef struct {
++ RegisterType reg_type;
++ int reg_num;
++ VMReg vmreg;
++ } LiveRegType;
++};
++
++
++#define RegisterSaver_LiveSpecialReg(regname) \
++ { RegisterSaver::special_reg, regname->encoding(), regname->as_VMReg() }
++
++#define RegisterSaver_LiveIntReg(regname) \
++ { RegisterSaver::int_reg, regname->encoding(), regname->as_VMReg() }
++
++#define RegisterSaver_LiveFloatReg(regname) \
++ { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
++
++static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
++ // Live registers which get spilled to the stack. Register
++ // positions in this array correspond directly to the stack layout.
++
++ //
++ // live special registers:
++ //
++ RegisterSaver_LiveSpecialReg(SR_CTR),
++ //
++ // live float registers:
++ //
++ RegisterSaver_LiveFloatReg( F0 ),
++ RegisterSaver_LiveFloatReg( F1 ),
++ RegisterSaver_LiveFloatReg( F2 ),
++ RegisterSaver_LiveFloatReg( F3 ),
++ RegisterSaver_LiveFloatReg( F4 ),
++ RegisterSaver_LiveFloatReg( F5 ),
++ RegisterSaver_LiveFloatReg( F6 ),
++ RegisterSaver_LiveFloatReg( F7 ),
++ RegisterSaver_LiveFloatReg( F8 ),
++ RegisterSaver_LiveFloatReg( F9 ),
++ RegisterSaver_LiveFloatReg( F10 ),
++ RegisterSaver_LiveFloatReg( F11 ),
++ RegisterSaver_LiveFloatReg( F12 ),
++ RegisterSaver_LiveFloatReg( F13 ),
++ RegisterSaver_LiveFloatReg( F14 ),
++ RegisterSaver_LiveFloatReg( F15 ),
++ RegisterSaver_LiveFloatReg( F16 ),
++ RegisterSaver_LiveFloatReg( F17 ),
++ RegisterSaver_LiveFloatReg( F18 ),
++ RegisterSaver_LiveFloatReg( F19 ),
++ RegisterSaver_LiveFloatReg( F20 ),
++ RegisterSaver_LiveFloatReg( F21 ),
++ RegisterSaver_LiveFloatReg( F22 ),
++ RegisterSaver_LiveFloatReg( F23 ),
++ RegisterSaver_LiveFloatReg( F24 ),
++ RegisterSaver_LiveFloatReg( F25 ),
++ RegisterSaver_LiveFloatReg( F26 ),
++ RegisterSaver_LiveFloatReg( F27 ),
++ RegisterSaver_LiveFloatReg( F28 ),
++ RegisterSaver_LiveFloatReg( F29 ),
++ RegisterSaver_LiveFloatReg( F30 ),
++ RegisterSaver_LiveFloatReg( F31 ),
++ //
++ // live integer registers:
++ //
++ RegisterSaver_LiveIntReg( R0 ),
++ //RegisterSaver_LiveIntReg( R1 ), // stack pointer
++ RegisterSaver_LiveIntReg( R2 ),
++ RegisterSaver_LiveIntReg( R3 ),
++ RegisterSaver_LiveIntReg( R4 ),
++ RegisterSaver_LiveIntReg( R5 ),
++ RegisterSaver_LiveIntReg( R6 ),
++ RegisterSaver_LiveIntReg( R7 ),
++ RegisterSaver_LiveIntReg( R8 ),
++ RegisterSaver_LiveIntReg( R9 ),
++ RegisterSaver_LiveIntReg( R10 ),
++ RegisterSaver_LiveIntReg( R11 ),
++ RegisterSaver_LiveIntReg( R12 ),
++ //RegisterSaver_LiveIntReg( R13 ), // system thread id
++ RegisterSaver_LiveIntReg( R14 ),
++ RegisterSaver_LiveIntReg( R15 ),
++ RegisterSaver_LiveIntReg( R16 ),
++ RegisterSaver_LiveIntReg( R17 ),
++ RegisterSaver_LiveIntReg( R18 ),
++ RegisterSaver_LiveIntReg( R19 ),
++ RegisterSaver_LiveIntReg( R20 ),
++ RegisterSaver_LiveIntReg( R21 ),
++ RegisterSaver_LiveIntReg( R22 ),
++ RegisterSaver_LiveIntReg( R23 ),
++ RegisterSaver_LiveIntReg( R24 ),
++ RegisterSaver_LiveIntReg( R25 ),
++ RegisterSaver_LiveIntReg( R26 ),
++ RegisterSaver_LiveIntReg( R27 ),
++ RegisterSaver_LiveIntReg( R28 ),
++ RegisterSaver_LiveIntReg( R29 ),
++ RegisterSaver_LiveIntReg( R31 ),
++ RegisterSaver_LiveIntReg( R30 ), // r30 must be the last register
++};
++
++OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
++ int* out_frame_size_in_bytes,
++ bool generate_oop_map,
++ int return_pc_adjustment,
++ ReturnPCLocation return_pc_location) {
++ // Push an abi_reg_args-frame and store all registers which may be live.
++ // If requested, create an OopMap: Record volatile registers as
++ // callee-save values in an OopMap so their save locations will be
++ // propagated to the RegisterMap of the caller frame during
++ // StackFrameStream construction (needed for deoptimization; see
++ // compiledVFrame::create_stack_value).
++ // If return_pc_adjustment != 0 adjust the return pc by return_pc_adjustment.
++
++ int i;
++ int offset;
++
++ // calcualte frame size
++ const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
++ sizeof(RegisterSaver::LiveRegType);
++ const int register_save_size = regstosave_num * reg_size;
++ const int frame_size_in_bytes = round_to(register_save_size, frame::alignment_in_bytes)
++ + frame::abi_reg_args_size;
++ *out_frame_size_in_bytes = frame_size_in_bytes;
++ const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
++ const int register_save_offset = frame_size_in_bytes - register_save_size;
++
++ // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
++ OopMap* map = generate_oop_map ? new OopMap(frame_size_in_slots, 0) : NULL;
++
++ BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {");
++
++ // Save r30 in the last slot of the not yet pushed frame so that we
++ // can use it as scratch reg.
++ __ std(R30, -reg_size, R1_SP);
++ assert(-reg_size == register_save_offset - frame_size_in_bytes + ((regstosave_num-1)*reg_size),
++ "consistency check");
++
++ // save the flags
++ // Do the save_LR_CR by hand and adjust the return pc if requested.
++ __ mfcr(R30);
++ __ std(R30, _abi(cr), R1_SP);
++ switch (return_pc_location) {
++ case return_pc_is_lr: __ mflr(R30); break;
++ case return_pc_is_r4: __ mr(R30, R4); break;
++ case return_pc_is_thread_saved_exception_pc:
++ __ ld(R30, thread_(saved_exception_pc)); break;
++ default: ShouldNotReachHere();
++ }
++ if (return_pc_adjustment != 0)
++ __ addi(R30, R30, return_pc_adjustment);
++ __ std(R30, _abi(lr), R1_SP);
++
++ // push a new frame
++ __ push_frame(frame_size_in_bytes, R30);
++
++ // save all registers (ints and floats)
++ offset = register_save_offset;
++ for (int i = 0; i < regstosave_num; i++) {
++ int reg_num = RegisterSaver_LiveRegs[i].reg_num;
++ int reg_type = RegisterSaver_LiveRegs[i].reg_type;
++
++ switch (reg_type) {
++ case RegisterSaver::int_reg: {
++ if (reg_num != 30) { // We spilled R30 right at the beginning.
++ __ std(as_Register(reg_num), offset, R1_SP);
++ }
++ break;
++ }
++ case RegisterSaver::float_reg: {
++ __ stfd(as_FloatRegister(reg_num), offset, R1_SP);
++ break;
++ }
++ case RegisterSaver::special_reg: {
++ if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
++ __ mfctr(R30);
++ __ std(R30, offset, R1_SP);
++ } else {
++ Unimplemented();
++ }
++ break;
++ }
++ default:
++ ShouldNotReachHere();
++ }
++
++ if (generate_oop_map) {
++ map->set_callee_saved(VMRegImpl::stack2reg(offset>>2),
++ RegisterSaver_LiveRegs[i].vmreg);
++ map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2),
++ RegisterSaver_LiveRegs[i].vmreg->next());
++ }
++ offset += reg_size;
++ }
++
++ BLOCK_COMMENT("} push_frame_reg_args_and_save_live_registers");
++
++ // And we're done.
++ return map;
++}
++
++
++// Pop the current frame and restore all the registers that we
++// saved.
++void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm,
++ int frame_size_in_bytes,
++ bool restore_ctr) {
++ int i;
++ int offset;
++ const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
++ sizeof(RegisterSaver::LiveRegType);
++ const int register_save_size = regstosave_num * reg_size;
++ const int register_save_offset = frame_size_in_bytes - register_save_size;
++
++ BLOCK_COMMENT("restore_live_registers_and_pop_frame {");
++
++ // restore all registers (ints and floats)
++ offset = register_save_offset;
++ for (int i = 0; i < regstosave_num; i++) {
++ int reg_num = RegisterSaver_LiveRegs[i].reg_num;
++ int reg_type = RegisterSaver_LiveRegs[i].reg_type;
++
++ switch (reg_type) {
++ case RegisterSaver::int_reg: {
++ if (reg_num != 30) // R30 restored at the end, it's the tmp reg!
++ __ ld(as_Register(reg_num), offset, R1_SP);
++ break;
++ }
++ case RegisterSaver::float_reg: {
++ __ lfd(as_FloatRegister(reg_num), offset, R1_SP);
++ break;
++ }
++ case RegisterSaver::special_reg: {
++ if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
++ if (restore_ctr) { // Nothing to do here if ctr already contains the next address.
++ __ ld(R30, offset, R1_SP);
++ __ mtctr(R30);
++ }
++ } else {
++ Unimplemented();
++ }
++ break;
++ }
++ default:
++ ShouldNotReachHere();
++ }
++ offset += reg_size;
++ }
++
++ // pop the frame
++ __ pop_frame();
++
++ // restore the flags
++ __ restore_LR_CR(R30);
++
++ // restore scratch register's value
++ __ ld(R30, -reg_size, R1_SP);
++
++ BLOCK_COMMENT("} restore_live_registers_and_pop_frame");
++}
++
++void RegisterSaver::push_frame_and_save_argument_registers(MacroAssembler* masm, Register r_temp,
++ int frame_size,int total_args, const VMRegPair *regs,
++ const VMRegPair *regs2) {
++ __ push_frame(frame_size, r_temp);
++ int st_off = frame_size - wordSize;
++ for (int i = 0; i < total_args; i++) {
++ VMReg r_1 = regs[i].first();
++ VMReg r_2 = regs[i].second();
++ if (!r_1->is_valid()) {
++ assert(!r_2->is_valid(), "");
++ continue;
++ }
++ if (r_1->is_Register()) {
++ Register r = r_1->as_Register();
++ __ std(r, st_off, R1_SP);
++ st_off -= wordSize;
++ } else if (r_1->is_FloatRegister()) {
++ FloatRegister f = r_1->as_FloatRegister();
++ __ stfd(f, st_off, R1_SP);
++ st_off -= wordSize;
++ }
++ }
++ if (regs2 != NULL) {
++ for (int i = 0; i < total_args; i++) {
++ VMReg r_1 = regs2[i].first();
++ VMReg r_2 = regs2[i].second();
++ if (!r_1->is_valid()) {
++ assert(!r_2->is_valid(), "");
++ continue;
++ }
++ if (r_1->is_Register()) {
++ Register r = r_1->as_Register();
++ __ std(r, st_off, R1_SP);
++ st_off -= wordSize;
++ } else if (r_1->is_FloatRegister()) {
++ FloatRegister f = r_1->as_FloatRegister();
++ __ stfd(f, st_off, R1_SP);
++ st_off -= wordSize;
++ }
++ }
++ }
++}
++
++void RegisterSaver::restore_argument_registers_and_pop_frame(MacroAssembler*masm, int frame_size,
++ int total_args, const VMRegPair *regs,
++ const VMRegPair *regs2) {
++ int st_off = frame_size - wordSize;
++ for (int i = 0; i < total_args; i++) {
++ VMReg r_1 = regs[i].first();
++ VMReg r_2 = regs[i].second();
++ if (r_1->is_Register()) {
++ Register r = r_1->as_Register();
++ __ ld(r, st_off, R1_SP);
++ st_off -= wordSize;
++ } else if (r_1->is_FloatRegister()) {
++ FloatRegister f = r_1->as_FloatRegister();
++ __ lfd(f, st_off, R1_SP);
++ st_off -= wordSize;
++ }
++ }
++ if (regs2 != NULL)
++ for (int i = 0; i < total_args; i++) {
++ VMReg r_1 = regs2[i].first();
++ VMReg r_2 = regs2[i].second();
++ if (r_1->is_Register()) {
++ Register r = r_1->as_Register();
++ __ ld(r, st_off, R1_SP);
++ st_off -= wordSize;
++ } else if (r_1->is_FloatRegister()) {
++ FloatRegister f = r_1->as_FloatRegister();
++ __ lfd(f, st_off, R1_SP);
++ st_off -= wordSize;
++ }
++ }
++ __ pop_frame();
++}
++
++// Restore the registers that might be holding a result.
++void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes) {
++ int i;
++ int offset;
++ const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
++ sizeof(RegisterSaver::LiveRegType);
++ const int register_save_size = regstosave_num * reg_size;
++ const int register_save_offset = frame_size_in_bytes - register_save_size;
++
++ // restore all result registers (ints and floats)
++ offset = register_save_offset;
++ for (int i = 0; i < regstosave_num; i++) {
++ int reg_num = RegisterSaver_LiveRegs[i].reg_num;
++ int reg_type = RegisterSaver_LiveRegs[i].reg_type;
++ switch (reg_type) {
++ case RegisterSaver::int_reg: {
++ if (as_Register(reg_num)==R3_RET) // int result_reg
++ __ ld(as_Register(reg_num), offset, R1_SP);
++ break;
++ }
++ case RegisterSaver::float_reg: {
++ if (as_FloatRegister(reg_num)==F1_RET) // float result_reg
++ __ lfd(as_FloatRegister(reg_num), offset, R1_SP);
++ break;
++ }
++ case RegisterSaver::special_reg: {
++ // Special registers don't hold a result.
++ break;
++ }
++ default:
++ ShouldNotReachHere();
++ }
++ offset += reg_size;
++ }
++}
++
++// Is vector's size (in bytes) bigger than a size saved by default?
++bool SharedRuntime::is_wide_vector(int size) {
++ ResourceMark rm;
++ // Note, MaxVectorSize == 8 on PPC64.
++ assert(size <= 8, err_msg_res("%d bytes vectors are not supported", size));
++ return size > 8;
++}
++#ifdef COMPILER2
++static int reg2slot(VMReg r) {
++ return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
++}
++
++static int reg2offset(VMReg r) {
++ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++}
++#endif
++
++// ---------------------------------------------------------------------------
++// Read the array of BasicTypes from a signature, and compute where the
++// arguments should go. Values in the VMRegPair regs array refer to 4-byte
++// quantities. Values less than VMRegImpl::stack0 are registers, those above
++// refer to 4-byte stack slots. All stack slots are based off of the stack pointer
++// as framesizes are fixed.
++// VMRegImpl::stack0 refers to the first slot 0(sp).
++// and VMRegImpl::stack0+1 refers to the memory word 4-bytes higher. Register
++// up to RegisterImpl::number_of_registers) are the 64-bit
++// integer registers.
++
++// Note: the INPUTS in sig_bt are in units of Java argument words, which are
++// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
++// units regardless of build. Of course for i486 there is no 64 bit build
++
++// The Java calling convention is a "shifted" version of the C ABI.
++// By skipping the first C ABI register we can call non-static jni methods
++// with small numbers of arguments without having to shuffle the arguments
++// at all. Since we control the java ABI we ought to at least get some
++// advantage out of it.
++
++const VMReg java_iarg_reg[8] = {
++ R3->as_VMReg(),
++ R4->as_VMReg(),
++ R5->as_VMReg(),
++ R6->as_VMReg(),
++ R7->as_VMReg(),
++ R8->as_VMReg(),
++ R9->as_VMReg(),
++ R10->as_VMReg()
++};
++
++const VMReg java_farg_reg[13] = {
++ F1->as_VMReg(),
++ F2->as_VMReg(),
++ F3->as_VMReg(),
++ F4->as_VMReg(),
++ F5->as_VMReg(),
++ F6->as_VMReg(),
++ F7->as_VMReg(),
++ F8->as_VMReg(),
++ F9->as_VMReg(),
++ F10->as_VMReg(),
++ F11->as_VMReg(),
++ F12->as_VMReg(),
++ F13->as_VMReg()
++};
++
++const int num_java_iarg_registers = sizeof(java_iarg_reg) / sizeof(java_iarg_reg[0]);
++const int num_java_farg_registers = sizeof(java_farg_reg) / sizeof(java_farg_reg[0]);
++
++int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
++ VMRegPair *regs,
++ int total_args_passed,
++ int is_outgoing) {
++ // C2c calling conventions for compiled-compiled calls.
++ // Put 8 ints/longs into registers _AND_ 13 float/doubles into
++ // registers _AND_ put the rest on the stack.
++
++ const int inc_stk_for_intfloat = 1; // 1 slots for ints and floats
++ const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles
++
++ int i;
++ VMReg reg;
++ int stk = 0;
++ int ireg = 0;
++ int freg = 0;
++
++ // We put the first 8 arguments into registers and the rest on the
++ // stack, float arguments are already in their argument registers
++ // due to c2c calling conventions (see calling_convention).
++ for (int i = 0; i < total_args_passed; ++i) {
++ switch(sig_bt[i]) {
++ case T_BOOLEAN:
++ case T_CHAR:
++ case T_BYTE:
++ case T_SHORT:
++ case T_INT:
++ if (ireg < num_java_iarg_registers) {
++ // Put int/ptr in register
++ reg = java_iarg_reg[ireg];
++ ++ireg;
++ } else {
++ // Put int/ptr on stack.
++ reg = VMRegImpl::stack2reg(stk);
++ stk += inc_stk_for_intfloat;
++ }
++ regs[i].set1(reg);
++ break;
++ case T_LONG:
++ assert(sig_bt[i+1] == T_VOID, "expecting half");
++ if (ireg < num_java_iarg_registers) {
++ // Put long in register.
++ reg = java_iarg_reg[ireg];
++ ++ireg;
++ } else {
++ // Put long on stack. They must be aligned to 2 slots.
++ if (stk & 0x1) ++stk;
++ reg = VMRegImpl::stack2reg(stk);
++ stk += inc_stk_for_longdouble;
++ }
++ regs[i].set2(reg);
++ break;
++ case T_OBJECT:
++ case T_ARRAY:
++ case T_ADDRESS:
++ if (ireg < num_java_iarg_registers) {
++ // Put ptr in register.
++ reg = java_iarg_reg[ireg];
++ ++ireg;
++ } else {
++ // Put ptr on stack. Objects must be aligned to 2 slots too,
++ // because "64-bit pointers record oop-ishness on 2 aligned
++ // adjacent registers." (see OopFlow::build_oop_map).
++ if (stk & 0x1) ++stk;
++ reg = VMRegImpl::stack2reg(stk);
++ stk += inc_stk_for_longdouble;
++ }
++ regs[i].set2(reg);
++ break;
++ case T_FLOAT:
++ if (freg < num_java_farg_registers) {
++ // Put float in register.
++ reg = java_farg_reg[freg];
++ ++freg;
++ } else {
++ // Put float on stack.
++ reg = VMRegImpl::stack2reg(stk);
++ stk += inc_stk_for_intfloat;
++ }
++ regs[i].set1(reg);
++ break;
++ case T_DOUBLE:
++ assert(sig_bt[i+1] == T_VOID, "expecting half");
++ if (freg < num_java_farg_registers) {
++ // Put double in register.
++ reg = java_farg_reg[freg];
++ ++freg;
++ } else {
++ // Put double on stack. They must be aligned to 2 slots.
++ if (stk & 0x1) ++stk;
++ reg = VMRegImpl::stack2reg(stk);
++ stk += inc_stk_for_longdouble;
++ }
++ regs[i].set2(reg);
++ break;
++ case T_VOID:
++ // Do not count halves.
++ regs[i].set_bad();
++ break;
++ default:
++ ShouldNotReachHere();
++ }
++ }
++ return round_to(stk, 2);
++}
++
++#ifdef COMPILER2
++// Calling convention for calling C code.
++int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
++ VMRegPair *regs,
++ VMRegPair *regs2,
++ int total_args_passed) {
++ // Calling conventions for C runtime calls and calls to JNI native methods.
++ //
++ // PPC64 convention: Hoist the first 8 int/ptr/long's in the first 8
++ // int regs, leaving int regs undefined if the arg is flt/dbl. Hoist
++ // the first 13 flt/dbl's in the first 13 fp regs but additionally
++ // copy flt/dbl to the stack if they are beyond the 8th argument.
++
++ const VMReg iarg_reg[8] = {
++ R3->as_VMReg(),
++ R4->as_VMReg(),
++ R5->as_VMReg(),
++ R6->as_VMReg(),
++ R7->as_VMReg(),
++ R8->as_VMReg(),
++ R9->as_VMReg(),
++ R10->as_VMReg()
++ };
++
++ const VMReg farg_reg[13] = {
++ F1->as_VMReg(),
++ F2->as_VMReg(),
++ F3->as_VMReg(),
++ F4->as_VMReg(),
++ F5->as_VMReg(),
++ F6->as_VMReg(),
++ F7->as_VMReg(),
++ F8->as_VMReg(),
++ F9->as_VMReg(),
++ F10->as_VMReg(),
++ F11->as_VMReg(),
++ F12->as_VMReg(),
++ F13->as_VMReg()
++ };
++
++ // Check calling conventions consistency.
++ assert(sizeof(iarg_reg) / sizeof(iarg_reg[0]) == Argument::n_int_register_parameters_c &&
++ sizeof(farg_reg) / sizeof(farg_reg[0]) == Argument::n_float_register_parameters_c,
++ "consistency");
++
++ // `Stk' counts stack slots. Due to alignment, 32 bit values occupy
++ // 2 such slots, like 64 bit values do.
++ const int inc_stk_for_intfloat = 2; // 2 slots for ints and floats
++ const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles
++
++ int i;
++ VMReg reg;
++ // Leave room for C-compatible ABI_REG_ARGS.
++ int stk = (frame::abi_reg_args_size - frame::jit_out_preserve_size) / VMRegImpl::stack_slot_size;
++ int arg = 0;
++ int freg = 0;
++
++ // Avoid passing C arguments in the wrong stack slots.
++#if defined(ABI_ELFv2)
++ assert((SharedRuntime::out_preserve_stack_slots() + stk) * VMRegImpl::stack_slot_size == 96,
++ "passing C arguments in wrong stack slots");
++#else
++ assert((SharedRuntime::out_preserve_stack_slots() + stk) * VMRegImpl::stack_slot_size == 112,
++ "passing C arguments in wrong stack slots");
++#endif
++ // We fill-out regs AND regs2 if an argument must be passed in a
++ // register AND in a stack slot. If regs2 is NULL in such a
++ // situation, we bail-out with a fatal error.
++ for (int i = 0; i < total_args_passed; ++i, ++arg) {
++ // Initialize regs2 to BAD.
++ if (regs2 != NULL) regs2[i].set_bad();
++
++ switch(sig_bt[i]) {
++
++ //
++ // If arguments 0-7 are integers, they are passed in integer registers.
++ // Argument i is placed in iarg_reg[i].
++ //
++ case T_BOOLEAN:
++ case T_CHAR:
++ case T_BYTE:
++ case T_SHORT:
++ case T_INT:
++ // We must cast ints to longs and use full 64 bit stack slots
++ // here. We do the cast in GraphKit::gen_stub() and just guard
++ // here against loosing that change.
++ assert(CCallingConventionRequiresIntsAsLongs,
++ "argument of type int should be promoted to type long");
++ guarantee(i > 0 && sig_bt[i-1] == T_LONG,
++ "argument of type (bt) should have been promoted to type (T_LONG,bt) for bt in "
++ "{T_BOOLEAN, T_CHAR, T_BYTE, T_SHORT, T_INT}");
++ // Do not count halves.
++ regs[i].set_bad();
++ --arg;
++ break;
++ case T_LONG:
++ guarantee(sig_bt[i+1] == T_VOID ||
++ sig_bt[i+1] == T_BOOLEAN || sig_bt[i+1] == T_CHAR ||
++ sig_bt[i+1] == T_BYTE || sig_bt[i+1] == T_SHORT ||
++ sig_bt[i+1] == T_INT,
++ "expecting type (T_LONG,half) or type (T_LONG,bt) with bt in {T_BOOLEAN, T_CHAR, T_BYTE, T_SHORT, T_INT}");
++ case T_OBJECT:
++ case T_ARRAY:
++ case T_ADDRESS:
++ case T_METADATA:
++ // Oops are already boxed if required (JNI).
++ if (arg < Argument::n_int_register_parameters_c) {
++ reg = iarg_reg[arg];
++ } else {
++ reg = VMRegImpl::stack2reg(stk);
++ stk += inc_stk_for_longdouble;
++ }
++ regs[i].set2(reg);
++ break;
++
++ //
++ // Floats are treated differently from int regs: The first 13 float arguments
++ // are passed in registers (not the float args among the first 13 args).
++ // Thus argument i is NOT passed in farg_reg[i] if it is float. It is passed
++ // in farg_reg[j] if argument i is the j-th float argument of this call.
++ //
++ case T_FLOAT:
++ if (freg < Argument::n_float_register_parameters_c) {
++ // Put float in register ...
++ reg = farg_reg[freg];
++ ++freg;
++
++ // Argument i for i > 8 is placed on the stack even if it's
++ // placed in a register (if it's a float arg). Aix disassembly
++ // shows that xlC places these float args on the stack AND in
++ // a register. This is not documented, but we follow this
++ // convention, too.
++ if (arg >= Argument::n_regs_not_on_stack_c) {
++ // ... and on the stack.
++ guarantee(regs2 != NULL, "must pass float in register and stack slot");
++ VMReg reg2 = VMRegImpl::stack2reg(stk LINUX_ONLY(+1));
++ regs2[i].set1(reg2);
++ stk += inc_stk_for_intfloat;
++ }
++
++ } else {
++ // Put float on stack.
++ reg = VMRegImpl::stack2reg(stk LINUX_ONLY(+1));
++ stk += inc_stk_for_intfloat;
++ }
++ regs[i].set1(reg);
++ break;
++ case T_DOUBLE:
++ assert(sig_bt[i+1] == T_VOID, "expecting half");
++ if (freg < Argument::n_float_register_parameters_c) {
++ // Put double in register ...
++ reg = farg_reg[freg];
++ ++freg;
++
++ // Argument i for i > 8 is placed on the stack even if it's
++ // placed in a register (if it's a double arg). Aix disassembly
++ // shows that xlC places these float args on the stack AND in
++ // a register. This is not documented, but we follow this
++ // convention, too.
++ if (arg >= Argument::n_regs_not_on_stack_c) {
++ // ... and on the stack.
++ guarantee(regs2 != NULL, "must pass float in register and stack slot");
++ VMReg reg2 = VMRegImpl::stack2reg(stk);
++ regs2[i].set2(reg2);
++ stk += inc_stk_for_longdouble;
++ }
++ } else {
++ // Put double on stack.
++ reg = VMRegImpl::stack2reg(stk);
++ stk += inc_stk_for_longdouble;
++ }
++ regs[i].set2(reg);
++ break;
++
++ case T_VOID:
++ // Do not count halves.
++ regs[i].set_bad();
++ --arg;
++ break;
++ default:
++ ShouldNotReachHere();
++ }
++ }
++
++ return round_to(stk, 2);
++}
++#endif // COMPILER2
++
++static address gen_c2i_adapter(MacroAssembler *masm,
++ int total_args_passed,
++ int comp_args_on_stack,
++ const BasicType *sig_bt,
++ const VMRegPair *regs,
++ Label& call_interpreter,
++ const Register& ientry) {
++
++ address c2i_entrypoint;
++
++ const Register sender_SP = R21_sender_SP; // == R21_tmp1
++ const Register code = R22_tmp2;
++ //const Register ientry = R23_tmp3;
++ const Register value_regs[] = { R24_tmp4, R25_tmp5, R26_tmp6 };
++ const int num_value_regs = sizeof(value_regs) / sizeof(Register);
++ int value_regs_index = 0;
++
++ const Register return_pc = R27_tmp7;
++ const Register tmp = R28_tmp8;
++
++ assert_different_registers(sender_SP, code, ientry, return_pc, tmp);
++
++ // Adapter needs TOP_IJAVA_FRAME_ABI.
++ const int adapter_size = frame::top_ijava_frame_abi_size +
++ round_to(total_args_passed * wordSize, frame::alignment_in_bytes);
++
++ // regular (verified) c2i entry point
++ c2i_entrypoint = __ pc();
++
++ // Does compiled code exists? If yes, patch the caller's callsite.
++ __ ld(code, method_(code));
++ __ cmpdi(CCR0, code, 0);
++ __ ld(ientry, method_(interpreter_entry)); // preloaded
++ __ beq(CCR0, call_interpreter);
++
++
++ // Patch caller's callsite, method_(code) was not NULL which means that
++ // compiled code exists.
++ __ mflr(return_pc);
++ __ std(return_pc, _abi(lr), R1_SP);
++ RegisterSaver::push_frame_and_save_argument_registers(masm, tmp, adapter_size, total_args_passed, regs);
++
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), R19_method, return_pc);
++
++ RegisterSaver::restore_argument_registers_and_pop_frame(masm, adapter_size, total_args_passed, regs);
++ __ ld(return_pc, _abi(lr), R1_SP);
++ __ ld(ientry, method_(interpreter_entry)); // preloaded
++ __ mtlr(return_pc);
++
++
++ // Call the interpreter.
++ __ BIND(call_interpreter);
++ __ mtctr(ientry);
++
++ // Get a copy of the current SP for loading caller's arguments.
++ __ mr(sender_SP, R1_SP);
++
++ // Add space for the adapter.
++ __ resize_frame(-adapter_size, R12_scratch2);
++
++ int st_off = adapter_size - wordSize;
++
++ // Write the args into the outgoing interpreter space.
++ for (int i = 0; i < total_args_passed; i++) {
++ VMReg r_1 = regs[i].first();
++ VMReg r_2 = regs[i].second();
++ if (!r_1->is_valid()) {
++ assert(!r_2->is_valid(), "");
++ continue;
++ }
++ if (r_1->is_stack()) {
++ Register tmp_reg = value_regs[value_regs_index];
++ value_regs_index = (value_regs_index + 1) % num_value_regs;
++ // The calling convention produces OptoRegs that ignore the out
++ // preserve area (JIT's ABI). We must account for it here.
++ int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++ if (!r_2->is_valid()) {
++ __ lwz(tmp_reg, ld_off, sender_SP);
++ } else {
++ __ ld(tmp_reg, ld_off, sender_SP);
++ }
++ // Pretend stack targets were loaded into tmp_reg.
++ r_1 = tmp_reg->as_VMReg();
++ }
++
++ if (r_1->is_Register()) {
++ Register r = r_1->as_Register();
++ if (!r_2->is_valid()) {
++ __ stw(r, st_off, R1_SP);
++ st_off-=wordSize;
++ } else {
++ // Longs are given 2 64-bit slots in the interpreter, but the
++ // data is passed in only 1 slot.
++ if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
++ DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
++ st_off-=wordSize;
++ }
++ __ std(r, st_off, R1_SP);
++ st_off-=wordSize;
++ }
++ } else {
++ assert(r_1->is_FloatRegister(), "");
++ FloatRegister f = r_1->as_FloatRegister();
++ if (!r_2->is_valid()) {
++ __ stfs(f, st_off, R1_SP);
++ st_off-=wordSize;
++ } else {
++ // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
++ // data is passed in only 1 slot.
++ // One of these should get known junk...
++ DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
++ st_off-=wordSize;
++ __ stfd(f, st_off, R1_SP);
++ st_off-=wordSize;
++ }
++ }
++ }
++
++ // Jump to the interpreter just as if interpreter was doing it.
++
++#ifdef CC_INTERP
++ const Register tos = R17_tos;
++#else
++ const Register tos = R15_esp;
++ __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
++#endif
++
++ // load TOS
++ __ addi(tos, R1_SP, st_off);
++
++ // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in R21_tmp1.
++ assert(sender_SP == R21_sender_SP, "passing initial caller's SP in wrong register");
++ __ bctr();
++
++ return c2i_entrypoint;
++}
++
++static void gen_i2c_adapter(MacroAssembler *masm,
++ int total_args_passed,
++ int comp_args_on_stack,
++ const BasicType *sig_bt,
++ const VMRegPair *regs) {
++
++ // Load method's entry-point from method.
++ __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
++ __ mtctr(R12_scratch2);
++
++ // We will only enter here from an interpreted frame and never from after
++ // passing thru a c2i. Azul allowed this but we do not. If we lose the
++ // race and use a c2i we will remain interpreted for the race loser(s).
++ // This removes all sorts of headaches on the x86 side and also eliminates
++ // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
++
++ // Note: r13 contains the senderSP on entry. We must preserve it since
++ // we may do a i2c -> c2i transition if we lose a race where compiled
++ // code goes non-entrant while we get args ready.
++ // In addition we use r13 to locate all the interpreter args as
++ // we must align the stack to 16 bytes on an i2c entry else we
++ // lose alignment we expect in all compiled code and register
++ // save code can segv when fxsave instructions find improperly
++ // aligned stack pointer.
++
++#ifdef CC_INTERP
++ const Register ld_ptr = R17_tos;
++#else
++ const Register ld_ptr = R15_esp;
++#endif
++
++ const Register value_regs[] = { R22_tmp2, R23_tmp3, R24_tmp4, R25_tmp5, R26_tmp6 };
++ const int num_value_regs = sizeof(value_regs) / sizeof(Register);
++ int value_regs_index = 0;
++
++ int ld_offset = total_args_passed*wordSize;
++
++ // Cut-out for having no stack args. Since up to 2 int/oop args are passed
++ // in registers, we will occasionally have no stack args.
++ int comp_words_on_stack = 0;
++ if (comp_args_on_stack) {
++ // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
++ // registers are below. By subtracting stack0, we either get a negative
++ // number (all values in registers) or the maximum stack slot accessed.
++
++ // Convert 4-byte c2 stack slots to words.
++ comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
++ // Round up to miminum stack alignment, in wordSize.
++ comp_words_on_stack = round_to(comp_words_on_stack, 2);
++ __ resize_frame(-comp_words_on_stack * wordSize, R11_scratch1);
++ }
++
++ // Now generate the shuffle code. Pick up all register args and move the
++ // rest through register value=Z_R12.
++ BLOCK_COMMENT("Shuffle arguments");
++ for (int i = 0; i < total_args_passed; i++) {
++ if (sig_bt[i] == T_VOID) {
++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++ continue;
++ }
++
++ // Pick up 0, 1 or 2 words from ld_ptr.
++ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
++ "scrambled load targets?");
++ VMReg r_1 = regs[i].first();
++ VMReg r_2 = regs[i].second();
++ if (!r_1->is_valid()) {
++ assert(!r_2->is_valid(), "");
++ continue;
++ }
++ if (r_1->is_FloatRegister()) {
++ if (!r_2->is_valid()) {
++ __ lfs(r_1->as_FloatRegister(), ld_offset, ld_ptr);
++ ld_offset-=wordSize;
++ } else {
++ // Skip the unused interpreter slot.
++ __ lfd(r_1->as_FloatRegister(), ld_offset-wordSize, ld_ptr);
++ ld_offset-=2*wordSize;
++ }
++ } else {
++ Register r;
++ if (r_1->is_stack()) {
++ // Must do a memory to memory move thru "value".
++ r = value_regs[value_regs_index];
++ value_regs_index = (value_regs_index + 1) % num_value_regs;
++ } else {
++ r = r_1->as_Register();
++ }
++ if (!r_2->is_valid()) {
++ // Not sure we need to do this but it shouldn't hurt.
++ if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ADDRESS || sig_bt[i] == T_ARRAY) {
++ __ ld(r, ld_offset, ld_ptr);
++ ld_offset-=wordSize;
++ } else {
++ __ lwz(r, ld_offset, ld_ptr);
++ ld_offset-=wordSize;
++ }
++ } else {
++ // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
++ // data is passed in only 1 slot.
++ if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
++ ld_offset-=wordSize;
++ }
++ __ ld(r, ld_offset, ld_ptr);
++ ld_offset-=wordSize;
++ }
++
++ if (r_1->is_stack()) {
++ // Now store value where the compiler expects it
++ int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots())*VMRegImpl::stack_slot_size;
++
++ if (sig_bt[i] == T_INT || sig_bt[i] == T_FLOAT ||sig_bt[i] == T_BOOLEAN ||
++ sig_bt[i] == T_SHORT || sig_bt[i] == T_CHAR || sig_bt[i] == T_BYTE) {
++ __ stw(r, st_off, R1_SP);
++ } else {
++ __ std(r, st_off, R1_SP);
++ }
++ }
++ }
++ }
++
++ BLOCK_COMMENT("Store method");
++ // Store method into thread->callee_target.
++ // We might end up in handle_wrong_method if the callee is
++ // deoptimized as we race thru here. If that happens we don't want
++ // to take a safepoint because the caller frame will look
++ // interpreted and arguments are now "compiled" so it is much better
++ // to make this transition invisible to the stack walking
++ // code. Unfortunately if we try and find the callee by normal means
++ // a safepoint is possible. So we stash the desired callee in the
++ // thread and the vm will find there should this case occur.
++ __ std(R19_method, thread_(callee_target));
++
++ // Jump to the compiled code just as if compiled code was doing it.
++ __ bctr();
++}
++
++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
++ int total_args_passed,
++ int comp_args_on_stack,
++ const BasicType *sig_bt,
++ const VMRegPair *regs,
++ AdapterFingerPrint* fingerprint) {
++ address i2c_entry;
++ address c2i_unverified_entry;
++ address c2i_entry;
++
++
++ // entry: i2c
++
++ __ align(CodeEntryAlignment);
++ i2c_entry = __ pc();
++ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
++
++
++ // entry: c2i unverified
++
++ __ align(CodeEntryAlignment);
++ BLOCK_COMMENT("c2i unverified entry");
++ c2i_unverified_entry = __ pc();
++
++ // inline_cache contains a compiledICHolder
++ const Register ic = R19_method;
++ const Register ic_klass = R11_scratch1;
++ const Register receiver_klass = R12_scratch2;
++ const Register code = R21_tmp1;
++ const Register ientry = R23_tmp3;
++
++ assert_different_registers(ic, ic_klass, receiver_klass, R3_ARG1, code, ientry);
++ assert(R11_scratch1 == R11, "need prologue scratch register");
++
++ Label call_interpreter;
++
++ assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()),
++ "klass offset should reach into any page");
++ // Check for NULL argument if we don't have implicit null checks.
++ if (!ImplicitNullChecks || !os::zero_page_read_protected()) {
++ if (TrapBasedNullChecks) {
++ __ trap_null_check(R3_ARG1);
++ } else {
++ Label valid;
++ __ cmpdi(CCR0, R3_ARG1, 0);
++ __ bne_predict_taken(CCR0, valid);
++ // We have a null argument, branch to ic_miss_stub.
++ __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
++ relocInfo::runtime_call_type);
++ __ BIND(valid);
++ }
++ }
++ // Assume argument is not NULL, load klass from receiver.
++ __ load_klass(receiver_klass, R3_ARG1);
++
++ __ ld(ic_klass, CompiledICHolder::holder_klass_offset(), ic);
++
++ if (TrapBasedICMissChecks) {
++ __ trap_ic_miss_check(receiver_klass, ic_klass);
++ } else {
++ Label valid;
++ __ cmpd(CCR0, receiver_klass, ic_klass);
++ __ beq_predict_taken(CCR0, valid);
++ // We have an unexpected klass, branch to ic_miss_stub.
++ __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
++ relocInfo::runtime_call_type);
++ __ BIND(valid);
++ }
++
++ // Argument is valid and klass is as expected, continue.
++
++ // Extract method from inline cache, verified entry point needs it.
++ __ ld(R19_method, CompiledICHolder::holder_method_offset(), ic);
++ assert(R19_method == ic, "the inline cache register is dead here");
++
++ __ ld(code, method_(code));
++ __ cmpdi(CCR0, code, 0);
++ __ ld(ientry, method_(interpreter_entry)); // preloaded
++ __ beq_predict_taken(CCR0, call_interpreter);
++
++ // Branch to ic_miss_stub.
++ __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
++
++ // entry: c2i
++
++ c2i_entry = gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, call_interpreter, ientry);
++
++ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
++}
++
++#ifdef COMPILER2
++// An oop arg. Must pass a handle not the oop itself.
++static void object_move(MacroAssembler* masm,
++ int frame_size_in_slots,
++ OopMap* oop_map, int oop_handle_offset,
++ bool is_receiver, int* receiver_offset,
++ VMRegPair src, VMRegPair dst,
++ Register r_caller_sp, Register r_temp_1, Register r_temp_2) {
++ assert(!is_receiver || (is_receiver && (*receiver_offset == -1)),
++ "receiver has already been moved");
++
++ // We must pass a handle. First figure out the location we use as a handle.
++
++ if (src.first()->is_stack()) {
++ // stack to stack or reg
++
++ const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
++ Label skip;
++ const int oop_slot_in_callers_frame = reg2slot(src.first());
++
++ guarantee(!is_receiver, "expecting receiver in register");
++ oop_map->set_oop(VMRegImpl::stack2reg(oop_slot_in_callers_frame + frame_size_in_slots));
++
++ __ addi(r_handle, r_caller_sp, reg2offset(src.first()));
++ __ ld( r_temp_2, reg2offset(src.first()), r_caller_sp);
++ __ cmpdi(CCR0, r_temp_2, 0);
++ __ bne(CCR0, skip);
++ // Use a NULL handle if oop is NULL.
++ __ li(r_handle, 0);
++ __ bind(skip);
++
++ if (dst.first()->is_stack()) {
++ // stack to stack
++ __ std(r_handle, reg2offset(dst.first()), R1_SP);
++ } else {
++ // stack to reg
++ // Nothing to do, r_handle is already the dst register.
++ }
++ } else {
++ // reg to stack or reg
++ const Register r_oop = src.first()->as_Register();
++ const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
++ const int oop_slot = (r_oop->encoding()-R3_ARG1->encoding()) * VMRegImpl::slots_per_word
++ + oop_handle_offset; // in slots
++ const int oop_offset = oop_slot * VMRegImpl::stack_slot_size;
++ Label skip;
++
++ if (is_receiver) {
++ *receiver_offset = oop_offset;
++ }
++ oop_map->set_oop(VMRegImpl::stack2reg(oop_slot));
++
++ __ std( r_oop, oop_offset, R1_SP);
++ __ addi(r_handle, R1_SP, oop_offset);
++
++ __ cmpdi(CCR0, r_oop, 0);
++ __ bne(CCR0, skip);
++ // Use a NULL handle if oop is NULL.
++ __ li(r_handle, 0);
++ __ bind(skip);
++
++ if (dst.first()->is_stack()) {
++ // reg to stack
++ __ std(r_handle, reg2offset(dst.first()), R1_SP);
++ } else {
++ // reg to reg
++ // Nothing to do, r_handle is already the dst register.
++ }
++ }
++}
++
++static void int_move(MacroAssembler*masm,
++ VMRegPair src, VMRegPair dst,
++ Register r_caller_sp, Register r_temp) {
++ assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be long-int");
++ assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
++
++ if (src.first()->is_stack()) {
++ if (dst.first()->is_stack()) {
++ // stack to stack
++ __ lwa(r_temp, reg2offset(src.first()), r_caller_sp);
++ __ std(r_temp, reg2offset(dst.first()), R1_SP);
++ } else {
++ // stack to reg
++ __ lwa(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
++ }
++ } else if (dst.first()->is_stack()) {
++ // reg to stack
++ __ extsw(r_temp, src.first()->as_Register());
++ __ std(r_temp, reg2offset(dst.first()), R1_SP);
++ } else {
++ // reg to reg
++ __ extsw(dst.first()->as_Register(), src.first()->as_Register());
++ }
++}
++
++static void long_move(MacroAssembler*masm,
++ VMRegPair src, VMRegPair dst,
++ Register r_caller_sp, Register r_temp) {
++ assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be long");
++ assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
++
++ if (src.first()->is_stack()) {
++ if (dst.first()->is_stack()) {
++ // stack to stack
++ __ ld( r_temp, reg2offset(src.first()), r_caller_sp);
++ __ std(r_temp, reg2offset(dst.first()), R1_SP);
++ } else {
++ // stack to reg
++ __ ld(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
++ }
++ } else if (dst.first()->is_stack()) {
++ // reg to stack
++ __ std(src.first()->as_Register(), reg2offset(dst.first()), R1_SP);
++ } else {
++ // reg to reg
++ if (dst.first()->as_Register() != src.first()->as_Register())
++ __ mr(dst.first()->as_Register(), src.first()->as_Register());
++ }
++}
++
++static void float_move(MacroAssembler*masm,
++ VMRegPair src, VMRegPair dst,
++ Register r_caller_sp, Register r_temp) {
++ assert(src.first()->is_valid() && !src.second()->is_valid(), "incoming must be float");
++ assert(dst.first()->is_valid() && !dst.second()->is_valid(), "outgoing must be float");
++
++ if (src.first()->is_stack()) {
++ if (dst.first()->is_stack()) {
++ // stack to stack
++ __ lwz(r_temp, reg2offset(src.first()), r_caller_sp);
++ __ stw(r_temp, reg2offset(dst.first()), R1_SP);
++ } else {
++ // stack to reg
++ __ lfs(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp);
++ }
++ } else if (dst.first()->is_stack()) {
++ // reg to stack
++ __ stfs(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP);
++ } else {
++ // reg to reg
++ if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister())
++ __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++ }
++}
++
++static void double_move(MacroAssembler*masm,
++ VMRegPair src, VMRegPair dst,
++ Register r_caller_sp, Register r_temp) {
++ assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be double");
++ assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be double");
++
++ if (src.first()->is_stack()) {
++ if (dst.first()->is_stack()) {
++ // stack to stack
++ __ ld( r_temp, reg2offset(src.first()), r_caller_sp);
++ __ std(r_temp, reg2offset(dst.first()), R1_SP);
++ } else {
++ // stack to reg
++ __ lfd(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp);
++ }
++ } else if (dst.first()->is_stack()) {
++ // reg to stack
++ __ stfd(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP);
++ } else {
++ // reg to reg
++ if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister())
++ __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++ }
++}
++
++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++ switch (ret_type) {
++ case T_BOOLEAN:
++ case T_CHAR:
++ case T_BYTE:
++ case T_SHORT:
++ case T_INT:
++ __ stw (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
++ break;
++ case T_ARRAY:
++ case T_OBJECT:
++ case T_LONG:
++ __ std (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
++ break;
++ case T_FLOAT:
++ __ stfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
++ break;
++ case T_DOUBLE:
++ __ stfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
++ break;
++ case T_VOID:
++ break;
++ default:
++ ShouldNotReachHere();
++ break;
++ }
++}
++
++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++ switch (ret_type) {
++ case T_BOOLEAN:
++ case T_CHAR:
++ case T_BYTE:
++ case T_SHORT:
++ case T_INT:
++ __ lwz(R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
++ break;
++ case T_ARRAY:
++ case T_OBJECT:
++ case T_LONG:
++ __ ld (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
++ break;
++ case T_FLOAT:
++ __ lfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
++ break;
++ case T_DOUBLE:
++ __ lfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
++ break;
++ case T_VOID:
++ break;
++ default:
++ ShouldNotReachHere();
++ break;
++ }
++}
++
++static void save_or_restore_arguments(MacroAssembler* masm,
++ const int stack_slots,
++ const int total_in_args,
++ const int arg_save_area,
++ OopMap* map,
++ VMRegPair* in_regs,
++ BasicType* in_sig_bt) {
++ // If map is non-NULL then the code should store the values,
++ // otherwise it should load them.
++ int slot = arg_save_area;
++ // Save down double word first.
++ for (int i = 0; i < total_in_args; i++) {
++ if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
++ int offset = slot * VMRegImpl::stack_slot_size;
++ slot += VMRegImpl::slots_per_word;
++ assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
++ if (map != NULL) {
++ __ stfd(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
++ } else {
++ __ lfd(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
++ }
++ } else if (in_regs[i].first()->is_Register() &&
++ (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
++ int offset = slot * VMRegImpl::stack_slot_size;
++ if (map != NULL) {
++ __ std(in_regs[i].first()->as_Register(), offset, R1_SP);
++ if (in_sig_bt[i] == T_ARRAY) {
++ map->set_oop(VMRegImpl::stack2reg(slot));
++ }
++ } else {
++ __ ld(in_regs[i].first()->as_Register(), offset, R1_SP);
++ }
++ slot += VMRegImpl::slots_per_word;
++ assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
++ }
++ }
++ // Save or restore single word registers.
++ for (int i = 0; i < total_in_args; i++) {
++ // PPC64: pass ints as longs: must only deal with floats here.
++ if (in_regs[i].first()->is_FloatRegister()) {
++ if (in_sig_bt[i] == T_FLOAT) {
++ int offset = slot * VMRegImpl::stack_slot_size;
++ slot++;
++ assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
++ if (map != NULL) {
++ __ stfs(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
++ } else {
++ __ lfs(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
++ }
++ }
++ } else if (in_regs[i].first()->is_stack()) {
++ if (in_sig_bt[i] == T_ARRAY && map != NULL) {
++ int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
++ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
++ }
++ }
++ }
++}
++
++// Check GC_locker::needs_gc and enter the runtime if it's true. This
++// keeps a new JNI critical region from starting until a GC has been
++// forced. Save down any oops in registers and describe them in an
++// OopMap.
++static void check_needs_gc_for_critical_native(MacroAssembler* masm,
++ const int stack_slots,
++ const int total_in_args,
++ const int arg_save_area,
++ OopMapSet* oop_maps,
++ VMRegPair* in_regs,
++ BasicType* in_sig_bt,
++ Register tmp_reg ) {
++ __ block_comment("check GC_locker::needs_gc");
++ Label cont;
++ __ lbz(tmp_reg, (RegisterOrConstant)(intptr_t)GC_locker::needs_gc_address());
++ __ cmplwi(CCR0, tmp_reg, 0);
++ __ beq(CCR0, cont);
++
++ // Save down any values that are live in registers and call into the
++ // runtime to halt for a GC.
++ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++ save_or_restore_arguments(masm, stack_slots, total_in_args,
++ arg_save_area, map, in_regs, in_sig_bt);
++
++ __ mr(R3_ARG1, R16_thread);
++ __ set_last_Java_frame(R1_SP, noreg);
++
++ __ block_comment("block_for_jni_critical");
++ address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
++#if defined(ABI_ELFv2)
++ __ call_c(entry_point, relocInfo::runtime_call_type);
++#else
++ __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, entry_point), relocInfo::runtime_call_type);
++#endif
++ address start = __ pc() - __ offset(),
++ calls_return_pc = __ last_calls_return_pc();
++ oop_maps->add_gc_map(calls_return_pc - start, map);
++
++ __ reset_last_Java_frame();
++
++ // Reload all the register arguments.
++ save_or_restore_arguments(masm, stack_slots, total_in_args,
++ arg_save_area, NULL, in_regs, in_sig_bt);
++
++ __ BIND(cont);
++
++#ifdef ASSERT
++ if (StressCriticalJNINatives) {
++ // Stress register saving.
++ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++ save_or_restore_arguments(masm, stack_slots, total_in_args,
++ arg_save_area, map, in_regs, in_sig_bt);
++ // Destroy argument registers.
++ for (int i = 0; i < total_in_args; i++) {
++ if (in_regs[i].first()->is_Register()) {
++ const Register reg = in_regs[i].first()->as_Register();
++ __ neg(reg, reg);
++ } else if (in_regs[i].first()->is_FloatRegister()) {
++ __ fneg(in_regs[i].first()->as_FloatRegister(), in_regs[i].first()->as_FloatRegister());
++ }
++ }
++
++ save_or_restore_arguments(masm, stack_slots, total_in_args,
++ arg_save_area, NULL, in_regs, in_sig_bt);
++ }
++#endif
++}
++
++static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst, Register r_caller_sp, Register r_temp) {
++ if (src.first()->is_stack()) {
++ if (dst.first()->is_stack()) {
++ // stack to stack
++ __ ld(r_temp, reg2offset(src.first()), r_caller_sp);
++ __ std(r_temp, reg2offset(dst.first()), R1_SP);
++ } else {
++ // stack to reg
++ __ ld(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
++ }
++ } else if (dst.first()->is_stack()) {
++ // reg to stack
++ __ std(src.first()->as_Register(), reg2offset(dst.first()), R1_SP);
++ } else {
++ if (dst.first() != src.first()) {
++ __ mr(dst.first()->as_Register(), src.first()->as_Register());
++ }
++ }
++}
++
++// Unpack an array argument into a pointer to the body and the length
++// if the array is non-null, otherwise pass 0 for both.
++static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type,
++ VMRegPair body_arg, VMRegPair length_arg, Register r_caller_sp,
++ Register tmp_reg, Register tmp2_reg) {
++ assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
++ "possible collision");
++ assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
++ "possible collision");
++
++ // Pass the length, ptr pair.
++ Label set_out_args;
++ VMRegPair tmp, tmp2;
++ tmp.set_ptr(tmp_reg->as_VMReg());
++ tmp2.set_ptr(tmp2_reg->as_VMReg());
++ if (reg.first()->is_stack()) {
++ // Load the arg up from the stack.
++ move_ptr(masm, reg, tmp, r_caller_sp, /*unused*/ R0);
++ reg = tmp;
++ }
++ __ li(tmp2_reg, 0); // Pass zeros if Array=null.
++ if (tmp_reg != reg.first()->as_Register()) __ li(tmp_reg, 0);
++ __ cmpdi(CCR0, reg.first()->as_Register(), 0);
++ __ beq(CCR0, set_out_args);
++ __ lwa(tmp2_reg, arrayOopDesc::length_offset_in_bytes(), reg.first()->as_Register());
++ __ addi(tmp_reg, reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type));
++ __ bind(set_out_args);
++ move_ptr(masm, tmp, body_arg, r_caller_sp, /*unused*/ R0);
++ move_ptr(masm, tmp2, length_arg, r_caller_sp, /*unused*/ R0); // Same as move32_64 on PPC64.
++}
++
++static void verify_oop_args(MacroAssembler* masm,
++ methodHandle method,
++ const BasicType* sig_bt,
++ const VMRegPair* regs) {
++ Register temp_reg = R19_method; // not part of any compiled calling seq
++ if (VerifyOops) {
++ for (int i = 0; i < method->size_of_parameters(); i++) {
++ if (sig_bt[i] == T_OBJECT ||
++ sig_bt[i] == T_ARRAY) {
++ VMReg r = regs[i].first();
++ assert(r->is_valid(), "bad oop arg");
++ if (r->is_stack()) {
++ __ ld(temp_reg, reg2offset(r), R1_SP);
++ __ verify_oop(temp_reg);
++ } else {
++ __ verify_oop(r->as_Register());
++ }
++ }
++ }
++ }
++}
++
++static void gen_special_dispatch(MacroAssembler* masm,
++ methodHandle method,
++ const BasicType* sig_bt,
++ const VMRegPair* regs) {
++ verify_oop_args(masm, method, sig_bt, regs);
++ vmIntrinsics::ID iid = method->intrinsic_id();
++
++ // Now write the args into the outgoing interpreter space
++ bool has_receiver = false;
++ Register receiver_reg = noreg;
++ int member_arg_pos = -1;
++ Register member_reg = noreg;
++ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
++ if (ref_kind != 0) {
++ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument
++ member_reg = R19_method; // known to be free at this point
++ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++ } else if (iid == vmIntrinsics::_invokeBasic) {
++ has_receiver = true;
++ } else {
++ fatal(err_msg_res("unexpected intrinsic id %d", iid));
++ }
++
++ if (member_reg != noreg) {
++ // Load the member_arg into register, if necessary.
++ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
++ VMReg r = regs[member_arg_pos].first();
++ if (r->is_stack()) {
++ __ ld(member_reg, reg2offset(r), R1_SP);
++ } else {
++ // no data motion is needed
++ member_reg = r->as_Register();
++ }
++ }
++
++ if (has_receiver) {
++ // Make sure the receiver is loaded into a register.
++ assert(method->size_of_parameters() > 0, "oob");
++ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++ VMReg r = regs[0].first();
++ assert(r->is_valid(), "bad receiver arg");
++ if (r->is_stack()) {
++ // Porting note: This assumes that compiled calling conventions always
++ // pass the receiver oop in a register. If this is not true on some
++ // platform, pick a temp and load the receiver from stack.
++ fatal("receiver always in a register");
++ receiver_reg = R11_scratch1; // TODO (hs24): is R11_scratch1 really free at this point?
++ __ ld(receiver_reg, reg2offset(r), R1_SP);
++ } else {
++ // no data motion is needed
++ receiver_reg = r->as_Register();
++ }
++ }
++
++ // Figure out which address we are really jumping to:
++ MethodHandles::generate_method_handle_dispatch(masm, iid,
++ receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
++
++#endif // COMPILER2
++
++// ---------------------------------------------------------------------------
++// Generate a native wrapper for a given method. The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// convention (handlizes oops, etc), transitions to native, makes the call,
++// returns to java state (possibly blocking), unhandlizes any result and
++// returns.
++//
++// Critical native functions are a shorthand for the use of
++// GetPrimtiveArrayCritical and disallow the use of any other JNI
++// functions. The wrapper is expected to unpack the arguments before
++// passing them to the callee and perform checks before and after the
++// native call to ensure that they GC_locker
++// lock_critical/unlock_critical semantics are followed. Some other
++// parts of JNI setup are skipped like the tear down of the JNI handle
++// block and the check for pending exceptions it's impossible for them
++// to be thrown.
++//
++// They are roughly structured like this:
++// if (GC_locker::needs_gc())
++// SharedRuntime::block_for_jni_critical();
++// tranistion to thread_in_native
++// unpack arrray arguments and call native entry point
++// check for safepoint in progress
++// check if any thread suspend flags are set
++// call into JVM and possible unlock the JNI critical
++// if a GC was suppressed while in the critical native.
++// transition back to thread_in_Java
++// return to caller
++//
++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
++ methodHandle method,
++ int compile_id,
++ BasicType *in_sig_bt,
++ VMRegPair *in_regs,
++ BasicType ret_type) {
++#ifdef COMPILER2
++ if (method->is_method_handle_intrinsic()) {
++ vmIntrinsics::ID iid = method->intrinsic_id();
++ intptr_t start = (intptr_t)__ pc();
++ int vep_offset = ((intptr_t)__ pc()) - start;
++ gen_special_dispatch(masm,
++ method,
++ in_sig_bt,
++ in_regs);
++ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period
++ __ flush();
++ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually
++ return nmethod::new_native_nmethod(method,
++ compile_id,
++ masm->code(),
++ vep_offset,
++ frame_complete,
++ stack_slots / VMRegImpl::slots_per_word,
++ in_ByteSize(-1),
++ in_ByteSize(-1),
++ (OopMapSet*)NULL);
++ }
++
++ bool is_critical_native = true;
++ address native_func = method->critical_native_function();
++ if (native_func == NULL) {
++ native_func = method->native_function();
++ is_critical_native = false;
++ }
++ assert(native_func != NULL, "must have function");
++
++ // First, create signature for outgoing C call
++ // --------------------------------------------------------------------------
++
++ int total_in_args = method->size_of_parameters();
++ // We have received a description of where all the java args are located
++ // on entry to the wrapper. We need to convert these args to where
++ // the jni function will expect them. To figure out where they go
++ // we convert the java signature to a C signature by inserting
++ // the hidden arguments as arg[0] and possibly arg[1] (static method)
++ //
++ // Additionally, on ppc64 we must convert integers to longs in the C
++ // signature. We do this in advance in order to have no trouble with
++ // indexes into the bt-arrays.
++ // So convert the signature and registers now, and adjust the total number
++ // of in-arguments accordingly.
++ int i2l_argcnt = convert_ints_to_longints_argcnt(total_in_args, in_sig_bt); // PPC64: pass ints as longs.
++
++ // Calculate the total number of C arguments and create arrays for the
++ // signature and the outgoing registers.
++ // On ppc64, we have two arrays for the outgoing registers, because
++ // some floating-point arguments must be passed in registers _and_
++ // in stack locations.
++ bool method_is_static = method->is_static();
++ int total_c_args = i2l_argcnt;
++
++ if (!is_critical_native) {
++ int n_hidden_args = method_is_static ? 2 : 1;
++ total_c_args += n_hidden_args;
++ } else {
++ // No JNIEnv*, no this*, but unpacked arrays (base+length).
++ for (int i = 0; i < total_in_args; i++) {
++ if (in_sig_bt[i] == T_ARRAY) {
++ total_c_args += 2; // PPC64: T_LONG, T_INT, T_ADDRESS (see convert_ints_to_longints and c_calling_convention)
++ }
++ }
++ }
++
++ BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
++ VMRegPair *out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
++ VMRegPair *out_regs2 = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
++ BasicType* in_elem_bt = NULL;
++
++ // Create the signature for the C call:
++ // 1) add the JNIEnv*
++ // 2) add the class if the method is static
++ // 3) copy the rest of the incoming signature (shifted by the number of
++ // hidden arguments).
++
++ int argc = 0;
++ if (!is_critical_native) {
++ convert_ints_to_longints(i2l_argcnt, total_in_args, in_sig_bt, in_regs); // PPC64: pass ints as longs.
++
++ out_sig_bt[argc++] = T_ADDRESS;
++ if (method->is_static()) {
++ out_sig_bt[argc++] = T_OBJECT;
++ }
++
++ for (int i = 0; i < total_in_args ; i++ ) {
++ out_sig_bt[argc++] = in_sig_bt[i];
++ }
++ } else {
++ Thread* THREAD = Thread::current();
++ in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, i2l_argcnt);
++ SignatureStream ss(method->signature());
++ int o = 0;
++ for (int i = 0; i < total_in_args ; i++, o++) {
++ if (in_sig_bt[i] == T_ARRAY) {
++ // Arrays are passed as int, elem* pair
++ Symbol* atype = ss.as_symbol(CHECK_NULL);
++ const char* at = atype->as_C_string();
++ if (strlen(at) == 2) {
++ assert(at[0] == '[', "must be");
++ switch (at[1]) {
++ case 'B': in_elem_bt[o] = T_BYTE; break;
++ case 'C': in_elem_bt[o] = T_CHAR; break;
++ case 'D': in_elem_bt[o] = T_DOUBLE; break;
++ case 'F': in_elem_bt[o] = T_FLOAT; break;
++ case 'I': in_elem_bt[o] = T_INT; break;
++ case 'J': in_elem_bt[o] = T_LONG; break;
++ case 'S': in_elem_bt[o] = T_SHORT; break;
++ case 'Z': in_elem_bt[o] = T_BOOLEAN; break;
++ default: ShouldNotReachHere();
++ }
++ }
++ } else {
++ in_elem_bt[o] = T_VOID;
++ switch(in_sig_bt[i]) { // PPC64: pass ints as longs.
++ case T_BOOLEAN:
++ case T_CHAR:
++ case T_BYTE:
++ case T_SHORT:
++ case T_INT: in_elem_bt[++o] = T_VOID; break;
++ default: break;
++ }
++ }
++ if (in_sig_bt[i] != T_VOID) {
++ assert(in_sig_bt[i] == ss.type(), "must match");
++ ss.next();
++ }
++ }
++ assert(i2l_argcnt==o, "must match");
++
++ convert_ints_to_longints(i2l_argcnt, total_in_args, in_sig_bt, in_regs); // PPC64: pass ints as longs.
++
++ for (int i = 0; i < total_in_args ; i++ ) {
++ if (in_sig_bt[i] == T_ARRAY) {
++ // Arrays are passed as int, elem* pair.
++ out_sig_bt[argc++] = T_LONG; // PPC64: pass ints as longs.
++ out_sig_bt[argc++] = T_INT;
++ out_sig_bt[argc++] = T_ADDRESS;
++ } else {
++ out_sig_bt[argc++] = in_sig_bt[i];
++ }
++ }
++ }
++
++
++ // Compute the wrapper's frame size.
++ // --------------------------------------------------------------------------
++
++ // Now figure out where the args must be stored and how much stack space
++ // they require.
++ //
++ // Compute framesize for the wrapper. We need to handlize all oops in
++ // incoming registers.
++ //
++ // Calculate the total number of stack slots we will need:
++ // 1) abi requirements
++ // 2) outgoing arguments
++ // 3) space for inbound oop handle area
++ // 4) space for handlizing a klass if static method
++ // 5) space for a lock if synchronized method
++ // 6) workspace for saving return values, int <-> float reg moves, etc.
++ // 7) alignment
++ //
++ // Layout of the native wrapper frame:
++ // (stack grows upwards, memory grows downwards)
++ //
++ // NW [ABI_REG_ARGS] <-- 1) R1_SP
++ // [outgoing arguments] <-- 2) R1_SP + out_arg_slot_offset
++ // [oopHandle area] <-- 3) R1_SP + oop_handle_offset (save area for critical natives)
++ // klass <-- 4) R1_SP + klass_offset
++ // lock <-- 5) R1_SP + lock_offset
++ // [workspace] <-- 6) R1_SP + workspace_offset
++ // [alignment] (optional) <-- 7)
++ // caller [JIT_TOP_ABI_48] <-- r_callers_sp
++ //
++ // - *_slot_offset Indicates offset from SP in number of stack slots.
++ // - *_offset Indicates offset from SP in bytes.
++
++ int stack_slots = c_calling_convention(out_sig_bt, out_regs, out_regs2, total_c_args) // 1+2)
++ + SharedRuntime::out_preserve_stack_slots(); // See c_calling_convention.
++
++ // Now the space for the inbound oop handle area.
++ int total_save_slots = num_java_iarg_registers * VMRegImpl::slots_per_word;
++ if (is_critical_native) {
++ // Critical natives may have to call out so they need a save area
++ // for register arguments.
++ int double_slots = 0;
++ int single_slots = 0;
++ for (int i = 0; i < total_in_args; i++) {
++ if (in_regs[i].first()->is_Register()) {
++ const Register reg = in_regs[i].first()->as_Register();
++ switch (in_sig_bt[i]) {
++ case T_BOOLEAN:
++ case T_BYTE:
++ case T_SHORT:
++ case T_CHAR:
++ case T_INT: /*single_slots++;*/ break; // PPC64: pass ints as longs.
++ case T_ARRAY:
++ case T_LONG: double_slots++; break;
++ default: ShouldNotReachHere();
++ }
++ } else if (in_regs[i].first()->is_FloatRegister()) {
++ switch (in_sig_bt[i]) {
++ case T_FLOAT: single_slots++; break;
++ case T_DOUBLE: double_slots++; break;
++ default: ShouldNotReachHere();
++ }
++ }
++ }
++ total_save_slots = double_slots * 2 + round_to(single_slots, 2); // round to even
++ }
++
++ int oop_handle_slot_offset = stack_slots;
++ stack_slots += total_save_slots; // 3)
++
++ int klass_slot_offset = 0;
++ int klass_offset = -1;
++ if (method_is_static && !is_critical_native) { // 4)
++ klass_slot_offset = stack_slots;
++ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
++ stack_slots += VMRegImpl::slots_per_word;
++ }
++
++ int lock_slot_offset = 0;
++ int lock_offset = -1;
++ if (method->is_synchronized()) { // 5)
++ lock_slot_offset = stack_slots;
++ lock_offset = lock_slot_offset * VMRegImpl::stack_slot_size;
++ stack_slots += VMRegImpl::slots_per_word;
++ }
++
++ int workspace_slot_offset = stack_slots; // 6)
++ stack_slots += 2;
++
++ // Now compute actual number of stack words we need.
++ // Rounding to make stack properly aligned.
++ stack_slots = round_to(stack_slots, // 7)
++ frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
++ int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
++
++
++ // Now we can start generating code.
++ // --------------------------------------------------------------------------
++
++ intptr_t start_pc = (intptr_t)__ pc();
++ intptr_t vep_start_pc;
++ intptr_t frame_done_pc;
++ intptr_t oopmap_pc;
++
++ Label ic_miss;
++ Label handle_pending_exception;
++
++ Register r_callers_sp = R21;
++ Register r_temp_1 = R22;
++ Register r_temp_2 = R23;
++ Register r_temp_3 = R24;
++ Register r_temp_4 = R25;
++ Register r_temp_5 = R26;
++ Register r_temp_6 = R27;
++ Register r_return_pc = R28;
++
++ Register r_carg1_jnienv = noreg;
++ Register r_carg2_classorobject = noreg;
++ if (!is_critical_native) {
++ r_carg1_jnienv = out_regs[0].first()->as_Register();
++ r_carg2_classorobject = out_regs[1].first()->as_Register();
++ }
++
++
++ // Generate the Unverified Entry Point (UEP).
++ // --------------------------------------------------------------------------
++ assert(start_pc == (intptr_t)__ pc(), "uep must be at start");
++
++ // Check ic: object class == cached class?
++ if (!method_is_static) {
++ Register ic = as_Register(Matcher::inline_cache_reg_encode());
++ Register receiver_klass = r_temp_1;
++
++ __ cmpdi(CCR0, R3_ARG1, 0);
++ __ beq(CCR0, ic_miss);
++ __ verify_oop(R3_ARG1);
++ __ load_klass(receiver_klass, R3_ARG1);
++
++ __ cmpd(CCR0, receiver_klass, ic);
++ __ bne(CCR0, ic_miss);
++ }
++
++
++ // Generate the Verified Entry Point (VEP).
++ // --------------------------------------------------------------------------
++ vep_start_pc = (intptr_t)__ pc();
++
++ __ save_LR_CR(r_temp_1);
++ __ generate_stack_overflow_check(frame_size_in_bytes); // Check before creating frame.
++ __ mr(r_callers_sp, R1_SP); // Remember frame pointer.
++ __ push_frame(frame_size_in_bytes, r_temp_1); // Push the c2n adapter's frame.
++ frame_done_pc = (intptr_t)__ pc();
++
++ // Native nmethod wrappers never take possesion of the oop arguments.
++ // So the caller will gc the arguments.
++ // The only thing we need an oopMap for is if the call is static.
++ //
++ // An OopMap for lock (and class if static), and one for the VM call itself.
++ OopMapSet *oop_maps = new OopMapSet();
++ OopMap *oop_map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++
++ if (is_critical_native) {
++ check_needs_gc_for_critical_native(masm, stack_slots, total_in_args, oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt, r_temp_1);
++ }
++
++ // Move arguments from register/stack to register/stack.
++ // --------------------------------------------------------------------------
++ //
++ // We immediately shuffle the arguments so that for any vm call we have
++ // to make from here on out (sync slow path, jvmti, etc.) we will have
++ // captured the oops from our caller and have a valid oopMap for them.
++ //
++ // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
++ // (derived from JavaThread* which is in R16_thread) and, if static,
++ // the class mirror instead of a receiver. This pretty much guarantees that
++ // register layout will not match. We ignore these extra arguments during
++ // the shuffle. The shuffle is described by the two calling convention
++ // vectors we have in our possession. We simply walk the java vector to
++ // get the source locations and the c vector to get the destinations.
++
++ // Record sp-based slot for receiver on stack for non-static methods.
++ int receiver_offset = -1;
++
++ // We move the arguments backward because the floating point registers
++ // destination will always be to a register with a greater or equal
++ // register number or the stack.
++ // in is the index of the incoming Java arguments
++ // out is the index of the outgoing C arguments
++
++#ifdef ASSERT
++ bool reg_destroyed[RegisterImpl::number_of_registers];
++ bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++ for (int r = 0 ; r < RegisterImpl::number_of_registers ; r++) {
++ reg_destroyed[r] = false;
++ }
++ for (int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++) {
++ freg_destroyed[f] = false;
++ }
++#endif // ASSERT
++
++ for (int in = total_in_args - 1, out = total_c_args - 1; in >= 0 ; in--, out--) {
++
++#ifdef ASSERT
++ if (in_regs[in].first()->is_Register()) {
++ assert(!reg_destroyed[in_regs[in].first()->as_Register()->encoding()], "ack!");
++ } else if (in_regs[in].first()->is_FloatRegister()) {
++ assert(!freg_destroyed[in_regs[in].first()->as_FloatRegister()->encoding()], "ack!");
++ }
++ if (out_regs[out].first()->is_Register()) {
++ reg_destroyed[out_regs[out].first()->as_Register()->encoding()] = true;
++ } else if (out_regs[out].first()->is_FloatRegister()) {
++ freg_destroyed[out_regs[out].first()->as_FloatRegister()->encoding()] = true;
++ }
++ if (out_regs2[out].first()->is_Register()) {
++ reg_destroyed[out_regs2[out].first()->as_Register()->encoding()] = true;
++ } else if (out_regs2[out].first()->is_FloatRegister()) {
++ freg_destroyed[out_regs2[out].first()->as_FloatRegister()->encoding()] = true;
++ }
++#endif // ASSERT
++
++ switch (in_sig_bt[in]) {
++ case T_BOOLEAN:
++ case T_CHAR:
++ case T_BYTE:
++ case T_SHORT:
++ case T_INT:
++ guarantee(in > 0 && in_sig_bt[in-1] == T_LONG,
++ "expecting type (T_LONG,bt) for bt in {T_BOOLEAN, T_CHAR, T_BYTE, T_SHORT, T_INT}");
++ break;
++ case T_LONG:
++ if (in_sig_bt[in+1] == T_VOID) {
++ long_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
++ } else {
++ guarantee(in_sig_bt[in+1] == T_BOOLEAN || in_sig_bt[in+1] == T_CHAR ||
++ in_sig_bt[in+1] == T_BYTE || in_sig_bt[in+1] == T_SHORT ||
++ in_sig_bt[in+1] == T_INT,
++ "expecting type (T_LONG,bt) for bt in {T_BOOLEAN, T_CHAR, T_BYTE, T_SHORT, T_INT}");
++ int_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
++ }
++ break;
++ case T_ARRAY:
++ if (is_critical_native) {
++ int body_arg = out;
++ out -= 2; // Point to length arg. PPC64: pass ints as longs.
++ unpack_array_argument(masm, in_regs[in], in_elem_bt[in], out_regs[body_arg], out_regs[out],
++ r_callers_sp, r_temp_1, r_temp_2);
++ break;
++ }
++ case T_OBJECT:
++ assert(!is_critical_native, "no oop arguments");
++ object_move(masm, stack_slots,
++ oop_map, oop_handle_slot_offset,
++ ((in == 0) && (!method_is_static)), &receiver_offset,
++ in_regs[in], out_regs[out],
++ r_callers_sp, r_temp_1, r_temp_2);
++ break;
++ case T_VOID:
++ break;
++ case T_FLOAT:
++ float_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
++ if (out_regs2[out].first()->is_valid()) {
++ float_move(masm, in_regs[in], out_regs2[out], r_callers_sp, r_temp_1);
++ }
++ break;
++ case T_DOUBLE:
++ double_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
++ if (out_regs2[out].first()->is_valid()) {
++ double_move(masm, in_regs[in], out_regs2[out], r_callers_sp, r_temp_1);
++ }
++ break;
++ case T_ADDRESS:
++ fatal("found type (T_ADDRESS) in java args");
++ break;
++ default:
++ ShouldNotReachHere();
++ break;
++ }
++ }
++
++ // Pre-load a static method's oop into ARG2.
++ // Used both by locking code and the normal JNI call code.
++ if (method_is_static && !is_critical_native) {
++ __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()),
++ r_carg2_classorobject);
++
++ // Now handlize the static class mirror in carg2. It's known not-null.
++ __ std(r_carg2_classorobject, klass_offset, R1_SP);
++ oop_map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
++ __ addi(r_carg2_classorobject, R1_SP, klass_offset);
++ }
++
++ // Get JNIEnv* which is first argument to native.
++ if (!is_critical_native) {
++ __ addi(r_carg1_jnienv, R16_thread, in_bytes(JavaThread::jni_environment_offset()));
++ }
++
++ // NOTE:
++ //
++ // We have all of the arguments setup at this point.
++ // We MUST NOT touch any outgoing regs from this point on.
++ // So if we must call out we must push a new frame.
++
++ // Get current pc for oopmap, and load it patchable relative to global toc.
++ oopmap_pc = (intptr_t) __ pc();
++ __ calculate_address_from_global_toc(r_return_pc, (address)oopmap_pc, true, true, true, true);
++
++ // We use the same pc/oopMap repeatedly when we call out.
++ oop_maps->add_gc_map(oopmap_pc - start_pc, oop_map);
++
++ // r_return_pc now has the pc loaded that we will use when we finally call
++ // to native.
++
++ // Make sure that thread is non-volatile; it crosses a bunch of VM calls below.
++ assert(R16_thread->is_nonvolatile(), "thread must be in non-volatile register");
++
++
++# if 0
++ // DTrace method entry
++# endif
++
++ // Lock a synchronized method.
++ // --------------------------------------------------------------------------
++
++ if (method->is_synchronized()) {
++ assert(!is_critical_native, "unhandled");
++ ConditionRegister r_flag = CCR1;
++ Register r_oop = r_temp_4;
++ const Register r_box = r_temp_5;
++ Label done, locked;
++
++ // Load the oop for the object or class. r_carg2_classorobject contains
++ // either the handlized oop from the incoming arguments or the handlized
++ // class mirror (if the method is static).
++ __ ld(r_oop, 0, r_carg2_classorobject);
++
++ // Get the lock box slot's address.
++ __ addi(r_box, R1_SP, lock_offset);
++
++# ifdef ASSERT
++ if (UseBiasedLocking) {
++ // Making the box point to itself will make it clear it went unused
++ // but also be obviously invalid.
++ __ std(r_box, 0, r_box);
++ }
++# endif // ASSERT
++
++ // Try fastpath for locking.
++ // fast_lock kills r_temp_1, r_temp_2, r_temp_3.
++ __ compiler_fast_lock_object(r_flag, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
++ __ beq(r_flag, locked);
++
++ // None of the above fast optimizations worked so we have to get into the
++ // slow case of monitor enter. Inline a special case of call_VM that
++ // disallows any pending_exception.
++
++ // Save argument registers and leave room for C-compatible ABI_REG_ARGS.
++ int frame_size = frame::abi_reg_args_size +
++ round_to(total_c_args * wordSize, frame::alignment_in_bytes);
++ __ mr(R11_scratch1, R1_SP);
++ RegisterSaver::push_frame_and_save_argument_registers(masm, R12_scratch2, frame_size, total_c_args, out_regs, out_regs2);
++
++ // Do the call.
++ __ set_last_Java_frame(R11_scratch1, r_return_pc);
++ assert(r_return_pc->is_nonvolatile(), "expecting return pc to be in non-volatile register");
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), r_oop, r_box, R16_thread);
++ __ reset_last_Java_frame();
++
++ RegisterSaver::restore_argument_registers_and_pop_frame(masm, frame_size, total_c_args, out_regs, out_regs2);
++
++ __ asm_assert_mem8_is_zero(thread_(pending_exception),
++ "no pending exception allowed on exit from SharedRuntime::complete_monitor_locking_C", 0);
++
++ __ bind(locked);
++ }
++
++
++ // Publish thread state
++ // --------------------------------------------------------------------------
++
++ // Use that pc we placed in r_return_pc a while back as the current frame anchor.
++ __ set_last_Java_frame(R1_SP, r_return_pc);
++
++ // Transition from _thread_in_Java to _thread_in_native.
++ __ li(R0, _thread_in_native);
++ __ release();
++ // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
++ __ stw(R0, thread_(thread_state));
++ if (UseMembar) {
++ __ fence();
++ }
++
++
++ // The JNI call
++ // --------------------------------------------------------------------------
++#if defined(ABI_ELFv2)
++ __ call_c(native_func, relocInfo::runtime_call_type);
++#else
++ FunctionDescriptor* fd_native_method = (FunctionDescriptor*) native_func;
++ __ call_c(fd_native_method, relocInfo::runtime_call_type);
++#endif
++
++
++ // Now, we are back from the native code.
++
++
++ // Unpack the native result.
++ // --------------------------------------------------------------------------
++
++ // For int-types, we do any needed sign-extension required.
++ // Care must be taken that the return values (R3_RET and F1_RET)
++ // will survive any VM calls for blocking or unlocking.
++ // An OOP result (handle) is done specially in the slow-path code.
++
++ switch (ret_type) {
++ case T_VOID: break; // Nothing to do!
++ case T_FLOAT: break; // Got it where we want it (unless slow-path).
++ case T_DOUBLE: break; // Got it where we want it (unless slow-path).
++ case T_LONG: break; // Got it where we want it (unless slow-path).
++ case T_OBJECT: break; // Really a handle.
++ // Cannot de-handlize until after reclaiming jvm_lock.
++ case T_ARRAY: break;
++
++ case T_BOOLEAN: { // 0 -> false(0); !0 -> true(1)
++ Label skip_modify;
++ __ cmpwi(CCR0, R3_RET, 0);
++ __ beq(CCR0, skip_modify);
++ __ li(R3_RET, 1);
++ __ bind(skip_modify);
++ break;
++ }
++ case T_BYTE: { // sign extension
++ __ extsb(R3_RET, R3_RET);
++ break;
++ }
++ case T_CHAR: { // unsigned result
++ __ andi(R3_RET, R3_RET, 0xffff);
++ break;
++ }
++ case T_SHORT: { // sign extension
++ __ extsh(R3_RET, R3_RET);
++ break;
++ }
++ case T_INT: // nothing to do
++ break;
++ default:
++ ShouldNotReachHere();
++ break;
++ }
++
++
++ // Publish thread state
++ // --------------------------------------------------------------------------
++
++ // Switch thread to "native transition" state before reading the
++ // synchronization state. This additional state is necessary because reading
++ // and testing the synchronization state is not atomic w.r.t. GC, as this
++ // scenario demonstrates:
++ // - Java thread A, in _thread_in_native state, loads _not_synchronized
++ // and is preempted.
++ // - VM thread changes sync state to synchronizing and suspends threads
++ // for GC.
++ // - Thread A is resumed to finish this native method, but doesn't block
++ // here since it didn't see any synchronization in progress, and escapes.
++
++ // Transition from _thread_in_native to _thread_in_native_trans.
++ __ li(R0, _thread_in_native_trans);
++ __ release();
++ // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
++ __ stw(R0, thread_(thread_state));
++
++
++ // Must we block?
++ // --------------------------------------------------------------------------
++
++ // Block, if necessary, before resuming in _thread_in_Java state.
++ // In order for GC to work, don't clear the last_Java_sp until after blocking.
++ Label after_transition;
++ {
++ Label no_block, sync;
++
++ if (os::is_MP()) {
++ if (UseMembar) {
++ // Force this write out before the read below.
++ __ fence();
++ } else {
++ // Write serialization page so VM thread can do a pseudo remote membar.
++ // We use the current thread pointer to calculate a thread specific
++ // offset to write to within the page. This minimizes bus traffic
++ // due to cache line collision.
++ __ serialize_memory(R16_thread, r_temp_4, r_temp_5);
++ }
++ }
++
++ Register sync_state_addr = r_temp_4;
++ Register sync_state = r_temp_5;
++ Register suspend_flags = r_temp_6;
++
++ __ load_const(sync_state_addr, SafepointSynchronize::address_of_state(), /*temp*/ sync_state);
++
++ // TODO: PPC port assert(4 == SafepointSynchronize::sz_state(), "unexpected field size");
++ __ lwz(sync_state, 0, sync_state_addr);
++
++ // TODO: PPC port assert(4 == Thread::sz_suspend_flags(), "unexpected field size");
++ __ lwz(suspend_flags, thread_(suspend_flags));
++
++ __ acquire();
++
++ Label do_safepoint;
++ // No synchronization in progress nor yet synchronized.
++ __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized);
++ // Not suspended.
++ __ cmpwi(CCR1, suspend_flags, 0);
++
++ __ bne(CCR0, sync);
++ __ beq(CCR1, no_block);
++
++ // Block. Save any potential method result value before the operation and
++ // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
++ // lets us share the oopMap we used when we went native rather than create
++ // a distinct one for this pc.
++ __ bind(sync);
++
++ address entry_point = is_critical_native
++ ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
++ : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
++ save_native_result(masm, ret_type, workspace_slot_offset);
++ __ call_VM_leaf(entry_point, R16_thread);
++ restore_native_result(masm, ret_type, workspace_slot_offset);
++
++ if (is_critical_native) {
++ __ b(after_transition); // No thread state transition here.
++ }
++ __ bind(no_block);
++ }
++
++ // Publish thread state.
++ // --------------------------------------------------------------------------
++
++ // Thread state is thread_in_native_trans. Any safepoint blocking has
++ // already happened so we can now change state to _thread_in_Java.
++
++ // Transition from _thread_in_native_trans to _thread_in_Java.
++ __ li(R0, _thread_in_Java);
++ __ release();
++ // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
++ __ stw(R0, thread_(thread_state));
++ if (UseMembar) {
++ __ fence();
++ }
++ __ bind(after_transition);
++
++ // Reguard any pages if necessary.
++ // --------------------------------------------------------------------------
++
++ Label no_reguard;
++ __ lwz(r_temp_1, thread_(stack_guard_state));
++ __ cmpwi(CCR0, r_temp_1, JavaThread::stack_guard_yellow_disabled);
++ __ bne(CCR0, no_reguard);
++
++ save_native_result(masm, ret_type, workspace_slot_offset);
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
++ restore_native_result(masm, ret_type, workspace_slot_offset);
++
++ __ bind(no_reguard);
++
++
++ // Unlock
++ // --------------------------------------------------------------------------
++
++ if (method->is_synchronized()) {
++
++ ConditionRegister r_flag = CCR1;
++ const Register r_oop = r_temp_4;
++ const Register r_box = r_temp_5;
++ const Register r_exception = r_temp_6;
++ Label done;
++
++ // Get oop and address of lock object box.
++ if (method_is_static) {
++ assert(klass_offset != -1, "");
++ __ ld(r_oop, klass_offset, R1_SP);
++ } else {
++ assert(receiver_offset != -1, "");
++ __ ld(r_oop, receiver_offset, R1_SP);
++ }
++ __ addi(r_box, R1_SP, lock_offset);
++
++ // Try fastpath for unlocking.
++ __ compiler_fast_unlock_object(r_flag, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
++ __ beq(r_flag, done);
++
++ // Save and restore any potential method result value around the unlocking operation.
++ save_native_result(masm, ret_type, workspace_slot_offset);
++
++ // Must save pending exception around the slow-path VM call. Since it's a
++ // leaf call, the pending exception (if any) can be kept in a register.
++ __ ld(r_exception, thread_(pending_exception));
++ assert(r_exception->is_nonvolatile(), "exception register must be non-volatile");
++ __ li(R0, 0);
++ __ std(R0, thread_(pending_exception));
++
++ // Slow case of monitor enter.
++ // Inline a special case of call_VM that disallows any pending_exception.
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), r_oop, r_box);
++
++ __ asm_assert_mem8_is_zero(thread_(pending_exception),
++ "no pending exception allowed on exit from SharedRuntime::complete_monitor_unlocking_C", 0);
++
++ restore_native_result(masm, ret_type, workspace_slot_offset);
++
++ // Check_forward_pending_exception jump to forward_exception if any pending
++ // exception is set. The forward_exception routine expects to see the
++ // exception in pending_exception and not in a register. Kind of clumsy,
++ // since all folks who branch to forward_exception must have tested
++ // pending_exception first and hence have it in a register already.
++ __ std(r_exception, thread_(pending_exception));
++
++ __ bind(done);
++ }
++
++# if 0
++ // DTrace method exit
++# endif
++
++ // Clear "last Java frame" SP and PC.
++ // --------------------------------------------------------------------------
++
++ __ reset_last_Java_frame();
++
++ // Unpack oop result.
++ // --------------------------------------------------------------------------
++
++ if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
++ Label skip_unboxing;
++ __ cmpdi(CCR0, R3_RET, 0);
++ __ beq(CCR0, skip_unboxing);
++ __ ld(R3_RET, 0, R3_RET);
++ __ bind(skip_unboxing);
++ __ verify_oop(R3_RET);
++ }
++
++
++ // Reset handle block.
++ // --------------------------------------------------------------------------
++ if (!is_critical_native) {
++ __ ld(r_temp_1, thread_(active_handles));
++ // TODO: PPC port assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size");
++ __ li(r_temp_2, 0);
++ __ stw(r_temp_2, JNIHandleBlock::top_offset_in_bytes(), r_temp_1);
++
++
++ // Check for pending exceptions.
++ // --------------------------------------------------------------------------
++ __ ld(r_temp_2, thread_(pending_exception));
++ __ cmpdi(CCR0, r_temp_2, 0);
++ __ bne(CCR0, handle_pending_exception);
++ }
++
++ // Return
++ // --------------------------------------------------------------------------
++
++ __ pop_frame();
++ __ restore_LR_CR(R11);
++ __ blr();
++
++
++ // Handler for pending exceptions (out-of-line).
++ // --------------------------------------------------------------------------
++
++ // Since this is a native call, we know the proper exception handler
++ // is the empty function. We just pop this frame and then jump to
++ // forward_exception_entry.
++ if (!is_critical_native) {
++ __ align(InteriorEntryAlignment);
++ __ bind(handle_pending_exception);
++
++ __ pop_frame();
++ __ restore_LR_CR(R11);
++ __ b64_patchable((address)StubRoutines::forward_exception_entry(),
++ relocInfo::runtime_call_type);
++ }
++
++ // Handler for a cache miss (out-of-line).
++ // --------------------------------------------------------------------------
++
++ if (!method_is_static) {
++ __ align(InteriorEntryAlignment);
++ __ bind(ic_miss);
++
++ __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
++ relocInfo::runtime_call_type);
++ }
++
++ // Done.
++ // --------------------------------------------------------------------------
++
++ __ flush();
++
++ nmethod *nm = nmethod::new_native_nmethod(method,
++ compile_id,
++ masm->code(),
++ vep_start_pc-start_pc,
++ frame_done_pc-start_pc,
++ stack_slots / VMRegImpl::slots_per_word,
++ (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
++ in_ByteSize(lock_offset),
++ oop_maps);
++
++ if (is_critical_native) {
++ nm->set_lazy_critical_native(true);
++ }
++
++ return nm;
++#else
++ ShouldNotReachHere();
++ return NULL;
++#endif // COMPILER2
++}
++
++// This function returns the adjust size (in number of words) to a c2i adapter
++// activation for use during deoptimization.
++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
++ return round_to((callee_locals - callee_parameters) * Interpreter::stackElementWords, frame::alignment_in_bytes);
++}
++
++uint SharedRuntime::out_preserve_stack_slots() {
++#ifdef COMPILER2
++ return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size;
++#else
++ return 0;
++#endif
++}
++
++#ifdef COMPILER2
++// Frame generation for deopt and uncommon trap blobs.
++static void push_skeleton_frame(MacroAssembler* masm, bool deopt,
++ /* Read */
++ Register unroll_block_reg,
++ /* Update */
++ Register frame_sizes_reg,
++ Register number_of_frames_reg,
++ Register pcs_reg,
++ /* Invalidate */
++ Register frame_size_reg,
++ Register pc_reg) {
++
++ __ ld(pc_reg, 0, pcs_reg);
++ __ ld(frame_size_reg, 0, frame_sizes_reg);
++ __ std(pc_reg, _abi(lr), R1_SP);
++ __ push_frame(frame_size_reg, R0/*tmp*/);
++#ifdef CC_INTERP
++ __ std(R1_SP, _parent_ijava_frame_abi(initial_caller_sp), R1_SP);
++#else
++#ifdef ASSERT
++ __ load_const_optimized(pc_reg, 0x5afe);
++ __ std(pc_reg, _ijava_state_neg(ijava_reserved), R1_SP);
++#endif
++ __ std(R1_SP, _ijava_state_neg(sender_sp), R1_SP);
++#endif // CC_INTERP
++ __ addi(number_of_frames_reg, number_of_frames_reg, -1);
++ __ addi(frame_sizes_reg, frame_sizes_reg, wordSize);
++ __ addi(pcs_reg, pcs_reg, wordSize);
++}
++
++// Loop through the UnrollBlock info and create new frames.
++static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
++ /* read */
++ Register unroll_block_reg,
++ /* invalidate */
++ Register frame_sizes_reg,
++ Register number_of_frames_reg,
++ Register pcs_reg,
++ Register frame_size_reg,
++ Register pc_reg) {
++ Label loop;
++
++ // _number_of_frames is of type int (deoptimization.hpp)
++ __ lwa(number_of_frames_reg,
++ Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(),
++ unroll_block_reg);
++ __ ld(pcs_reg,
++ Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(),
++ unroll_block_reg);
++ __ ld(frame_sizes_reg,
++ Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes(),
++ unroll_block_reg);
++
++ // stack: (caller_of_deoptee, ...).
++
++ // At this point we either have an interpreter frame or a compiled
++ // frame on top of stack. If it is a compiled frame we push a new c2i
++ // adapter here
++
++ // Memorize top-frame stack-pointer.
++ __ mr(frame_size_reg/*old_sp*/, R1_SP);
++
++ // Resize interpreter top frame OR C2I adapter.
++
++ // At this moment, the top frame (which is the caller of the deoptee) is
++ // an interpreter frame or a newly pushed C2I adapter or an entry frame.
++ // The top frame has a TOP_IJAVA_FRAME_ABI and the frame contains the
++ // outgoing arguments.
++ //
++ // In order to push the interpreter frame for the deoptee, we need to
++ // resize the top frame such that we are able to place the deoptee's
++ // locals in the frame.
++ // Additionally, we have to turn the top frame's TOP_IJAVA_FRAME_ABI
++ // into a valid PARENT_IJAVA_FRAME_ABI.
++
++ __ lwa(R11_scratch1,
++ Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes(),
++ unroll_block_reg);
++ __ neg(R11_scratch1, R11_scratch1);
++
++ // R11_scratch1 contains size of locals for frame resizing.
++ // R12_scratch2 contains top frame's lr.
++
++ // Resize frame by complete frame size prevents TOC from being
++ // overwritten by locals. A more stack space saving way would be
++ // to copy the TOC to its location in the new abi.
++ __ addi(R11_scratch1, R11_scratch1, - frame::parent_ijava_frame_abi_size);
++
++ // now, resize the frame
++ __ resize_frame(R11_scratch1, pc_reg/*tmp*/);
++
++ // In the case where we have resized a c2i frame above, the optional
++ // alignment below the locals has size 32 (why?).
++ __ std(R12_scratch2, _abi(lr), R1_SP);
++
++ // Initialize initial_caller_sp.
++#ifdef CC_INTERP
++ __ std(frame_size_reg/*old_sp*/, _parent_ijava_frame_abi(initial_caller_sp), R1_SP);
++#else
++#ifdef ASSERT
++ __ load_const_optimized(pc_reg, 0x5afe);
++ __ std(pc_reg, _ijava_state_neg(ijava_reserved), R1_SP);
++#endif
++ __ std(frame_size_reg, _ijava_state_neg(sender_sp), R1_SP);
++#endif // CC_INTERP
++
++#ifdef ASSERT
++ // Make sure that there is at least one entry in the array.
++ __ cmpdi(CCR0, number_of_frames_reg, 0);
++ __ asm_assert_ne("array_size must be > 0", 0x205);
++#endif
++
++ // Now push the new interpreter frames.
++ //
++ __ bind(loop);
++ // Allocate a new frame, fill in the pc.
++ push_skeleton_frame(masm, deopt,
++ unroll_block_reg,
++ frame_sizes_reg,
++ number_of_frames_reg,
++ pcs_reg,
++ frame_size_reg,
++ pc_reg);
++ __ cmpdi(CCR0, number_of_frames_reg, 0);
++ __ bne(CCR0, loop);
++
++ // Get the return address pointing into the frame manager.
++ __ ld(R0, 0, pcs_reg);
++ // Store it in the top interpreter frame.
++ __ std(R0, _abi(lr), R1_SP);
++ // Initialize frame_manager_lr of interpreter top frame.
++#ifdef CC_INTERP
++ __ std(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
++#endif
++}
++#endif
++
++void SharedRuntime::generate_deopt_blob() {
++ // Allocate space for the code
++ ResourceMark rm;
++ // Setup code generation tools
++ CodeBuffer buffer("deopt_blob", 2048, 1024);
++ InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
++ Label exec_mode_initialized;
++ int frame_size_in_words;
++ OopMap* map = NULL;
++ OopMapSet *oop_maps = new OopMapSet();
++
++ // size of ABI112 plus spill slots for R3_RET and F1_RET.
++ const int frame_size_in_bytes = frame::abi_reg_args_spill_size;
++ const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
++ int first_frame_size_in_bytes = 0; // frame size of "unpack frame" for call to fetch_unroll_info.
++
++ const Register exec_mode_reg = R21_tmp1;
++
++ const address start = __ pc();
++
++#ifdef COMPILER2
++ // --------------------------------------------------------------------------
++ // Prolog for non exception case!
++
++ // We have been called from the deopt handler of the deoptee.
++ //
++ // deoptee:
++ // ...
++ // call X
++ // ...
++ // deopt_handler: call_deopt_stub
++ // cur. return pc --> ...
++ //
++ // So currently SR_LR points behind the call in the deopt handler.
++ // We adjust it such that it points to the start of the deopt handler.
++ // The return_pc has been stored in the frame of the deoptee and
++ // will replace the address of the deopt_handler in the call
++ // to Deoptimization::fetch_unroll_info below.
++ // We can't grab a free register here, because all registers may
++ // contain live values, so let the RegisterSaver do the adjustment
++ // of the return pc.
++ const int return_pc_adjustment_no_exception = -HandlerImpl::size_deopt_handler();
++
++ // Push the "unpack frame"
++ // Save everything in sight.
++ map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
++ &first_frame_size_in_bytes,
++ /*generate_oop_map=*/ true,
++ return_pc_adjustment_no_exception,
++ RegisterSaver::return_pc_is_lr);
++ assert(map != NULL, "OopMap must have been created");
++
++ __ li(exec_mode_reg, Deoptimization::Unpack_deopt);
++ // Save exec mode for unpack_frames.
++ __ b(exec_mode_initialized);
++
++ // --------------------------------------------------------------------------
++ // Prolog for exception case
++
++ // An exception is pending.
++ // We have been called with a return (interpreter) or a jump (exception blob).
++ //
++ // - R3_ARG1: exception oop
++ // - R4_ARG2: exception pc
++
++ int exception_offset = __ pc() - start;
++
++ BLOCK_COMMENT("Prolog for exception case");
++
++ // The RegisterSaves doesn't need to adjust the return pc for this situation.
++ const int return_pc_adjustment_exception = 0;
++
++ // Push the "unpack frame".
++ // Save everything in sight.
++ assert(R4 == R4_ARG2, "exception pc must be in r4");
++ RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
++ &first_frame_size_in_bytes,
++ /*generate_oop_map=*/ false,
++ return_pc_adjustment_exception,
++ RegisterSaver::return_pc_is_r4);
++
++ // Deopt during an exception. Save exec mode for unpack_frames.
++ __ li(exec_mode_reg, Deoptimization::Unpack_exception);
++
++ // Store exception oop and pc in thread (location known to GC).
++ // This is needed since the call to "fetch_unroll_info()" may safepoint.
++ __ std(R3_ARG1, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
++ __ std(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
++
++ // fall through
++
++ // --------------------------------------------------------------------------
++ __ BIND(exec_mode_initialized);
++
++ {
++ const Register unroll_block_reg = R22_tmp2;
++
++ // We need to set `last_Java_frame' because `fetch_unroll_info' will
++ // call `last_Java_frame()'. The value of the pc in the frame is not
++ // particularly important. It just needs to identify this blob.
++ __ set_last_Java_frame(R1_SP, noreg);
++
++ // With EscapeAnalysis turned on, this call may safepoint!
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread);
++ address calls_return_pc = __ last_calls_return_pc();
++ // Set an oopmap for the call site that describes all our saved registers.
++ oop_maps->add_gc_map(calls_return_pc - start, map);
++
++ __ reset_last_Java_frame();
++ // Save the return value.
++ __ mr(unroll_block_reg, R3_RET);
++
++ // Restore only the result registers that have been saved
++ // by save_volatile_registers(...).
++ RegisterSaver::restore_result_registers(masm, first_frame_size_in_bytes);
++
++ // In excp_deopt_mode, restore and clear exception oop which we
++ // stored in the thread during exception entry above. The exception
++ // oop will be the return value of this stub.
++ Label skip_restore_excp;
++ __ cmpdi(CCR0, exec_mode_reg, Deoptimization::Unpack_exception);
++ __ bne(CCR0, skip_restore_excp);
++ __ ld(R3_RET, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
++ __ ld(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
++ __ li(R0, 0);
++ __ std(R0, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
++ __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
++ __ BIND(skip_restore_excp);
++
++ // reload narrro_oop_base
++ if (UseCompressedOops && Universe::narrow_oop_base() != 0) {
++ __ load_const_optimized(R30, Universe::narrow_oop_base());
++ }
++
++ __ pop_frame();
++
++ // stack: (deoptee, optional i2c, caller of deoptee, ...).
++
++ // pop the deoptee's frame
++ __ pop_frame();
++
++ // stack: (caller_of_deoptee, ...).
++
++ // Loop through the `UnrollBlock' info and create interpreter frames.
++ push_skeleton_frames(masm, true/*deopt*/,
++ unroll_block_reg,
++ R23_tmp3,
++ R24_tmp4,
++ R25_tmp5,
++ R26_tmp6,
++ R27_tmp7);
++
++ // stack: (skeletal interpreter frame, ..., optional skeletal
++ // interpreter frame, optional c2i, caller of deoptee, ...).
++ }
++
++ // push an `unpack_frame' taking care of float / int return values.
++ __ push_frame(frame_size_in_bytes, R0/*tmp*/);
++
++ // stack: (unpack frame, skeletal interpreter frame, ..., optional
++ // skeletal interpreter frame, optional c2i, caller of deoptee,
++ // ...).
++
++ // Spill live volatile registers since we'll do a call.
++ __ std( R3_RET, _abi_reg_args_spill(spill_ret), R1_SP);
++ __ stfd(F1_RET, _abi_reg_args_spill(spill_fret), R1_SP);
++
++ // Let the unpacker layout information in the skeletal frames just
++ // allocated.
++ __ get_PC_trash_LR(R3_RET);
++ __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R3_RET);
++ // This is a call to a LEAF method, so no oop map is required.
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
++ R16_thread/*thread*/, exec_mode_reg/*exec_mode*/);
++ __ reset_last_Java_frame();
++
++ // Restore the volatiles saved above.
++ __ ld( R3_RET, _abi_reg_args_spill(spill_ret), R1_SP);
++ __ lfd(F1_RET, _abi_reg_args_spill(spill_fret), R1_SP);
++
++ // Pop the unpack frame.
++ __ pop_frame();
++ __ restore_LR_CR(R0);
++
++ // stack: (top interpreter frame, ..., optional interpreter frame,
++ // optional c2i, caller of deoptee, ...).
++
++ // Initialize R14_state.
++#ifdef CC_INTERP
++ __ ld(R14_state, 0, R1_SP);
++ __ addi(R14_state, R14_state, -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
++ // Also inititialize R15_prev_state.
++ __ restore_prev_state();
++#else
++ __ restore_interpreter_state(R11_scratch1);
++ __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
++#endif // CC_INTERP
++
++
++ // Return to the interpreter entry point.
++ __ blr();
++ __ flush();
++#else // COMPILER2
++ __ unimplemented("deopt blob needed only with compiler");
++ int exception_offset = __ pc() - start;
++#endif // COMPILER2
++
++ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, 0, first_frame_size_in_bytes / wordSize);
++}
++
++#ifdef COMPILER2
++void SharedRuntime::generate_uncommon_trap_blob() {
++ // Allocate space for the code.
++ ResourceMark rm;
++ // Setup code generation tools.
++ CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
++ InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
++ address start = __ pc();
++
++ Register unroll_block_reg = R21_tmp1;
++ Register klass_index_reg = R22_tmp2;
++ Register unc_trap_reg = R23_tmp3;
++
++ OopMapSet* oop_maps = new OopMapSet();
++ int frame_size_in_bytes = frame::abi_reg_args_size;
++ OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
++
++ // stack: (deoptee, optional i2c, caller_of_deoptee, ...).
++
++ // Push a dummy `unpack_frame' and call
++ // `Deoptimization::uncommon_trap' to pack the compiled frame into a
++ // vframe array and return the `UnrollBlock' information.
++
++ // Save LR to compiled frame.
++ __ save_LR_CR(R11_scratch1);
++
++ // Push an "uncommon_trap" frame.
++ __ push_frame_reg_args(0, R11_scratch1);
++
++ // stack: (unpack frame, deoptee, optional i2c, caller_of_deoptee, ...).
++
++ // Set the `unpack_frame' as last_Java_frame.
++ // `Deoptimization::uncommon_trap' expects it and considers its
++ // sender frame as the deoptee frame.
++ // Remember the offset of the instruction whose address will be
++ // moved to R11_scratch1.
++ address gc_map_pc = __ get_PC_trash_LR(R11_scratch1);
++
++ __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1);
++
++ __ mr(klass_index_reg, R3);
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap),
++ R16_thread, klass_index_reg);
++
++ // Set an oopmap for the call site.
++ oop_maps->add_gc_map(gc_map_pc - start, map);
++
++ __ reset_last_Java_frame();
++
++ // Pop the `unpack frame'.
++ __ pop_frame();
++
++ // stack: (deoptee, optional i2c, caller_of_deoptee, ...).
++
++ // Save the return value.
++ __ mr(unroll_block_reg, R3_RET);
++
++ // Pop the uncommon_trap frame.
++ __ pop_frame();
++
++ // stack: (caller_of_deoptee, ...).
++
++ // Allocate new interpreter frame(s) and possibly a c2i adapter
++ // frame.
++ push_skeleton_frames(masm, false/*deopt*/,
++ unroll_block_reg,
++ R22_tmp2,
++ R23_tmp3,
++ R24_tmp4,
++ R25_tmp5,
++ R26_tmp6);
++
++ // stack: (skeletal interpreter frame, ..., optional skeletal
++ // interpreter frame, optional c2i, caller of deoptee, ...).
++
++ // Push a dummy `unpack_frame' taking care of float return values.
++ // Call `Deoptimization::unpack_frames' to layout information in the
++ // interpreter frames just created.
++
++ // Push a simple "unpack frame" here.
++ __ push_frame_reg_args(0, R11_scratch1);
++
++ // stack: (unpack frame, skeletal interpreter frame, ..., optional
++ // skeletal interpreter frame, optional c2i, caller of deoptee,
++ // ...).
++
++ // Set the "unpack_frame" as last_Java_frame.
++ __ get_PC_trash_LR(R11_scratch1);
++ __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1);
++
++ // Indicate it is the uncommon trap case.
++ __ li(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
++ // Let the unpacker layout information in the skeletal frames just
++ // allocated.
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
++ R16_thread, unc_trap_reg);
++
++ __ reset_last_Java_frame();
++ // Pop the `unpack frame'.
++ __ pop_frame();
++ // Restore LR from top interpreter frame.
++ __ restore_LR_CR(R11_scratch1);
++
++ // stack: (top interpreter frame, ..., optional interpreter frame,
++ // optional c2i, caller of deoptee, ...).
++
++#ifdef CC_INTERP
++ // Initialize R14_state, ...
++ __ ld(R11_scratch1, 0, R1_SP);
++ __ addi(R14_state, R11_scratch1, -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
++ // also initialize R15_prev_state.
++ __ restore_prev_state();
++#else
++ __ restore_interpreter_state(R11_scratch1);
++ __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
++#endif // CC_INTERP
++
++ // Return to the interpreter entry point.
++ __ blr();
++
++ masm->flush();
++
++ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, frame_size_in_bytes/wordSize);
++}
++#endif // COMPILER2
++
++// Generate a special Compile2Runtime blob that saves all registers, and setup oopmap.
++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
++ assert(StubRoutines::forward_exception_entry() != NULL,
++ "must be generated before");
++
++ ResourceMark rm;
++ OopMapSet *oop_maps = new OopMapSet();
++ OopMap* map;
++
++ // Allocate space for the code. Setup code generation tools.
++ CodeBuffer buffer("handler_blob", 2048, 1024);
++ MacroAssembler* masm = new MacroAssembler(&buffer);
++
++ address start = __ pc();
++ int frame_size_in_bytes = 0;
++
++ RegisterSaver::ReturnPCLocation return_pc_location;
++ bool cause_return = (poll_type == POLL_AT_RETURN);
++ if (cause_return) {
++ // Nothing to do here. The frame has already been popped in MachEpilogNode.
++ // Register LR already contains the return pc.
++ return_pc_location = RegisterSaver::return_pc_is_lr;
++ } else {
++ // Use thread()->saved_exception_pc() as return pc.
++ return_pc_location = RegisterSaver::return_pc_is_thread_saved_exception_pc;
++ }
++
++ // Save registers, fpu state, and flags.
++ map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
++ &frame_size_in_bytes,
++ /*generate_oop_map=*/ true,
++ /*return_pc_adjustment=*/0,
++ return_pc_location);
++
++ // The following is basically a call_VM. However, we need the precise
++ // address of the call in order to generate an oopmap. Hence, we do all the
++ // work outselves.
++ __ set_last_Java_frame(/*sp=*/R1_SP, /*pc=*/noreg);
++
++ // The return address must always be correct so that the frame constructor
++ // never sees an invalid pc.
++
++ // Do the call
++ __ call_VM_leaf(call_ptr, R16_thread);
++ address calls_return_pc = __ last_calls_return_pc();
++
++ // Set an oopmap for the call site. This oopmap will map all
++ // oop-registers and debug-info registers as callee-saved. This
++ // will allow deoptimization at this safepoint to find all possible
++ // debug-info recordings, as well as let GC find all oops.
++ oop_maps->add_gc_map(calls_return_pc - start, map);
++
++ Label noException;
++
++ // Clear the last Java frame.
++ __ reset_last_Java_frame();
++
++ BLOCK_COMMENT(" Check pending exception.");
++ const Register pending_exception = R0;
++ __ ld(pending_exception, thread_(pending_exception));
++ __ cmpdi(CCR0, pending_exception, 0);
++ __ beq(CCR0, noException);
++
++ // Exception pending
++ RegisterSaver::restore_live_registers_and_pop_frame(masm,
++ frame_size_in_bytes,
++ /*restore_ctr=*/true);
++
++ BLOCK_COMMENT(" Jump to forward_exception_entry.");
++ // Jump to forward_exception_entry, with the issuing PC in LR
++ // so it looks like the original nmethod called forward_exception_entry.
++ __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++
++ // No exception case.
++ __ BIND(noException);
++
++
++ // Normal exit, restore registers and exit.
++ RegisterSaver::restore_live_registers_and_pop_frame(masm,
++ frame_size_in_bytes,
++ /*restore_ctr=*/true);
++
++ __ blr();
++
++ // Make sure all code is generated
++ masm->flush();
++
++ // Fill-out other meta info
++ // CodeBlob frame size is in words.
++ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_bytes / wordSize);
++}
++
++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss)
++//
++// Generate a stub that calls into the vm to find out the proper destination
++// of a java call. All the argument registers are live at this point
++// but since this is generic code we don't know what they are and the caller
++// must do any gc of the args.
++//
++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
++
++ // allocate space for the code
++ ResourceMark rm;
++
++ CodeBuffer buffer(name, 1000, 512);
++ MacroAssembler* masm = new MacroAssembler(&buffer);
++
++ int frame_size_in_bytes;
++
++ OopMapSet *oop_maps = new OopMapSet();
++ OopMap* map = NULL;
++
++ address start = __ pc();
++
++ map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
++ &frame_size_in_bytes,
++ /*generate_oop_map*/ true,
++ /*return_pc_adjustment*/ 0,
++ RegisterSaver::return_pc_is_lr);
++
++ // Use noreg as last_Java_pc, the return pc will be reconstructed
++ // from the physical frame.
++ __ set_last_Java_frame(/*sp*/R1_SP, noreg);
++
++ int frame_complete = __ offset();
++
++ // Pass R19_method as 2nd (optional) argument, used by
++ // counter_overflow_stub.
++ __ call_VM_leaf(destination, R16_thread, R19_method);
++ address calls_return_pc = __ last_calls_return_pc();
++ // Set an oopmap for the call site.
++ // We need this not only for callee-saved registers, but also for volatile
++ // registers that the compiler might be keeping live across a safepoint.
++ // Create the oopmap for the call's return pc.
++ oop_maps->add_gc_map(calls_return_pc - start, map);
++
++ // R3_RET contains the address we are going to jump to assuming no exception got installed.
++
++ // clear last_Java_sp
++ __ reset_last_Java_frame();
++
++ // Check for pending exceptions.
++ BLOCK_COMMENT("Check for pending exceptions.");
++ Label pending;
++ __ ld(R11_scratch1, thread_(pending_exception));
++ __ cmpdi(CCR0, R11_scratch1, 0);
++ __ bne(CCR0, pending);
++
++ __ mtctr(R3_RET); // Ctr will not be touched by restore_live_registers_and_pop_frame.
++
++ RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ false);
++
++ // Get the returned method.
++ __ get_vm_result_2(R19_method);
++
++ __ bctr();
++
++
++ // Pending exception after the safepoint.
++ __ BIND(pending);
++
++ RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ true);
++
++ // exception pending => remove activation and forward to exception handler
++
++ __ li(R11_scratch1, 0);
++ __ ld(R3_ARG1, thread_(pending_exception));
++ __ std(R11_scratch1, in_bytes(JavaThread::vm_result_offset()), R16_thread);
++ __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++
++ // -------------
++ // Make sure all code is generated.
++ masm->flush();
++
++ // return the blob
++ // frame_size_words or bytes??
++ return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_bytes/wordSize,
++ oop_maps, true);
++}
+--- ./hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,2117 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "nativeInst_ppc.hpp"
++#include "oops/instanceOop.hpp"
++#include "oops/method.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/top.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++#include "runtime/thread.inline.hpp"
++
++#define __ _masm->
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) // nothing
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++class StubGenerator: public StubCodeGenerator {
++ private:
++
++ // Call stubs are used to call Java from C
++ //
++ // Arguments:
++ //
++ // R3 - call wrapper address : address
++ // R4 - result : intptr_t*
++ // R5 - result type : BasicType
++ // R6 - method : Method
++ // R7 - frame mgr entry point : address
++ // R8 - parameter block : intptr_t*
++ // R9 - parameter count in words : int
++ // R10 - thread : Thread*
++ //
++ address generate_call_stub(address& return_address) {
++ // Setup a new c frame, copy java arguments, call frame manager or
++ // native_entry, and process result.
++
++ StubCodeMark mark(this, "StubRoutines", "call_stub");
++
++ address start = __ function_entry();
++
++ // some sanity checks
++ assert((sizeof(frame::abi_minframe) % 16) == 0, "unaligned");
++ assert((sizeof(frame::abi_reg_args) % 16) == 0, "unaligned");
++ assert((sizeof(frame::spill_nonvolatiles) % 16) == 0, "unaligned");
++ assert((sizeof(frame::parent_ijava_frame_abi) % 16) == 0, "unaligned");
++ assert((sizeof(frame::entry_frame_locals) % 16) == 0, "unaligned");
++
++ Register r_arg_call_wrapper_addr = R3;
++ Register r_arg_result_addr = R4;
++ Register r_arg_result_type = R5;
++ Register r_arg_method = R6;
++ Register r_arg_entry = R7;
++ Register r_arg_thread = R10;
++
++ Register r_temp = R24;
++ Register r_top_of_arguments_addr = R25;
++ Register r_entryframe_fp = R26;
++
++ {
++ // Stack on entry to call_stub:
++ //
++ // F1 [C_FRAME]
++ // ...
++
++ Register r_arg_argument_addr = R8;
++ Register r_arg_argument_count = R9;
++ Register r_frame_alignment_in_bytes = R27;
++ Register r_argument_addr = R28;
++ Register r_argumentcopy_addr = R29;
++ Register r_argument_size_in_bytes = R30;
++ Register r_frame_size = R23;
++
++ Label arguments_copied;
++
++ // Save LR/CR to caller's C_FRAME.
++ __ save_LR_CR(R0);
++
++ // Zero extend arg_argument_count.
++ __ clrldi(r_arg_argument_count, r_arg_argument_count, 32);
++
++ // Save non-volatiles GPRs to ENTRY_FRAME (not yet pushed, but it's safe).
++ __ save_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14));
++
++ // Keep copy of our frame pointer (caller's SP).
++ __ mr(r_entryframe_fp, R1_SP);
++
++ BLOCK_COMMENT("Push ENTRY_FRAME including arguments");
++ // Push ENTRY_FRAME including arguments:
++ //
++ // F0 [TOP_IJAVA_FRAME_ABI]
++ // alignment (optional)
++ // [outgoing Java arguments]
++ // [ENTRY_FRAME_LOCALS]
++ // F1 [C_FRAME]
++ // ...
++
++ // calculate frame size
++
++ // unaligned size of arguments
++ __ sldi(r_argument_size_in_bytes,
++ r_arg_argument_count, Interpreter::logStackElementSize);
++ // arguments alignment (max 1 slot)
++ // FIXME: use round_to() here
++ __ andi_(r_frame_alignment_in_bytes, r_arg_argument_count, 1);
++ __ sldi(r_frame_alignment_in_bytes,
++ r_frame_alignment_in_bytes, Interpreter::logStackElementSize);
++
++ // size = unaligned size of arguments + top abi's size
++ __ addi(r_frame_size, r_argument_size_in_bytes,
++ frame::top_ijava_frame_abi_size);
++ // size += arguments alignment
++ __ add(r_frame_size,
++ r_frame_size, r_frame_alignment_in_bytes);
++ // size += size of call_stub locals
++ __ addi(r_frame_size,
++ r_frame_size, frame::entry_frame_locals_size);
++
++ // push ENTRY_FRAME
++ __ push_frame(r_frame_size, r_temp);
++
++ // initialize call_stub locals (step 1)
++ __ std(r_arg_call_wrapper_addr,
++ _entry_frame_locals_neg(call_wrapper_address), r_entryframe_fp);
++ __ std(r_arg_result_addr,
++ _entry_frame_locals_neg(result_address), r_entryframe_fp);
++ __ std(r_arg_result_type,
++ _entry_frame_locals_neg(result_type), r_entryframe_fp);
++ // we will save arguments_tos_address later
++
++
++ BLOCK_COMMENT("Copy Java arguments");
++ // copy Java arguments
++
++ // Calculate top_of_arguments_addr which will be R17_tos (not prepushed) later.
++ // FIXME: why not simply use SP+frame::top_ijava_frame_size?
++ __ addi(r_top_of_arguments_addr,
++ R1_SP, frame::top_ijava_frame_abi_size);
++ __ add(r_top_of_arguments_addr,
++ r_top_of_arguments_addr, r_frame_alignment_in_bytes);
++
++ // any arguments to copy?
++ __ cmpdi(CCR0, r_arg_argument_count, 0);
++ __ beq(CCR0, arguments_copied);
++
++ // prepare loop and copy arguments in reverse order
++ {
++ // init CTR with arg_argument_count
++ __ mtctr(r_arg_argument_count);
++
++ // let r_argumentcopy_addr point to last outgoing Java arguments P
++ __ mr(r_argumentcopy_addr, r_top_of_arguments_addr);
++
++ // let r_argument_addr point to last incoming java argument
++ __ add(r_argument_addr,
++ r_arg_argument_addr, r_argument_size_in_bytes);
++ __ addi(r_argument_addr, r_argument_addr, -BytesPerWord);
++
++ // now loop while CTR > 0 and copy arguments
++ {
++ Label next_argument;
++ __ bind(next_argument);
++
++ __ ld(r_temp, 0, r_argument_addr);
++ // argument_addr--;
++ __ addi(r_argument_addr, r_argument_addr, -BytesPerWord);
++ __ std(r_temp, 0, r_argumentcopy_addr);
++ // argumentcopy_addr++;
++ __ addi(r_argumentcopy_addr, r_argumentcopy_addr, BytesPerWord);
++
++ __ bdnz(next_argument);
++ }
++ }
++
++ // Arguments copied, continue.
++ __ bind(arguments_copied);
++ }
++
++ {
++ BLOCK_COMMENT("Call frame manager or native entry.");
++ // Call frame manager or native entry.
++ Register r_new_arg_entry = R14; // PPC_state;
++ assert_different_registers(r_new_arg_entry, r_top_of_arguments_addr,
++ r_arg_method, r_arg_thread);
++
++ __ mr(r_new_arg_entry, r_arg_entry);
++
++ // Register state on entry to frame manager / native entry:
++ //
++ // tos - intptr_t* sender tos (prepushed) Lesp = (SP) + copied_arguments_offset - 8
++ // R19_method - Method
++ // R16_thread - JavaThread*
++
++ // Tos must point to last argument - element_size.
++#ifdef CC_INTERP
++ const Register tos = R17_tos;
++#else
++ const Register tos = R15_esp;
++#endif
++ __ addi(tos, r_top_of_arguments_addr, -Interpreter::stackElementSize);
++
++ // initialize call_stub locals (step 2)
++ // now save tos as arguments_tos_address
++ __ std(tos, _entry_frame_locals_neg(arguments_tos_address), r_entryframe_fp);
++
++ // load argument registers for call
++ __ mr(R19_method, r_arg_method);
++ __ mr(R16_thread, r_arg_thread);
++ assert(tos != r_arg_method, "trashed r_arg_method");
++ assert(tos != r_arg_thread && R19_method != r_arg_thread, "trashed r_arg_thread");
++
++ // Set R15_prev_state to 0 for simplifying checks in callee.
++#ifdef CC_INTERP
++ __ li(R15_prev_state, 0);
++#else
++ __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
++#endif
++ // Stack on entry to frame manager / native entry:
++ //
++ // F0 [TOP_IJAVA_FRAME_ABI]
++ // alignment (optional)
++ // [outgoing Java arguments]
++ // [ENTRY_FRAME_LOCALS]
++ // F1 [C_FRAME]
++ // ...
++ //
++
++ // global toc register
++ __ load_const(R29, MacroAssembler::global_toc(), R11_scratch1);
++
++ // Load narrow oop base.
++ __ reinit_heapbase(R30, R11_scratch1);
++
++ // Remember the senderSP so we interpreter can pop c2i arguments off of the stack
++ // when called via a c2i.
++
++ // Pass initial_caller_sp to framemanager.
++ __ mr(R21_tmp1, R1_SP);
++
++ // Do a light-weight C-call here, r_new_arg_entry holds the address
++ // of the interpreter entry point (frame manager or native entry)
++ // and save runtime-value of LR in return_address.
++ assert(r_new_arg_entry != tos && r_new_arg_entry != R19_method && r_new_arg_entry != R16_thread,
++ "trashed r_new_arg_entry");
++ return_address = __ call_stub(r_new_arg_entry);
++ }
++
++ {
++ BLOCK_COMMENT("Returned from frame manager or native entry.");
++ // Returned from frame manager or native entry.
++ // Now pop frame, process result, and return to caller.
++
++ // Stack on exit from frame manager / native entry:
++ //
++ // F0 [ABI]
++ // ...
++ // [ENTRY_FRAME_LOCALS]
++ // F1 [C_FRAME]
++ // ...
++ //
++ // Just pop the topmost frame ...
++ //
++
++ Label ret_is_object;
++ Label ret_is_long;
++ Label ret_is_float;
++ Label ret_is_double;
++
++ Register r_entryframe_fp = R30;
++ Register r_lr = R7_ARG5;
++ Register r_cr = R8_ARG6;
++
++ // Reload some volatile registers which we've spilled before the call
++ // to frame manager / native entry.
++ // Access all locals via frame pointer, because we know nothing about
++ // the topmost frame's size.
++ __ ld(r_entryframe_fp, _abi(callers_sp), R1_SP);
++ assert_different_registers(r_entryframe_fp, R3_RET, r_arg_result_addr, r_arg_result_type, r_cr, r_lr);
++ __ ld(r_arg_result_addr,
++ _entry_frame_locals_neg(result_address), r_entryframe_fp);
++ __ ld(r_arg_result_type,
++ _entry_frame_locals_neg(result_type), r_entryframe_fp);
++ __ ld(r_cr, _abi(cr), r_entryframe_fp);
++ __ ld(r_lr, _abi(lr), r_entryframe_fp);
++
++ // pop frame and restore non-volatiles, LR and CR
++ __ mr(R1_SP, r_entryframe_fp);
++ __ mtcr(r_cr);
++ __ mtlr(r_lr);
++
++ // Store result depending on type. Everything that is not
++ // T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE is treated as T_INT.
++ __ cmpwi(CCR0, r_arg_result_type, T_OBJECT);
++ __ cmpwi(CCR1, r_arg_result_type, T_LONG);
++ __ cmpwi(CCR5, r_arg_result_type, T_FLOAT);
++ __ cmpwi(CCR6, r_arg_result_type, T_DOUBLE);
++
++ // restore non-volatile registers
++ __ restore_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14));
++
++
++ // Stack on exit from call_stub:
++ //
++ // 0 [C_FRAME]
++ // ...
++ //
++ // no call_stub frames left.
++
++ // All non-volatiles have been restored at this point!!
++ assert(R3_RET == R3, "R3_RET should be R3");
++
++ __ beq(CCR0, ret_is_object);
++ __ beq(CCR1, ret_is_long);
++ __ beq(CCR5, ret_is_float);
++ __ beq(CCR6, ret_is_double);
++
++ // default:
++ __ stw(R3_RET, 0, r_arg_result_addr);
++ __ blr(); // return to caller
++
++ // case T_OBJECT:
++ __ bind(ret_is_object);
++ __ std(R3_RET, 0, r_arg_result_addr);
++ __ blr(); // return to caller
++
++ // case T_LONG:
++ __ bind(ret_is_long);
++ __ std(R3_RET, 0, r_arg_result_addr);
++ __ blr(); // return to caller
++
++ // case T_FLOAT:
++ __ bind(ret_is_float);
++ __ stfs(F1_RET, 0, r_arg_result_addr);
++ __ blr(); // return to caller
++
++ // case T_DOUBLE:
++ __ bind(ret_is_double);
++ __ stfd(F1_RET, 0, r_arg_result_addr);
++ __ blr(); // return to caller
++ }
++
++ return start;
++ }
++
++ // Return point for a Java call if there's an exception thrown in
++ // Java code. The exception is caught and transformed into a
++ // pending exception stored in JavaThread that can be tested from
++ // within the VM.
++ //
++ address generate_catch_exception() {
++ StubCodeMark mark(this, "StubRoutines", "catch_exception");
++
++ address start = __ pc();
++
++ // Registers alive
++ //
++ // R16_thread
++ // R3_ARG1 - address of pending exception
++ // R4_ARG2 - return address in call stub
++
++ const Register exception_file = R21_tmp1;
++ const Register exception_line = R22_tmp2;
++
++ __ load_const(exception_file, (void*)__FILE__);
++ __ load_const(exception_line, (void*)__LINE__);
++
++ __ std(R3_ARG1, thread_(pending_exception));
++ // store into `char *'
++ __ std(exception_file, thread_(exception_file));
++ // store into `int'
++ __ stw(exception_line, thread_(exception_line));
++
++ // complete return to VM
++ assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before");
++
++ __ mtlr(R4_ARG2);
++ // continue in call stub
++ __ blr();
++
++ return start;
++ }
++
++ // Continuation point for runtime calls returning with a pending
++ // exception. The pending exception check happened in the runtime
++ // or native call stub. The pending exception in Thread is
++ // converted into a Java-level exception.
++ //
++ address generate_forward_exception() {
++ StubCodeMark mark(this, "StubRoutines", "forward_exception");
++ address start = __ pc();
++
++#if !defined(PRODUCT)
++ if (VerifyOops) {
++ // Get pending exception oop.
++ __ ld(R3_ARG1,
++ in_bytes(Thread::pending_exception_offset()),
++ R16_thread);
++ // Make sure that this code is only executed if there is a pending exception.
++ {
++ Label L;
++ __ cmpdi(CCR0, R3_ARG1, 0);
++ __ bne(CCR0, L);
++ __ stop("StubRoutines::forward exception: no pending exception (1)");
++ __ bind(L);
++ }
++ __ verify_oop(R3_ARG1, "StubRoutines::forward exception: not an oop");
++ }
++#endif
++
++ // Save LR/CR and copy exception pc (LR) into R4_ARG2.
++ __ save_LR_CR(R4_ARG2);
++ __ push_frame_reg_args(0, R0);
++ // Find exception handler.
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address,
++ SharedRuntime::exception_handler_for_return_address),
++ R16_thread,
++ R4_ARG2);
++ // Copy handler's address.
++ __ mtctr(R3_RET);
++ __ pop_frame();
++ __ restore_LR_CR(R0);
++
++ // Set up the arguments for the exception handler:
++ // - R3_ARG1: exception oop
++ // - R4_ARG2: exception pc.
++
++ // Load pending exception oop.
++ __ ld(R3_ARG1,
++ in_bytes(Thread::pending_exception_offset()),
++ R16_thread);
++
++ // The exception pc is the return address in the caller.
++ // Must load it into R4_ARG2.
++ __ mflr(R4_ARG2);
++
++#ifdef ASSERT
++ // Make sure exception is set.
++ {
++ Label L;
++ __ cmpdi(CCR0, R3_ARG1, 0);
++ __ bne(CCR0, L);
++ __ stop("StubRoutines::forward exception: no pending exception (2)");
++ __ bind(L);
++ }
++#endif
++
++ // Clear the pending exception.
++ __ li(R0, 0);
++ __ std(R0,
++ in_bytes(Thread::pending_exception_offset()),
++ R16_thread);
++ // Jump to exception handler.
++ __ bctr();
++
++ return start;
++ }
++
++#undef __
++#define __ masm->
++ // Continuation point for throwing of implicit exceptions that are
++ // not handled in the current activation. Fabricates an exception
++ // oop and initiates normal exception dispatching in this
++ // frame. Only callee-saved registers are preserved (through the
++ // normal register window / RegisterMap handling). If the compiler
++ // needs all registers to be preserved between the fault point and
++ // the exception handler then it must assume responsibility for that
++ // in AbstractCompiler::continuation_for_implicit_null_exception or
++ // continuation_for_implicit_division_by_zero_exception. All other
++ // implicit exceptions (e.g., NullPointerException or
++ // AbstractMethodError on entry) are either at call sites or
++ // otherwise assume that stack unwinding will be initiated, so
++ // caller saved registers were assumed volatile in the compiler.
++ //
++ // Note that we generate only this stub into a RuntimeStub, because
++ // it needs to be properly traversed and ignored during GC, so we
++ // change the meaning of the "__" macro within this method.
++ //
++ // Note: the routine set_pc_not_at_call_for_caller in
++ // SharedRuntime.cpp requires that this code be generated into a
++ // RuntimeStub.
++ address generate_throw_exception(const char* name, address runtime_entry, bool restore_saved_exception_pc,
++ Register arg1 = noreg, Register arg2 = noreg) {
++ CodeBuffer code(name, 1024 DEBUG_ONLY(+ 512), 0);
++ MacroAssembler* masm = new MacroAssembler(&code);
++
++ OopMapSet* oop_maps = new OopMapSet();
++ int frame_size_in_bytes = frame::abi_reg_args_size;
++ OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
++
++ StubCodeMark mark(this, "StubRoutines", "throw_exception");
++
++ address start = __ pc();
++
++ __ save_LR_CR(R11_scratch1);
++
++ // Push a frame.
++ __ push_frame_reg_args(0, R11_scratch1);
++
++ address frame_complete_pc = __ pc();
++
++ if (restore_saved_exception_pc) {
++ __ unimplemented("StubGenerator::throw_exception with restore_saved_exception_pc", 74);
++ }
++
++ // Note that we always have a runtime stub frame on the top of
++ // stack by this point. Remember the offset of the instruction
++ // whose address will be moved to R11_scratch1.
++ address gc_map_pc = __ get_PC_trash_LR(R11_scratch1);
++
++ __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1);
++
++ __ mr(R3_ARG1, R16_thread);
++ if (arg1 != noreg) {
++ __ mr(R4_ARG2, arg1);
++ }
++ if (arg2 != noreg) {
++ __ mr(R5_ARG3, arg2);
++ }
++#if defined(ABI_ELFv2)
++ __ call_c(runtime_entry, relocInfo::none);
++#else
++ __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, runtime_entry), relocInfo::none);
++#endif
++
++ // Set an oopmap for the call site.
++ oop_maps->add_gc_map((int)(gc_map_pc - start), map);
++
++ __ reset_last_Java_frame();
++
++#ifdef ASSERT
++ // Make sure that this code is only executed if there is a pending
++ // exception.
++ {
++ Label L;
++ __ ld(R0,
++ in_bytes(Thread::pending_exception_offset()),
++ R16_thread);
++ __ cmpdi(CCR0, R0, 0);
++ __ bne(CCR0, L);
++ __ stop("StubRoutines::throw_exception: no pending exception");
++ __ bind(L);
++ }
++#endif
++
++ // Pop frame.
++ __ pop_frame();
++
++ __ restore_LR_CR(R11_scratch1);
++
++ __ load_const(R11_scratch1, StubRoutines::forward_exception_entry());
++ __ mtctr(R11_scratch1);
++ __ bctr();
++
++ // Create runtime stub with OopMap.
++ RuntimeStub* stub =
++ RuntimeStub::new_runtime_stub(name, &code,
++ /*frame_complete=*/ (int)(frame_complete_pc - start),
++ frame_size_in_bytes/wordSize,
++ oop_maps,
++ false);
++ return stub->entry_point();
++ }
++#undef __
++#define __ _masm->
++
++ // Generate G1 pre-write barrier for array.
++ //
++ // Input:
++ // from - register containing src address (only needed for spilling)
++ // to - register containing starting address
++ // count - register containing element count
++ // tmp - scratch register
++ //
++ // Kills:
++ // nothing
++ //
++ void gen_write_ref_array_pre_barrier(Register from, Register to, Register count, bool dest_uninitialized, Register Rtmp1) {
++ BarrierSet* const bs = Universe::heap()->barrier_set();
++ switch (bs->kind()) {
++ case BarrierSet::G1SATBCT:
++ case BarrierSet::G1SATBCTLogging:
++ // With G1, don't generate the call if we statically know that the target in uninitialized
++ if (!dest_uninitialized) {
++ const int spill_slots = 4 * wordSize;
++ const int frame_size = frame::abi_reg_args_size + spill_slots;
++ Label filtered;
++
++ // Is marking active?
++ if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
++ __ lwz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread);
++ } else {
++ guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
++ __ lbz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread);
++ }
++ __ cmpdi(CCR0, Rtmp1, 0);
++ __ beq(CCR0, filtered);
++
++ __ save_LR_CR(R0);
++ __ push_frame_reg_args(spill_slots, R0);
++ __ std(from, frame_size - 1 * wordSize, R1_SP);
++ __ std(to, frame_size - 2 * wordSize, R1_SP);
++ __ std(count, frame_size - 3 * wordSize, R1_SP);
++
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), to, count);
++
++ __ ld(from, frame_size - 1 * wordSize, R1_SP);
++ __ ld(to, frame_size - 2 * wordSize, R1_SP);
++ __ ld(count, frame_size - 3 * wordSize, R1_SP);
++ __ pop_frame();
++ __ restore_LR_CR(R0);
++
++ __ bind(filtered);
++ }
++ break;
++ case BarrierSet::CardTableModRef:
++ case BarrierSet::CardTableExtension:
++ case BarrierSet::ModRef:
++ break;
++ default:
++ ShouldNotReachHere();
++ }
++ }
++
++ // Generate CMS/G1 post-write barrier for array.
++ //
++ // Input:
++ // addr - register containing starting address
++ // count - register containing element count
++ // tmp - scratch register
++ //
++ // The input registers and R0 are overwritten.
++ //
++ void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp, bool branchToEnd) {
++ BarrierSet* const bs = Universe::heap()->barrier_set();
++
++ switch (bs->kind()) {
++ case BarrierSet::G1SATBCT:
++ case BarrierSet::G1SATBCTLogging:
++ {
++ if (branchToEnd) {
++ __ save_LR_CR(R0);
++ // We need this frame only to spill LR.
++ __ push_frame_reg_args(0, R0);
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count);
++ __ pop_frame();
++ __ restore_LR_CR(R0);
++ } else {
++ // Tail call: fake call from stub caller by branching without linking.
++ address entry_point = (address)CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post);
++ __ mr_if_needed(R3_ARG1, addr);
++ __ mr_if_needed(R4_ARG2, count);
++ __ load_const(R11, entry_point, R0);
++ __ call_c_and_return_to_caller(R11);
++ }
++ }
++ break;
++ case BarrierSet::CardTableModRef:
++ case BarrierSet::CardTableExtension:
++ {
++ Label Lskip_loop, Lstore_loop;
++ if (UseConcMarkSweepGC) {
++ // TODO PPC port: contribute optimization / requires shared changes
++ __ release();
++ }
++
++ CardTableModRefBS* const ct = (CardTableModRefBS*)bs;
++ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
++ assert_different_registers(addr, count, tmp);
++
++ __ sldi(count, count, LogBytesPerHeapOop);
++ __ addi(count, count, -BytesPerHeapOop);
++ __ add(count, addr, count);
++ // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
++ __ srdi(addr, addr, CardTableModRefBS::card_shift);
++ __ srdi(count, count, CardTableModRefBS::card_shift);
++ __ subf(count, addr, count);
++ assert_different_registers(R0, addr, count, tmp);
++ __ load_const(tmp, (address)ct->byte_map_base);
++ __ addic_(count, count, 1);
++ __ beq(CCR0, Lskip_loop);
++ __ li(R0, 0);
++ __ mtctr(count);
++ // Byte store loop
++ __ bind(Lstore_loop);
++ __ stbx(R0, tmp, addr);
++ __ addi(addr, addr, 1);
++ __ bdnz(Lstore_loop);
++ __ bind(Lskip_loop);
++
++ if (!branchToEnd) __ blr();
++ }
++ break;
++ case BarrierSet::ModRef:
++ if (!branchToEnd) __ blr();
++ break;
++ default:
++ ShouldNotReachHere();
++ }
++ }
++
++ // Support for void zero_words_aligned8(HeapWord* to, size_t count)
++ //
++ // Arguments:
++ // to:
++ // count:
++ //
++ // Destroys:
++ //
++ address generate_zero_words_aligned8() {
++ StubCodeMark mark(this, "StubRoutines", "zero_words_aligned8");
++
++ // Implemented as in ClearArray.
++ address start = __ function_entry();
++
++ Register base_ptr_reg = R3_ARG1; // tohw (needs to be 8b aligned)
++ Register cnt_dwords_reg = R4_ARG2; // count (in dwords)
++ Register tmp1_reg = R5_ARG3;
++ Register tmp2_reg = R6_ARG4;
++ Register zero_reg = R7_ARG5;
++
++ // Procedure for large arrays (uses data cache block zero instruction).
++ Label dwloop, fast, fastloop, restloop, lastdword, done;
++ int cl_size=VM_Version::get_cache_line_size(), cl_dwords=cl_size>>3, cl_dwordaddr_bits=exact_log2(cl_dwords);
++ int min_dcbz=2; // Needs to be positive, apply dcbz only to at least min_dcbz cache lines.
++
++ // Clear up to 128byte boundary if long enough, dword_cnt=(16-(base>>3))%16.
++ __ dcbtst(base_ptr_reg); // Indicate write access to first cache line ...
++ __ andi(tmp2_reg, cnt_dwords_reg, 1); // to check if number of dwords is even.
++ __ srdi_(tmp1_reg, cnt_dwords_reg, 1); // number of double dwords
++ __ load_const_optimized(zero_reg, 0L); // Use as zero register.
++
++ __ cmpdi(CCR1, tmp2_reg, 0); // cnt_dwords even?
++ __ beq(CCR0, lastdword); // size <= 1
++ __ mtctr(tmp1_reg); // Speculatively preload counter for rest loop (>0).
++ __ cmpdi(CCR0, cnt_dwords_reg, (min_dcbz+1)*cl_dwords-1); // Big enough to ensure >=min_dcbz cache lines are included?
++ __ neg(tmp1_reg, base_ptr_reg); // bit 0..58: bogus, bit 57..60: (16-(base>>3))%16, bit 61..63: 000
++
++ __ blt(CCR0, restloop); // Too small. (<31=(2*cl_dwords)-1 is sufficient, but bigger performs better.)
++ __ rldicl_(tmp1_reg, tmp1_reg, 64-3, 64-cl_dwordaddr_bits); // Extract number of dwords to 128byte boundary=(16-(base>>3))%16.
++
++ __ beq(CCR0, fast); // already 128byte aligned
++ __ mtctr(tmp1_reg); // Set ctr to hit 128byte boundary (00 since size>=256-8)
++
++ // Clear in first cache line dword-by-dword if not already 128byte aligned.
++ __ bind(dwloop);
++ __ std(zero_reg, 0, base_ptr_reg); // Clear 8byte aligned block.
++ __ addi(base_ptr_reg, base_ptr_reg, 8);
++ __ bdnz(dwloop);
++
++ // clear 128byte blocks
++ __ bind(fast);
++ __ srdi(tmp1_reg, cnt_dwords_reg, cl_dwordaddr_bits); // loop count for 128byte loop (>0 since size>=256-8)
++ __ andi(tmp2_reg, cnt_dwords_reg, 1); // to check if rest even
++
++ __ mtctr(tmp1_reg); // load counter
++ __ cmpdi(CCR1, tmp2_reg, 0); // rest even?
++ __ rldicl_(tmp1_reg, cnt_dwords_reg, 63, 65-cl_dwordaddr_bits); // rest in double dwords
++
++ __ bind(fastloop);
++ __ dcbz(base_ptr_reg); // Clear 128byte aligned block.
++ __ addi(base_ptr_reg, base_ptr_reg, cl_size);
++ __ bdnz(fastloop);
++
++ //__ dcbtst(base_ptr_reg); // Indicate write access to last cache line.
++ __ beq(CCR0, lastdword); // rest<=1
++ __ mtctr(tmp1_reg); // load counter
++
++ // Clear rest.
++ __ bind(restloop);
++ __ std(zero_reg, 0, base_ptr_reg); // Clear 8byte aligned block.
++ __ std(zero_reg, 8, base_ptr_reg); // Clear 8byte aligned block.
++ __ addi(base_ptr_reg, base_ptr_reg, 16);
++ __ bdnz(restloop);
++
++ __ bind(lastdword);
++ __ beq(CCR1, done);
++ __ std(zero_reg, 0, base_ptr_reg);
++ __ bind(done);
++ __ blr(); // return
++
++ return start;
++ }
++
++ // The following routine generates a subroutine to throw an asynchronous
++ // UnknownError when an unsafe access gets a fault that could not be
++ // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.)
++ //
++ address generate_handler_for_unsafe_access() {
++ StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
++ address start = __ function_entry();
++ __ unimplemented("StubRoutines::handler_for_unsafe_access", 93);
++ return start;
++ }
++
++#if !defined(PRODUCT)
++ // Wrapper which calls oopDesc::is_oop_or_null()
++ // Only called by MacroAssembler::verify_oop
++ static void verify_oop_helper(const char* message, oop o) {
++ if (!o->is_oop_or_null()) {
++ fatal(message);
++ }
++ ++ StubRoutines::_verify_oop_count;
++ }
++#endif
++
++ // Return address of code to be called from code generated by
++ // MacroAssembler::verify_oop.
++ //
++ // Don't generate, rather use C++ code.
++ address generate_verify_oop() {
++ StubCodeMark mark(this, "StubRoutines", "verify_oop");
++
++ // this is actually a `FunctionDescriptor*'.
++ address start = 0;
++
++#if !defined(PRODUCT)
++ start = CAST_FROM_FN_PTR(address, verify_oop_helper);
++#endif
++
++ return start;
++ }
++
++ // Fairer handling of safepoints for native methods.
++ //
++ // Generate code which reads from the polling page. This special handling is needed as the
++ // linux-ppc64 kernel before 2.6.6 doesn't set si_addr on some segfaults in 64bit mode
++ // (cf. http://www.kernel.org/pub/linux/kernel/v2.6/ChangeLog-2.6.6), especially when we try
++ // to read from the safepoint polling page.
++ address generate_load_from_poll() {
++ StubCodeMark mark(this, "StubRoutines", "generate_load_from_poll");
++ address start = __ function_entry();
++ __ unimplemented("StubRoutines::verify_oop", 95); // TODO PPC port
++ return start;
++ }
++
++ // -XX:+OptimizeFill : convert fill/copy loops into intrinsic
++ //
++ // The code is implemented(ported from sparc) as we believe it benefits JVM98, however
++ // tracing(-XX:+TraceOptimizeFill) shows the intrinsic replacement doesn't happen at all!
++ //
++ // Source code in function is_range_check_if() shows that OptimizeFill relaxed the condition
++ // for turning on loop predication optimization, and hence the behavior of "array range check"
++ // and "loop invariant check" could be influenced, which potentially boosted JVM98.
++ //
++ // Generate stub for disjoint short fill. If "aligned" is true, the
++ // "to" address is assumed to be heapword aligned.
++ //
++ // Arguments for generated stub:
++ // to: R3_ARG1
++ // value: R4_ARG2
++ // count: R5_ARG3 treated as signed
++ //
++ address generate_fill(BasicType t, bool aligned, const char* name) {
++ StubCodeMark mark(this, "StubRoutines", name);
++ address start = __ function_entry();
++
++ const Register to = R3_ARG1; // source array address
++ const Register value = R4_ARG2; // fill value
++ const Register count = R5_ARG3; // elements count
++ const Register temp = R6_ARG4; // temp register
++
++ //assert_clean_int(count, O3); // Make sure 'count' is clean int.
++
++ Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
++ Label L_fill_2_bytes, L_fill_4_bytes, L_fill_elements, L_fill_32_bytes;
++
++ int shift = -1;
++ switch (t) {
++ case T_BYTE:
++ shift = 2;
++ // Clone bytes (zero extend not needed because store instructions below ignore high order bytes).
++ __ rldimi(value, value, 8, 48); // 8 bit -> 16 bit
++ __ cmpdi(CCR0, count, 2< 32 bit
++ break;
++ case T_SHORT:
++ shift = 1;
++ // Clone bytes (zero extend not needed because store instructions below ignore high order bytes).
++ __ rldimi(value, value, 16, 32); // 16 bit -> 32 bit
++ __ cmpdi(CCR0, count, 2<long as above.
++ __ rldimi(value, value, 32, 0); // 32 bit -> 64 bit
++
++ Label L_check_fill_8_bytes;
++ // Fill 32-byte chunks.
++ __ subf_(count, temp, count);
++ __ blt(CCR0, L_check_fill_8_bytes);
++
++ Label L_fill_32_bytes_loop;
++ __ align(32);
++ __ bind(L_fill_32_bytes_loop);
++
++ __ std(value, 0, to);
++ __ std(value, 8, to);
++ __ subf_(count, temp, count); // Update count.
++ __ std(value, 16, to);
++ __ std(value, 24, to);
++
++ __ addi(to, to, 32);
++ __ bge(CCR0, L_fill_32_bytes_loop);
++
++ __ bind(L_check_fill_8_bytes);
++ __ add_(count, temp, count);
++ __ beq(CCR0, L_exit);
++ __ addic_(count, count, -(2 << shift));
++ __ blt(CCR0, L_fill_4_bytes);
++
++ //
++ // Length is too short, just fill 8 bytes at a time.
++ //
++ Label L_fill_8_bytes_loop;
++ __ bind(L_fill_8_bytes_loop);
++ __ std(value, 0, to);
++ __ addic_(count, count, -(2 << shift));
++ __ addi(to, to, 8);
++ __ bge(CCR0, L_fill_8_bytes_loop);
++
++ // Fill trailing 4 bytes.
++ __ bind(L_fill_4_bytes);
++ __ andi_(temp, count, 1< to or from is aligned -> copy 8
++
++ // copy a 2-element word if necessary to align to 8 bytes
++ __ andi_(R0, R3_ARG1, 7);
++ __ beq(CCR0, l_7);
++
++ __ lwzx(tmp2, R3_ARG1, tmp3);
++ __ addi(R5_ARG3, R5_ARG3, -4);
++ __ stwx(tmp2, R4_ARG2, tmp3);
++ { // FasterArrayCopy
++ __ addi(R3_ARG1, R3_ARG1, 4);
++ __ addi(R4_ARG2, R4_ARG2, 4);
++ }
++ __ bind(l_7);
++
++ { // FasterArrayCopy
++ __ cmpwi(CCR0, R5_ARG3, 31);
++ __ ble(CCR0, l_6); // copy 2 at a time if less than 32 elements remain
++
++ __ srdi(tmp1, R5_ARG3, 5);
++ __ andi_(R5_ARG3, R5_ARG3, 31);
++ __ mtctr(tmp1);
++
++ __ bind(l_8);
++ // Use unrolled version for mass copying (copy 32 elements a time)
++ // Load feeding store gets zero latency on Power6, however not on Power5.
++ // Therefore, the following sequence is made for the good of both.
++ __ ld(tmp1, 0, R3_ARG1);
++ __ ld(tmp2, 8, R3_ARG1);
++ __ ld(tmp3, 16, R3_ARG1);
++ __ ld(tmp4, 24, R3_ARG1);
++ __ std(tmp1, 0, R4_ARG2);
++ __ std(tmp2, 8, R4_ARG2);
++ __ std(tmp3, 16, R4_ARG2);
++ __ std(tmp4, 24, R4_ARG2);
++ __ addi(R3_ARG1, R3_ARG1, 32);
++ __ addi(R4_ARG2, R4_ARG2, 32);
++ __ bdnz(l_8);
++ }
++
++ __ bind(l_6);
++
++ // copy 4 elements at a time
++ __ cmpwi(CCR0, R5_ARG3, 4);
++ __ blt(CCR0, l_1);
++ __ srdi(tmp1, R5_ARG3, 2);
++ __ mtctr(tmp1); // is > 0
++ __ andi_(R5_ARG3, R5_ARG3, 3);
++
++ { // FasterArrayCopy
++ __ addi(R3_ARG1, R3_ARG1, -4);
++ __ addi(R4_ARG2, R4_ARG2, -4);
++ __ bind(l_3);
++ __ lwzu(tmp2, 4, R3_ARG1);
++ __ stwu(tmp2, 4, R4_ARG2);
++ __ bdnz(l_3);
++ __ addi(R3_ARG1, R3_ARG1, 4);
++ __ addi(R4_ARG2, R4_ARG2, 4);
++ }
++
++ // do single element copy
++ __ bind(l_1);
++ __ cmpwi(CCR0, R5_ARG3, 0);
++ __ beq(CCR0, l_4);
++
++ { // FasterArrayCopy
++ __ mtctr(R5_ARG3);
++ __ addi(R3_ARG1, R3_ARG1, -1);
++ __ addi(R4_ARG2, R4_ARG2, -1);
++
++ __ bind(l_5);
++ __ lbzu(tmp2, 1, R3_ARG1);
++ __ stbu(tmp2, 1, R4_ARG2);
++ __ bdnz(l_5);
++ }
++
++ __ bind(l_4);
++ __ blr();
++
++ return start;
++ }
++
++ // Generate stub for conjoint byte copy. If "aligned" is true, the
++ // "from" and "to" addresses are assumed to be heapword aligned.
++ //
++ // Arguments for generated stub:
++ // from: R3_ARG1
++ // to: R4_ARG2
++ // count: R5_ARG3 treated as signed
++ //
++ address generate_conjoint_byte_copy(bool aligned, const char * name) {
++ StubCodeMark mark(this, "StubRoutines", name);
++ address start = __ function_entry();
++
++ Register tmp1 = R6_ARG4;
++ Register tmp2 = R7_ARG5;
++ Register tmp3 = R8_ARG6;
++
++#if defined(ABI_ELFv2)
++ address nooverlap_target = aligned ?
++ StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
++ StubRoutines::jbyte_disjoint_arraycopy();
++#else
++ address nooverlap_target = aligned ?
++ ((FunctionDescriptor*)StubRoutines::arrayof_jbyte_disjoint_arraycopy())->entry() :
++ ((FunctionDescriptor*)StubRoutines::jbyte_disjoint_arraycopy())->entry();
++#endif
++
++ array_overlap_test(nooverlap_target, 0);
++ // Do reverse copy. We assume the case of actual overlap is rare enough
++ // that we don't have to optimize it.
++ Label l_1, l_2;
++
++ __ b(l_2);
++ __ bind(l_1);
++ __ stbx(tmp1, R4_ARG2, R5_ARG3);
++ __ bind(l_2);
++ __ addic_(R5_ARG3, R5_ARG3, -1);
++ __ lbzx(tmp1, R3_ARG1, R5_ARG3);
++ __ bge(CCR0, l_1);
++
++ __ blr();
++
++ return start;
++ }
++
++ // Generate stub for disjoint short copy. If "aligned" is true, the
++ // "from" and "to" addresses are assumed to be heapword aligned.
++ //
++ // Arguments for generated stub:
++ // from: R3_ARG1
++ // to: R4_ARG2
++ // elm.count: R5_ARG3 treated as signed
++ //
++ // Strategy for aligned==true:
++ //
++ // If length <= 9:
++ // 1. copy 2 elements at a time (l_6)
++ // 2. copy last element if original element count was odd (l_1)
++ //
++ // If length > 9:
++ // 1. copy 4 elements at a time until less than 4 elements are left (l_7)
++ // 2. copy 2 elements at a time until less than 2 elements are left (l_6)
++ // 3. copy last element if one was left in step 2. (l_1)
++ //
++ //
++ // Strategy for aligned==false:
++ //
++ // If length <= 9: same as aligned==true case, but NOTE: load/stores
++ // can be unaligned (see comment below)
++ //
++ // If length > 9:
++ // 1. continue with step 6. if the alignment of from and to mod 4
++ // is different.
++ // 2. align from and to to 4 bytes by copying 1 element if necessary
++ // 3. at l_2 from and to are 4 byte aligned; continue with
++ // 5. if they cannot be aligned to 8 bytes because they have
++ // got different alignment mod 8.
++ // 4. at this point we know that both, from and to, have the same
++ // alignment mod 8, now copy one element if necessary to get
++ // 8 byte alignment of from and to.
++ // 5. copy 4 elements at a time until less than 4 elements are
++ // left; depending on step 3. all load/stores are aligned or
++ // either all loads or all stores are unaligned.
++ // 6. copy 2 elements at a time until less than 2 elements are
++ // left (l_6); arriving here from step 1., there is a chance
++ // that all accesses are unaligned.
++ // 7. copy last element if one was left in step 6. (l_1)
++ //
++ // There are unaligned data accesses using integer load/store
++ // instructions in this stub. POWER allows such accesses.
++ //
++ // According to the manuals (PowerISA_V2.06_PUBLIC, Book II,
++ // Chapter 2: Effect of Operand Placement on Performance) unaligned
++ // integer load/stores have good performance. Only unaligned
++ // floating point load/stores can have poor performance.
++ //
++ // TODO:
++ //
++ // 1. check if aligning the backbranch target of loops is beneficial
++ //
++ address generate_disjoint_short_copy(bool aligned, const char * name) {
++ StubCodeMark mark(this, "StubRoutines", name);
++
++ Register tmp1 = R6_ARG4;
++ Register tmp2 = R7_ARG5;
++ Register tmp3 = R8_ARG6;
++ Register tmp4 = R9_ARG7;
++
++ address start = __ function_entry();
++
++ Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8;
++ // don't try anything fancy if arrays don't have many elements
++ __ li(tmp3, 0);
++ __ cmpwi(CCR0, R5_ARG3, 9);
++ __ ble(CCR0, l_6); // copy 2 at a time
++
++ if (!aligned) {
++ __ xorr(tmp1, R3_ARG1, R4_ARG2);
++ __ andi_(tmp1, tmp1, 3);
++ __ bne(CCR0, l_6); // if arrays don't have the same alignment mod 4, do 2 element copy
++
++ // At this point it is guaranteed that both, from and to have the same alignment mod 4.
++
++ // Copy 1 element if necessary to align to 4 bytes.
++ __ andi_(tmp1, R3_ARG1, 3);
++ __ beq(CCR0, l_2);
++
++ __ lhz(tmp2, 0, R3_ARG1);
++ __ addi(R3_ARG1, R3_ARG1, 2);
++ __ sth(tmp2, 0, R4_ARG2);
++ __ addi(R4_ARG2, R4_ARG2, 2);
++ __ addi(R5_ARG3, R5_ARG3, -1);
++ __ bind(l_2);
++
++ // At this point the positions of both, from and to, are at least 4 byte aligned.
++
++ // Copy 4 elements at a time.
++ // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
++ __ xorr(tmp2, R3_ARG1, R4_ARG2);
++ __ andi_(tmp1, tmp2, 7);
++ __ bne(CCR0, l_7); // not same alignment mod 8 -> copy 4, either from or to will be unaligned
++
++ // Copy a 2-element word if necessary to align to 8 bytes.
++ __ andi_(R0, R3_ARG1, 7);
++ __ beq(CCR0, l_7);
++
++ __ lwzx(tmp2, R3_ARG1, tmp3);
++ __ addi(R5_ARG3, R5_ARG3, -2);
++ __ stwx(tmp2, R4_ARG2, tmp3);
++ { // FasterArrayCopy
++ __ addi(R3_ARG1, R3_ARG1, 4);
++ __ addi(R4_ARG2, R4_ARG2, 4);
++ }
++ }
++
++ __ bind(l_7);
++
++ // Copy 4 elements at a time; either the loads or the stores can
++ // be unaligned if aligned == false.
++
++ { // FasterArrayCopy
++ __ cmpwi(CCR0, R5_ARG3, 15);
++ __ ble(CCR0, l_6); // copy 2 at a time if less than 16 elements remain
++
++ __ srdi(tmp1, R5_ARG3, 4);
++ __ andi_(R5_ARG3, R5_ARG3, 15);
++ __ mtctr(tmp1);
++
++ __ bind(l_8);
++ // Use unrolled version for mass copying (copy 16 elements a time).
++ // Load feeding store gets zero latency on Power6, however not on Power5.
++ // Therefore, the following sequence is made for the good of both.
++ __ ld(tmp1, 0, R3_ARG1);
++ __ ld(tmp2, 8, R3_ARG1);
++ __ ld(tmp3, 16, R3_ARG1);
++ __ ld(tmp4, 24, R3_ARG1);
++ __ std(tmp1, 0, R4_ARG2);
++ __ std(tmp2, 8, R4_ARG2);
++ __ std(tmp3, 16, R4_ARG2);
++ __ std(tmp4, 24, R4_ARG2);
++ __ addi(R3_ARG1, R3_ARG1, 32);
++ __ addi(R4_ARG2, R4_ARG2, 32);
++ __ bdnz(l_8);
++ }
++ __ bind(l_6);
++
++ // copy 2 elements at a time
++ { // FasterArrayCopy
++ __ cmpwi(CCR0, R5_ARG3, 2);
++ __ blt(CCR0, l_1);
++ __ srdi(tmp1, R5_ARG3, 1);
++ __ andi_(R5_ARG3, R5_ARG3, 1);
++
++ __ addi(R3_ARG1, R3_ARG1, -4);
++ __ addi(R4_ARG2, R4_ARG2, -4);
++ __ mtctr(tmp1);
++
++ __ bind(l_3);
++ __ lwzu(tmp2, 4, R3_ARG1);
++ __ stwu(tmp2, 4, R4_ARG2);
++ __ bdnz(l_3);
++
++ __ addi(R3_ARG1, R3_ARG1, 4);
++ __ addi(R4_ARG2, R4_ARG2, 4);
++ }
++
++ // do single element copy
++ __ bind(l_1);
++ __ cmpwi(CCR0, R5_ARG3, 0);
++ __ beq(CCR0, l_4);
++
++ { // FasterArrayCopy
++ __ mtctr(R5_ARG3);
++ __ addi(R3_ARG1, R3_ARG1, -2);
++ __ addi(R4_ARG2, R4_ARG2, -2);
++
++ __ bind(l_5);
++ __ lhzu(tmp2, 2, R3_ARG1);
++ __ sthu(tmp2, 2, R4_ARG2);
++ __ bdnz(l_5);
++ }
++ __ bind(l_4);
++ __ blr();
++
++ return start;
++ }
++
++ // Generate stub for conjoint short copy. If "aligned" is true, the
++ // "from" and "to" addresses are assumed to be heapword aligned.
++ //
++ // Arguments for generated stub:
++ // from: R3_ARG1
++ // to: R4_ARG2
++ // count: R5_ARG3 treated as signed
++ //
++ address generate_conjoint_short_copy(bool aligned, const char * name) {
++ StubCodeMark mark(this, "StubRoutines", name);
++ address start = __ function_entry();
++
++ Register tmp1 = R6_ARG4;
++ Register tmp2 = R7_ARG5;
++ Register tmp3 = R8_ARG6;
++
++#if defined(ABI_ELFv2)
++ address nooverlap_target = aligned ?
++ StubRoutines::arrayof_jshort_disjoint_arraycopy() :
++ StubRoutines::jshort_disjoint_arraycopy();
++#else
++ address nooverlap_target = aligned ?
++ ((FunctionDescriptor*)StubRoutines::arrayof_jshort_disjoint_arraycopy())->entry() :
++ ((FunctionDescriptor*)StubRoutines::jshort_disjoint_arraycopy())->entry();
++#endif
++
++ array_overlap_test(nooverlap_target, 1);
++
++ Label l_1, l_2;
++ __ sldi(tmp1, R5_ARG3, 1);
++ __ b(l_2);
++ __ bind(l_1);
++ __ sthx(tmp2, R4_ARG2, tmp1);
++ __ bind(l_2);
++ __ addic_(tmp1, tmp1, -2);
++ __ lhzx(tmp2, R3_ARG1, tmp1);
++ __ bge(CCR0, l_1);
++
++ __ blr();
++
++ return start;
++ }
++
++ // Generate core code for disjoint int copy (and oop copy on 32-bit). If "aligned"
++ // is true, the "from" and "to" addresses are assumed to be heapword aligned.
++ //
++ // Arguments:
++ // from: R3_ARG1
++ // to: R4_ARG2
++ // count: R5_ARG3 treated as signed
++ //
++ void generate_disjoint_int_copy_core(bool aligned) {
++ Register tmp1 = R6_ARG4;
++ Register tmp2 = R7_ARG5;
++ Register tmp3 = R8_ARG6;
++ Register tmp4 = R0;
++
++ Label l_1, l_2, l_3, l_4, l_5, l_6;
++ // for short arrays, just do single element copy
++ __ li(tmp3, 0);
++ __ cmpwi(CCR0, R5_ARG3, 5);
++ __ ble(CCR0, l_2);
++
++ if (!aligned) {
++ // check if arrays have same alignment mod 8.
++ __ xorr(tmp1, R3_ARG1, R4_ARG2);
++ __ andi_(R0, tmp1, 7);
++ // Not the same alignment, but ld and std just need to be 4 byte aligned.
++ __ bne(CCR0, l_4); // to OR from is 8 byte aligned -> copy 2 at a time
++
++ // copy 1 element to align to and from on an 8 byte boundary
++ __ andi_(R0, R3_ARG1, 7);
++ __ beq(CCR0, l_4);
++
++ __ lwzx(tmp2, R3_ARG1, tmp3);
++ __ addi(R5_ARG3, R5_ARG3, -1);
++ __ stwx(tmp2, R4_ARG2, tmp3);
++ { // FasterArrayCopy
++ __ addi(R3_ARG1, R3_ARG1, 4);
++ __ addi(R4_ARG2, R4_ARG2, 4);
++ }
++ __ bind(l_4);
++ }
++
++ { // FasterArrayCopy
++ __ cmpwi(CCR0, R5_ARG3, 7);
++ __ ble(CCR0, l_2); // copy 1 at a time if less than 8 elements remain
++
++ __ srdi(tmp1, R5_ARG3, 3);
++ __ andi_(R5_ARG3, R5_ARG3, 7);
++ __ mtctr(tmp1);
++
++ __ bind(l_6);
++ // Use unrolled version for mass copying (copy 8 elements a time).
++ // Load feeding store gets zero latency on power6, however not on power 5.
++ // Therefore, the following sequence is made for the good of both.
++ __ ld(tmp1, 0, R3_ARG1);
++ __ ld(tmp2, 8, R3_ARG1);
++ __ ld(tmp3, 16, R3_ARG1);
++ __ ld(tmp4, 24, R3_ARG1);
++ __ std(tmp1, 0, R4_ARG2);
++ __ std(tmp2, 8, R4_ARG2);
++ __ std(tmp3, 16, R4_ARG2);
++ __ std(tmp4, 24, R4_ARG2);
++ __ addi(R3_ARG1, R3_ARG1, 32);
++ __ addi(R4_ARG2, R4_ARG2, 32);
++ __ bdnz(l_6);
++ }
++
++ // copy 1 element at a time
++ __ bind(l_2);
++ __ cmpwi(CCR0, R5_ARG3, 0);
++ __ beq(CCR0, l_1);
++
++ { // FasterArrayCopy
++ __ mtctr(R5_ARG3);
++ __ addi(R3_ARG1, R3_ARG1, -4);
++ __ addi(R4_ARG2, R4_ARG2, -4);
++
++ __ bind(l_3);
++ __ lwzu(tmp2, 4, R3_ARG1);
++ __ stwu(tmp2, 4, R4_ARG2);
++ __ bdnz(l_3);
++ }
++
++ __ bind(l_1);
++ return;
++ }
++
++ // Generate stub for disjoint int copy. If "aligned" is true, the
++ // "from" and "to" addresses are assumed to be heapword aligned.
++ //
++ // Arguments for generated stub:
++ // from: R3_ARG1
++ // to: R4_ARG2
++ // count: R5_ARG3 treated as signed
++ //
++ address generate_disjoint_int_copy(bool aligned, const char * name) {
++ StubCodeMark mark(this, "StubRoutines", name);
++ address start = __ function_entry();
++ generate_disjoint_int_copy_core(aligned);
++ __ blr();
++ return start;
++ }
++
++ // Generate core code for conjoint int copy (and oop copy on
++ // 32-bit). If "aligned" is true, the "from" and "to" addresses
++ // are assumed to be heapword aligned.
++ //
++ // Arguments:
++ // from: R3_ARG1
++ // to: R4_ARG2
++ // count: R5_ARG3 treated as signed
++ //
++ void generate_conjoint_int_copy_core(bool aligned) {
++ // Do reverse copy. We assume the case of actual overlap is rare enough
++ // that we don't have to optimize it.
++
++ Label l_1, l_2, l_3, l_4, l_5, l_6;
++
++ Register tmp1 = R6_ARG4;
++ Register tmp2 = R7_ARG5;
++ Register tmp3 = R8_ARG6;
++ Register tmp4 = R0;
++
++ { // FasterArrayCopy
++ __ cmpwi(CCR0, R5_ARG3, 0);
++ __ beq(CCR0, l_6);
++
++ __ sldi(R5_ARG3, R5_ARG3, 2);
++ __ add(R3_ARG1, R3_ARG1, R5_ARG3);
++ __ add(R4_ARG2, R4_ARG2, R5_ARG3);
++ __ srdi(R5_ARG3, R5_ARG3, 2);
++
++ __ cmpwi(CCR0, R5_ARG3, 7);
++ __ ble(CCR0, l_5); // copy 1 at a time if less than 8 elements remain
++
++ __ srdi(tmp1, R5_ARG3, 3);
++ __ andi(R5_ARG3, R5_ARG3, 7);
++ __ mtctr(tmp1);
++
++ __ bind(l_4);
++ // Use unrolled version for mass copying (copy 4 elements a time).
++ // Load feeding store gets zero latency on Power6, however not on Power5.
++ // Therefore, the following sequence is made for the good of both.
++ __ addi(R3_ARG1, R3_ARG1, -32);
++ __ addi(R4_ARG2, R4_ARG2, -32);
++ __ ld(tmp4, 24, R3_ARG1);
++ __ ld(tmp3, 16, R3_ARG1);
++ __ ld(tmp2, 8, R3_ARG1);
++ __ ld(tmp1, 0, R3_ARG1);
++ __ std(tmp4, 24, R4_ARG2);
++ __ std(tmp3, 16, R4_ARG2);
++ __ std(tmp2, 8, R4_ARG2);
++ __ std(tmp1, 0, R4_ARG2);
++ __ bdnz(l_4);
++
++ __ cmpwi(CCR0, R5_ARG3, 0);
++ __ beq(CCR0, l_6);
++
++ __ bind(l_5);
++ __ mtctr(R5_ARG3);
++ __ bind(l_3);
++ __ lwz(R0, -4, R3_ARG1);
++ __ stw(R0, -4, R4_ARG2);
++ __ addi(R3_ARG1, R3_ARG1, -4);
++ __ addi(R4_ARG2, R4_ARG2, -4);
++ __ bdnz(l_3);
++
++ __ bind(l_6);
++ }
++ }
++
++ // Generate stub for conjoint int copy. If "aligned" is true, the
++ // "from" and "to" addresses are assumed to be heapword aligned.
++ //
++ // Arguments for generated stub:
++ // from: R3_ARG1
++ // to: R4_ARG2
++ // count: R5_ARG3 treated as signed
++ //
++ address generate_conjoint_int_copy(bool aligned, const char * name) {
++ StubCodeMark mark(this, "StubRoutines", name);
++ address start = __ function_entry();
++
++#if defined(ABI_ELFv2)
++ address nooverlap_target = aligned ?
++ StubRoutines::arrayof_jint_disjoint_arraycopy() :
++ StubRoutines::jint_disjoint_arraycopy();
++#else
++ address nooverlap_target = aligned ?
++ ((FunctionDescriptor*)StubRoutines::arrayof_jint_disjoint_arraycopy())->entry() :
++ ((FunctionDescriptor*)StubRoutines::jint_disjoint_arraycopy())->entry();
++#endif
++
++ array_overlap_test(nooverlap_target, 2);
++
++ generate_conjoint_int_copy_core(aligned);
++
++ __ blr();
++
++ return start;
++ }
++
++ // Generate core code for disjoint long copy (and oop copy on
++ // 64-bit). If "aligned" is true, the "from" and "to" addresses
++ // are assumed to be heapword aligned.
++ //
++ // Arguments:
++ // from: R3_ARG1
++ // to: R4_ARG2
++ // count: R5_ARG3 treated as signed
++ //
++ void generate_disjoint_long_copy_core(bool aligned) {
++ Register tmp1 = R6_ARG4;
++ Register tmp2 = R7_ARG5;
++ Register tmp3 = R8_ARG6;
++ Register tmp4 = R0;
++
++ Label l_1, l_2, l_3, l_4;
++
++ { // FasterArrayCopy
++ __ cmpwi(CCR0, R5_ARG3, 3);
++ __ ble(CCR0, l_3); // copy 1 at a time if less than 4 elements remain
++
++ __ srdi(tmp1, R5_ARG3, 2);
++ __ andi_(R5_ARG3, R5_ARG3, 3);
++ __ mtctr(tmp1);
++
++ __ bind(l_4);
++ // Use unrolled version for mass copying (copy 4 elements a time).
++ // Load feeding store gets zero latency on Power6, however not on Power5.
++ // Therefore, the following sequence is made for the good of both.
++ __ ld(tmp1, 0, R3_ARG1);
++ __ ld(tmp2, 8, R3_ARG1);
++ __ ld(tmp3, 16, R3_ARG1);
++ __ ld(tmp4, 24, R3_ARG1);
++ __ std(tmp1, 0, R4_ARG2);
++ __ std(tmp2, 8, R4_ARG2);
++ __ std(tmp3, 16, R4_ARG2);
++ __ std(tmp4, 24, R4_ARG2);
++ __ addi(R3_ARG1, R3_ARG1, 32);
++ __ addi(R4_ARG2, R4_ARG2, 32);
++ __ bdnz(l_4);
++ }
++
++ // copy 1 element at a time
++ __ bind(l_3);
++ __ cmpwi(CCR0, R5_ARG3, 0);
++ __ beq(CCR0, l_1);
++
++ { // FasterArrayCopy
++ __ mtctr(R5_ARG3);
++ __ addi(R3_ARG1, R3_ARG1, -8);
++ __ addi(R4_ARG2, R4_ARG2, -8);
++
++ __ bind(l_2);
++ __ ldu(R0, 8, R3_ARG1);
++ __ stdu(R0, 8, R4_ARG2);
++ __ bdnz(l_2);
++
++ }
++ __ bind(l_1);
++ }
++
++ // Generate stub for disjoint long copy. If "aligned" is true, the
++ // "from" and "to" addresses are assumed to be heapword aligned.
++ //
++ // Arguments for generated stub:
++ // from: R3_ARG1
++ // to: R4_ARG2
++ // count: R5_ARG3 treated as signed
++ //
++ address generate_disjoint_long_copy(bool aligned, const char * name) {
++ StubCodeMark mark(this, "StubRoutines", name);
++ address start = __ function_entry();
++ generate_disjoint_long_copy_core(aligned);
++ __ blr();
++
++ return start;
++ }
++
++ // Generate core code for conjoint long copy (and oop copy on
++ // 64-bit). If "aligned" is true, the "from" and "to" addresses
++ // are assumed to be heapword aligned.
++ //
++ // Arguments:
++ // from: R3_ARG1
++ // to: R4_ARG2
++ // count: R5_ARG3 treated as signed
++ //
++ void generate_conjoint_long_copy_core(bool aligned) {
++ Register tmp1 = R6_ARG4;
++ Register tmp2 = R7_ARG5;
++ Register tmp3 = R8_ARG6;
++ Register tmp4 = R0;
++
++ Label l_1, l_2, l_3, l_4, l_5;
++
++ __ cmpwi(CCR0, R5_ARG3, 0);
++ __ beq(CCR0, l_1);
++
++ { // FasterArrayCopy
++ __ sldi(R5_ARG3, R5_ARG3, 3);
++ __ add(R3_ARG1, R3_ARG1, R5_ARG3);
++ __ add(R4_ARG2, R4_ARG2, R5_ARG3);
++ __ srdi(R5_ARG3, R5_ARG3, 3);
++
++ __ cmpwi(CCR0, R5_ARG3, 3);
++ __ ble(CCR0, l_5); // copy 1 at a time if less than 4 elements remain
++
++ __ srdi(tmp1, R5_ARG3, 2);
++ __ andi(R5_ARG3, R5_ARG3, 3);
++ __ mtctr(tmp1);
++
++ __ bind(l_4);
++ // Use unrolled version for mass copying (copy 4 elements a time).
++ // Load feeding store gets zero latency on Power6, however not on Power5.
++ // Therefore, the following sequence is made for the good of both.
++ __ addi(R3_ARG1, R3_ARG1, -32);
++ __ addi(R4_ARG2, R4_ARG2, -32);
++ __ ld(tmp4, 24, R3_ARG1);
++ __ ld(tmp3, 16, R3_ARG1);
++ __ ld(tmp2, 8, R3_ARG1);
++ __ ld(tmp1, 0, R3_ARG1);
++ __ std(tmp4, 24, R4_ARG2);
++ __ std(tmp3, 16, R4_ARG2);
++ __ std(tmp2, 8, R4_ARG2);
++ __ std(tmp1, 0, R4_ARG2);
++ __ bdnz(l_4);
++
++ __ cmpwi(CCR0, R5_ARG3, 0);
++ __ beq(CCR0, l_1);
++
++ __ bind(l_5);
++ __ mtctr(R5_ARG3);
++ __ bind(l_3);
++ __ ld(R0, -8, R3_ARG1);
++ __ std(R0, -8, R4_ARG2);
++ __ addi(R3_ARG1, R3_ARG1, -8);
++ __ addi(R4_ARG2, R4_ARG2, -8);
++ __ bdnz(l_3);
++
++ }
++ __ bind(l_1);
++ }
++
++ // Generate stub for conjoint long copy. If "aligned" is true, the
++ // "from" and "to" addresses are assumed to be heapword aligned.
++ //
++ // Arguments for generated stub:
++ // from: R3_ARG1
++ // to: R4_ARG2
++ // count: R5_ARG3 treated as signed
++ //
++ address generate_conjoint_long_copy(bool aligned, const char * name) {
++ StubCodeMark mark(this, "StubRoutines", name);
++ address start = __ function_entry();
++
++#if defined(ABI_ELFv2)
++ address nooverlap_target = aligned ?
++ StubRoutines::arrayof_jlong_disjoint_arraycopy() :
++ StubRoutines::jlong_disjoint_arraycopy();
++#else
++ address nooverlap_target = aligned ?
++ ((FunctionDescriptor*)StubRoutines::arrayof_jlong_disjoint_arraycopy())->entry() :
++ ((FunctionDescriptor*)StubRoutines::jlong_disjoint_arraycopy())->entry();
++#endif
++
++ array_overlap_test(nooverlap_target, 3);
++ generate_conjoint_long_copy_core(aligned);
++
++ __ blr();
++
++ return start;
++ }
++
++ // Generate stub for conjoint oop copy. If "aligned" is true, the
++ // "from" and "to" addresses are assumed to be heapword aligned.
++ //
++ // Arguments for generated stub:
++ // from: R3_ARG1
++ // to: R4_ARG2
++ // count: R5_ARG3 treated as signed
++ // dest_uninitialized: G1 support
++ //
++ address generate_conjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) {
++ StubCodeMark mark(this, "StubRoutines", name);
++
++ address start = __ function_entry();
++
++#if defined(ABI_ELFv2)
++ address nooverlap_target = aligned ?
++ StubRoutines::arrayof_oop_disjoint_arraycopy() :
++ StubRoutines::oop_disjoint_arraycopy();
++#else
++ address nooverlap_target = aligned ?
++ ((FunctionDescriptor*)StubRoutines::arrayof_oop_disjoint_arraycopy())->entry() :
++ ((FunctionDescriptor*)StubRoutines::oop_disjoint_arraycopy())->entry();
++#endif
++
++ gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7);
++
++ // Save arguments.
++ __ mr(R9_ARG7, R4_ARG2);
++ __ mr(R10_ARG8, R5_ARG3);
++
++ if (UseCompressedOops) {
++ array_overlap_test(nooverlap_target, 2);
++ generate_conjoint_int_copy_core(aligned);
++ } else {
++ array_overlap_test(nooverlap_target, 3);
++ generate_conjoint_long_copy_core(aligned);
++ }
++
++ gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
++ return start;
++ }
++
++ // Generate stub for disjoint oop copy. If "aligned" is true, the
++ // "from" and "to" addresses are assumed to be heapword aligned.
++ //
++ // Arguments for generated stub:
++ // from: R3_ARG1
++ // to: R4_ARG2
++ // count: R5_ARG3 treated as signed
++ // dest_uninitialized: G1 support
++ //
++ address generate_disjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) {
++ StubCodeMark mark(this, "StubRoutines", name);
++ address start = __ function_entry();
++
++ gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7);
++
++ // save some arguments, disjoint_long_copy_core destroys them.
++ // needed for post barrier
++ __ mr(R9_ARG7, R4_ARG2);
++ __ mr(R10_ARG8, R5_ARG3);
++
++ if (UseCompressedOops) {
++ generate_disjoint_int_copy_core(aligned);
++ } else {
++ generate_disjoint_long_copy_core(aligned);
++ }
++
++ gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
++
++ return start;
++ }
++
++ void generate_arraycopy_stubs() {
++ // Note: the disjoint stubs must be generated first, some of
++ // the conjoint stubs use them.
++
++ // non-aligned disjoint versions
++ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
++ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
++ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy");
++ StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy", false);
++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy_uninit", true);
++
++ // aligned disjoint versions
++ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy");
++ StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
++ StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, "arrayof_jint_disjoint_arraycopy");
++ StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy");
++ StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, "arrayof_oop_disjoint_arraycopy", false);
++ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, "oop_disjoint_arraycopy_uninit", true);
++
++ // non-aligned conjoint versions
++ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
++ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
++ StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, "jint_arraycopy");
++ StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy");
++ StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, "oop_arraycopy", false);
++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy(false, "oop_arraycopy_uninit", true);
++
++ // aligned conjoint versions
++ StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy");
++ StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy");
++ StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, "arrayof_jint_arraycopy");
++ StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, "arrayof_jlong_arraycopy");
++ StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy(true, "arrayof_oop_arraycopy", false);
++ StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(true, "arrayof_oop_arraycopy", true);
++
++ // fill routines
++ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
++ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
++ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
++ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
++ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
++ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
++ }
++
++ // Safefetch stubs.
++ void generate_safefetch(const char* name, int size, address* entry, address* fault_pc, address* continuation_pc) {
++ // safefetch signatures:
++ // int SafeFetch32(int* adr, int errValue);
++ // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
++ //
++ // arguments:
++ // R3_ARG1 = adr
++ // R4_ARG2 = errValue
++ //
++ // result:
++ // R3_RET = *adr or errValue
++
++ StubCodeMark mark(this, "StubRoutines", name);
++
++ // Entry point, pc or function descriptor.
++ *entry = __ function_entry();
++
++ // Load *adr into R4_ARG2, may fault.
++ *fault_pc = __ pc();
++ switch (size) {
++ case 4:
++ // int32_t, signed extended
++ __ lwa(R4_ARG2, 0, R3_ARG1);
++ break;
++ case 8:
++ // int64_t
++ __ ld(R4_ARG2, 0, R3_ARG1);
++ break;
++ default:
++ ShouldNotReachHere();
++ }
++
++ // return errValue or *adr
++ *continuation_pc = __ pc();
++ __ mr(R3_RET, R4_ARG2);
++ __ blr();
++ }
++
++ // Initialization
++ void generate_initial() {
++ // Generates all stubs and initializes the entry points
++
++ // Entry points that exist in all platforms.
++ // Note: This is code that could be shared among different platforms - however the
++ // benefit seems to be smaller than the disadvantage of having a
++ // much more complicated generator structure. See also comment in
++ // stubRoutines.hpp.
++
++ StubRoutines::_forward_exception_entry = generate_forward_exception();
++ StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
++ StubRoutines::_catch_exception_entry = generate_catch_exception();
++
++ // Build this early so it's available for the interpreter.
++ StubRoutines::_throw_StackOverflowError_entry =
++ generate_throw_exception("StackOverflowError throw_exception",
++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
++ }
++
++ void generate_all() {
++ // Generates all stubs and initializes the entry points
++
++ // These entry points require SharedInfo::stack0 to be set up in
++ // non-core builds
++ StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false);
++ // Handle IncompatibleClassChangeError in itable stubs.
++ StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false);
++ StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
++
++ StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access();
++
++ // support for verify_oop (must happen after universe_init)
++ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
++
++ // arraycopy stubs used by compilers
++ generate_arraycopy_stubs();
++
++ if (UseAESIntrinsics) {
++ guarantee(!UseAESIntrinsics, "not yet implemented.");
++ }
++
++ // Safefetch stubs.
++ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
++ &StubRoutines::_safefetch32_fault_pc,
++ &StubRoutines::_safefetch32_continuation_pc);
++ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
++ &StubRoutines::_safefetchN_fault_pc,
++ &StubRoutines::_safefetchN_continuation_pc);
++ }
++
++ public:
++ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
++ // replace the standard masm with a special one:
++ _masm = new MacroAssembler(code);
++ if (all) {
++ generate_all();
++ } else {
++ generate_initial();
++ }
++ }
++};
++
++void StubGenerator_generate(CodeBuffer* code, bool all) {
++ StubGenerator g(code, all);
++}
+--- ./hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,29 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// Implementation of the platform-specific part of StubRoutines - for
++// a description of how to extend it, see the stubRoutines.hpp file.
++
++
+--- ./hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,40 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_STUBROUTINES_PPC_64_HPP
++#define CPU_PPC_VM_STUBROUTINES_PPC_64_HPP
++
++// This file holds the platform specific parts of the StubRoutines
++// definition. See stubRoutines.hpp for a description on how to
++// extend it.
++
++static bool returns_to_call_stub(address return_pc) { return return_pc == _call_stub_return_address; }
++
++enum platform_dependent_constants {
++ code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
++ code_size2 = 20000 // simply increase if too small (assembler will crash if too small)
++};
++
++#endif // CPU_PPC_VM_STUBROUTINES_PPC_64_HPP
+--- ./hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2013, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_TEMPLATEINTERPRETERGENERATOR_PPC_HPP
++#define CPU_PPC_VM_TEMPLATEINTERPRETERGENERATOR_PPC_HPP
++
++ protected:
++ address generate_normal_entry(bool synchronized);
++ address generate_native_entry(bool synchronized);
++ address generate_math_entry(AbstractInterpreter::MethodKind kind);
++ address generate_empty_entry(void);
++
++ void lock_method(Register Rflags, Register Rscratch1, Register Rscratch2, bool flags_preloaded=false);
++ void unlock_method(bool check_exceptions = true);
++
++ void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
++ void generate_counter_overflow(Label& continue_entry);
++
++ void generate_fixed_frame(bool native_call, Register Rsize_of_parameters, Register Rsize_of_locals);
++ void generate_stack_overflow_check(Register Rframe_size, Register Rscratch1);
++
++#endif // CPU_PPC_VM_TEMPLATEINTERPRETERGENERATOR_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,1828 @@
++/*
++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2013, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#ifndef CC_INTERP
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterGenerator.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateTable.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++#include "utilities/macros.hpp"
++
++#undef __
++#define __ _masm->
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++//-----------------------------------------------------------------------------
++
++// Actually we should never reach here since we do stack overflow checks before pushing any frame.
++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
++ address entry = __ pc();
++ __ unimplemented("generate_StackOverflowError_handler");
++ return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(const char* name) {
++ address entry = __ pc();
++ __ empty_expression_stack();
++ __ load_const_optimized(R4_ARG2, (address) name);
++ // Index is in R17_tos.
++ __ mr(R5_ARG3, R17_tos);
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ArrayIndexOutOfBoundsException));
++ return entry;
++}
++
++#if 0
++// Call special ClassCastException constructor taking object to cast
++// and target class as arguments.
++address TemplateInterpreterGenerator::generate_ClassCastException_verbose_handler() {
++ address entry = __ pc();
++
++ // Expression stack must be empty before entering the VM if an
++ // exception happened.
++ __ empty_expression_stack();
++
++ // Thread will be loaded to R3_ARG1.
++ // Target class oop is in register R5_ARG3 by convention!
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException_verbose, R17_tos, R5_ARG3));
++ // Above call must not return here since exception pending.
++ DEBUG_ONLY(__ should_not_reach_here();)
++ return entry;
++}
++#endif
++
++address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
++ address entry = __ pc();
++ // Expression stack must be empty before entering the VM if an
++ // exception happened.
++ __ empty_expression_stack();
++
++ // Load exception object.
++ // Thread will be loaded to R3_ARG1.
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), R17_tos);
++#ifdef ASSERT
++ // Above call must not return here since exception pending.
++ __ should_not_reach_here();
++#endif
++ return entry;
++}
++
++address TemplateInterpreterGenerator::generate_exception_handler_common(const char* name, const char* message, bool pass_oop) {
++ address entry = __ pc();
++ //__ untested("generate_exception_handler_common");
++ Register Rexception = R17_tos;
++
++ // Expression stack must be empty before entering the VM if an exception happened.
++ __ empty_expression_stack();
++
++ __ load_const_optimized(R4_ARG2, (address) name, R11_scratch1);
++ if (pass_oop) {
++ __ mr(R5_ARG3, Rexception);
++ __ call_VM(Rexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), false);
++ } else {
++ __ load_const_optimized(R5_ARG3, (address) message, R11_scratch1);
++ __ call_VM(Rexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), false);
++ }
++
++ // Throw exception.
++ __ mr(R3_ARG1, Rexception);
++ __ load_const_optimized(R11_scratch1, Interpreter::throw_exception_entry(), R12_scratch2);
++ __ mtctr(R11_scratch1);
++ __ bctr();
++
++ return entry;
++}
++
++address TemplateInterpreterGenerator::generate_continuation_for(TosState state) {
++ address entry = __ pc();
++ __ unimplemented("generate_continuation_for");
++ return entry;
++}
++
++// This entry is returned to when a call returns to the interpreter.
++// When we arrive here, we expect that the callee stack frame is already popped.
++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
++ address entry = __ pc();
++
++ // Move the value out of the return register back to the TOS cache of current frame.
++ switch (state) {
++ case ltos:
++ case btos:
++ case ctos:
++ case stos:
++ case atos:
++ case itos: __ mr(R17_tos, R3_RET); break; // RET -> TOS cache
++ case ftos:
++ case dtos: __ fmr(F15_ftos, F1_RET); break; // TOS cache -> GR_FRET
++ case vtos: break; // Nothing to do, this was a void return.
++ default : ShouldNotReachHere();
++ }
++
++ __ restore_interpreter_state(R11_scratch1); // Sets R11_scratch1 = fp.
++ __ ld(R12_scratch2, _ijava_state_neg(top_frame_sp), R11_scratch1);
++ __ resize_frame_absolute(R12_scratch2, R11_scratch1, R0);
++
++ // Compiled code destroys templateTableBase, reload.
++ __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R12_scratch2);
++
++ const Register cache = R11_scratch1;
++ const Register size = R12_scratch2;
++ __ get_cache_and_index_at_bcp(cache, 1, index_size);
++
++ // Big Endian (get least significant byte of 64 bit value):
++ __ lbz(size, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()) + 7, cache);
++ __ sldi(size, size, Interpreter::logStackElementSize);
++ __ add(R15_esp, R15_esp, size);
++ __ dispatch_next(state, step);
++ return entry;
++}
++
++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) {
++ address entry = __ pc();
++ // If state != vtos, we're returning from a native method, which put it's result
++ // into the result register. So move the value out of the return register back
++ // to the TOS cache of current frame.
++
++ switch (state) {
++ case ltos:
++ case btos:
++ case ctos:
++ case stos:
++ case atos:
++ case itos: __ mr(R17_tos, R3_RET); break; // GR_RET -> TOS cache
++ case ftos:
++ case dtos: __ fmr(F15_ftos, F1_RET); break; // TOS cache -> GR_FRET
++ case vtos: break; // Nothing to do, this was a void return.
++ default : ShouldNotReachHere();
++ }
++
++ // Load LcpoolCache @@@ should be already set!
++ __ get_constant_pool_cache(R27_constPoolCache);
++
++ // Handle a pending exception, fall through if none.
++ __ check_and_forward_exception(R11_scratch1, R12_scratch2);
++
++ // Start executing bytecodes.
++ __ dispatch_next(state, step);
++
++ return entry;
++}
++
++// A result handler converts the native result into java format.
++// Use the shared code between c++ and template interpreter.
++address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) {
++ return AbstractInterpreterGenerator::generate_result_handler_for(type);
++}
++
++address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, address runtime_entry) {
++ address entry = __ pc();
++
++ __ push(state);
++ __ call_VM(noreg, runtime_entry);
++ __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
++
++ return entry;
++}
++
++// Helpers for commoning out cases in the various type of method entries.
++
++// Increment invocation count & check for overflow.
++//
++// Note: checking for negative value instead of overflow
++// so we have a 'sticky' overflow test.
++//
++void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) {
++ // Note: In tiered we increment either counters in method or in MDO depending if we're profiling or not.
++ Register Rscratch1 = R11_scratch1;
++ Register Rscratch2 = R12_scratch2;
++ Register R3_counters = R3_ARG1;
++ Label done;
++
++ if (TieredCompilation) {
++ const int increment = InvocationCounter::count_increment;
++ const int mask = ((1 << Tier0InvokeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
++ Label no_mdo;
++ if (ProfileInterpreter) {
++ const Register Rmdo = Rscratch1;
++ // If no method data exists, go to profile_continue.
++ __ ld(Rmdo, in_bytes(Method::method_data_offset()), R19_method);
++ __ cmpdi(CCR0, Rmdo, 0);
++ __ beq(CCR0, no_mdo);
++
++ // Increment backedge counter in the MDO.
++ const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
++ __ lwz(Rscratch2, mdo_bc_offs, Rmdo);
++ __ addi(Rscratch2, Rscratch2, increment);
++ __ stw(Rscratch2, mdo_bc_offs, Rmdo);
++ __ load_const_optimized(Rscratch1, mask, R0);
++ __ and_(Rscratch1, Rscratch2, Rscratch1);
++ __ bne(CCR0, done);
++ __ b(*overflow);
++ }
++
++ // Increment counter in MethodCounters*.
++ const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
++ __ bind(no_mdo);
++ __ get_method_counters(R19_method, R3_counters, done);
++ __ lwz(Rscratch2, mo_bc_offs, R3_counters);
++ __ addi(Rscratch2, Rscratch2, increment);
++ __ stw(Rscratch2, mo_bc_offs, R3_counters);
++ __ load_const_optimized(Rscratch1, mask, R0);
++ __ and_(Rscratch1, Rscratch2, Rscratch1);
++ __ beq(CCR0, *overflow);
++
++ __ bind(done);
++
++ } else {
++
++ // Update standard invocation counters.
++ Register Rsum_ivc_bec = R4_ARG2;
++ __ get_method_counters(R19_method, R3_counters, done);
++ __ increment_invocation_counter(R3_counters, Rsum_ivc_bec, R12_scratch2);
++ // Increment interpreter invocation counter.
++ if (ProfileInterpreter) { // %%% Merge this into methodDataOop.
++ __ lwz(R12_scratch2, in_bytes(MethodCounters::interpreter_invocation_counter_offset()), R3_counters);
++ __ addi(R12_scratch2, R12_scratch2, 1);
++ __ stw(R12_scratch2, in_bytes(MethodCounters::interpreter_invocation_counter_offset()), R3_counters);
++ }
++ // Check if we must create a method data obj.
++ if (ProfileInterpreter && profile_method != NULL) {
++ const Register profile_limit = Rscratch1;
++ int pl_offs = __ load_const_optimized(profile_limit, &InvocationCounter::InterpreterProfileLimit, R0, true);
++ __ lwz(profile_limit, pl_offs, profile_limit);
++ // Test to see if we should create a method data oop.
++ __ cmpw(CCR0, Rsum_ivc_bec, profile_limit);
++ __ blt(CCR0, *profile_method_continue);
++ // If no method data exists, go to profile_method.
++ __ test_method_data_pointer(*profile_method);
++ }
++ // Finally check for counter overflow.
++ if (overflow) {
++ const Register invocation_limit = Rscratch1;
++ int il_offs = __ load_const_optimized(invocation_limit, &InvocationCounter::InterpreterInvocationLimit, R0, true);
++ __ lwz(invocation_limit, il_offs, invocation_limit);
++ assert(4 == sizeof(InvocationCounter::InterpreterInvocationLimit), "unexpected field size");
++ __ cmpw(CCR0, Rsum_ivc_bec, invocation_limit);
++ __ bge(CCR0, *overflow);
++ }
++
++ __ bind(done);
++ }
++}
++
++// Generate code to initiate compilation on invocation counter overflow.
++void TemplateInterpreterGenerator::generate_counter_overflow(Label& continue_entry) {
++ // Generate code to initiate compilation on the counter overflow.
++
++ // InterpreterRuntime::frequency_counter_overflow takes one arguments,
++ // which indicates if the counter overflow occurs at a backwards branch (NULL bcp)
++ // We pass zero in.
++ // The call returns the address of the verified entry point for the method or NULL
++ // if the compilation did not complete (either went background or bailed out).
++ //
++ // Unlike the C++ interpreter above: Check exceptions!
++ // Assumption: Caller must set the flag "do_not_unlock_if_sychronized" if the monitor of a sync'ed
++ // method has not yet been created. Thus, no unlocking of a non-existing monitor can occur.
++
++ __ li(R4_ARG2, 0);
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), R4_ARG2, true);
++
++ // Returns verified_entry_point or NULL.
++ // We ignore it in any case.
++ __ b(continue_entry);
++}
++
++void TemplateInterpreterGenerator::generate_stack_overflow_check(Register Rmem_frame_size, Register Rscratch1) {
++ assert_different_registers(Rmem_frame_size, Rscratch1);
++ __ generate_stack_overflow_check_with_compare_and_throw(Rmem_frame_size, Rscratch1);
++}
++
++void TemplateInterpreterGenerator::unlock_method(bool check_exceptions) {
++ __ unlock_object(R26_monitor, check_exceptions);
++}
++
++// Lock the current method, interpreter register window must be set up!
++void TemplateInterpreterGenerator::lock_method(Register Rflags, Register Rscratch1, Register Rscratch2, bool flags_preloaded) {
++ const Register Robj_to_lock = Rscratch2;
++
++ {
++ if (!flags_preloaded) {
++ __ lwz(Rflags, method_(access_flags));
++ }
++
++#ifdef ASSERT
++ // Check if methods needs synchronization.
++ {
++ Label Lok;
++ __ testbitdi(CCR0, R0, Rflags, JVM_ACC_SYNCHRONIZED_BIT);
++ __ btrue(CCR0,Lok);
++ __ stop("method doesn't need synchronization");
++ __ bind(Lok);
++ }
++#endif // ASSERT
++ }
++
++ // Get synchronization object to Rscratch2.
++ {
++ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++ Label Lstatic;
++ Label Ldone;
++
++ __ testbitdi(CCR0, R0, Rflags, JVM_ACC_STATIC_BIT);
++ __ btrue(CCR0, Lstatic);
++
++ // Non-static case: load receiver obj from stack and we're done.
++ __ ld(Robj_to_lock, R18_locals);
++ __ b(Ldone);
++
++ __ bind(Lstatic); // Static case: Lock the java mirror
++ __ ld(Robj_to_lock, in_bytes(Method::const_offset()), R19_method);
++ __ ld(Robj_to_lock, in_bytes(ConstMethod::constants_offset()), Robj_to_lock);
++ __ ld(Robj_to_lock, ConstantPool::pool_holder_offset_in_bytes(), Robj_to_lock);
++ __ ld(Robj_to_lock, mirror_offset, Robj_to_lock);
++
++ __ bind(Ldone);
++ __ verify_oop(Robj_to_lock);
++ }
++
++ // Got the oop to lock => execute!
++ __ add_monitor_to_stack(true, Rscratch1, R0);
++
++ __ std(Robj_to_lock, BasicObjectLock::obj_offset_in_bytes(), R26_monitor);
++ __ lock_object(R26_monitor, Robj_to_lock);
++}
++
++// Generate a fixed interpreter frame for pure interpreter
++// and I2N native transition frames.
++//
++// Before (stack grows downwards):
++//
++// | ... |
++// |------------- |
++// | java arg0 |
++// | ... |
++// | java argn |
++// | | <- R15_esp
++// | |
++// |--------------|
++// | abi_112 |
++// | | <- R1_SP
++// |==============|
++//
++//
++// After:
++//
++// | ... |
++// | java arg0 |<- R18_locals
++// | ... |
++// | java argn |
++// |--------------|
++// | |
++// | java locals |
++// | |
++// |--------------|
++// | abi_48 |
++// |==============|
++// | |
++// | istate |
++// | |
++// |--------------|
++// | monitor |<- R26_monitor
++// |--------------|
++// | |<- R15_esp
++// | expression |
++// | stack |
++// | |
++// |--------------|
++// | |
++// | abi_112 |<- R1_SP
++// |==============|
++//
++// The top most frame needs an abi space of 112 bytes. This space is needed,
++// since we call to c. The c function may spill their arguments to the caller
++// frame. When we call to java, we don't need these spill slots. In order to save
++// space on the stack, we resize the caller. However, java local reside in
++// the caller frame and the frame has to be increased. The frame_size for the
++// current frame was calculated based on max_stack as size for the expression
++// stack. At the call, just a part of the expression stack might be used.
++// We don't want to waste this space and cut the frame back accordingly.
++// The resulting amount for resizing is calculated as follows:
++// resize = (number_of_locals - number_of_arguments) * slot_size
++// + (R1_SP - R15_esp) + 48
++//
++// The size for the callee frame is calculated:
++// framesize = 112 + max_stack + monitor + state_size
++//
++// maxstack: Max number of slots on the expression stack, loaded from the method.
++// monitor: We statically reserve room for one monitor object.
++// state_size: We save the current state of the interpreter to this area.
++//
++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, Register Rsize_of_parameters, Register Rsize_of_locals) {
++ Register parent_frame_resize = R6_ARG4, // Frame will grow by this number of bytes.
++ top_frame_size = R7_ARG5,
++ Rconst_method = R8_ARG6;
++
++ assert_different_registers(Rsize_of_parameters, Rsize_of_locals, parent_frame_resize, top_frame_size);
++
++ __ ld(Rconst_method, method_(const));
++ __ lhz(Rsize_of_parameters /* number of params */,
++ in_bytes(ConstMethod::size_of_parameters_offset()), Rconst_method);
++ if (native_call) {
++ // If we're calling a native method, we reserve space for the worst-case signature
++ // handler varargs vector, which is max(Argument::n_register_parameters, parameter_count+2).
++ // We add two slots to the parameter_count, one for the jni
++ // environment and one for a possible native mirror.
++ Label skip_native_calculate_max_stack;
++ __ addi(top_frame_size, Rsize_of_parameters, 2);
++ __ cmpwi(CCR0, top_frame_size, Argument::n_register_parameters);
++ __ bge(CCR0, skip_native_calculate_max_stack);
++ __ li(top_frame_size, Argument::n_register_parameters);
++ __ bind(skip_native_calculate_max_stack);
++ __ sldi(Rsize_of_parameters, Rsize_of_parameters, Interpreter::logStackElementSize);
++ __ sldi(top_frame_size, top_frame_size, Interpreter::logStackElementSize);
++ __ sub(parent_frame_resize, R1_SP, R15_esp); // <0, off by Interpreter::stackElementSize!
++ assert(Rsize_of_locals == noreg, "Rsize_of_locals not initialized"); // Only relevant value is Rsize_of_parameters.
++ } else {
++ __ lhz(Rsize_of_locals /* number of params */, in_bytes(ConstMethod::size_of_locals_offset()), Rconst_method);
++ __ sldi(Rsize_of_parameters, Rsize_of_parameters, Interpreter::logStackElementSize);
++ __ sldi(Rsize_of_locals, Rsize_of_locals, Interpreter::logStackElementSize);
++ __ lhz(top_frame_size, in_bytes(ConstMethod::max_stack_offset()), Rconst_method);
++ __ sub(R11_scratch1, Rsize_of_locals, Rsize_of_parameters); // >=0
++ __ sub(parent_frame_resize, R1_SP, R15_esp); // <0, off by Interpreter::stackElementSize!
++ __ sldi(top_frame_size, top_frame_size, Interpreter::logStackElementSize);
++ __ add(parent_frame_resize, parent_frame_resize, R11_scratch1);
++ }
++
++ // Compute top frame size.
++ __ addi(top_frame_size, top_frame_size, frame::abi_reg_args_size + frame::ijava_state_size);
++
++ // Cut back area between esp and max_stack.
++ __ addi(parent_frame_resize, parent_frame_resize, frame::abi_minframe_size - Interpreter::stackElementSize);
++
++ __ round_to(top_frame_size, frame::alignment_in_bytes);
++ __ round_to(parent_frame_resize, frame::alignment_in_bytes);
++ // parent_frame_resize = (locals-parameters) - (ESP-SP-ABI48) Rounded to frame alignment size.
++ // Enlarge by locals-parameters (not in case of native_call), shrink by ESP-SP-ABI48.
++
++ {
++ // --------------------------------------------------------------------------
++ // Stack overflow check
++
++ Label cont;
++ __ add(R11_scratch1, parent_frame_resize, top_frame_size);
++ generate_stack_overflow_check(R11_scratch1, R12_scratch2);
++ }
++
++ // Set up interpreter state registers.
++
++ __ add(R18_locals, R15_esp, Rsize_of_parameters);
++ __ ld(R27_constPoolCache, in_bytes(ConstMethod::constants_offset()), Rconst_method);
++ __ ld(R27_constPoolCache, ConstantPool::cache_offset_in_bytes(), R27_constPoolCache);
++
++ // Set method data pointer.
++ if (ProfileInterpreter) {
++ Label zero_continue;
++ __ ld(R28_mdx, method_(method_data));
++ __ cmpdi(CCR0, R28_mdx, 0);
++ __ beq(CCR0, zero_continue);
++ __ addi(R28_mdx, R28_mdx, in_bytes(MethodData::data_offset()));
++ __ bind(zero_continue);
++ }
++
++ if (native_call) {
++ __ li(R14_bcp, 0); // Must initialize.
++ } else {
++ __ add(R14_bcp, in_bytes(ConstMethod::codes_offset()), Rconst_method);
++ }
++
++ // Resize parent frame.
++ __ mflr(R12_scratch2);
++ __ neg(parent_frame_resize, parent_frame_resize);
++ __ resize_frame(parent_frame_resize, R11_scratch1);
++ __ std(R12_scratch2, _abi(lr), R1_SP);
++
++ __ addi(R26_monitor, R1_SP, - frame::ijava_state_size);
++ __ addi(R15_esp, R26_monitor, - Interpreter::stackElementSize);
++
++ // Store values.
++ // R15_esp, R14_bcp, R26_monitor, R28_mdx are saved at java calls
++ // in InterpreterMacroAssembler::call_from_interpreter.
++ __ std(R19_method, _ijava_state_neg(method), R1_SP);
++ __ std(R21_sender_SP, _ijava_state_neg(sender_sp), R1_SP);
++ __ std(R27_constPoolCache, _ijava_state_neg(cpoolCache), R1_SP);
++ __ std(R18_locals, _ijava_state_neg(locals), R1_SP);
++
++ // Note: esp, bcp, monitor, mdx live in registers. Hence, the correct version can only
++ // be found in the frame after save_interpreter_state is done. This is always true
++ // for non-top frames. But when a signal occurs, dumping the top frame can go wrong,
++ // because e.g. frame::interpreter_frame_bcp() will not access the correct value
++ // (Enhanced Stack Trace).
++ // The signal handler does not save the interpreter state into the frame.
++ __ li(R0, 0);
++#ifdef ASSERT
++ // Fill remaining slots with constants.
++ __ load_const_optimized(R11_scratch1, 0x5afe);
++ __ load_const_optimized(R12_scratch2, 0xdead);
++#endif
++ // We have to initialize some frame slots for native calls (accessed by GC).
++ if (native_call) {
++ __ std(R26_monitor, _ijava_state_neg(monitors), R1_SP);
++ __ std(R14_bcp, _ijava_state_neg(bcp), R1_SP);
++ if (ProfileInterpreter) { __ std(R28_mdx, _ijava_state_neg(mdx), R1_SP); }
++ }
++#ifdef ASSERT
++ else {
++ __ std(R12_scratch2, _ijava_state_neg(monitors), R1_SP);
++ __ std(R12_scratch2, _ijava_state_neg(bcp), R1_SP);
++ __ std(R12_scratch2, _ijava_state_neg(mdx), R1_SP);
++ }
++ __ std(R11_scratch1, _ijava_state_neg(ijava_reserved), R1_SP);
++ __ std(R12_scratch2, _ijava_state_neg(esp), R1_SP);
++ __ std(R12_scratch2, _ijava_state_neg(lresult), R1_SP);
++ __ std(R12_scratch2, _ijava_state_neg(fresult), R1_SP);
++#endif
++ __ subf(R12_scratch2, top_frame_size, R1_SP);
++ __ std(R0, _ijava_state_neg(oop_tmp), R1_SP);
++ __ std(R12_scratch2, _ijava_state_neg(top_frame_sp), R1_SP);
++
++ // Push top frame.
++ __ push_frame(top_frame_size, R11_scratch1);
++}
++
++// End of helpers
++
++// ============================================================================
++// Various method entries
++//
++
++// Empty method, generate a very fast return. We must skip this entry if
++// someone's debugging, indicated by the flag
++// "interp_mode" in the Thread obj.
++// Note: empty methods are generated mostly methods that do assertions, which are
++// disabled in the "java opt build".
++address TemplateInterpreterGenerator::generate_empty_entry(void) {
++ if (!UseFastEmptyMethods) {
++ NOT_PRODUCT(__ should_not_reach_here();)
++ return Interpreter::entry_for_kind(Interpreter::zerolocals);
++ }
++
++ Label Lslow_path;
++ const Register Rjvmti_mode = R11_scratch1;
++ address entry = __ pc();
++
++ __ lwz(Rjvmti_mode, thread_(interp_only_mode));
++ __ cmpwi(CCR0, Rjvmti_mode, 0);
++ __ bne(CCR0, Lslow_path); // jvmti_mode!=0
++
++ // Noone's debuggin: Simply return.
++ // Pop c2i arguments (if any) off when we return.
++#ifdef ASSERT
++ __ ld(R9_ARG7, 0, R1_SP);
++ __ ld(R10_ARG8, 0, R21_sender_SP);
++ __ cmpd(CCR0, R9_ARG7, R10_ARG8);
++ __ asm_assert_eq("backlink", 0x545);
++#endif // ASSERT
++ __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
++
++ // And we're done.
++ __ blr();
++
++ __ bind(Lslow_path);
++ __ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R11_scratch1);
++ __ flush();
++
++ return entry;
++}
++
++// Support abs and sqrt like in compiler.
++// For others we can use a normal (native) entry.
++
++inline bool math_entry_available(AbstractInterpreter::MethodKind kind) {
++ // Provide math entry with debugging on demand.
++ // Note: Debugging changes which code will get executed:
++ // Debugging or disabled InlineIntrinsics: java method will get interpreted and performs a native call.
++ // Not debugging and enabled InlineIntrinics: processor instruction will get used.
++ // Result might differ slightly due to rounding etc.
++ if (!InlineIntrinsics && (!FLAG_IS_ERGO(InlineIntrinsics))) return false; // Generate a vanilla entry.
++
++ return ((kind==Interpreter::java_lang_math_sqrt && VM_Version::has_fsqrt()) ||
++ (kind==Interpreter::java_lang_math_abs));
++}
++
++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
++ if (!math_entry_available(kind)) {
++ NOT_PRODUCT(__ should_not_reach_here();)
++ return Interpreter::entry_for_kind(Interpreter::zerolocals);
++ }
++
++ Label Lslow_path;
++ const Register Rjvmti_mode = R11_scratch1;
++ address entry = __ pc();
++
++ // Provide math entry with debugging on demand.
++ __ lwz(Rjvmti_mode, thread_(interp_only_mode));
++ __ cmpwi(CCR0, Rjvmti_mode, 0);
++ __ bne(CCR0, Lslow_path); // jvmti_mode!=0
++
++ __ lfd(F1_RET, Interpreter::stackElementSize, R15_esp);
++
++ // Pop c2i arguments (if any) off when we return.
++#ifdef ASSERT
++ __ ld(R9_ARG7, 0, R1_SP);
++ __ ld(R10_ARG8, 0, R21_sender_SP);
++ __ cmpd(CCR0, R9_ARG7, R10_ARG8);
++ __ asm_assert_eq("backlink", 0x545);
++#endif // ASSERT
++ __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
++
++ if (kind == Interpreter::java_lang_math_sqrt) {
++ __ fsqrt(F1_RET, F1_RET);
++ } else if (kind == Interpreter::java_lang_math_abs) {
++ __ fabs(F1_RET, F1_RET);
++ } else {
++ ShouldNotReachHere();
++ }
++
++ // And we're done.
++ __ blr();
++
++ // Provide slow path for JVMTI case.
++ __ bind(Lslow_path);
++ __ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R12_scratch2);
++ __ flush();
++
++ return entry;
++}
++
++// Interpreter stub for calling a native method. (asm interpreter)
++// This sets up a somewhat different looking stack for calling the
++// native method than the typical interpreter frame setup.
++//
++// On entry:
++// R19_method - method
++// R16_thread - JavaThread*
++// R15_esp - intptr_t* sender tos
++//
++// abstract stack (grows up)
++// [ IJava (caller of JNI callee) ] <-- ASP
++// ...
++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
++
++ address entry = __ pc();
++
++ const bool inc_counter = UseCompiler || CountCompiledCalls;
++
++ // -----------------------------------------------------------------------------
++ // Allocate a new frame that represents the native callee (i2n frame).
++ // This is not a full-blown interpreter frame, but in particular, the
++ // following registers are valid after this:
++ // - R19_method
++ // - R18_local (points to start of argumuments to native function)
++ //
++ // abstract stack (grows up)
++ // [ IJava (caller of JNI callee) ] <-- ASP
++ // ...
++
++ const Register signature_handler_fd = R11_scratch1;
++ const Register pending_exception = R0;
++ const Register result_handler_addr = R31;
++ const Register native_method_fd = R11_scratch1;
++ const Register access_flags = R22_tmp2;
++ const Register active_handles = R11_scratch1; // R26_monitor saved to state.
++ const Register sync_state = R12_scratch2;
++ const Register sync_state_addr = sync_state; // Address is dead after use.
++ const Register suspend_flags = R11_scratch1;
++
++ //=============================================================================
++ // Allocate new frame and initialize interpreter state.
++
++ Label exception_return;
++ Label exception_return_sync_check;
++ Label stack_overflow_return;
++
++ // Generate new interpreter state and jump to stack_overflow_return in case of
++ // a stack overflow.
++ //generate_compute_interpreter_state(stack_overflow_return);
++
++ Register size_of_parameters = R22_tmp2;
++
++ generate_fixed_frame(true, size_of_parameters, noreg /* unused */);
++
++ //=============================================================================
++ // Increment invocation counter. On overflow, entry to JNI method
++ // will be compiled.
++ Label invocation_counter_overflow, continue_after_compile;
++ if (inc_counter) {
++ if (synchronized) {
++ // Since at this point in the method invocation the exception handler
++ // would try to exit the monitor of synchronized methods which hasn't
++ // been entered yet, we set the thread local variable
++ // _do_not_unlock_if_synchronized to true. If any exception was thrown by
++ // runtime, exception handling i.e. unlock_if_synchronized_method will
++ // check this thread local flag.
++ // This flag has two effects, one is to force an unwind in the topmost
++ // interpreter frame and not perform an unlock while doing so.
++ __ li(R0, 1);
++ __ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread);
++ }
++ generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
++
++ __ BIND(continue_after_compile);
++ // Reset the _do_not_unlock_if_synchronized flag.
++ if (synchronized) {
++ __ li(R0, 0);
++ __ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread);
++ }
++ }
++
++ // access_flags = method->access_flags();
++ // Load access flags.
++ assert(access_flags->is_nonvolatile(),
++ "access_flags must be in a non-volatile register");
++ // Type check.
++ assert(4 == sizeof(AccessFlags), "unexpected field size");
++ __ lwz(access_flags, method_(access_flags));
++
++ // We don't want to reload R19_method and access_flags after calls
++ // to some helper functions.
++ assert(R19_method->is_nonvolatile(),
++ "R19_method must be a non-volatile register");
++
++ // Check for synchronized methods. Must happen AFTER invocation counter
++ // check, so method is not locked if counter overflows.
++
++ if (synchronized) {
++ lock_method(access_flags, R11_scratch1, R12_scratch2, true);
++
++ // Update monitor in state.
++ __ ld(R11_scratch1, 0, R1_SP);
++ __ std(R26_monitor, _ijava_state_neg(monitors), R11_scratch1);
++ }
++
++ // jvmti/jvmpi support
++ __ notify_method_entry();
++
++ //=============================================================================
++ // Get and call the signature handler.
++
++ __ ld(signature_handler_fd, method_(signature_handler));
++ Label call_signature_handler;
++
++ __ cmpdi(CCR0, signature_handler_fd, 0);
++ __ bne(CCR0, call_signature_handler);
++
++ // Method has never been called. Either generate a specialized
++ // handler or point to the slow one.
++ //
++ // Pass parameter 'false' to avoid exception check in call_VM.
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), R19_method, false);
++
++ // Check for an exception while looking up the target method. If we
++ // incurred one, bail.
++ __ ld(pending_exception, thread_(pending_exception));
++ __ cmpdi(CCR0, pending_exception, 0);
++ __ bne(CCR0, exception_return_sync_check); // Has pending exception.
++
++ // Reload signature handler, it may have been created/assigned in the meanwhile.
++ __ ld(signature_handler_fd, method_(signature_handler));
++ __ twi_0(signature_handler_fd); // Order wrt. load of klass mirror and entry point (isync is below).
++
++ __ BIND(call_signature_handler);
++
++ // Before we call the signature handler we push a new frame to
++ // protect the interpreter frame volatile registers when we return
++ // from jni but before we can get back to Java.
++
++ // First set the frame anchor while the SP/FP registers are
++ // convenient and the slow signature handler can use this same frame
++ // anchor.
++
++ // We have a TOP_IJAVA_FRAME here, which belongs to us.
++ __ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R12_scratch2/*tmp*/);
++
++ // Now the interpreter frame (and its call chain) have been
++ // invalidated and flushed. We are now protected against eager
++ // being enabled in native code. Even if it goes eager the
++ // registers will be reloaded as clean and we will invalidate after
++ // the call so no spurious flush should be possible.
++
++ // Call signature handler and pass locals address.
++ //
++ // Our signature handlers copy required arguments to the C stack
++ // (outgoing C args), R3_ARG1 to R10_ARG8, and FARG1 to FARG13.
++ __ mr(R3_ARG1, R18_locals);
++ __ ld(signature_handler_fd, 0, signature_handler_fd);
++
++ __ call_stub(signature_handler_fd);
++
++ // Remove the register parameter varargs slots we allocated in
++ // compute_interpreter_state. SP+16 ends up pointing to the ABI
++ // outgoing argument area.
++ //
++ // Not needed on PPC64.
++ //__ add(SP, SP, Argument::n_register_parameters*BytesPerWord);
++
++ assert(result_handler_addr->is_nonvolatile(), "result_handler_addr must be in a non-volatile register");
++ // Save across call to native method.
++ __ mr(result_handler_addr, R3_RET);
++
++ __ isync(); // Acquire signature handler before trying to fetch the native entry point and klass mirror.
++
++ // Set up fixed parameters and call the native method.
++ // If the method is static, get mirror into R4_ARG2.
++ {
++ Label method_is_not_static;
++ // Access_flags is non-volatile and still, no need to restore it.
++
++ // Restore access flags.
++ __ testbitdi(CCR0, R0, access_flags, JVM_ACC_STATIC_BIT);
++ __ bfalse(CCR0, method_is_not_static);
++
++ // constants = method->constants();
++ __ ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method);
++ __ ld(R11_scratch1, in_bytes(ConstMethod::constants_offset()), R11_scratch1);
++ // pool_holder = method->constants()->pool_holder();
++ __ ld(R11_scratch1/*pool_holder*/, ConstantPool::pool_holder_offset_in_bytes(),
++ R11_scratch1/*constants*/);
++
++ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++
++ // mirror = pool_holder->klass_part()->java_mirror();
++ __ ld(R0/*mirror*/, mirror_offset, R11_scratch1/*pool_holder*/);
++ // state->_native_mirror = mirror;
++
++ __ ld(R11_scratch1, 0, R1_SP);
++ __ std(R0/*mirror*/, _ijava_state_neg(oop_tmp), R11_scratch1);
++ // R4_ARG2 = &state->_oop_temp;
++ __ addi(R4_ARG2, R11_scratch1, _ijava_state_neg(oop_tmp));
++ __ BIND(method_is_not_static);
++ }
++
++ // At this point, arguments have been copied off the stack into
++ // their JNI positions. Oops are boxed in-place on the stack, with
++ // handles copied to arguments. The result handler address is in a
++ // register.
++
++ // Pass JNIEnv address as first parameter.
++ __ addir(R3_ARG1, thread_(jni_environment));
++
++ // Load the native_method entry before we change the thread state.
++ __ ld(native_method_fd, method_(native_function));
++
++ //=============================================================================
++ // Transition from _thread_in_Java to _thread_in_native. As soon as
++ // we make this change the safepoint code needs to be certain that
++ // the last Java frame we established is good. The pc in that frame
++ // just needs to be near here not an actual return address.
++
++ // We use release_store_fence to update values like the thread state, where
++ // we don't want the current thread to continue until all our prior memory
++ // accesses (including the new thread state) are visible to other threads.
++ __ li(R0, _thread_in_native);
++ __ release();
++
++ // TODO PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
++ __ stw(R0, thread_(thread_state));
++
++ if (UseMembar) {
++ __ fence();
++ }
++
++ //=============================================================================
++ // Call the native method. Argument registers must not have been
++ // overwritten since "__ call_stub(signature_handler);" (except for
++ // ARG1 and ARG2 for static methods).
++ __ call_c(native_method_fd);
++
++ __ li(R0, 0);
++ __ ld(R11_scratch1, 0, R1_SP);
++ __ std(R3_RET, _ijava_state_neg(lresult), R11_scratch1);
++ __ stfd(F1_RET, _ijava_state_neg(fresult), R11_scratch1);
++ __ std(R0/*mirror*/, _ijava_state_neg(oop_tmp), R11_scratch1); // reset
++
++ // Note: C++ interpreter needs the following here:
++ // The frame_manager_lr field, which we use for setting the last
++ // java frame, gets overwritten by the signature handler. Restore
++ // it now.
++ //__ get_PC_trash_LR(R11_scratch1);
++ //__ std(R11_scratch1, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
++
++ // Because of GC R19_method may no longer be valid.
++
++ // Block, if necessary, before resuming in _thread_in_Java state.
++ // In order for GC to work, don't clear the last_Java_sp until after
++ // blocking.
++
++ //=============================================================================
++ // Switch thread to "native transition" state before reading the
++ // synchronization state. This additional state is necessary
++ // because reading and testing the synchronization state is not
++ // atomic w.r.t. GC, as this scenario demonstrates: Java thread A,
++ // in _thread_in_native state, loads _not_synchronized and is
++ // preempted. VM thread changes sync state to synchronizing and
++ // suspends threads for GC. Thread A is resumed to finish this
++ // native method, but doesn't block here since it didn't see any
++ // synchronization in progress, and escapes.
++
++ // We use release_store_fence to update values like the thread state, where
++ // we don't want the current thread to continue until all our prior memory
++ // accesses (including the new thread state) are visible to other threads.
++ __ li(R0/*thread_state*/, _thread_in_native_trans);
++ __ release();
++ __ stw(R0/*thread_state*/, thread_(thread_state));
++ if (UseMembar) {
++ __ fence();
++ }
++ // Write serialization page so that the VM thread can do a pseudo remote
++ // membar. We use the current thread pointer to calculate a thread
++ // specific offset to write to within the page. This minimizes bus
++ // traffic due to cache line collision.
++ else {
++ __ serialize_memory(R16_thread, R11_scratch1, R12_scratch2);
++ }
++
++ // Now before we return to java we must look for a current safepoint
++ // (a new safepoint can not start since we entered native_trans).
++ // We must check here because a current safepoint could be modifying
++ // the callers registers right this moment.
++
++ // Acquire isn't strictly necessary here because of the fence, but
++ // sync_state is declared to be volatile, so we do it anyway
++ // (cmp-br-isync on one path, release (same as acquire on PPC64) on the other path).
++ int sync_state_offs = __ load_const_optimized(sync_state_addr, SafepointSynchronize::address_of_state(), /*temp*/R0, true);
++
++ // TODO PPC port assert(4 == SafepointSynchronize::sz_state(), "unexpected field size");
++ __ lwz(sync_state, sync_state_offs, sync_state_addr);
++
++ // TODO PPC port assert(4 == Thread::sz_suspend_flags(), "unexpected field size");
++ __ lwz(suspend_flags, thread_(suspend_flags));
++
++ Label sync_check_done;
++ Label do_safepoint;
++ // No synchronization in progress nor yet synchronized.
++ __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized);
++ // Not suspended.
++ __ cmpwi(CCR1, suspend_flags, 0);
++
++ __ bne(CCR0, do_safepoint);
++ __ beq(CCR1, sync_check_done);
++ __ bind(do_safepoint);
++ __ isync();
++ // Block. We do the call directly and leave the current
++ // last_Java_frame setup undisturbed. We must save any possible
++ // native result across the call. No oop is present.
++
++ __ mr(R3_ARG1, R16_thread);
++ __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, JavaThread::check_special_condition_for_native_trans),
++ relocInfo::none);
++
++ __ bind(sync_check_done);
++
++ //=============================================================================
++ // <<<<<< Back in Interpreter Frame >>>>>
++
++ // We are in thread_in_native_trans here and back in the normal
++ // interpreter frame. We don't have to do anything special about
++ // safepoints and we can switch to Java mode anytime we are ready.
++
++ // Note: frame::interpreter_frame_result has a dependency on how the
++ // method result is saved across the call to post_method_exit. For
++ // native methods it assumes that the non-FPU/non-void result is
++ // saved in _native_lresult and a FPU result in _native_fresult. If
++ // this changes then the interpreter_frame_result implementation
++ // will need to be updated too.
++
++ // On PPC64, we have stored the result directly after the native call.
++
++ //=============================================================================
++ // Back in Java
++
++ // We use release_store_fence to update values like the thread state, where
++ // we don't want the current thread to continue until all our prior memory
++ // accesses (including the new thread state) are visible to other threads.
++ __ li(R0/*thread_state*/, _thread_in_Java);
++ __ release();
++ __ stw(R0/*thread_state*/, thread_(thread_state));
++ if (UseMembar) {
++ __ fence();
++ }
++
++ __ reset_last_Java_frame();
++
++ // Jvmdi/jvmpi support. Whether we've got an exception pending or
++ // not, and whether unlocking throws an exception or not, we notify
++ // on native method exit. If we do have an exception, we'll end up
++ // in the caller's context to handle it, so if we don't do the
++ // notify here, we'll drop it on the floor.
++ __ notify_method_exit(true/*native method*/,
++ ilgl /*illegal state (not used for native methods)*/,
++ InterpreterMacroAssembler::NotifyJVMTI,
++ false /*check_exceptions*/);
++
++ //=============================================================================
++ // Handle exceptions
++
++ if (synchronized) {
++ // Don't check for exceptions since we're still in the i2n frame. Do that
++ // manually afterwards.
++ unlock_method(false);
++ }
++
++ // Reset active handles after returning from native.
++ // thread->active_handles()->clear();
++ __ ld(active_handles, thread_(active_handles));
++ // TODO PPC port assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size");
++ __ li(R0, 0);
++ __ stw(R0, JNIHandleBlock::top_offset_in_bytes(), active_handles);
++
++ Label exception_return_sync_check_already_unlocked;
++ __ ld(R0/*pending_exception*/, thread_(pending_exception));
++ __ cmpdi(CCR0, R0/*pending_exception*/, 0);
++ __ bne(CCR0, exception_return_sync_check_already_unlocked);
++
++ //-----------------------------------------------------------------------------
++ // No exception pending.
++
++ // Move native method result back into proper registers and return.
++ // Invoke result handler (may unbox/promote).
++ __ ld(R11_scratch1, 0, R1_SP);
++ __ ld(R3_RET, _ijava_state_neg(lresult), R11_scratch1);
++ __ lfd(F1_RET, _ijava_state_neg(fresult), R11_scratch1);
++ __ call_stub(result_handler_addr);
++
++ __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ R0, R11_scratch1, R12_scratch2);
++
++ // Must use the return pc which was loaded from the caller's frame
++ // as the VM uses return-pc-patching for deoptimization.
++ __ mtlr(R0);
++ __ blr();
++
++ //-----------------------------------------------------------------------------
++ // An exception is pending. We call into the runtime only if the
++ // caller was not interpreted. If it was interpreted the
++ // interpreter will do the correct thing. If it isn't interpreted
++ // (call stub/compiled code) we will change our return and continue.
++
++ __ BIND(exception_return_sync_check);
++
++ if (synchronized) {
++ // Don't check for exceptions since we're still in the i2n frame. Do that
++ // manually afterwards.
++ unlock_method(false);
++ }
++ __ BIND(exception_return_sync_check_already_unlocked);
++
++ const Register return_pc = R31;
++
++ __ ld(return_pc, 0, R1_SP);
++ __ ld(return_pc, _abi(lr), return_pc);
++
++ // Get the address of the exception handler.
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address),
++ R16_thread,
++ return_pc /* return pc */);
++ __ merge_frames(/*top_frame_sp*/ R21_sender_SP, noreg, R11_scratch1, R12_scratch2);
++
++ // Load the PC of the the exception handler into LR.
++ __ mtlr(R3_RET);
++
++ // Load exception into R3_ARG1 and clear pending exception in thread.
++ __ ld(R3_ARG1/*exception*/, thread_(pending_exception));
++ __ li(R4_ARG2, 0);
++ __ std(R4_ARG2, thread_(pending_exception));
++
++ // Load the original return pc into R4_ARG2.
++ __ mr(R4_ARG2/*issuing_pc*/, return_pc);
++
++ // Return to exception handler.
++ __ blr();
++
++ //=============================================================================
++ // Counter overflow.
++
++ if (inc_counter) {
++ // Handle invocation counter overflow.
++ __ bind(invocation_counter_overflow);
++
++ generate_counter_overflow(continue_after_compile);
++ }
++
++ return entry;
++}
++
++// Generic interpreted method entry to (asm) interpreter.
++//
++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
++ bool inc_counter = UseCompiler || CountCompiledCalls;
++ address entry = __ pc();
++ // Generate the code to allocate the interpreter stack frame.
++ Register Rsize_of_parameters = R4_ARG2, // Written by generate_fixed_frame.
++ Rsize_of_locals = R5_ARG3; // Written by generate_fixed_frame.
++
++ generate_fixed_frame(false, Rsize_of_parameters, Rsize_of_locals);
++
++#ifdef FAST_DISPATCH
++ __ unimplemented("Fast dispatch in generate_normal_entry");
++#if 0
++ __ set((intptr_t)Interpreter::dispatch_table(), IdispatchTables);
++ // Set bytecode dispatch table base.
++#endif
++#endif
++
++ // --------------------------------------------------------------------------
++ // Zero out non-parameter locals.
++ // Note: *Always* zero out non-parameter locals as Sparc does. It's not
++ // worth to ask the flag, just do it.
++ Register Rslot_addr = R6_ARG4,
++ Rnum = R7_ARG5;
++ Label Lno_locals, Lzero_loop;
++
++ // Set up the zeroing loop.
++ __ subf(Rnum, Rsize_of_parameters, Rsize_of_locals);
++ __ subf(Rslot_addr, Rsize_of_parameters, R18_locals);
++ __ srdi_(Rnum, Rnum, Interpreter::logStackElementSize);
++ __ beq(CCR0, Lno_locals);
++ __ li(R0, 0);
++ __ mtctr(Rnum);
++
++ // The zero locals loop.
++ __ bind(Lzero_loop);
++ __ std(R0, 0, Rslot_addr);
++ __ addi(Rslot_addr, Rslot_addr, -Interpreter::stackElementSize);
++ __ bdnz(Lzero_loop);
++
++ __ bind(Lno_locals);
++
++ // --------------------------------------------------------------------------
++ // Counter increment and overflow check.
++ Label invocation_counter_overflow,
++ profile_method,
++ profile_method_continue;
++ if (inc_counter || ProfileInterpreter) {
++
++ Register Rdo_not_unlock_if_synchronized_addr = R11_scratch1;
++ if (synchronized) {
++ // Since at this point in the method invocation the exception handler
++ // would try to exit the monitor of synchronized methods which hasn't
++ // been entered yet, we set the thread local variable
++ // _do_not_unlock_if_synchronized to true. If any exception was thrown by
++ // runtime, exception handling i.e. unlock_if_synchronized_method will
++ // check this thread local flag.
++ // This flag has two effects, one is to force an unwind in the topmost
++ // interpreter frame and not perform an unlock while doing so.
++ __ li(R0, 1);
++ __ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread);
++ }
++ // Increment invocation counter and check for overflow.
++ if (inc_counter) {
++ generate_counter_incr(&invocation_counter_overflow, &profile_method, &profile_method_continue);
++ }
++
++ __ bind(profile_method_continue);
++
++ // Reset the _do_not_unlock_if_synchronized flag.
++ if (synchronized) {
++ __ li(R0, 0);
++ __ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread);
++ }
++ }
++
++ // --------------------------------------------------------------------------
++ // Locking of synchronized methods. Must happen AFTER invocation_counter
++ // check and stack overflow check, so method is not locked if overflows.
++ if (synchronized) {
++ lock_method(R3_ARG1, R4_ARG2, R5_ARG3);
++ }
++#ifdef ASSERT
++ else {
++ Label Lok;
++ __ lwz(R0, in_bytes(Method::access_flags_offset()), R19_method);
++ __ andi_(R0, R0, JVM_ACC_SYNCHRONIZED);
++ __ asm_assert_eq("method needs synchronization", 0x8521);
++ __ bind(Lok);
++ }
++#endif // ASSERT
++
++ __ verify_thread();
++
++ // --------------------------------------------------------------------------
++ // JVMTI support
++ __ notify_method_entry();
++
++ // --------------------------------------------------------------------------
++ // Start executing instructions.
++ __ dispatch_next(vtos);
++
++ // --------------------------------------------------------------------------
++ // Out of line counter overflow and MDO creation code.
++ if (ProfileInterpreter) {
++ // We have decided to profile this method in the interpreter.
++ __ bind(profile_method);
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++ __ set_method_data_pointer_for_bcp();
++ __ b(profile_method_continue);
++ }
++
++ if (inc_counter) {
++ // Handle invocation counter overflow.
++ __ bind(invocation_counter_overflow);
++ generate_counter_overflow(profile_method_continue);
++ }
++ return entry;
++}
++
++// =============================================================================
++// Entry points
++
++address AbstractInterpreterGenerator::generate_method_entry(
++ AbstractInterpreter::MethodKind kind) {
++ // Determine code generation flags.
++ bool synchronized = false;
++ address entry_point = NULL;
++
++ switch (kind) {
++ case Interpreter::zerolocals : break;
++ case Interpreter::zerolocals_synchronized: synchronized = true; break;
++ case Interpreter::native : entry_point = ((InterpreterGenerator*) this)->generate_native_entry(false); break;
++ case Interpreter::native_synchronized : entry_point = ((InterpreterGenerator*) this)->generate_native_entry(true); break;
++ case Interpreter::empty : entry_point = ((InterpreterGenerator*) this)->generate_empty_entry(); break;
++ case Interpreter::accessor : entry_point = ((InterpreterGenerator*) this)->generate_accessor_entry(); break;
++ case Interpreter::abstract : entry_point = ((InterpreterGenerator*) this)->generate_abstract_entry(); break;
++
++ case Interpreter::java_lang_math_sin : // fall thru
++ case Interpreter::java_lang_math_cos : // fall thru
++ case Interpreter::java_lang_math_tan : // fall thru
++ case Interpreter::java_lang_math_abs : // fall thru
++ case Interpreter::java_lang_math_log : // fall thru
++ case Interpreter::java_lang_math_log10 : // fall thru
++ case Interpreter::java_lang_math_sqrt : // fall thru
++ case Interpreter::java_lang_math_pow : // fall thru
++ case Interpreter::java_lang_math_exp : entry_point = ((InterpreterGenerator*) this)->generate_math_entry(kind); break;
++ case Interpreter::java_lang_ref_reference_get
++ : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
++ default : ShouldNotReachHere(); break;
++ }
++
++ if (entry_point) {
++ return entry_point;
++ }
++
++ return ((InterpreterGenerator*) this)->generate_normal_entry(synchronized);
++}
++
++// These should never be compiled since the interpreter will prefer
++// the compiled version to the intrinsic version.
++bool AbstractInterpreter::can_be_compiled(methodHandle m) {
++ return !math_entry_available(method_kind(m));
++}
++
++// How much stack a method activation needs in stack slots.
++// We must calc this exactly like in generate_fixed_frame.
++// Note: This returns the conservative size assuming maximum alignment.
++int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
++ const int max_alignment_size = 2;
++ const int abi_scratch = frame::abi_reg_args_size;
++ return method->max_locals() + method->max_stack() +
++ frame::interpreter_frame_monitor_size() + max_alignment_size + abi_scratch;
++}
++
++// Returns number of stackElementWords needed for the interpreter frame with the
++// given sections.
++// This overestimates the stack by one slot in case of alignments.
++int AbstractInterpreter::size_activation(int max_stack,
++ int temps,
++ int extra_args,
++ int monitors,
++ int callee_params,
++ int callee_locals,
++ bool is_top_frame) {
++ // Note: This calculation must exactly parallel the frame setup
++ // in AbstractInterpreterGenerator::generate_method_entry.
++ assert(Interpreter::stackElementWords == 1, "sanity");
++ const int max_alignment_space = StackAlignmentInBytes / Interpreter::stackElementSize;
++ const int abi_scratch = is_top_frame ? (frame::abi_reg_args_size / Interpreter::stackElementSize) :
++ (frame::abi_minframe_size / Interpreter::stackElementSize);
++ const int size =
++ max_stack +
++ (callee_locals - callee_params) +
++ monitors * frame::interpreter_frame_monitor_size() +
++ max_alignment_space +
++ abi_scratch +
++ frame::ijava_state_size / Interpreter::stackElementSize;
++
++ // Fixed size of an interpreter frame, align to 16-byte.
++ return (size & -2);
++}
++
++// Fills a sceletal interpreter frame generated during deoptimizations.
++//
++// Parameters:
++//
++// interpreter_frame != NULL:
++// set up the method, locals, and monitors.
++// The frame interpreter_frame, if not NULL, is guaranteed to be the
++// right size, as determined by a previous call to this method.
++// It is also guaranteed to be walkable even though it is in a skeletal state
++//
++// is_top_frame == true:
++// We're processing the *oldest* interpreter frame!
++//
++// pop_frame_extra_args:
++// If this is != 0 we are returning to a deoptimized frame by popping
++// off the callee frame. We want to re-execute the call that called the
++// callee interpreted, but since the return to the interpreter would pop
++// the arguments off advance the esp by dummy popframe_extra_args slots.
++// Popping off those will establish the stack layout as it was before the call.
++//
++void AbstractInterpreter::layout_activation(Method* method,
++ int tempcount,
++ int popframe_extra_args,
++ int moncount,
++ int caller_actual_parameters,
++ int callee_param_count,
++ int callee_locals_count,
++ frame* caller,
++ frame* interpreter_frame,
++ bool is_top_frame,
++ bool is_bottom_frame) {
++
++ const int abi_scratch = is_top_frame ? (frame::abi_reg_args_size / Interpreter::stackElementSize) :
++ (frame::abi_minframe_size / Interpreter::stackElementSize);
++
++ intptr_t* locals_base = (caller->is_interpreted_frame()) ?
++ caller->interpreter_frame_esp() + caller_actual_parameters :
++ caller->sp() + method->max_locals() - 1 + (frame::abi_minframe_size / Interpreter::stackElementSize) ;
++
++ intptr_t* monitor_base = caller->sp() - frame::ijava_state_size / Interpreter::stackElementSize ;
++ intptr_t* monitor = monitor_base - (moncount * frame::interpreter_frame_monitor_size());
++ intptr_t* esp_base = monitor - 1;
++ intptr_t* esp = esp_base - tempcount - popframe_extra_args;
++ intptr_t* sp = (intptr_t *) (((intptr_t) (esp_base - callee_locals_count + callee_param_count - method->max_stack()- abi_scratch)) & -StackAlignmentInBytes);
++ intptr_t* sender_sp = caller->sp() + (frame::abi_minframe_size - frame::abi_reg_args_size) / Interpreter::stackElementSize;
++ intptr_t* top_frame_sp = is_top_frame ? sp : sp + (frame::abi_minframe_size - frame::abi_reg_args_size) / Interpreter::stackElementSize;
++
++ interpreter_frame->interpreter_frame_set_method(method);
++ interpreter_frame->interpreter_frame_set_locals(locals_base);
++ interpreter_frame->interpreter_frame_set_cpcache(method->constants()->cache());
++ interpreter_frame->interpreter_frame_set_esp(esp);
++ interpreter_frame->interpreter_frame_set_monitor_end((BasicObjectLock *)monitor);
++ interpreter_frame->interpreter_frame_set_top_frame_sp(top_frame_sp);
++ if (!is_bottom_frame) {
++ interpreter_frame->interpreter_frame_set_sender_sp(sender_sp);
++ }
++}
++
++// =============================================================================
++// Exceptions
++
++void TemplateInterpreterGenerator::generate_throw_exception() {
++ Register Rexception = R17_tos,
++ Rcontinuation = R3_RET;
++
++ // --------------------------------------------------------------------------
++ // Entry point if an method returns with a pending exception (rethrow).
++ Interpreter::_rethrow_exception_entry = __ pc();
++ {
++ __ restore_interpreter_state(R11_scratch1); // Sets R11_scratch1 = fp.
++ __ ld(R12_scratch2, _ijava_state_neg(top_frame_sp), R11_scratch1);
++ __ resize_frame_absolute(R12_scratch2, R11_scratch1, R0);
++
++ // Compiled code destroys templateTableBase, reload.
++ __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
++ }
++
++ // Entry point if a interpreted method throws an exception (throw).
++ Interpreter::_throw_exception_entry = __ pc();
++ {
++ __ mr(Rexception, R3_RET);
++
++ __ verify_thread();
++ __ verify_oop(Rexception);
++
++ // Expression stack must be empty before entering the VM in case of an exception.
++ __ empty_expression_stack();
++ // Find exception handler address and preserve exception oop.
++ // Call C routine to find handler and jump to it.
++ __ call_VM(Rexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), Rexception);
++ __ mtctr(Rcontinuation);
++ // Push exception for exception handler bytecodes.
++ __ push_ptr(Rexception);
++
++ // Jump to exception handler (may be remove activation entry!).
++ __ bctr();
++ }
++
++ // If the exception is not handled in the current frame the frame is
++ // removed and the exception is rethrown (i.e. exception
++ // continuation is _rethrow_exception).
++ //
++ // Note: At this point the bci is still the bxi for the instruction
++ // which caused the exception and the expression stack is
++ // empty. Thus, for any VM calls at this point, GC will find a legal
++ // oop map (with empty expression stack).
++
++ // In current activation
++ // tos: exception
++ // bcp: exception bcp
++
++ // --------------------------------------------------------------------------
++ // JVMTI PopFrame support
++
++ Interpreter::_remove_activation_preserving_args_entry = __ pc();
++ {
++ // Set the popframe_processing bit in popframe_condition indicating that we are
++ // currently handling popframe, so that call_VMs that may happen later do not
++ // trigger new popframe handling cycles.
++ __ lwz(R11_scratch1, in_bytes(JavaThread::popframe_condition_offset()), R16_thread);
++ __ ori(R11_scratch1, R11_scratch1, JavaThread::popframe_processing_bit);
++ __ stw(R11_scratch1, in_bytes(JavaThread::popframe_condition_offset()), R16_thread);
++
++ // Empty the expression stack, as in normal exception handling.
++ __ empty_expression_stack();
++ __ unlock_if_synchronized_method(vtos, /* throw_monitor_exception */ false, /* install_monitor_exception */ false);
++
++ // Check to see whether we are returning to a deoptimized frame.
++ // (The PopFrame call ensures that the caller of the popped frame is
++ // either interpreted or compiled and deoptimizes it if compiled.)
++ // Note that we don't compare the return PC against the
++ // deoptimization blob's unpack entry because of the presence of
++ // adapter frames in C2.
++ Label Lcaller_not_deoptimized;
++ Register return_pc = R3_ARG1;
++ __ ld(return_pc, 0, R1_SP);
++ __ ld(return_pc, _abi(lr), return_pc);
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), return_pc);
++ __ cmpdi(CCR0, R3_RET, 0);
++ __ bne(CCR0, Lcaller_not_deoptimized);
++
++ // The deoptimized case.
++ // In this case, we can't call dispatch_next() after the frame is
++ // popped, but instead must save the incoming arguments and restore
++ // them after deoptimization has occurred.
++ __ ld(R4_ARG2, in_bytes(Method::const_offset()), R19_method);
++ __ lhz(R4_ARG2 /* number of params */, in_bytes(ConstMethod::size_of_parameters_offset()), R4_ARG2);
++ __ slwi(R4_ARG2, R4_ARG2, Interpreter::logStackElementSize);
++ __ addi(R5_ARG3, R18_locals, Interpreter::stackElementSize);
++ __ subf(R5_ARG3, R4_ARG2, R5_ARG3);
++ // Save these arguments.
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), R16_thread, R4_ARG2, R5_ARG3);
++
++ // Inform deoptimization that it is responsible for restoring these arguments.
++ __ load_const_optimized(R11_scratch1, JavaThread::popframe_force_deopt_reexecution_bit);
++ __ stw(R11_scratch1, in_bytes(JavaThread::popframe_condition_offset()), R16_thread);
++
++ // Return from the current method into the deoptimization blob. Will eventually
++ // end up in the deopt interpeter entry, deoptimization prepared everything that
++ // we will reexecute the call that called us.
++ __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*reload return_pc*/ return_pc, R11_scratch1, R12_scratch2);
++ __ mtlr(return_pc);
++ __ blr();
++
++ // The non-deoptimized case.
++ __ bind(Lcaller_not_deoptimized);
++
++ // Clear the popframe condition flag.
++ __ li(R0, 0);
++ __ stw(R0, in_bytes(JavaThread::popframe_condition_offset()), R16_thread);
++
++ // Get out of the current method and re-execute the call that called us.
++ __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ noreg, R11_scratch1, R12_scratch2);
++ __ restore_interpreter_state(R11_scratch1);
++ __ ld(R12_scratch2, _ijava_state_neg(top_frame_sp), R11_scratch1);
++ __ resize_frame_absolute(R12_scratch2, R11_scratch1, R0);
++ if (ProfileInterpreter) {
++ __ set_method_data_pointer_for_bcp();
++ }
++#if INCLUDE_JVMTI
++ Label L_done;
++
++ __ lbz(R11_scratch1, 0, R14_bcp);
++ __ cmpwi(CCR0, R11_scratch1, Bytecodes::_invokestatic);
++ __ bne(CCR0, L_done);
++
++ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
++ // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
++ __ ld(R4_ARG2, 0, R18_locals);
++ __ call_VM(R11_scratch1, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null),
++ R4_ARG2, R19_method, R14_bcp);
++
++ __ cmpdi(CCR0, R11_scratch1, 0);
++ __ beq(CCR0, L_done);
++
++ __ std(R11_scratch1, wordSize, R15_esp);
++ __ bind(L_done);
++#endif // INCLUDE_JVMTI
++ __ dispatch_next(vtos);
++ }
++ // end of JVMTI PopFrame support
++
++ // --------------------------------------------------------------------------
++ // Remove activation exception entry.
++ // This is jumped to if an interpreted method can't handle an exception itself
++ // (we come from the throw/rethrow exception entry above). We're going to call
++ // into the VM to find the exception handler in the caller, pop the current
++ // frame and return the handler we calculated.
++ Interpreter::_remove_activation_entry = __ pc();
++ {
++ __ pop_ptr(Rexception);
++ __ verify_thread();
++ __ verify_oop(Rexception);
++ __ std(Rexception, in_bytes(JavaThread::vm_result_offset()), R16_thread);
++
++ __ unlock_if_synchronized_method(vtos, /* throw_monitor_exception */ false, true);
++ __ notify_method_exit(false, vtos, InterpreterMacroAssembler::SkipNotifyJVMTI, false);
++
++ __ get_vm_result(Rexception);
++
++ // We are done with this activation frame; find out where to go next.
++ // The continuation point will be an exception handler, which expects
++ // the following registers set up:
++ //
++ // RET: exception oop
++ // ARG2: Issuing PC (see generate_exception_blob()), only used if the caller is compiled.
++
++ Register return_pc = R31; // Needs to survive the runtime call.
++ __ ld(return_pc, 0, R1_SP);
++ __ ld(return_pc, _abi(lr), return_pc);
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), R16_thread, return_pc);
++
++ // Remove the current activation.
++ __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ noreg, R11_scratch1, R12_scratch2);
++
++ __ mr(R4_ARG2, return_pc);
++ __ mtlr(R3_RET);
++ __ mr(R3_RET, Rexception);
++ __ blr();
++ }
++}
++
++// JVMTI ForceEarlyReturn support.
++// Returns "in the middle" of a method with a "fake" return value.
++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
++
++ Register Rscratch1 = R11_scratch1,
++ Rscratch2 = R12_scratch2;
++
++ address entry = __ pc();
++ __ empty_expression_stack();
++
++ __ load_earlyret_value(state, Rscratch1);
++
++ __ ld(Rscratch1, in_bytes(JavaThread::jvmti_thread_state_offset()), R16_thread);
++ // Clear the earlyret state.
++ __ li(R0, 0);
++ __ stw(R0, in_bytes(JvmtiThreadState::earlyret_state_offset()), Rscratch1);
++
++ __ remove_activation(state, false, false);
++ // Copied from TemplateTable::_return.
++ // Restoration of lr done by remove_activation.
++ switch (state) {
++ case ltos:
++ case btos:
++ case ctos:
++ case stos:
++ case atos:
++ case itos: __ mr(R3_RET, R17_tos); break;
++ case ftos:
++ case dtos: __ fmr(F1_RET, F15_ftos); break;
++ case vtos: // This might be a constructor. Final fields (and volatile fields on PPC64) need
++ // to get visible before the reference to the object gets stored anywhere.
++ __ membar(Assembler::StoreStore); break;
++ default : ShouldNotReachHere();
++ }
++ __ blr();
++
++ return entry;
++} // end of ForceEarlyReturn support
++
++//-----------------------------------------------------------------------------
++// Helper for vtos entry point generation
++
++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
++ address& bep,
++ address& cep,
++ address& sep,
++ address& aep,
++ address& iep,
++ address& lep,
++ address& fep,
++ address& dep,
++ address& vep) {
++ assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
++ Label L;
++
++ aep = __ pc(); __ push_ptr(); __ b(L);
++ fep = __ pc(); __ push_f(); __ b(L);
++ dep = __ pc(); __ push_d(); __ b(L);
++ lep = __ pc(); __ push_l(); __ b(L);
++ __ align(32, 12, 24); // align L
++ bep = cep = sep =
++ iep = __ pc(); __ push_i();
++ vep = __ pc();
++ __ bind(L);
++ generate_and_dispatch(t);
++}
++
++//-----------------------------------------------------------------------------
++// Generation of individual instructions
++
++// helpers for generate_and_dispatch
++
++InterpreterGenerator::InterpreterGenerator(StubQueue* code)
++ : TemplateInterpreterGenerator(code) {
++ generate_all(); // Down here so it can be "virtual".
++}
++
++//-----------------------------------------------------------------------------
++
++// Non-product code
++#ifndef PRODUCT
++address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
++ //__ flush_bundle();
++ address entry = __ pc();
++
++ const char *bname = NULL;
++ uint tsize = 0;
++ switch(state) {
++ case ftos:
++ bname = "trace_code_ftos {";
++ tsize = 2;
++ break;
++ case btos:
++ bname = "trace_code_btos {";
++ tsize = 2;
++ break;
++ case ctos:
++ bname = "trace_code_ctos {";
++ tsize = 2;
++ break;
++ case stos:
++ bname = "trace_code_stos {";
++ tsize = 2;
++ break;
++ case itos:
++ bname = "trace_code_itos {";
++ tsize = 2;
++ break;
++ case ltos:
++ bname = "trace_code_ltos {";
++ tsize = 3;
++ break;
++ case atos:
++ bname = "trace_code_atos {";
++ tsize = 2;
++ break;
++ case vtos:
++ // Note: In case of vtos, the topmost of stack value could be a int or doubl
++ // In case of a double (2 slots) we won't see the 2nd stack value.
++ // Maybe we simply should print the topmost 3 stack slots to cope with the problem.
++ bname = "trace_code_vtos {";
++ tsize = 2;
++
++ break;
++ case dtos:
++ bname = "trace_code_dtos {";
++ tsize = 3;
++ break;
++ default:
++ ShouldNotReachHere();
++ }
++ BLOCK_COMMENT(bname);
++
++ // Support short-cut for TraceBytecodesAt.
++ // Don't call into the VM if we don't want to trace to speed up things.
++ Label Lskip_vm_call;
++ if (TraceBytecodesAt > 0 && TraceBytecodesAt < max_intx) {
++ int offs1 = __ load_const_optimized(R11_scratch1, (address) &TraceBytecodesAt, R0, true);
++ int offs2 = __ load_const_optimized(R12_scratch2, (address) &BytecodeCounter::_counter_value, R0, true);
++ __ ld(R11_scratch1, offs1, R11_scratch1);
++ __ lwa(R12_scratch2, offs2, R12_scratch2);
++ __ cmpd(CCR0, R12_scratch2, R11_scratch1);
++ __ blt(CCR0, Lskip_vm_call);
++ }
++
++ __ push(state);
++ // Load 2 topmost expression stack values.
++ __ ld(R6_ARG4, tsize*Interpreter::stackElementSize, R15_esp);
++ __ ld(R5_ARG3, Interpreter::stackElementSize, R15_esp);
++ __ mflr(R31);
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), /* unused */ R4_ARG2, R5_ARG3, R6_ARG4, false);
++ __ mtlr(R31);
++ __ pop(state);
++
++ if (TraceBytecodesAt > 0 && TraceBytecodesAt < max_intx) {
++ __ bind(Lskip_vm_call);
++ }
++ __ blr();
++ BLOCK_COMMENT("} trace_code");
++ return entry;
++}
++
++void TemplateInterpreterGenerator::count_bytecode() {
++ int offs = __ load_const_optimized(R11_scratch1, (address) &BytecodeCounter::_counter_value, R12_scratch2, true);
++ __ lwz(R12_scratch2, offs, R11_scratch1);
++ __ addi(R12_scratch2, R12_scratch2, 1);
++ __ stw(R12_scratch2, offs, R11_scratch1);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {
++ int offs = __ load_const_optimized(R11_scratch1, (address) &BytecodeHistogram::_counters[t->bytecode()], R12_scratch2, true);
++ __ lwz(R12_scratch2, offs, R11_scratch1);
++ __ addi(R12_scratch2, R12_scratch2, 1);
++ __ stw(R12_scratch2, offs, R11_scratch1);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) {
++ const Register addr = R11_scratch1,
++ tmp = R12_scratch2;
++ // Get index, shift out old bytecode, bring in new bytecode, and store it.
++ // _index = (_index >> log2_number_of_codes) |
++ // (bytecode << log2_number_of_codes);
++ int offs1 = __ load_const_optimized(addr, (address)&BytecodePairHistogram::_index, tmp, true);
++ __ lwz(tmp, offs1, addr);
++ __ srwi(tmp, tmp, BytecodePairHistogram::log2_number_of_codes);
++ __ ori(tmp, tmp, ((int) t->bytecode()) << BytecodePairHistogram::log2_number_of_codes);
++ __ stw(tmp, offs1, addr);
++
++ // Bump bucket contents.
++ // _counters[_index] ++;
++ int offs2 = __ load_const_optimized(addr, (address)&BytecodePairHistogram::_counters, R0, true);
++ __ sldi(tmp, tmp, LogBytesPerInt);
++ __ add(addr, tmp, addr);
++ __ lwz(tmp, offs2, addr);
++ __ addi(tmp, tmp, 1);
++ __ stw(tmp, offs2, addr);
++}
++
++void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
++ // Call a little run-time stub to avoid blow-up for each bytecode.
++ // The run-time runtime saves the right registers, depending on
++ // the tosca in-state for the given template.
++
++ assert(Interpreter::trace_code(t->tos_in()) != NULL,
++ "entry must have been generated");
++
++ // Note: we destroy LR here.
++ __ bl(Interpreter::trace_code(t->tos_in()));
++}
++
++void TemplateInterpreterGenerator::stop_interpreter_at() {
++ Label L;
++ int offs1 = __ load_const_optimized(R11_scratch1, (address) &StopInterpreterAt, R0, true);
++ int offs2 = __ load_const_optimized(R12_scratch2, (address) &BytecodeCounter::_counter_value, R0, true);
++ __ ld(R11_scratch1, offs1, R11_scratch1);
++ __ lwa(R12_scratch2, offs2, R12_scratch2);
++ __ cmpd(CCR0, R12_scratch2, R11_scratch1);
++ __ bne(CCR0, L);
++ __ illtrap();
++ __ bind(L);
++}
++
++#endif // !PRODUCT
++#endif // !CC_INTERP
+--- ./hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2013, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_TEMPLATEINTERPRETER_PPC_HPP
++#define CPU_PPC_VM_TEMPLATEINTERPRETER_PPC_HPP
++
++ protected:
++
++ // Size of interpreter code. Increase if too small. Interpreter will
++ // fail with a guarantee ("not enough space for interpreter generation");
++ // if too small.
++ // Run with +PrintInterpreter to get the VM to print out the size.
++ // Max size with JVMTI
++
++ const static int InterpreterCodeSize = 210*K;
++
++#endif // CPU_PPC_VM_TEMPLATEINTERPRETER_PPC_HPP
++
++
+--- ./hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,4082 @@
++/*
++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2013, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateInterpreter.hpp"
++#include "interpreter/templateTable.hpp"
++#include "memory/universe.inline.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "utilities/macros.hpp"
++
++#ifndef CC_INTERP
++
++#undef __
++#define __ _masm->
++
++// ============================================================================
++// Misc helpers
++
++// Do an oop store like *(base + index) = val OR *(base + offset) = val
++// (only one of both variants is possible at the same time).
++// Index can be noreg.
++// Kills:
++// Rbase, Rtmp
++static void do_oop_store(InterpreterMacroAssembler* _masm,
++ Register Rbase,
++ RegisterOrConstant offset,
++ Register Rval, // Noreg means always null.
++ Register Rtmp1,
++ Register Rtmp2,
++ Register Rtmp3,
++ BarrierSet::Name barrier,
++ bool precise,
++ bool check_null) {
++ assert_different_registers(Rtmp1, Rtmp2, Rtmp3, Rval, Rbase);
++
++ switch (barrier) {
++#if INCLUDE_ALL_GCS
++ case BarrierSet::G1SATBCT:
++ case BarrierSet::G1SATBCTLogging:
++ {
++ // Load and record the previous value.
++ __ g1_write_barrier_pre(Rbase, offset,
++ Rtmp3, /* holder of pre_val ? */
++ Rtmp1, Rtmp2, false /* frame */);
++
++ Label Lnull, Ldone;
++ if (Rval != noreg) {
++ if (check_null) {
++ __ cmpdi(CCR0, Rval, 0);
++ __ beq(CCR0, Lnull);
++ }
++ __ store_heap_oop_not_null(Rval, offset, Rbase, /*Rval must stay uncompressed.*/ Rtmp1);
++ // Mark the card.
++ if (!(offset.is_constant() && offset.as_constant() == 0) && precise) {
++ __ add(Rbase, offset, Rbase);
++ }
++ __ g1_write_barrier_post(Rbase, Rval, Rtmp1, Rtmp2, Rtmp3, /*filtered (fast path)*/ &Ldone);
++ if (check_null) { __ b(Ldone); }
++ }
++
++ if (Rval == noreg || check_null) { // Store null oop.
++ Register Rnull = Rval;
++ __ bind(Lnull);
++ if (Rval == noreg) {
++ Rnull = Rtmp1;
++ __ li(Rnull, 0);
++ }
++ if (UseCompressedOops) {
++ __ stw(Rnull, offset, Rbase);
++ } else {
++ __ std(Rnull, offset, Rbase);
++ }
++ }
++ __ bind(Ldone);
++ }
++ break;
++#endif // INCLUDE_ALL_GCS
++ case BarrierSet::CardTableModRef:
++ case BarrierSet::CardTableExtension:
++ {
++ Label Lnull, Ldone;
++ if (Rval != noreg) {
++ if (check_null) {
++ __ cmpdi(CCR0, Rval, 0);
++ __ beq(CCR0, Lnull);
++ }
++ __ store_heap_oop_not_null(Rval, offset, Rbase, /*Rval should better stay uncompressed.*/ Rtmp1);
++ // Mark the card.
++ if (!(offset.is_constant() && offset.as_constant() == 0) && precise) {
++ __ add(Rbase, offset, Rbase);
++ }
++ __ card_write_barrier_post(Rbase, Rval, Rtmp1);
++ if (check_null) {
++ __ b(Ldone);
++ }
++ }
++
++ if (Rval == noreg || check_null) { // Store null oop.
++ Register Rnull = Rval;
++ __ bind(Lnull);
++ if (Rval == noreg) {
++ Rnull = Rtmp1;
++ __ li(Rnull, 0);
++ }
++ if (UseCompressedOops) {
++ __ stw(Rnull, offset, Rbase);
++ } else {
++ __ std(Rnull, offset, Rbase);
++ }
++ }
++ __ bind(Ldone);
++ }
++ break;
++ case BarrierSet::ModRef:
++ case BarrierSet::Other:
++ ShouldNotReachHere();
++ break;
++ default:
++ ShouldNotReachHere();
++ }
++}
++
++// ============================================================================
++// Platform-dependent initialization
++
++void TemplateTable::pd_initialize() {
++ // No ppc64 specific initialization.
++}
++
++Address TemplateTable::at_bcp(int offset) {
++ // Not used on ppc.
++ ShouldNotReachHere();
++ return Address();
++}
++
++// Patches the current bytecode (ptr to it located in bcp)
++// in the bytecode stream with a new one.
++void TemplateTable::patch_bytecode(Bytecodes::Code new_bc, Register Rnew_bc, Register Rtemp, bool load_bc_into_bc_reg /*=true*/, int byte_no) {
++ // With sharing on, may need to test method flag.
++ if (!RewriteBytecodes) return;
++ Label L_patch_done;
++
++ switch (new_bc) {
++ case Bytecodes::_fast_aputfield:
++ case Bytecodes::_fast_bputfield:
++ case Bytecodes::_fast_cputfield:
++ case Bytecodes::_fast_dputfield:
++ case Bytecodes::_fast_fputfield:
++ case Bytecodes::_fast_iputfield:
++ case Bytecodes::_fast_lputfield:
++ case Bytecodes::_fast_sputfield:
++ {
++ // We skip bytecode quickening for putfield instructions when
++ // the put_code written to the constant pool cache is zero.
++ // This is required so that every execution of this instruction
++ // calls out to InterpreterRuntime::resolve_get_put to do
++ // additional, required work.
++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++ assert(load_bc_into_bc_reg, "we use bc_reg as temp");
++ __ get_cache_and_index_at_bcp(Rtemp /* dst = cache */, 1);
++ // Big Endian: ((*(cache+indices))>>((1+byte_no)*8))&0xFF
++ __ lbz(Rnew_bc, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (1 + byte_no), Rtemp);
++ __ cmpwi(CCR0, Rnew_bc, 0);
++ __ li(Rnew_bc, (unsigned int)(unsigned char)new_bc);
++ __ beq(CCR0, L_patch_done);
++ // __ isync(); // acquire not needed
++ break;
++ }
++
++ default:
++ assert(byte_no == -1, "sanity");
++ if (load_bc_into_bc_reg) {
++ __ li(Rnew_bc, (unsigned int)(unsigned char)new_bc);
++ }
++ }
++
++ if (JvmtiExport::can_post_breakpoint()) {
++ Label L_fast_patch;
++ __ lbz(Rtemp, 0, R14_bcp);
++ __ cmpwi(CCR0, Rtemp, (unsigned int)(unsigned char)Bytecodes::_breakpoint);
++ __ bne(CCR0, L_fast_patch);
++ // Perform the quickening, slowly, in the bowels of the breakpoint table.
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), R19_method, R14_bcp, Rnew_bc);
++ __ b(L_patch_done);
++ __ bind(L_fast_patch);
++ }
++
++ // Patch bytecode.
++ __ stb(Rnew_bc, 0, R14_bcp);
++
++ __ bind(L_patch_done);
++}
++
++// ============================================================================
++// Individual instructions
++
++void TemplateTable::nop() {
++ transition(vtos, vtos);
++ // Nothing to do.
++}
++
++void TemplateTable::shouldnotreachhere() {
++ transition(vtos, vtos);
++ __ stop("shouldnotreachhere bytecode");
++}
++
++void TemplateTable::aconst_null() {
++ transition(vtos, atos);
++ __ li(R17_tos, 0);
++}
++
++void TemplateTable::iconst(int value) {
++ transition(vtos, itos);
++ assert(value >= -1 && value <= 5, "");
++ __ li(R17_tos, value);
++}
++
++void TemplateTable::lconst(int value) {
++ transition(vtos, ltos);
++ assert(value >= -1 && value <= 5, "");
++ __ li(R17_tos, value);
++}
++
++void TemplateTable::fconst(int value) {
++ transition(vtos, ftos);
++ static float zero = 0.0;
++ static float one = 1.0;
++ static float two = 2.0;
++ switch (value) {
++ default: ShouldNotReachHere();
++ case 0: {
++ int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&zero, R0, true);
++ __ lfs(F15_ftos, simm16_offset, R11_scratch1);
++ break;
++ }
++ case 1: {
++ int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&one, R0, true);
++ __ lfs(F15_ftos, simm16_offset, R11_scratch1);
++ break;
++ }
++ case 2: {
++ int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&two, R0, true);
++ __ lfs(F15_ftos, simm16_offset, R11_scratch1);
++ break;
++ }
++ }
++}
++
++void TemplateTable::dconst(int value) {
++ transition(vtos, dtos);
++ static double zero = 0.0;
++ static double one = 1.0;
++ switch (value) {
++ case 0: {
++ int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&zero, R0, true);
++ __ lfd(F15_ftos, simm16_offset, R11_scratch1);
++ break;
++ }
++ case 1: {
++ int simm16_offset = __ load_const_optimized(R11_scratch1, (address*)&one, R0, true);
++ __ lfd(F15_ftos, simm16_offset, R11_scratch1);
++ break;
++ }
++ default: ShouldNotReachHere();
++ }
++}
++
++void TemplateTable::bipush() {
++ transition(vtos, itos);
++ __ lbz(R17_tos, 1, R14_bcp);
++ __ extsb(R17_tos, R17_tos);
++}
++
++void TemplateTable::sipush() {
++ transition(vtos, itos);
++ __ get_2_byte_integer_at_bcp(1, R17_tos, InterpreterMacroAssembler::Signed);
++}
++
++void TemplateTable::ldc(bool wide) {
++ Register Rscratch1 = R11_scratch1,
++ Rscratch2 = R12_scratch2,
++ Rcpool = R3_ARG1;
++
++ transition(vtos, vtos);
++ Label notInt, notClass, exit;
++
++ __ get_cpool_and_tags(Rcpool, Rscratch2); // Set Rscratch2 = &tags.
++ if (wide) { // Read index.
++ __ get_2_byte_integer_at_bcp(1, Rscratch1, InterpreterMacroAssembler::Unsigned);
++ } else {
++ __ lbz(Rscratch1, 1, R14_bcp);
++ }
++
++ const int base_offset = ConstantPool::header_size() * wordSize;
++ const int tags_offset = Array::base_offset_in_bytes();
++
++ // Get type from tags.
++ __ addi(Rscratch2, Rscratch2, tags_offset);
++ __ lbzx(Rscratch2, Rscratch2, Rscratch1);
++
++ __ cmpwi(CCR0, Rscratch2, JVM_CONSTANT_UnresolvedClass); // Unresolved class?
++ __ cmpwi(CCR1, Rscratch2, JVM_CONSTANT_UnresolvedClassInError); // Unresolved class in error state?
++ __ cror(/*CR0 eq*/2, /*CR1 eq*/4+2, /*CR0 eq*/2);
++
++ // Resolved class - need to call vm to get java mirror of the class.
++ __ cmpwi(CCR1, Rscratch2, JVM_CONSTANT_Class);
++ __ crnor(/*CR0 eq*/2, /*CR1 eq*/4+2, /*CR0 eq*/2); // Neither resolved class nor unresolved case from above?
++ __ beq(CCR0, notClass);
++
++ __ li(R4, wide ? 1 : 0);
++ call_VM(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), R4);
++ __ push(atos);
++ __ b(exit);
++
++ __ align(32, 12);
++ __ bind(notClass);
++ __ addi(Rcpool, Rcpool, base_offset);
++ __ sldi(Rscratch1, Rscratch1, LogBytesPerWord);
++ __ cmpdi(CCR0, Rscratch2, JVM_CONSTANT_Integer);
++ __ bne(CCR0, notInt);
++ __ isync(); // Order load of constant wrt. tags.
++ __ lwax(R17_tos, Rcpool, Rscratch1);
++ __ push(itos);
++ __ b(exit);
++
++ __ align(32, 12);
++ __ bind(notInt);
++#ifdef ASSERT
++ // String and Object are rewritten to fast_aldc
++ __ cmpdi(CCR0, Rscratch2, JVM_CONSTANT_Float);
++ __ asm_assert_eq("unexpected type", 0x8765);
++#endif
++ __ isync(); // Order load of constant wrt. tags.
++ __ lfsx(F15_ftos, Rcpool, Rscratch1);
++ __ push(ftos);
++
++ __ align(32, 12);
++ __ bind(exit);
++}
++
++// Fast path for caching oop constants.
++void TemplateTable::fast_aldc(bool wide) {
++ transition(vtos, atos);
++
++ int index_size = wide ? sizeof(u2) : sizeof(u1);
++ const Register Rscratch = R11_scratch1;
++ Label resolved;
++
++ // We are resolved if the resolved reference cache entry contains a
++ // non-null object (CallSite, etc.)
++ __ get_cache_index_at_bcp(Rscratch, 1, index_size); // Load index.
++ __ load_resolved_reference_at_index(R17_tos, Rscratch);
++ __ cmpdi(CCR0, R17_tos, 0);
++ __ bne(CCR0, resolved);
++ __ load_const_optimized(R3_ARG1, (int)bytecode());
++
++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
++
++ // First time invocation - must resolve first.
++ __ call_VM(R17_tos, entry, R3_ARG1);
++
++ __ align(32, 12);
++ __ bind(resolved);
++ __ verify_oop(R17_tos);
++}
++
++void TemplateTable::ldc2_w() {
++ transition(vtos, vtos);
++ Label Llong, Lexit;
++
++ Register Rindex = R11_scratch1,
++ Rcpool = R12_scratch2,
++ Rtag = R3_ARG1;
++ __ get_cpool_and_tags(Rcpool, Rtag);
++ __ get_2_byte_integer_at_bcp(1, Rindex, InterpreterMacroAssembler::Unsigned);
++
++ const int base_offset = ConstantPool::header_size() * wordSize;
++ const int tags_offset = Array::base_offset_in_bytes();
++ // Get type from tags.
++ __ addi(Rcpool, Rcpool, base_offset);
++ __ addi(Rtag, Rtag, tags_offset);
++
++ __ lbzx(Rtag, Rtag, Rindex);
++
++ __ sldi(Rindex, Rindex, LogBytesPerWord);
++ __ cmpdi(CCR0, Rtag, JVM_CONSTANT_Double);
++ __ bne(CCR0, Llong);
++ // A double can be placed at word-aligned locations in the constant pool.
++ // Check out Conversions.java for an example.
++ // Also ConstantPool::header_size() is 20, which makes it very difficult
++ // to double-align double on the constant pool. SG, 11/7/97
++ __ isync(); // Order load of constant wrt. tags.
++ __ lfdx(F15_ftos, Rcpool, Rindex);
++ __ push(dtos);
++ __ b(Lexit);
++
++ __ bind(Llong);
++ __ isync(); // Order load of constant wrt. tags.
++ __ ldx(R17_tos, Rcpool, Rindex);
++ __ push(ltos);
++
++ __ bind(Lexit);
++}
++
++// Get the locals index located in the bytecode stream at bcp + offset.
++void TemplateTable::locals_index(Register Rdst, int offset) {
++ __ lbz(Rdst, offset, R14_bcp);
++}
++
++void TemplateTable::iload() {
++ transition(vtos, itos);
++
++ // Get the local value into tos
++ const Register Rindex = R22_tmp2;
++ locals_index(Rindex);
++
++ // Rewrite iload,iload pair into fast_iload2
++ // iload,caload pair into fast_icaload
++ if (RewriteFrequentPairs) {
++ Label Lrewrite, Ldone;
++ Register Rnext_byte = R3_ARG1,
++ Rrewrite_to = R6_ARG4,
++ Rscratch = R11_scratch1;
++
++ // get next byte
++ __ lbz(Rnext_byte, Bytecodes::length_for(Bytecodes::_iload), R14_bcp);
++
++ // if _iload, wait to rewrite to iload2. We only want to rewrite the
++ // last two iloads in a pair. Comparing against fast_iload means that
++ // the next bytecode is neither an iload or a caload, and therefore
++ // an iload pair.
++ __ cmpwi(CCR0, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_iload);
++ __ beq(CCR0, Ldone);
++
++ __ cmpwi(CCR1, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_fast_iload);
++ __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_iload2);
++ __ beq(CCR1, Lrewrite);
++
++ __ cmpwi(CCR0, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_caload);
++ __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_icaload);
++ __ beq(CCR0, Lrewrite);
++
++ __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_iload);
++
++ __ bind(Lrewrite);
++ patch_bytecode(Bytecodes::_iload, Rrewrite_to, Rscratch, false);
++ __ bind(Ldone);
++ }
++
++ __ load_local_int(R17_tos, Rindex, Rindex);
++}
++
++// Load 2 integers in a row without dispatching
++void TemplateTable::fast_iload2() {
++ transition(vtos, itos);
++
++ __ lbz(R3_ARG1, 1, R14_bcp);
++ __ lbz(R17_tos, Bytecodes::length_for(Bytecodes::_iload) + 1, R14_bcp);
++
++ __ load_local_int(R3_ARG1, R11_scratch1, R3_ARG1);
++ __ load_local_int(R17_tos, R12_scratch2, R17_tos);
++ __ push_i(R3_ARG1);
++}
++
++void TemplateTable::fast_iload() {
++ transition(vtos, itos);
++ // Get the local value into tos
++
++ const Register Rindex = R11_scratch1;
++ locals_index(Rindex);
++ __ load_local_int(R17_tos, Rindex, Rindex);
++}
++
++// Load a local variable type long from locals area to TOS cache register.
++// Local index resides in bytecodestream.
++void TemplateTable::lload() {
++ transition(vtos, ltos);
++
++ const Register Rindex = R11_scratch1;
++ locals_index(Rindex);
++ __ load_local_long(R17_tos, Rindex, Rindex);
++}
++
++void TemplateTable::fload() {
++ transition(vtos, ftos);
++
++ const Register Rindex = R11_scratch1;
++ locals_index(Rindex);
++ __ load_local_float(F15_ftos, Rindex, Rindex);
++}
++
++void TemplateTable::dload() {
++ transition(vtos, dtos);
++
++ const Register Rindex = R11_scratch1;
++ locals_index(Rindex);
++ __ load_local_double(F15_ftos, Rindex, Rindex);
++}
++
++void TemplateTable::aload() {
++ transition(vtos, atos);
++
++ const Register Rindex = R11_scratch1;
++ locals_index(Rindex);
++ __ load_local_ptr(R17_tos, Rindex, Rindex);
++}
++
++void TemplateTable::locals_index_wide(Register Rdst) {
++ // Offset is 2, not 1, because Lbcp points to wide prefix code.
++ __ get_2_byte_integer_at_bcp(2, Rdst, InterpreterMacroAssembler::Unsigned);
++}
++
++void TemplateTable::wide_iload() {
++ // Get the local value into tos.
++
++ const Register Rindex = R11_scratch1;
++ locals_index_wide(Rindex);
++ __ load_local_int(R17_tos, Rindex, Rindex);
++}
++
++void TemplateTable::wide_lload() {
++ transition(vtos, ltos);
++
++ const Register Rindex = R11_scratch1;
++ locals_index_wide(Rindex);
++ __ load_local_long(R17_tos, Rindex, Rindex);
++}
++
++void TemplateTable::wide_fload() {
++ transition(vtos, ftos);
++
++ const Register Rindex = R11_scratch1;
++ locals_index_wide(Rindex);
++ __ load_local_float(F15_ftos, Rindex, Rindex);
++}
++
++void TemplateTable::wide_dload() {
++ transition(vtos, dtos);
++
++ const Register Rindex = R11_scratch1;
++ locals_index_wide(Rindex);
++ __ load_local_double(F15_ftos, Rindex, Rindex);
++}
++
++void TemplateTable::wide_aload() {
++ transition(vtos, atos);
++
++ const Register Rindex = R11_scratch1;
++ locals_index_wide(Rindex);
++ __ load_local_ptr(R17_tos, Rindex, Rindex);
++}
++
++void TemplateTable::iaload() {
++ transition(itos, itos);
++
++ const Register Rload_addr = R3_ARG1,
++ Rarray = R4_ARG2,
++ Rtemp = R5_ARG3;
++ __ index_check(Rarray, R17_tos /* index */, LogBytesPerInt, Rtemp, Rload_addr);
++ __ lwa(R17_tos, arrayOopDesc::base_offset_in_bytes(T_INT), Rload_addr);
++}
++
++void TemplateTable::laload() {
++ transition(itos, ltos);
++
++ const Register Rload_addr = R3_ARG1,
++ Rarray = R4_ARG2,
++ Rtemp = R5_ARG3;
++ __ index_check(Rarray, R17_tos /* index */, LogBytesPerLong, Rtemp, Rload_addr);
++ __ ld(R17_tos, arrayOopDesc::base_offset_in_bytes(T_LONG), Rload_addr);
++}
++
++void TemplateTable::faload() {
++ transition(itos, ftos);
++
++ const Register Rload_addr = R3_ARG1,
++ Rarray = R4_ARG2,
++ Rtemp = R5_ARG3;
++ __ index_check(Rarray, R17_tos /* index */, LogBytesPerInt, Rtemp, Rload_addr);
++ __ lfs(F15_ftos, arrayOopDesc::base_offset_in_bytes(T_FLOAT), Rload_addr);
++}
++
++void TemplateTable::daload() {
++ transition(itos, dtos);
++
++ const Register Rload_addr = R3_ARG1,
++ Rarray = R4_ARG2,
++ Rtemp = R5_ARG3;
++ __ index_check(Rarray, R17_tos /* index */, LogBytesPerLong, Rtemp, Rload_addr);
++ __ lfd(F15_ftos, arrayOopDesc::base_offset_in_bytes(T_DOUBLE), Rload_addr);
++}
++
++void TemplateTable::aaload() {
++ transition(itos, atos);
++
++ // tos: index
++ // result tos: array
++ const Register Rload_addr = R3_ARG1,
++ Rarray = R4_ARG2,
++ Rtemp = R5_ARG3;
++ __ index_check(Rarray, R17_tos /* index */, UseCompressedOops ? 2 : LogBytesPerWord, Rtemp, Rload_addr);
++ __ load_heap_oop(R17_tos, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rload_addr);
++ __ verify_oop(R17_tos);
++ //__ dcbt(R17_tos); // prefetch
++}
++
++void TemplateTable::baload() {
++ transition(itos, itos);
++
++ const Register Rload_addr = R3_ARG1,
++ Rarray = R4_ARG2,
++ Rtemp = R5_ARG3;
++ __ index_check(Rarray, R17_tos /* index */, 0, Rtemp, Rload_addr);
++ __ lbz(R17_tos, arrayOopDesc::base_offset_in_bytes(T_BYTE), Rload_addr);
++ __ extsb(R17_tos, R17_tos);
++}
++
++void TemplateTable::caload() {
++ transition(itos, itos);
++
++ const Register Rload_addr = R3_ARG1,
++ Rarray = R4_ARG2,
++ Rtemp = R5_ARG3;
++ __ index_check(Rarray, R17_tos /* index */, LogBytesPerShort, Rtemp, Rload_addr);
++ __ lhz(R17_tos, arrayOopDesc::base_offset_in_bytes(T_CHAR), Rload_addr);
++}
++
++// Iload followed by caload frequent pair.
++void TemplateTable::fast_icaload() {
++ transition(vtos, itos);
++
++ const Register Rload_addr = R3_ARG1,
++ Rarray = R4_ARG2,
++ Rtemp = R11_scratch1;
++
++ locals_index(R17_tos);
++ __ load_local_int(R17_tos, Rtemp, R17_tos);
++ __ index_check(Rarray, R17_tos /* index */, LogBytesPerShort, Rtemp, Rload_addr);
++ __ lhz(R17_tos, arrayOopDesc::base_offset_in_bytes(T_CHAR), Rload_addr);
++}
++
++void TemplateTable::saload() {
++ transition(itos, itos);
++
++ const Register Rload_addr = R11_scratch1,
++ Rarray = R12_scratch2,
++ Rtemp = R3_ARG1;
++ __ index_check(Rarray, R17_tos /* index */, LogBytesPerShort, Rtemp, Rload_addr);
++ __ lha(R17_tos, arrayOopDesc::base_offset_in_bytes(T_SHORT), Rload_addr);
++}
++
++void TemplateTable::iload(int n) {
++ transition(vtos, itos);
++
++ __ lwz(R17_tos, Interpreter::local_offset_in_bytes(n), R18_locals);
++}
++
++void TemplateTable::lload(int n) {
++ transition(vtos, ltos);
++
++ __ ld(R17_tos, Interpreter::local_offset_in_bytes(n + 1), R18_locals);
++}
++
++void TemplateTable::fload(int n) {
++ transition(vtos, ftos);
++
++ __ lfs(F15_ftos, Interpreter::local_offset_in_bytes(n), R18_locals);
++}
++
++void TemplateTable::dload(int n) {
++ transition(vtos, dtos);
++
++ __ lfd(F15_ftos, Interpreter::local_offset_in_bytes(n + 1), R18_locals);
++}
++
++void TemplateTable::aload(int n) {
++ transition(vtos, atos);
++
++ __ ld(R17_tos, Interpreter::local_offset_in_bytes(n), R18_locals);
++}
++
++void TemplateTable::aload_0() {
++ transition(vtos, atos);
++ // According to bytecode histograms, the pairs:
++ //
++ // _aload_0, _fast_igetfield
++ // _aload_0, _fast_agetfield
++ // _aload_0, _fast_fgetfield
++ //
++ // occur frequently. If RewriteFrequentPairs is set, the (slow)
++ // _aload_0 bytecode checks if the next bytecode is either
++ // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
++ // rewrites the current bytecode into a pair bytecode; otherwise it
++ // rewrites the current bytecode into _0 that doesn't do
++ // the pair check anymore.
++ //
++ // Note: If the next bytecode is _getfield, the rewrite must be
++ // delayed, otherwise we may miss an opportunity for a pair.
++ //
++ // Also rewrite frequent pairs
++ // aload_0, aload_1
++ // aload_0, iload_1
++ // These bytecodes with a small amount of code are most profitable
++ // to rewrite.
++
++ if (RewriteFrequentPairs) {
++
++ Label Lrewrite, Ldont_rewrite;
++ Register Rnext_byte = R3_ARG1,
++ Rrewrite_to = R6_ARG4,
++ Rscratch = R11_scratch1;
++
++ // Get next byte.
++ __ lbz(Rnext_byte, Bytecodes::length_for(Bytecodes::_aload_0), R14_bcp);
++
++ // If _getfield, wait to rewrite. We only want to rewrite the last two bytecodes in a pair.
++ __ cmpwi(CCR0, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_getfield);
++ __ beq(CCR0, Ldont_rewrite);
++
++ __ cmpwi(CCR1, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_fast_igetfield);
++ __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_iaccess_0);
++ __ beq(CCR1, Lrewrite);
++
++ __ cmpwi(CCR0, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_fast_agetfield);
++ __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_aaccess_0);
++ __ beq(CCR0, Lrewrite);
++
++ __ cmpwi(CCR1, Rnext_byte, (unsigned int)(unsigned char)Bytecodes::_fast_fgetfield);
++ __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_faccess_0);
++ __ beq(CCR1, Lrewrite);
++
++ __ li(Rrewrite_to, (unsigned int)(unsigned char)Bytecodes::_fast_aload_0);
++
++ __ bind(Lrewrite);
++ patch_bytecode(Bytecodes::_aload_0, Rrewrite_to, Rscratch, false);
++ __ bind(Ldont_rewrite);
++ }
++
++ // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
++ aload(0);
++}
++
++void TemplateTable::istore() {
++ transition(itos, vtos);
++
++ const Register Rindex = R11_scratch1;
++ locals_index(Rindex);
++ __ store_local_int(R17_tos, Rindex);
++}
++
++void TemplateTable::lstore() {
++ transition(ltos, vtos);
++ const Register Rindex = R11_scratch1;
++ locals_index(Rindex);
++ __ store_local_long(R17_tos, Rindex);
++}
++
++void TemplateTable::fstore() {
++ transition(ftos, vtos);
++
++ const Register Rindex = R11_scratch1;
++ locals_index(Rindex);
++ __ store_local_float(F15_ftos, Rindex);
++}
++
++void TemplateTable::dstore() {
++ transition(dtos, vtos);
++
++ const Register Rindex = R11_scratch1;
++ locals_index(Rindex);
++ __ store_local_double(F15_ftos, Rindex);
++}
++
++void TemplateTable::astore() {
++ transition(vtos, vtos);
++
++ const Register Rindex = R11_scratch1;
++ __ pop_ptr();
++ __ verify_oop_or_return_address(R17_tos, Rindex);
++ locals_index(Rindex);
++ __ store_local_ptr(R17_tos, Rindex);
++}
++
++void TemplateTable::wide_istore() {
++ transition(vtos, vtos);
++
++ const Register Rindex = R11_scratch1;
++ __ pop_i();
++ locals_index_wide(Rindex);
++ __ store_local_int(R17_tos, Rindex);
++}
++
++void TemplateTable::wide_lstore() {
++ transition(vtos, vtos);
++
++ const Register Rindex = R11_scratch1;
++ __ pop_l();
++ locals_index_wide(Rindex);
++ __ store_local_long(R17_tos, Rindex);
++}
++
++void TemplateTable::wide_fstore() {
++ transition(vtos, vtos);
++
++ const Register Rindex = R11_scratch1;
++ __ pop_f();
++ locals_index_wide(Rindex);
++ __ store_local_float(F15_ftos, Rindex);
++}
++
++void TemplateTable::wide_dstore() {
++ transition(vtos, vtos);
++
++ const Register Rindex = R11_scratch1;
++ __ pop_d();
++ locals_index_wide(Rindex);
++ __ store_local_double(F15_ftos, Rindex);
++}
++
++void TemplateTable::wide_astore() {
++ transition(vtos, vtos);
++
++ const Register Rindex = R11_scratch1;
++ __ pop_ptr();
++ __ verify_oop_or_return_address(R17_tos, Rindex);
++ locals_index_wide(Rindex);
++ __ store_local_ptr(R17_tos, Rindex);
++}
++
++void TemplateTable::iastore() {
++ transition(itos, vtos);
++
++ const Register Rindex = R3_ARG1,
++ Rstore_addr = R4_ARG2,
++ Rarray = R5_ARG3,
++ Rtemp = R6_ARG4;
++ __ pop_i(Rindex);
++ __ index_check(Rarray, Rindex, LogBytesPerInt, Rtemp, Rstore_addr);
++ __ stw(R17_tos, arrayOopDesc::base_offset_in_bytes(T_INT), Rstore_addr);
++ }
++
++void TemplateTable::lastore() {
++ transition(ltos, vtos);
++
++ const Register Rindex = R3_ARG1,
++ Rstore_addr = R4_ARG2,
++ Rarray = R5_ARG3,
++ Rtemp = R6_ARG4;
++ __ pop_i(Rindex);
++ __ index_check(Rarray, Rindex, LogBytesPerLong, Rtemp, Rstore_addr);
++ __ std(R17_tos, arrayOopDesc::base_offset_in_bytes(T_LONG), Rstore_addr);
++ }
++
++void TemplateTable::fastore() {
++ transition(ftos, vtos);
++
++ const Register Rindex = R3_ARG1,
++ Rstore_addr = R4_ARG2,
++ Rarray = R5_ARG3,
++ Rtemp = R6_ARG4;
++ __ pop_i(Rindex);
++ __ index_check(Rarray, Rindex, LogBytesPerInt, Rtemp, Rstore_addr);
++ __ stfs(F15_ftos, arrayOopDesc::base_offset_in_bytes(T_FLOAT), Rstore_addr);
++ }
++
++void TemplateTable::dastore() {
++ transition(dtos, vtos);
++
++ const Register Rindex = R3_ARG1,
++ Rstore_addr = R4_ARG2,
++ Rarray = R5_ARG3,
++ Rtemp = R6_ARG4;
++ __ pop_i(Rindex);
++ __ index_check(Rarray, Rindex, LogBytesPerLong, Rtemp, Rstore_addr);
++ __ stfd(F15_ftos, arrayOopDesc::base_offset_in_bytes(T_DOUBLE), Rstore_addr);
++ }
++
++// Pop 3 values from the stack and...
++void TemplateTable::aastore() {
++ transition(vtos, vtos);
++
++ Label Lstore_ok, Lis_null, Ldone;
++ const Register Rindex = R3_ARG1,
++ Rarray = R4_ARG2,
++ Rscratch = R11_scratch1,
++ Rscratch2 = R12_scratch2,
++ Rarray_klass = R5_ARG3,
++ Rarray_element_klass = Rarray_klass,
++ Rvalue_klass = R6_ARG4,
++ Rstore_addr = R31; // Use register which survives VM call.
++
++ __ ld(R17_tos, Interpreter::expr_offset_in_bytes(0), R15_esp); // Get value to store.
++ __ lwz(Rindex, Interpreter::expr_offset_in_bytes(1), R15_esp); // Get index.
++ __ ld(Rarray, Interpreter::expr_offset_in_bytes(2), R15_esp); // Get array.
++
++ __ verify_oop(R17_tos);
++ __ index_check_without_pop(Rarray, Rindex, UseCompressedOops ? 2 : LogBytesPerWord, Rscratch, Rstore_addr);
++ // Rindex is dead!
++ Register Rscratch3 = Rindex;
++
++ // Do array store check - check for NULL value first.
++ __ cmpdi(CCR0, R17_tos, 0);
++ __ beq(CCR0, Lis_null);
++
++ __ load_klass(Rarray_klass, Rarray);
++ __ load_klass(Rvalue_klass, R17_tos);
++
++ // Do fast instanceof cache test.
++ __ ld(Rarray_element_klass, in_bytes(ObjArrayKlass::element_klass_offset()), Rarray_klass);
++
++ // Generate a fast subtype check. Branch to store_ok if no failure. Throw if failure.
++ __ gen_subtype_check(Rvalue_klass /*subklass*/, Rarray_element_klass /*superklass*/, Rscratch, Rscratch2, Rscratch3, Lstore_ok);
++
++ // Fell through: subtype check failed => throw an exception.
++ __ load_dispatch_table(R11_scratch1, (address*)Interpreter::_throw_ArrayStoreException_entry);
++ __ mtctr(R11_scratch1);
++ __ bctr();
++
++ __ bind(Lis_null);
++ do_oop_store(_masm, Rstore_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), noreg /* 0 */,
++ Rscratch, Rscratch2, Rscratch3, _bs->kind(), true /* precise */, false /* check_null */);
++ __ profile_null_seen(Rscratch, Rscratch2);
++ __ b(Ldone);
++
++ // Store is OK.
++ __ bind(Lstore_ok);
++ do_oop_store(_masm, Rstore_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), R17_tos /* value */,
++ Rscratch, Rscratch2, Rscratch3, _bs->kind(), true /* precise */, false /* check_null */);
++
++ __ bind(Ldone);
++ // Adjust sp (pops array, index and value).
++ __ addi(R15_esp, R15_esp, 3 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::bastore() {
++ transition(itos, vtos);
++
++ const Register Rindex = R11_scratch1,
++ Rarray = R12_scratch2,
++ Rscratch = R3_ARG1;
++ __ pop_i(Rindex);
++ // tos: val
++ // Rarray: array ptr (popped by index_check)
++ __ index_check(Rarray, Rindex, 0, Rscratch, Rarray);
++ __ stb(R17_tos, arrayOopDesc::base_offset_in_bytes(T_BYTE), Rarray);
++}
++
++void TemplateTable::castore() {
++ transition(itos, vtos);
++
++ const Register Rindex = R11_scratch1,
++ Rarray = R12_scratch2,
++ Rscratch = R3_ARG1;
++ __ pop_i(Rindex);
++ // tos: val
++ // Rarray: array ptr (popped by index_check)
++ __ index_check(Rarray, Rindex, LogBytesPerShort, Rscratch, Rarray);
++ __ sth(R17_tos, arrayOopDesc::base_offset_in_bytes(T_CHAR), Rarray);
++}
++
++void TemplateTable::sastore() {
++ castore();
++}
++
++void TemplateTable::istore(int n) {
++ transition(itos, vtos);
++ __ stw(R17_tos, Interpreter::local_offset_in_bytes(n), R18_locals);
++}
++
++void TemplateTable::lstore(int n) {
++ transition(ltos, vtos);
++ __ std(R17_tos, Interpreter::local_offset_in_bytes(n + 1), R18_locals);
++}
++
++void TemplateTable::fstore(int n) {
++ transition(ftos, vtos);
++ __ stfs(F15_ftos, Interpreter::local_offset_in_bytes(n), R18_locals);
++}
++
++void TemplateTable::dstore(int n) {
++ transition(dtos, vtos);
++ __ stfd(F15_ftos, Interpreter::local_offset_in_bytes(n + 1), R18_locals);
++}
++
++void TemplateTable::astore(int n) {
++ transition(vtos, vtos);
++
++ __ pop_ptr();
++ __ verify_oop_or_return_address(R17_tos, R11_scratch1);
++ __ std(R17_tos, Interpreter::local_offset_in_bytes(n), R18_locals);
++}
++
++void TemplateTable::pop() {
++ transition(vtos, vtos);
++
++ __ addi(R15_esp, R15_esp, Interpreter::stackElementSize);
++}
++
++void TemplateTable::pop2() {
++ transition(vtos, vtos);
++
++ __ addi(R15_esp, R15_esp, Interpreter::stackElementSize * 2);
++}
++
++void TemplateTable::dup() {
++ transition(vtos, vtos);
++
++ __ ld(R11_scratch1, Interpreter::stackElementSize, R15_esp);
++ __ push_ptr(R11_scratch1);
++}
++
++void TemplateTable::dup_x1() {
++ transition(vtos, vtos);
++
++ Register Ra = R11_scratch1,
++ Rb = R12_scratch2;
++ // stack: ..., a, b
++ __ ld(Rb, Interpreter::stackElementSize, R15_esp);
++ __ ld(Ra, Interpreter::stackElementSize * 2, R15_esp);
++ __ std(Rb, Interpreter::stackElementSize * 2, R15_esp);
++ __ std(Ra, Interpreter::stackElementSize, R15_esp);
++ __ push_ptr(Rb);
++ // stack: ..., b, a, b
++}
++
++void TemplateTable::dup_x2() {
++ transition(vtos, vtos);
++
++ Register Ra = R11_scratch1,
++ Rb = R12_scratch2,
++ Rc = R3_ARG1;
++
++ // stack: ..., a, b, c
++ __ ld(Rc, Interpreter::stackElementSize, R15_esp); // load c
++ __ ld(Ra, Interpreter::stackElementSize * 3, R15_esp); // load a
++ __ std(Rc, Interpreter::stackElementSize * 3, R15_esp); // store c in a
++ __ ld(Rb, Interpreter::stackElementSize * 2, R15_esp); // load b
++ // stack: ..., c, b, c
++ __ std(Ra, Interpreter::stackElementSize * 2, R15_esp); // store a in b
++ // stack: ..., c, a, c
++ __ std(Rb, Interpreter::stackElementSize, R15_esp); // store b in c
++ __ push_ptr(Rc); // push c
++ // stack: ..., c, a, b, c
++}
++
++void TemplateTable::dup2() {
++ transition(vtos, vtos);
++
++ Register Ra = R11_scratch1,
++ Rb = R12_scratch2;
++ // stack: ..., a, b
++ __ ld(Rb, Interpreter::stackElementSize, R15_esp);
++ __ ld(Ra, Interpreter::stackElementSize * 2, R15_esp);
++ __ push_2ptrs(Ra, Rb);
++ // stack: ..., a, b, a, b
++}
++
++void TemplateTable::dup2_x1() {
++ transition(vtos, vtos);
++
++ Register Ra = R11_scratch1,
++ Rb = R12_scratch2,
++ Rc = R3_ARG1;
++ // stack: ..., a, b, c
++ __ ld(Rc, Interpreter::stackElementSize, R15_esp);
++ __ ld(Rb, Interpreter::stackElementSize * 2, R15_esp);
++ __ std(Rc, Interpreter::stackElementSize * 2, R15_esp);
++ __ ld(Ra, Interpreter::stackElementSize * 3, R15_esp);
++ __ std(Ra, Interpreter::stackElementSize, R15_esp);
++ __ std(Rb, Interpreter::stackElementSize * 3, R15_esp);
++ // stack: ..., b, c, a
++ __ push_2ptrs(Rb, Rc);
++ // stack: ..., b, c, a, b, c
++}
++
++void TemplateTable::dup2_x2() {
++ transition(vtos, vtos);
++
++ Register Ra = R11_scratch1,
++ Rb = R12_scratch2,
++ Rc = R3_ARG1,
++ Rd = R4_ARG2;
++ // stack: ..., a, b, c, d
++ __ ld(Rb, Interpreter::stackElementSize * 3, R15_esp);
++ __ ld(Rd, Interpreter::stackElementSize, R15_esp);
++ __ std(Rb, Interpreter::stackElementSize, R15_esp); // store b in d
++ __ std(Rd, Interpreter::stackElementSize * 3, R15_esp); // store d in b
++ __ ld(Ra, Interpreter::stackElementSize * 4, R15_esp);
++ __ ld(Rc, Interpreter::stackElementSize * 2, R15_esp);
++ __ std(Ra, Interpreter::stackElementSize * 2, R15_esp); // store a in c
++ __ std(Rc, Interpreter::stackElementSize * 4, R15_esp); // store c in a
++ // stack: ..., c, d, a, b
++ __ push_2ptrs(Rc, Rd);
++ // stack: ..., c, d, a, b, c, d
++}
++
++void TemplateTable::swap() {
++ transition(vtos, vtos);
++ // stack: ..., a, b
++
++ Register Ra = R11_scratch1,
++ Rb = R12_scratch2;
++ // stack: ..., a, b
++ __ ld(Rb, Interpreter::stackElementSize, R15_esp);
++ __ ld(Ra, Interpreter::stackElementSize * 2, R15_esp);
++ __ std(Rb, Interpreter::stackElementSize * 2, R15_esp);
++ __ std(Ra, Interpreter::stackElementSize, R15_esp);
++ // stack: ..., b, a
++}
++
++void TemplateTable::iop2(Operation op) {
++ transition(itos, itos);
++
++ Register Rscratch = R11_scratch1;
++
++ __ pop_i(Rscratch);
++ // tos = number of bits to shift
++ // Rscratch = value to shift
++ switch (op) {
++ case add: __ add(R17_tos, Rscratch, R17_tos); break;
++ case sub: __ sub(R17_tos, Rscratch, R17_tos); break;
++ case mul: __ mullw(R17_tos, Rscratch, R17_tos); break;
++ case _and: __ andr(R17_tos, Rscratch, R17_tos); break;
++ case _or: __ orr(R17_tos, Rscratch, R17_tos); break;
++ case _xor: __ xorr(R17_tos, Rscratch, R17_tos); break;
++ case shl: __ rldicl(R17_tos, R17_tos, 0, 64-5); __ slw(R17_tos, Rscratch, R17_tos); break;
++ case shr: __ rldicl(R17_tos, R17_tos, 0, 64-5); __ sraw(R17_tos, Rscratch, R17_tos); break;
++ case ushr: __ rldicl(R17_tos, R17_tos, 0, 64-5); __ srw(R17_tos, Rscratch, R17_tos); break;
++ default: ShouldNotReachHere();
++ }
++}
++
++void TemplateTable::lop2(Operation op) {
++ transition(ltos, ltos);
++
++ Register Rscratch = R11_scratch1;
++ __ pop_l(Rscratch);
++ switch (op) {
++ case add: __ add(R17_tos, Rscratch, R17_tos); break;
++ case sub: __ sub(R17_tos, Rscratch, R17_tos); break;
++ case _and: __ andr(R17_tos, Rscratch, R17_tos); break;
++ case _or: __ orr(R17_tos, Rscratch, R17_tos); break;
++ case _xor: __ xorr(R17_tos, Rscratch, R17_tos); break;
++ default: ShouldNotReachHere();
++ }
++}
++
++void TemplateTable::idiv() {
++ transition(itos, itos);
++
++ Label Lnormal, Lexception, Ldone;
++ Register Rdividend = R11_scratch1; // Used by irem.
++
++ __ addi(R0, R17_tos, 1);
++ __ cmplwi(CCR0, R0, 2);
++ __ bgt(CCR0, Lnormal); // divisor <-1 or >1
++
++ __ cmpwi(CCR1, R17_tos, 0);
++ __ beq(CCR1, Lexception); // divisor == 0
++
++ __ pop_i(Rdividend);
++ __ mullw(R17_tos, Rdividend, R17_tos); // div by +/-1
++ __ b(Ldone);
++
++ __ bind(Lexception);
++ __ load_dispatch_table(R11_scratch1, (address*)Interpreter::_throw_ArithmeticException_entry);
++ __ mtctr(R11_scratch1);
++ __ bctr();
++
++ __ align(32, 12);
++ __ bind(Lnormal);
++ __ pop_i(Rdividend);
++ __ divw(R17_tos, Rdividend, R17_tos); // Can't divide minint/-1.
++ __ bind(Ldone);
++}
++
++void TemplateTable::irem() {
++ transition(itos, itos);
++
++ __ mr(R12_scratch2, R17_tos);
++ idiv();
++ __ mullw(R17_tos, R17_tos, R12_scratch2);
++ __ subf(R17_tos, R17_tos, R11_scratch1); // Dividend set by idiv.
++}
++
++void TemplateTable::lmul() {
++ transition(ltos, ltos);
++
++ __ pop_l(R11_scratch1);
++ __ mulld(R17_tos, R11_scratch1, R17_tos);
++}
++
++void TemplateTable::ldiv() {
++ transition(ltos, ltos);
++
++ Label Lnormal, Lexception, Ldone;
++ Register Rdividend = R11_scratch1; // Used by lrem.
++
++ __ addi(R0, R17_tos, 1);
++ __ cmpldi(CCR0, R0, 2);
++ __ bgt(CCR0, Lnormal); // divisor <-1 or >1
++
++ __ cmpdi(CCR1, R17_tos, 0);
++ __ beq(CCR1, Lexception); // divisor == 0
++
++ __ pop_l(Rdividend);
++ __ mulld(R17_tos, Rdividend, R17_tos); // div by +/-1
++ __ b(Ldone);
++
++ __ bind(Lexception);
++ __ load_dispatch_table(R11_scratch1, (address*)Interpreter::_throw_ArithmeticException_entry);
++ __ mtctr(R11_scratch1);
++ __ bctr();
++
++ __ align(32, 12);
++ __ bind(Lnormal);
++ __ pop_l(Rdividend);
++ __ divd(R17_tos, Rdividend, R17_tos); // Can't divide minint/-1.
++ __ bind(Ldone);
++}
++
++void TemplateTable::lrem() {
++ transition(ltos, ltos);
++
++ __ mr(R12_scratch2, R17_tos);
++ ldiv();
++ __ mulld(R17_tos, R17_tos, R12_scratch2);
++ __ subf(R17_tos, R17_tos, R11_scratch1); // Dividend set by ldiv.
++}
++
++void TemplateTable::lshl() {
++ transition(itos, ltos);
++
++ __ rldicl(R17_tos, R17_tos, 0, 64-6); // Extract least significant bits.
++ __ pop_l(R11_scratch1);
++ __ sld(R17_tos, R11_scratch1, R17_tos);
++}
++
++void TemplateTable::lshr() {
++ transition(itos, ltos);
++
++ __ rldicl(R17_tos, R17_tos, 0, 64-6); // Extract least significant bits.
++ __ pop_l(R11_scratch1);
++ __ srad(R17_tos, R11_scratch1, R17_tos);
++}
++
++void TemplateTable::lushr() {
++ transition(itos, ltos);
++
++ __ rldicl(R17_tos, R17_tos, 0, 64-6); // Extract least significant bits.
++ __ pop_l(R11_scratch1);
++ __ srd(R17_tos, R11_scratch1, R17_tos);
++}
++
++void TemplateTable::fop2(Operation op) {
++ transition(ftos, ftos);
++
++ switch (op) {
++ case add: __ pop_f(F0_SCRATCH); __ fadds(F15_ftos, F0_SCRATCH, F15_ftos); break;
++ case sub: __ pop_f(F0_SCRATCH); __ fsubs(F15_ftos, F0_SCRATCH, F15_ftos); break;
++ case mul: __ pop_f(F0_SCRATCH); __ fmuls(F15_ftos, F0_SCRATCH, F15_ftos); break;
++ case div: __ pop_f(F0_SCRATCH); __ fdivs(F15_ftos, F0_SCRATCH, F15_ftos); break;
++ case rem:
++ __ pop_f(F1_ARG1);
++ __ fmr(F2_ARG2, F15_ftos);
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem));
++ __ fmr(F15_ftos, F1_RET);
++ break;
++
++ default: ShouldNotReachHere();
++ }
++}
++
++void TemplateTable::dop2(Operation op) {
++ transition(dtos, dtos);
++
++ switch (op) {
++ case add: __ pop_d(F0_SCRATCH); __ fadd(F15_ftos, F0_SCRATCH, F15_ftos); break;
++ case sub: __ pop_d(F0_SCRATCH); __ fsub(F15_ftos, F0_SCRATCH, F15_ftos); break;
++ case mul: __ pop_d(F0_SCRATCH); __ fmul(F15_ftos, F0_SCRATCH, F15_ftos); break;
++ case div: __ pop_d(F0_SCRATCH); __ fdiv(F15_ftos, F0_SCRATCH, F15_ftos); break;
++ case rem:
++ __ pop_d(F1_ARG1);
++ __ fmr(F2_ARG2, F15_ftos);
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem));
++ __ fmr(F15_ftos, F1_RET);
++ break;
++
++ default: ShouldNotReachHere();
++ }
++}
++
++// Negate the value in the TOS cache.
++void TemplateTable::ineg() {
++ transition(itos, itos);
++
++ __ neg(R17_tos, R17_tos);
++}
++
++// Negate the value in the TOS cache.
++void TemplateTable::lneg() {
++ transition(ltos, ltos);
++
++ __ neg(R17_tos, R17_tos);
++}
++
++void TemplateTable::fneg() {
++ transition(ftos, ftos);
++
++ __ fneg(F15_ftos, F15_ftos);
++}
++
++void TemplateTable::dneg() {
++ transition(dtos, dtos);
++
++ __ fneg(F15_ftos, F15_ftos);
++}
++
++// Increments a local variable in place.
++void TemplateTable::iinc() {
++ transition(vtos, vtos);
++
++ const Register Rindex = R11_scratch1,
++ Rincrement = R0,
++ Rvalue = R12_scratch2;
++
++ locals_index(Rindex); // Load locals index from bytecode stream.
++ __ lbz(Rincrement, 2, R14_bcp); // Load increment from the bytecode stream.
++ __ extsb(Rincrement, Rincrement);
++
++ __ load_local_int(Rvalue, Rindex, Rindex); // Puts address of local into Rindex.
++
++ __ add(Rvalue, Rincrement, Rvalue);
++ __ stw(Rvalue, 0, Rindex);
++}
++
++void TemplateTable::wide_iinc() {
++ transition(vtos, vtos);
++
++ Register Rindex = R11_scratch1,
++ Rlocals_addr = Rindex,
++ Rincr = R12_scratch2;
++ locals_index_wide(Rindex);
++ __ get_2_byte_integer_at_bcp(4, Rincr, InterpreterMacroAssembler::Signed);
++ __ load_local_int(R17_tos, Rlocals_addr, Rindex);
++ __ add(R17_tos, Rincr, R17_tos);
++ __ stw(R17_tos, 0, Rlocals_addr);
++}
++
++void TemplateTable::convert() {
++ // %%%%% Factor this first part accross platforms
++#ifdef ASSERT
++ TosState tos_in = ilgl;
++ TosState tos_out = ilgl;
++ switch (bytecode()) {
++ case Bytecodes::_i2l: // fall through
++ case Bytecodes::_i2f: // fall through
++ case Bytecodes::_i2d: // fall through
++ case Bytecodes::_i2b: // fall through
++ case Bytecodes::_i2c: // fall through
++ case Bytecodes::_i2s: tos_in = itos; break;
++ case Bytecodes::_l2i: // fall through
++ case Bytecodes::_l2f: // fall through
++ case Bytecodes::_l2d: tos_in = ltos; break;
++ case Bytecodes::_f2i: // fall through
++ case Bytecodes::_f2l: // fall through
++ case Bytecodes::_f2d: tos_in = ftos; break;
++ case Bytecodes::_d2i: // fall through
++ case Bytecodes::_d2l: // fall through
++ case Bytecodes::_d2f: tos_in = dtos; break;
++ default : ShouldNotReachHere();
++ }
++ switch (bytecode()) {
++ case Bytecodes::_l2i: // fall through
++ case Bytecodes::_f2i: // fall through
++ case Bytecodes::_d2i: // fall through
++ case Bytecodes::_i2b: // fall through
++ case Bytecodes::_i2c: // fall through
++ case Bytecodes::_i2s: tos_out = itos; break;
++ case Bytecodes::_i2l: // fall through
++ case Bytecodes::_f2l: // fall through
++ case Bytecodes::_d2l: tos_out = ltos; break;
++ case Bytecodes::_i2f: // fall through
++ case Bytecodes::_l2f: // fall through
++ case Bytecodes::_d2f: tos_out = ftos; break;
++ case Bytecodes::_i2d: // fall through
++ case Bytecodes::_l2d: // fall through
++ case Bytecodes::_f2d: tos_out = dtos; break;
++ default : ShouldNotReachHere();
++ }
++ transition(tos_in, tos_out);
++#endif
++
++ // Conversion
++ Label done;
++ switch (bytecode()) {
++ case Bytecodes::_i2l:
++ __ extsw(R17_tos, R17_tos);
++ break;
++
++ case Bytecodes::_l2i:
++ // Nothing to do, we'll continue to work with the lower bits.
++ break;
++
++ case Bytecodes::_i2b:
++ __ extsb(R17_tos, R17_tos);
++ break;
++
++ case Bytecodes::_i2c:
++ __ rldicl(R17_tos, R17_tos, 0, 64-2*8);
++ break;
++
++ case Bytecodes::_i2s:
++ __ extsh(R17_tos, R17_tos);
++ break;
++
++ case Bytecodes::_i2d:
++ __ extsw(R17_tos, R17_tos);
++ case Bytecodes::_l2d:
++ __ push_l_pop_d();
++ __ fcfid(F15_ftos, F15_ftos);
++ break;
++
++ case Bytecodes::_i2f:
++ __ extsw(R17_tos, R17_tos);
++ __ push_l_pop_d();
++ if (VM_Version::has_fcfids()) { // fcfids is >= Power7 only
++ // Comment: alternatively, load with sign extend could be done by lfiwax.
++ __ fcfids(F15_ftos, F15_ftos);
++ } else {
++ __ fcfid(F15_ftos, F15_ftos);
++ __ frsp(F15_ftos, F15_ftos);
++ }
++ break;
++
++ case Bytecodes::_l2f:
++ if (VM_Version::has_fcfids()) { // fcfids is >= Power7 only
++ __ push_l_pop_d();
++ __ fcfids(F15_ftos, F15_ftos);
++ } else {
++ // Avoid rounding problem when result should be 0x3f800001: need fixup code before fcfid+frsp.
++ __ mr(R3_ARG1, R17_tos);
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::l2f));
++ __ fmr(F15_ftos, F1_RET);
++ }
++ break;
++
++ case Bytecodes::_f2d:
++ // empty
++ break;
++
++ case Bytecodes::_d2f:
++ __ frsp(F15_ftos, F15_ftos);
++ break;
++
++ case Bytecodes::_d2i:
++ case Bytecodes::_f2i:
++ __ fcmpu(CCR0, F15_ftos, F15_ftos);
++ __ li(R17_tos, 0); // 0 in case of NAN
++ __ bso(CCR0, done);
++ __ fctiwz(F15_ftos, F15_ftos);
++ __ push_d_pop_l();
++ break;
++
++ case Bytecodes::_d2l:
++ case Bytecodes::_f2l:
++ __ fcmpu(CCR0, F15_ftos, F15_ftos);
++ __ li(R17_tos, 0); // 0 in case of NAN
++ __ bso(CCR0, done);
++ __ fctidz(F15_ftos, F15_ftos);
++ __ push_d_pop_l();
++ break;
++
++ default: ShouldNotReachHere();
++ }
++ __ bind(done);
++}
++
++// Long compare
++void TemplateTable::lcmp() {
++ transition(ltos, itos);
++
++ const Register Rscratch = R11_scratch1;
++ __ pop_l(Rscratch); // first operand, deeper in stack
++
++ __ cmpd(CCR0, Rscratch, R17_tos); // compare
++ __ mfcr(R17_tos); // set bit 32..33 as follows: <: 0b10, =: 0b00, >: 0b01
++ __ srwi(Rscratch, R17_tos, 30);
++ __ srawi(R17_tos, R17_tos, 31);
++ __ orr(R17_tos, Rscratch, R17_tos); // set result as follows: <: -1, =: 0, >: 1
++}
++
++// fcmpl/fcmpg and dcmpl/dcmpg bytecodes
++// unordered_result == -1 => fcmpl or dcmpl
++// unordered_result == 1 => fcmpg or dcmpg
++void TemplateTable::float_cmp(bool is_float, int unordered_result) {
++ const FloatRegister Rfirst = F0_SCRATCH,
++ Rsecond = F15_ftos;
++ const Register Rscratch = R11_scratch1;
++
++ if (is_float) {
++ __ pop_f(Rfirst);
++ } else {
++ __ pop_d(Rfirst);
++ }
++
++ Label Lunordered, Ldone;
++ __ fcmpu(CCR0, Rfirst, Rsecond); // compare
++ if (unordered_result) {
++ __ bso(CCR0, Lunordered);
++ }
++ __ mfcr(R17_tos); // set bit 32..33 as follows: <: 0b10, =: 0b00, >: 0b01
++ __ srwi(Rscratch, R17_tos, 30);
++ __ srawi(R17_tos, R17_tos, 31);
++ __ orr(R17_tos, Rscratch, R17_tos); // set result as follows: <: -1, =: 0, >: 1
++ if (unordered_result) {
++ __ b(Ldone);
++ __ bind(Lunordered);
++ __ load_const_optimized(R17_tos, unordered_result);
++ }
++ __ bind(Ldone);
++}
++
++// Branch_conditional which takes TemplateTable::Condition.
++void TemplateTable::branch_conditional(ConditionRegister crx, TemplateTable::Condition cc, Label& L, bool invert) {
++ bool positive = false;
++ Assembler::Condition cond = Assembler::equal;
++ switch (cc) {
++ case TemplateTable::equal: positive = true ; cond = Assembler::equal ; break;
++ case TemplateTable::not_equal: positive = false; cond = Assembler::equal ; break;
++ case TemplateTable::less: positive = true ; cond = Assembler::less ; break;
++ case TemplateTable::less_equal: positive = false; cond = Assembler::greater; break;
++ case TemplateTable::greater: positive = true ; cond = Assembler::greater; break;
++ case TemplateTable::greater_equal: positive = false; cond = Assembler::less ; break;
++ default: ShouldNotReachHere();
++ }
++ int bo = (positive != invert) ? Assembler::bcondCRbiIs1 : Assembler::bcondCRbiIs0;
++ int bi = Assembler::bi0(crx, cond);
++ __ bc(bo, bi, L);
++}
++
++void TemplateTable::branch(bool is_jsr, bool is_wide) {
++
++ // Note: on SPARC, we use InterpreterMacroAssembler::if_cmp also.
++ __ verify_thread();
++
++ const Register Rscratch1 = R11_scratch1,
++ Rscratch2 = R12_scratch2,
++ Rscratch3 = R3_ARG1,
++ R4_counters = R4_ARG2,
++ bumped_count = R31,
++ Rdisp = R22_tmp2;
++
++ __ profile_taken_branch(Rscratch1, bumped_count);
++
++ // Get (wide) offset.
++ if (is_wide) {
++ __ get_4_byte_integer_at_bcp(1, Rdisp, InterpreterMacroAssembler::Signed);
++ } else {
++ __ get_2_byte_integer_at_bcp(1, Rdisp, InterpreterMacroAssembler::Signed);
++ }
++
++ // --------------------------------------------------------------------------
++ // Handle all the JSR stuff here, then exit.
++ // It's much shorter and cleaner than intermingling with the
++ // non-JSR normal-branch stuff occurring below.
++ if (is_jsr) {
++ // Compute return address as bci in Otos_i.
++ __ ld(Rscratch1, in_bytes(Method::const_offset()), R19_method);
++ __ addi(Rscratch2, R14_bcp, -in_bytes(ConstMethod::codes_offset()) + (is_wide ? 5 : 3));
++ __ subf(R17_tos, Rscratch1, Rscratch2);
++
++ // Bump bcp to target of JSR.
++ __ add(R14_bcp, Rdisp, R14_bcp);
++ // Push returnAddress for "ret" on stack.
++ __ push_ptr(R17_tos);
++ // And away we go!
++ __ dispatch_next(vtos);
++ return;
++ }
++
++ // --------------------------------------------------------------------------
++ // Normal (non-jsr) branch handling
++
++ const bool increment_invocation_counter_for_backward_branches = UseCompiler && UseLoopCounter;
++ if (increment_invocation_counter_for_backward_branches) {
++ //__ unimplemented("branch invocation counter");
++
++ Label Lforward;
++ __ add(R14_bcp, Rdisp, R14_bcp); // Add to bc addr.
++
++ // Check branch direction.
++ __ cmpdi(CCR0, Rdisp, 0);
++ __ bgt(CCR0, Lforward);
++
++ __ get_method_counters(R19_method, R4_counters, Lforward);
++
++ if (TieredCompilation) {
++ Label Lno_mdo, Loverflow;
++ const int increment = InvocationCounter::count_increment;
++ const int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
++ if (ProfileInterpreter) {
++ Register Rmdo = Rscratch1;
++
++ // If no method data exists, go to profile_continue.
++ __ ld(Rmdo, in_bytes(Method::method_data_offset()), R19_method);
++ __ cmpdi(CCR0, Rmdo, 0);
++ __ beq(CCR0, Lno_mdo);
++
++ // Increment backedge counter in the MDO.
++ const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
++ __ lwz(Rscratch2, mdo_bc_offs, Rmdo);
++ __ load_const_optimized(Rscratch3, mask, R0);
++ __ addi(Rscratch2, Rscratch2, increment);
++ __ stw(Rscratch2, mdo_bc_offs, Rmdo);
++ __ and_(Rscratch3, Rscratch2, Rscratch3);
++ __ bne(CCR0, Lforward);
++ __ b(Loverflow);
++ }
++
++ // If there's no MDO, increment counter in method.
++ const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
++ __ bind(Lno_mdo);
++ __ lwz(Rscratch2, mo_bc_offs, R4_counters);
++ __ load_const_optimized(Rscratch3, mask, R0);
++ __ addi(Rscratch2, Rscratch2, increment);
++ __ stw(Rscratch2, mo_bc_offs, R19_method);
++ __ and_(Rscratch3, Rscratch2, Rscratch3);
++ __ bne(CCR0, Lforward);
++
++ __ bind(Loverflow);
++
++ // Notify point for loop, pass branch bytecode.
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), R14_bcp, true);
++
++ // Was an OSR adapter generated?
++ // O0 = osr nmethod
++ __ cmpdi(CCR0, R3_RET, 0);
++ __ beq(CCR0, Lforward);
++
++ // Has the nmethod been invalidated already?
++ __ lwz(R0, nmethod::entry_bci_offset(), R3_RET);
++ __ cmpwi(CCR0, R0, InvalidOSREntryBci);
++ __ beq(CCR0, Lforward);
++
++ // Migrate the interpreter frame off of the stack.
++ // We can use all registers because we will not return to interpreter from this point.
++
++ // Save nmethod.
++ const Register osr_nmethod = R31;
++ __ mr(osr_nmethod, R3_RET);
++ __ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R11_scratch1);
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin), R16_thread);
++ __ reset_last_Java_frame();
++ // OSR buffer is in ARG1.
++
++ // Remove the interpreter frame.
++ __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ R0, R11_scratch1, R12_scratch2);
++
++ // Jump to the osr code.
++ __ ld(R11_scratch1, nmethod::osr_entry_point_offset(), osr_nmethod);
++ __ mtlr(R0);
++ __ mtctr(R11_scratch1);
++ __ bctr();
++
++ } else {
++
++ const Register invoke_ctr = Rscratch1;
++ // Update Backedge branch separately from invocations.
++ __ increment_backedge_counter(R4_counters, invoke_ctr, Rscratch2, Rscratch3);
++
++ if (ProfileInterpreter) {
++ __ test_invocation_counter_for_mdp(invoke_ctr, Rscratch2, Lforward);
++ if (UseOnStackReplacement) {
++ __ test_backedge_count_for_osr(bumped_count, R14_bcp, Rscratch2);
++ }
++ } else {
++ if (UseOnStackReplacement) {
++ __ test_backedge_count_for_osr(invoke_ctr, R14_bcp, Rscratch2);
++ }
++ }
++ }
++
++ __ bind(Lforward);
++
++ } else {
++ // Bump bytecode pointer by displacement (take the branch).
++ __ add(R14_bcp, Rdisp, R14_bcp); // Add to bc addr.
++ }
++ // Continue with bytecode @ target.
++ // %%%%% Like Intel, could speed things up by moving bytecode fetch to code above,
++ // %%%%% and changing dispatch_next to dispatch_only.
++ __ dispatch_next(vtos);
++}
++
++// Helper function for if_cmp* methods below.
++// Factored out common compare and branch code.
++void TemplateTable::if_cmp_common(Register Rfirst, Register Rsecond, Register Rscratch1, Register Rscratch2, Condition cc, bool is_jint, bool cmp0) {
++ Label Lnot_taken;
++ // Note: The condition code we get is the condition under which we
++ // *fall through*! So we have to inverse the CC here.
++
++ if (is_jint) {
++ if (cmp0) {
++ __ cmpwi(CCR0, Rfirst, 0);
++ } else {
++ __ cmpw(CCR0, Rfirst, Rsecond);
++ }
++ } else {
++ if (cmp0) {
++ __ cmpdi(CCR0, Rfirst, 0);
++ } else {
++ __ cmpd(CCR0, Rfirst, Rsecond);
++ }
++ }
++ branch_conditional(CCR0, cc, Lnot_taken, /*invert*/ true);
++
++ // Conition is false => Jump!
++ branch(false, false);
++
++ // Condition is not true => Continue.
++ __ align(32, 12);
++ __ bind(Lnot_taken);
++ __ profile_not_taken_branch(Rscratch1, Rscratch2);
++}
++
++// Compare integer values with zero and fall through if CC holds, branch away otherwise.
++void TemplateTable::if_0cmp(Condition cc) {
++ transition(itos, vtos);
++
++ if_cmp_common(R17_tos, noreg, R11_scratch1, R12_scratch2, cc, true, true);
++}
++
++// Compare integer values and fall through if CC holds, branch away otherwise.
++//
++// Interface:
++// - Rfirst: First operand (older stack value)
++// - tos: Second operand (younger stack value)
++void TemplateTable::if_icmp(Condition cc) {
++ transition(itos, vtos);
++
++ const Register Rfirst = R0,
++ Rsecond = R17_tos;
++
++ __ pop_i(Rfirst);
++ if_cmp_common(Rfirst, Rsecond, R11_scratch1, R12_scratch2, cc, true, false);
++}
++
++void TemplateTable::if_nullcmp(Condition cc) {
++ transition(atos, vtos);
++
++ if_cmp_common(R17_tos, noreg, R11_scratch1, R12_scratch2, cc, false, true);
++}
++
++void TemplateTable::if_acmp(Condition cc) {
++ transition(atos, vtos);
++
++ const Register Rfirst = R0,
++ Rsecond = R17_tos;
++
++ __ pop_ptr(Rfirst);
++ if_cmp_common(Rfirst, Rsecond, R11_scratch1, R12_scratch2, cc, false, false);
++}
++
++void TemplateTable::ret() {
++ locals_index(R11_scratch1);
++ __ load_local_ptr(R17_tos, R11_scratch1, R11_scratch1);
++
++ __ profile_ret(vtos, R17_tos, R11_scratch1, R12_scratch2);
++
++ __ ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method);
++ __ add(R11_scratch1, R17_tos, R11_scratch1);
++ __ addi(R14_bcp, R11_scratch1, in_bytes(ConstMethod::codes_offset()));
++ __ dispatch_next(vtos);
++}
++
++void TemplateTable::wide_ret() {
++ transition(vtos, vtos);
++
++ const Register Rindex = R3_ARG1,
++ Rscratch1 = R11_scratch1,
++ Rscratch2 = R12_scratch2;
++
++ locals_index_wide(Rindex);
++ __ load_local_ptr(R17_tos, R17_tos, Rindex);
++ __ profile_ret(vtos, R17_tos, Rscratch1, R12_scratch2);
++ // Tos now contains the bci, compute the bcp from that.
++ __ ld(Rscratch1, in_bytes(Method::const_offset()), R19_method);
++ __ addi(Rscratch2, R17_tos, in_bytes(ConstMethod::codes_offset()));
++ __ add(R14_bcp, Rscratch1, Rscratch2);
++ __ dispatch_next(vtos);
++}
++
++void TemplateTable::tableswitch() {
++ transition(itos, vtos);
++
++ Label Ldispatch, Ldefault_case;
++ Register Rlow_byte = R3_ARG1,
++ Rindex = Rlow_byte,
++ Rhigh_byte = R4_ARG2,
++ Rdef_offset_addr = R5_ARG3, // is going to contain address of default offset
++ Rscratch1 = R11_scratch1,
++ Rscratch2 = R12_scratch2,
++ Roffset = R6_ARG4;
++
++ // Align bcp.
++ __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt);
++ __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt));
++
++ // Load lo & hi.
++ __ lwz(Rlow_byte, BytesPerInt, Rdef_offset_addr);
++ __ lwz(Rhigh_byte, BytesPerInt * 2, Rdef_offset_addr);
++
++ // Check for default case (=index outside [low,high]).
++ __ cmpw(CCR0, R17_tos, Rlow_byte);
++ __ cmpw(CCR1, R17_tos, Rhigh_byte);
++ __ blt(CCR0, Ldefault_case);
++ __ bgt(CCR1, Ldefault_case);
++
++ // Lookup dispatch offset.
++ __ sub(Rindex, R17_tos, Rlow_byte);
++ __ extsw(Rindex, Rindex);
++ __ profile_switch_case(Rindex, Rhigh_byte /* scratch */, Rscratch1, Rscratch2);
++ __ sldi(Rindex, Rindex, LogBytesPerInt);
++ __ addi(Rindex, Rindex, 3 * BytesPerInt);
++ __ lwax(Roffset, Rdef_offset_addr, Rindex);
++ __ b(Ldispatch);
++
++ __ bind(Ldefault_case);
++ __ profile_switch_default(Rhigh_byte, Rscratch1);
++ __ lwa(Roffset, 0, Rdef_offset_addr);
++
++ __ bind(Ldispatch);
++
++ __ add(R14_bcp, Roffset, R14_bcp);
++ __ dispatch_next(vtos);
++}
++
++void TemplateTable::lookupswitch() {
++ transition(itos, itos);
++ __ stop("lookupswitch bytecode should have been rewritten");
++}
++
++// Table switch using linear search through cases.
++// Bytecode stream format:
++// Bytecode (1) | 4-byte padding | default offset (4) | count (4) | value/offset pair1 (8) | value/offset pair2 (8) | ...
++// Note: Everything is big-endian format here. So on little endian machines, we have to revers offset and count and cmp value.
++void TemplateTable::fast_linearswitch() {
++ transition(itos, vtos);
++
++ Label Lloop_entry, Lsearch_loop, Lfound, Lcontinue_execution, Ldefault_case;
++
++ Register Rcount = R3_ARG1,
++ Rcurrent_pair = R4_ARG2,
++ Rdef_offset_addr = R5_ARG3, // Is going to contain address of default offset.
++ Roffset = R31, // Might need to survive C call.
++ Rvalue = R12_scratch2,
++ Rscratch = R11_scratch1,
++ Rcmp_value = R17_tos;
++
++ // Align bcp.
++ __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt);
++ __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt));
++
++ // Setup loop counter and limit.
++ __ lwz(Rcount, BytesPerInt, Rdef_offset_addr); // Load count.
++ __ addi(Rcurrent_pair, Rdef_offset_addr, 2 * BytesPerInt); // Rcurrent_pair now points to first pair.
++
++ // Set up search loop.
++ __ cmpwi(CCR0, Rcount, 0);
++ __ beq(CCR0, Ldefault_case);
++
++ __ mtctr(Rcount);
++
++ // linear table search
++ __ bind(Lsearch_loop);
++
++ __ lwz(Rvalue, 0, Rcurrent_pair);
++ __ lwa(Roffset, 1 * BytesPerInt, Rcurrent_pair);
++
++ __ cmpw(CCR0, Rvalue, Rcmp_value);
++ __ beq(CCR0, Lfound);
++
++ __ addi(Rcurrent_pair, Rcurrent_pair, 2 * BytesPerInt);
++ __ bdnz(Lsearch_loop);
++
++ // default case
++ __ bind(Ldefault_case);
++
++ __ lwa(Roffset, 0, Rdef_offset_addr);
++ if (ProfileInterpreter) {
++ __ profile_switch_default(Rdef_offset_addr, Rcount/* scratch */);
++ __ b(Lcontinue_execution);
++ }
++
++ // Entry found, skip Roffset bytecodes and continue.
++ __ bind(Lfound);
++ if (ProfileInterpreter) {
++ // Calc the num of the pair we hit. Careful, Rcurrent_pair points 2 ints
++ // beyond the actual current pair due to the auto update load above!
++ __ sub(Rcurrent_pair, Rcurrent_pair, Rdef_offset_addr);
++ __ addi(Rcurrent_pair, Rcurrent_pair, - 2 * BytesPerInt);
++ __ srdi(Rcurrent_pair, Rcurrent_pair, LogBytesPerInt + 1);
++ __ profile_switch_case(Rcurrent_pair, Rcount /*scratch*/, Rdef_offset_addr/*scratch*/, Rscratch);
++ __ bind(Lcontinue_execution);
++ }
++ __ add(R14_bcp, Roffset, R14_bcp);
++ __ dispatch_next(vtos);
++}
++
++// Table switch using binary search (value/offset pairs are ordered).
++// Bytecode stream format:
++// Bytecode (1) | 4-byte padding | default offset (4) | count (4) | value/offset pair1 (8) | value/offset pair2 (8) | ...
++// Note: Everything is big-endian format here. So on little endian machines, we have to revers offset and count and cmp value.
++void TemplateTable::fast_binaryswitch() {
++
++ transition(itos, vtos);
++ // Implementation using the following core algorithm: (copied from Intel)
++ //
++ // int binary_search(int key, LookupswitchPair* array, int n) {
++ // // Binary search according to "Methodik des Programmierens" by
++ // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
++ // int i = 0;
++ // int j = n;
++ // while (i+1 < j) {
++ // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
++ // // with Q: for all i: 0 <= i < n: key < a[i]
++ // // where a stands for the array and assuming that the (inexisting)
++ // // element a[n] is infinitely big.
++ // int h = (i + j) >> 1;
++ // // i < h < j
++ // if (key < array[h].fast_match()) {
++ // j = h;
++ // } else {
++ // i = h;
++ // }
++ // }
++ // // R: a[i] <= key < a[i+1] or Q
++ // // (i.e., if key is within array, i is the correct index)
++ // return i;
++ // }
++
++ // register allocation
++ const Register Rkey = R17_tos; // already set (tosca)
++ const Register Rarray = R3_ARG1;
++ const Register Ri = R4_ARG2;
++ const Register Rj = R5_ARG3;
++ const Register Rh = R6_ARG4;
++ const Register Rscratch = R11_scratch1;
++
++ const int log_entry_size = 3;
++ const int entry_size = 1 << log_entry_size;
++
++ Label found;
++
++ // Find Array start,
++ __ addi(Rarray, R14_bcp, 3 * BytesPerInt);
++ __ clrrdi(Rarray, Rarray, log2_long((jlong)BytesPerInt));
++
++ // initialize i & j
++ __ li(Ri,0);
++ __ lwz(Rj, -BytesPerInt, Rarray);
++
++ // and start.
++ Label entry;
++ __ b(entry);
++
++ // binary search loop
++ { Label loop;
++ __ bind(loop);
++ // int h = (i + j) >> 1;
++ __ srdi(Rh, Rh, 1);
++ // if (key < array[h].fast_match()) {
++ // j = h;
++ // } else {
++ // i = h;
++ // }
++ __ sldi(Rscratch, Rh, log_entry_size);
++ __ lwzx(Rscratch, Rscratch, Rarray);
++
++ // if (key < current value)
++ // Rh = Rj
++ // else
++ // Rh = Ri
++ Label Lgreater;
++ __ cmpw(CCR0, Rkey, Rscratch);
++ __ bge(CCR0, Lgreater);
++ __ mr(Rj, Rh);
++ __ b(entry);
++ __ bind(Lgreater);
++ __ mr(Ri, Rh);
++
++ // while (i+1 < j)
++ __ bind(entry);
++ __ addi(Rscratch, Ri, 1);
++ __ cmpw(CCR0, Rscratch, Rj);
++ __ add(Rh, Ri, Rj); // start h = i + j >> 1;
++
++ __ blt(CCR0, loop);
++ }
++
++ // End of binary search, result index is i (must check again!).
++ Label default_case;
++ Label continue_execution;
++ if (ProfileInterpreter) {
++ __ mr(Rh, Ri); // Save index in i for profiling.
++ }
++ // Ri = value offset
++ __ sldi(Ri, Ri, log_entry_size);
++ __ add(Ri, Ri, Rarray);
++ __ lwz(Rscratch, 0, Ri);
++
++ Label not_found;
++ // Ri = offset offset
++ __ cmpw(CCR0, Rkey, Rscratch);
++ __ beq(CCR0, not_found);
++ // entry not found -> j = default offset
++ __ lwz(Rj, -2 * BytesPerInt, Rarray);
++ __ b(default_case);
++
++ __ bind(not_found);
++ // entry found -> j = offset
++ __ profile_switch_case(Rh, Rj, Rscratch, Rkey);
++ __ lwz(Rj, BytesPerInt, Ri);
++
++ if (ProfileInterpreter) {
++ __ b(continue_execution);
++ }
++
++ __ bind(default_case); // fall through (if not profiling)
++ __ profile_switch_default(Ri, Rscratch);
++
++ __ bind(continue_execution);
++
++ __ extsw(Rj, Rj);
++ __ add(R14_bcp, Rj, R14_bcp);
++ __ dispatch_next(vtos);
++}
++
++void TemplateTable::_return(TosState state) {
++ transition(state, state);
++ assert(_desc->calls_vm(),
++ "inconsistent calls_vm information"); // call in remove_activation
++
++ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
++
++ Register Rscratch = R11_scratch1,
++ Rklass = R12_scratch2,
++ Rklass_flags = Rklass;
++ Label Lskip_register_finalizer;
++
++ // Check if the method has the FINALIZER flag set and call into the VM to finalize in this case.
++ assert(state == vtos, "only valid state");
++ __ ld(R17_tos, 0, R18_locals);
++
++ // Load klass of this obj.
++ __ load_klass(Rklass, R17_tos);
++ __ lwz(Rklass_flags, in_bytes(Klass::access_flags_offset()), Rklass);
++ __ testbitdi(CCR0, R0, Rklass_flags, exact_log2(JVM_ACC_HAS_FINALIZER));
++ __ bfalse(CCR0, Lskip_register_finalizer);
++
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), R17_tos /* obj */);
++
++ __ align(32, 12);
++ __ bind(Lskip_register_finalizer);
++ }
++
++ // Move the result value into the correct register and remove memory stack frame.
++ __ remove_activation(state, /* throw_monitor_exception */ true);
++ // Restoration of lr done by remove_activation.
++ switch (state) {
++ case ltos:
++ case btos:
++ case ctos:
++ case stos:
++ case atos:
++ case itos: __ mr(R3_RET, R17_tos); break;
++ case ftos:
++ case dtos: __ fmr(F1_RET, F15_ftos); break;
++ case vtos: // This might be a constructor. Final fields (and volatile fields on PPC64) need
++ // to get visible before the reference to the object gets stored anywhere.
++ __ membar(Assembler::StoreStore); break;
++ default : ShouldNotReachHere();
++ }
++ __ blr();
++}
++
++// ============================================================================
++// Constant pool cache access
++//
++// Memory ordering:
++//
++// Like done in C++ interpreter, we load the fields
++// - _indices
++// - _f12_oop
++// acquired, because these are asked if the cache is already resolved. We don't
++// want to float loads above this check.
++// See also comments in ConstantPoolCacheEntry::bytecode_1(),
++// ConstantPoolCacheEntry::bytecode_2() and ConstantPoolCacheEntry::f1();
++
++// Call into the VM if call site is not yet resolved
++//
++// Input regs:
++// - None, all passed regs are outputs.
++//
++// Returns:
++// - Rcache: The const pool cache entry that contains the resolved result.
++// - Rresult: Either noreg or output for f1/f2.
++//
++// Kills:
++// - Rscratch
++void TemplateTable::resolve_cache_and_index(int byte_no, Register Rcache, Register Rscratch, size_t index_size) {
++
++ __ get_cache_and_index_at_bcp(Rcache, 1, index_size);
++ Label Lresolved, Ldone;
++
++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++ // We are resolved if the indices offset contains the current bytecode.
++ // Big Endian:
++ __ lbz(Rscratch, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (byte_no + 1), Rcache);
++ // Acquire by cmp-br-isync (see below).
++ __ cmpdi(CCR0, Rscratch, (int)bytecode());
++ __ beq(CCR0, Lresolved);
++
++ address entry = NULL;
++ switch (bytecode()) {
++ case Bytecodes::_getstatic : // fall through
++ case Bytecodes::_putstatic : // fall through
++ case Bytecodes::_getfield : // fall through
++ case Bytecodes::_putfield : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); break;
++ case Bytecodes::_invokevirtual : // fall through
++ case Bytecodes::_invokespecial : // fall through
++ case Bytecodes::_invokestatic : // fall through
++ case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke); break;
++ case Bytecodes::_invokehandle : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle); break;
++ case Bytecodes::_invokedynamic : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic); break;
++ default : ShouldNotReachHere(); break;
++ }
++ __ li(R4_ARG2, (int)bytecode());
++ __ call_VM(noreg, entry, R4_ARG2, true);
++
++ // Update registers with resolved info.
++ __ get_cache_and_index_at_bcp(Rcache, 1, index_size);
++ __ b(Ldone);
++
++ __ bind(Lresolved);
++ __ isync(); // Order load wrt. succeeding loads.
++ __ bind(Ldone);
++}
++
++// Load the constant pool cache entry at field accesses into registers.
++// The Rcache and Rindex registers must be set before call.
++// Input:
++// - Rcache, Rindex
++// Output:
++// - Robj, Roffset, Rflags
++void TemplateTable::load_field_cp_cache_entry(Register Robj,
++ Register Rcache,
++ Register Rindex /* unused on PPC64 */,
++ Register Roffset,
++ Register Rflags,
++ bool is_static = false) {
++ assert_different_registers(Rcache, Rflags, Roffset);
++ // assert(Rindex == noreg, "parameter not used on PPC64");
++
++ ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++ __ ld(Rflags, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::flags_offset()), Rcache);
++ __ ld(Roffset, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f2_offset()), Rcache);
++ if (is_static) {
++ __ ld(Robj, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f1_offset()), Rcache);
++ __ ld(Robj, in_bytes(Klass::java_mirror_offset()), Robj);
++ // Acquire not needed here. Following access has an address dependency on this value.
++ }
++}
++
++// Load the constant pool cache entry at invokes into registers.
++// Resolve if necessary.
++
++// Input Registers:
++// - None, bcp is used, though
++//
++// Return registers:
++// - Rmethod (f1 field or f2 if invokevirtual)
++// - Ritable_index (f2 field)
++// - Rflags (flags field)
++//
++// Kills:
++// - R21
++//
++void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
++ Register Rmethod,
++ Register Ritable_index,
++ Register Rflags,
++ bool is_invokevirtual,
++ bool is_invokevfinal,
++ bool is_invokedynamic) {
++
++ ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++ // Determine constant pool cache field offsets.
++ assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
++ const int method_offset = in_bytes(cp_base_offset + (is_invokevirtual ? ConstantPoolCacheEntry::f2_offset() : ConstantPoolCacheEntry::f1_offset()));
++ const int flags_offset = in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset());
++ // Access constant pool cache fields.
++ const int index_offset = in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset());
++
++ Register Rcache = R21_tmp1; // Note: same register as R21_sender_SP.
++
++ if (is_invokevfinal) {
++ assert(Ritable_index == noreg, "register not used");
++ // Already resolved.
++ __ get_cache_and_index_at_bcp(Rcache, 1);
++ } else {
++ resolve_cache_and_index(byte_no, Rcache, R0, is_invokedynamic ? sizeof(u4) : sizeof(u2));
++ }
++
++ __ ld(Rmethod, method_offset, Rcache);
++ __ ld(Rflags, flags_offset, Rcache);
++
++ if (Ritable_index != noreg) {
++ __ ld(Ritable_index, index_offset, Rcache);
++ }
++}
++
++// ============================================================================
++// Field access
++
++// Volatile variables demand their effects be made known to all CPU's
++// in order. Store buffers on most chips allow reads & writes to
++// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
++// without some kind of memory barrier (i.e., it's not sufficient that
++// the interpreter does not reorder volatile references, the hardware
++// also must not reorder them).
++//
++// According to the new Java Memory Model (JMM):
++// (1) All volatiles are serialized wrt to each other. ALSO reads &
++// writes act as aquire & release, so:
++// (2) A read cannot let unrelated NON-volatile memory refs that
++// happen after the read float up to before the read. It's OK for
++// non-volatile memory refs that happen before the volatile read to
++// float down below it.
++// (3) Similar a volatile write cannot let unrelated NON-volatile
++// memory refs that happen BEFORE the write float down to after the
++// write. It's OK for non-volatile memory refs that happen after the
++// volatile write to float up before it.
++//
++// We only put in barriers around volatile refs (they are expensive),
++// not _between_ memory refs (that would require us to track the
++// flavor of the previous memory refs). Requirements (2) and (3)
++// require some barriers before volatile stores and after volatile
++// loads. These nearly cover requirement (1) but miss the
++// volatile-store-volatile-load case. This final case is placed after
++// volatile-stores although it could just as well go before
++// volatile-loads.
++
++// The registers cache and index expected to be set before call.
++// Correct values of the cache and index registers are preserved.
++// Kills:
++// Rcache (if has_tos)
++// Rscratch
++void TemplateTable::jvmti_post_field_access(Register Rcache, Register Rscratch, bool is_static, bool has_tos) {
++
++ assert_different_registers(Rcache, Rscratch);
++
++ if (JvmtiExport::can_post_field_access()) {
++ ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++ Label Lno_field_access_post;
++
++ // Check if post field access in enabled.
++ int offs = __ load_const_optimized(Rscratch, JvmtiExport::get_field_access_count_addr(), R0, true);
++ __ lwz(Rscratch, offs, Rscratch);
++
++ __ cmpwi(CCR0, Rscratch, 0);
++ __ beq(CCR0, Lno_field_access_post);
++
++ // Post access enabled - do it!
++ __ addi(Rcache, Rcache, in_bytes(cp_base_offset));
++ if (is_static) {
++ __ li(R17_tos, 0);
++ } else {
++ if (has_tos) {
++ // The fast bytecode versions have obj ptr in register.
++ // Thus, save object pointer before call_VM() clobbers it
++ // put object on tos where GC wants it.
++ __ push_ptr(R17_tos);
++ } else {
++ // Load top of stack (do not pop the value off the stack).
++ __ ld(R17_tos, Interpreter::expr_offset_in_bytes(0), R15_esp);
++ }
++ __ verify_oop(R17_tos);
++ }
++ // tos: object pointer or NULL if static
++ // cache: cache entry pointer
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), R17_tos, Rcache);
++ if (!is_static && has_tos) {
++ // Restore object pointer.
++ __ pop_ptr(R17_tos);
++ __ verify_oop(R17_tos);
++ } else {
++ // Cache is still needed to get class or obj.
++ __ get_cache_and_index_at_bcp(Rcache, 1);
++ }
++
++ __ align(32, 12);
++ __ bind(Lno_field_access_post);
++ }
++}
++
++// kills R11_scratch1
++void TemplateTable::pop_and_check_object(Register Roop) {
++ Register Rtmp = R11_scratch1;
++
++ assert_different_registers(Rtmp, Roop);
++ __ pop_ptr(Roop);
++ // For field access must check obj.
++ __ null_check_throw(Roop, -1, Rtmp);
++ __ verify_oop(Roop);
++}
++
++// PPC64: implement volatile loads as fence-store-acquire.
++void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
++ transition(vtos, vtos);
++
++ Label Lacquire, Lisync;
++
++ const Register Rcache = R3_ARG1,
++ Rclass_or_obj = R22_tmp2,
++ Roffset = R23_tmp3,
++ Rflags = R31,
++ Rbtable = R5_ARG3,
++ Rbc = R6_ARG4,
++ Rscratch = R12_scratch2;
++
++ static address field_branch_table[number_of_states],
++ static_branch_table[number_of_states];
++
++ address* branch_table = is_static ? static_branch_table : field_branch_table;
++
++ // Get field offset.
++ resolve_cache_and_index(byte_no, Rcache, Rscratch, sizeof(u2));
++
++ // JVMTI support
++ jvmti_post_field_access(Rcache, Rscratch, is_static, false);
++
++ // Load after possible GC.
++ load_field_cp_cache_entry(Rclass_or_obj, Rcache, noreg, Roffset, Rflags, is_static);
++
++ // Load pointer to branch table.
++ __ load_const_optimized(Rbtable, (address)branch_table, Rscratch);
++
++ // Get volatile flag.
++ __ rldicl(Rscratch, Rflags, 64-ConstantPoolCacheEntry::is_volatile_shift, 63); // Extract volatile bit.
++ // Note: sync is needed before volatile load on PPC64.
++
++ // Check field type.
++ __ rldicl(Rflags, Rflags, 64-ConstantPoolCacheEntry::tos_state_shift, 64-ConstantPoolCacheEntry::tos_state_bits);
++
++#ifdef ASSERT
++ Label LFlagInvalid;
++ __ cmpldi(CCR0, Rflags, number_of_states);
++ __ bge(CCR0, LFlagInvalid);
++#endif
++
++ // Load from branch table and dispatch (volatile case: one instruction ahead).
++ __ sldi(Rflags, Rflags, LogBytesPerWord);
++ __ cmpwi(CCR6, Rscratch, 1); // Volatile?
++ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
++ __ sldi(Rscratch, Rscratch, exact_log2(BytesPerInstWord)); // Volatile ? size of 1 instruction : 0.
++ }
++ __ ldx(Rbtable, Rbtable, Rflags);
++
++ // Get the obj from stack.
++ if (!is_static) {
++ pop_and_check_object(Rclass_or_obj); // Kills R11_scratch1.
++ } else {
++ __ verify_oop(Rclass_or_obj);
++ }
++
++ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
++ __ subf(Rbtable, Rscratch, Rbtable); // Point to volatile/non-volatile entry point.
++ }
++ __ mtctr(Rbtable);
++ __ bctr();
++
++#ifdef ASSERT
++ __ bind(LFlagInvalid);
++ __ stop("got invalid flag", 0x654);
++
++ // __ bind(Lvtos);
++ address pc_before_fence = __ pc();
++ __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(__ pc() - pc_before_fence == (ptrdiff_t)BytesPerInstWord, "must be single instruction");
++ assert(branch_table[vtos] == 0, "can't compute twice");
++ branch_table[vtos] = __ pc(); // non-volatile_entry point
++ __ stop("vtos unexpected", 0x655);
++#endif
++
++ __ align(32, 28, 28); // Align load.
++ // __ bind(Ldtos);
++ __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(branch_table[dtos] == 0, "can't compute twice");
++ branch_table[dtos] = __ pc(); // non-volatile_entry point
++ __ lfdx(F15_ftos, Rclass_or_obj, Roffset);
++ __ push(dtos);
++ if (!is_static) patch_bytecode(Bytecodes::_fast_dgetfield, Rbc, Rscratch);
++ {
++ Label acquire_double;
++ __ beq(CCR6, acquire_double); // Volatile?
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ bind(acquire_double);
++ __ fcmpu(CCR0, F15_ftos, F15_ftos); // Acquire by cmp-br-isync.
++ __ beq_predict_taken(CCR0, Lisync);
++ __ b(Lisync); // In case of NAN.
++ }
++
++ __ align(32, 28, 28); // Align load.
++ // __ bind(Lftos);
++ __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(branch_table[ftos] == 0, "can't compute twice");
++ branch_table[ftos] = __ pc(); // non-volatile_entry point
++ __ lfsx(F15_ftos, Rclass_or_obj, Roffset);
++ __ push(ftos);
++ if (!is_static) { patch_bytecode(Bytecodes::_fast_fgetfield, Rbc, Rscratch); }
++ {
++ Label acquire_float;
++ __ beq(CCR6, acquire_float); // Volatile?
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ bind(acquire_float);
++ __ fcmpu(CCR0, F15_ftos, F15_ftos); // Acquire by cmp-br-isync.
++ __ beq_predict_taken(CCR0, Lisync);
++ __ b(Lisync); // In case of NAN.
++ }
++
++ __ align(32, 28, 28); // Align load.
++ // __ bind(Litos);
++ __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(branch_table[itos] == 0, "can't compute twice");
++ branch_table[itos] = __ pc(); // non-volatile_entry point
++ __ lwax(R17_tos, Rclass_or_obj, Roffset);
++ __ push(itos);
++ if (!is_static) patch_bytecode(Bytecodes::_fast_igetfield, Rbc, Rscratch);
++ __ beq(CCR6, Lacquire); // Volatile?
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ align(32, 28, 28); // Align load.
++ // __ bind(Lltos);
++ __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(branch_table[ltos] == 0, "can't compute twice");
++ branch_table[ltos] = __ pc(); // non-volatile_entry point
++ __ ldx(R17_tos, Rclass_or_obj, Roffset);
++ __ push(ltos);
++ if (!is_static) patch_bytecode(Bytecodes::_fast_lgetfield, Rbc, Rscratch);
++ __ beq(CCR6, Lacquire); // Volatile?
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ align(32, 28, 28); // Align load.
++ // __ bind(Lbtos);
++ __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(branch_table[btos] == 0, "can't compute twice");
++ branch_table[btos] = __ pc(); // non-volatile_entry point
++ __ lbzx(R17_tos, Rclass_or_obj, Roffset);
++ __ extsb(R17_tos, R17_tos);
++ __ push(btos);
++ if (!is_static) patch_bytecode(Bytecodes::_fast_bgetfield, Rbc, Rscratch);
++ __ beq(CCR6, Lacquire); // Volatile?
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ align(32, 28, 28); // Align load.
++ // __ bind(Lctos);
++ __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(branch_table[ctos] == 0, "can't compute twice");
++ branch_table[ctos] = __ pc(); // non-volatile_entry point
++ __ lhzx(R17_tos, Rclass_or_obj, Roffset);
++ __ push(ctos);
++ if (!is_static) patch_bytecode(Bytecodes::_fast_cgetfield, Rbc, Rscratch);
++ __ beq(CCR6, Lacquire); // Volatile?
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ align(32, 28, 28); // Align load.
++ // __ bind(Lstos);
++ __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(branch_table[stos] == 0, "can't compute twice");
++ branch_table[stos] = __ pc(); // non-volatile_entry point
++ __ lhax(R17_tos, Rclass_or_obj, Roffset);
++ __ push(stos);
++ if (!is_static) patch_bytecode(Bytecodes::_fast_sgetfield, Rbc, Rscratch);
++ __ beq(CCR6, Lacquire); // Volatile?
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ align(32, 28, 28); // Align load.
++ // __ bind(Latos);
++ __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(branch_table[atos] == 0, "can't compute twice");
++ branch_table[atos] = __ pc(); // non-volatile_entry point
++ __ load_heap_oop(R17_tos, (RegisterOrConstant)Roffset, Rclass_or_obj);
++ __ verify_oop(R17_tos);
++ __ push(atos);
++ //__ dcbt(R17_tos); // prefetch
++ if (!is_static) patch_bytecode(Bytecodes::_fast_agetfield, Rbc, Rscratch);
++ __ beq(CCR6, Lacquire); // Volatile?
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ align(32, 12);
++ __ bind(Lacquire);
++ __ twi_0(R17_tos);
++ __ bind(Lisync);
++ __ isync(); // acquire
++
++#ifdef ASSERT
++ for (int i = 0; iprint_cr("get: %s_branch_table[%d] = 0x%llx (opcode 0x%llx)",
++ // is_static ? "static" : "field", i, branch_table[i], *((unsigned int*)branch_table[i]));
++ }
++#endif
++}
++
++void TemplateTable::getfield(int byte_no) {
++ getfield_or_static(byte_no, false);
++}
++
++void TemplateTable::getstatic(int byte_no) {
++ getfield_or_static(byte_no, true);
++}
++
++// The registers cache and index expected to be set before call.
++// The function may destroy various registers, just not the cache and index registers.
++void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rscratch, bool is_static) {
++
++ assert_different_registers(Rcache, Rscratch, R6_ARG4);
++
++ if (JvmtiExport::can_post_field_modification()) {
++ Label Lno_field_mod_post;
++
++ // Check if post field access in enabled.
++ int offs = __ load_const_optimized(Rscratch, JvmtiExport::get_field_modification_count_addr(), R0, true);
++ __ lwz(Rscratch, offs, Rscratch);
++
++ __ cmpwi(CCR0, Rscratch, 0);
++ __ beq(CCR0, Lno_field_mod_post);
++
++ // Do the post
++ ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++ const Register Robj = Rscratch;
++
++ __ addi(Rcache, Rcache, in_bytes(cp_base_offset));
++ if (is_static) {
++ // Life is simple. Null out the object pointer.
++ __ li(Robj, 0);
++ } else {
++ // In case of the fast versions, value lives in registers => put it back on tos.
++ int offs = Interpreter::expr_offset_in_bytes(0);
++ Register base = R15_esp;
++ switch(bytecode()) {
++ case Bytecodes::_fast_aputfield: __ push_ptr(); offs+= Interpreter::stackElementSize; break;
++ case Bytecodes::_fast_iputfield: // Fall through
++ case Bytecodes::_fast_bputfield: // Fall through
++ case Bytecodes::_fast_cputfield: // Fall through
++ case Bytecodes::_fast_sputfield: __ push_i(); offs+= Interpreter::stackElementSize; break;
++ case Bytecodes::_fast_lputfield: __ push_l(); offs+=2*Interpreter::stackElementSize; break;
++ case Bytecodes::_fast_fputfield: __ push_f(); offs+= Interpreter::stackElementSize; break;
++ case Bytecodes::_fast_dputfield: __ push_d(); offs+=2*Interpreter::stackElementSize; break;
++ default: {
++ offs = 0;
++ base = Robj;
++ const Register Rflags = Robj;
++ Label is_one_slot;
++ // Life is harder. The stack holds the value on top, followed by the
++ // object. We don't know the size of the value, though; it could be
++ // one or two words depending on its type. As a result, we must find
++ // the type to determine where the object is.
++ __ ld(Rflags, in_bytes(ConstantPoolCacheEntry::flags_offset()), Rcache); // Big Endian
++ __ rldicl(Rflags, Rflags, 64-ConstantPoolCacheEntry::tos_state_shift, 64-ConstantPoolCacheEntry::tos_state_bits);
++
++ __ cmpwi(CCR0, Rflags, ltos);
++ __ cmpwi(CCR1, Rflags, dtos);
++ __ addi(base, R15_esp, Interpreter::expr_offset_in_bytes(1));
++ __ crnor(/*CR0 eq*/2, /*CR1 eq*/4+2, /*CR0 eq*/2);
++ __ beq(CCR0, is_one_slot);
++ __ addi(base, R15_esp, Interpreter::expr_offset_in_bytes(2));
++ __ bind(is_one_slot);
++ break;
++ }
++ }
++ __ ld(Robj, offs, base);
++ __ verify_oop(Robj);
++ }
++
++ __ addi(R6_ARG4, R15_esp, Interpreter::expr_offset_in_bytes(0));
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), Robj, Rcache, R6_ARG4);
++ __ get_cache_and_index_at_bcp(Rcache, 1);
++
++ // In case of the fast versions, value lives in registers => put it back on tos.
++ switch(bytecode()) {
++ case Bytecodes::_fast_aputfield: __ pop_ptr(); break;
++ case Bytecodes::_fast_iputfield: // Fall through
++ case Bytecodes::_fast_bputfield: // Fall through
++ case Bytecodes::_fast_cputfield: // Fall through
++ case Bytecodes::_fast_sputfield: __ pop_i(); break;
++ case Bytecodes::_fast_lputfield: __ pop_l(); break;
++ case Bytecodes::_fast_fputfield: __ pop_f(); break;
++ case Bytecodes::_fast_dputfield: __ pop_d(); break;
++ default: break; // Nothin' to do.
++ }
++
++ __ align(32, 12);
++ __ bind(Lno_field_mod_post);
++ }
++}
++
++// PPC64: implement volatile stores as release-store (return bytecode contains an additional release).
++void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
++ Label Lvolatile;
++
++ const Register Rcache = R5_ARG3, // Do not use ARG1/2 (causes trouble in jvmti_post_field_mod).
++ Rclass_or_obj = R31, // Needs to survive C call.
++ Roffset = R22_tmp2, // Needs to survive C call.
++ Rflags = R3_ARG1,
++ Rbtable = R4_ARG2,
++ Rscratch = R11_scratch1,
++ Rscratch2 = R12_scratch2,
++ Rscratch3 = R6_ARG4,
++ Rbc = Rscratch3;
++ const ConditionRegister CR_is_vol = CCR2; // Non-volatile condition register (survives runtime call in do_oop_store).
++
++ static address field_branch_table[number_of_states],
++ static_branch_table[number_of_states];
++
++ address* branch_table = is_static ? static_branch_table : field_branch_table;
++
++ // Stack (grows up):
++ // value
++ // obj
++
++ // Load the field offset.
++ resolve_cache_and_index(byte_no, Rcache, Rscratch, sizeof(u2));
++ jvmti_post_field_mod(Rcache, Rscratch, is_static);
++ load_field_cp_cache_entry(Rclass_or_obj, Rcache, noreg, Roffset, Rflags, is_static);
++
++ // Load pointer to branch table.
++ __ load_const_optimized(Rbtable, (address)branch_table, Rscratch);
++
++ // Get volatile flag.
++ __ rldicl(Rscratch, Rflags, 64-ConstantPoolCacheEntry::is_volatile_shift, 63); // Extract volatile bit.
++
++ // Check the field type.
++ __ rldicl(Rflags, Rflags, 64-ConstantPoolCacheEntry::tos_state_shift, 64-ConstantPoolCacheEntry::tos_state_bits);
++
++#ifdef ASSERT
++ Label LFlagInvalid;
++ __ cmpldi(CCR0, Rflags, number_of_states);
++ __ bge(CCR0, LFlagInvalid);
++#endif
++
++ // Load from branch table and dispatch (volatile case: one instruction ahead).
++ __ sldi(Rflags, Rflags, LogBytesPerWord);
++ if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { __ cmpwi(CR_is_vol, Rscratch, 1); } // Volatile?
++ __ sldi(Rscratch, Rscratch, exact_log2(BytesPerInstWord)); // Volatile? size of instruction 1 : 0.
++ __ ldx(Rbtable, Rbtable, Rflags);
++
++ __ subf(Rbtable, Rscratch, Rbtable); // Point to volatile/non-volatile entry point.
++ __ mtctr(Rbtable);
++ __ bctr();
++
++#ifdef ASSERT
++ __ bind(LFlagInvalid);
++ __ stop("got invalid flag", 0x656);
++
++ // __ bind(Lvtos);
++ address pc_before_release = __ pc();
++ __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(__ pc() - pc_before_release == (ptrdiff_t)BytesPerInstWord, "must be single instruction");
++ assert(branch_table[vtos] == 0, "can't compute twice");
++ branch_table[vtos] = __ pc(); // non-volatile_entry point
++ __ stop("vtos unexpected", 0x657);
++#endif
++
++ __ align(32, 28, 28); // Align pop.
++ // __ bind(Ldtos);
++ __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(branch_table[dtos] == 0, "can't compute twice");
++ branch_table[dtos] = __ pc(); // non-volatile_entry point
++ __ pop(dtos);
++ if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
++ __ stfdx(F15_ftos, Rclass_or_obj, Roffset);
++ if (!is_static) { patch_bytecode(Bytecodes::_fast_dputfield, Rbc, Rscratch, true, byte_no); }
++ if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
++ __ beq(CR_is_vol, Lvolatile); // Volatile?
++ }
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ align(32, 28, 28); // Align pop.
++ // __ bind(Lftos);
++ __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(branch_table[ftos] == 0, "can't compute twice");
++ branch_table[ftos] = __ pc(); // non-volatile_entry point
++ __ pop(ftos);
++ if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
++ __ stfsx(F15_ftos, Rclass_or_obj, Roffset);
++ if (!is_static) { patch_bytecode(Bytecodes::_fast_fputfield, Rbc, Rscratch, true, byte_no); }
++ if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
++ __ beq(CR_is_vol, Lvolatile); // Volatile?
++ }
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ align(32, 28, 28); // Align pop.
++ // __ bind(Litos);
++ __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(branch_table[itos] == 0, "can't compute twice");
++ branch_table[itos] = __ pc(); // non-volatile_entry point
++ __ pop(itos);
++ if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
++ __ stwx(R17_tos, Rclass_or_obj, Roffset);
++ if (!is_static) { patch_bytecode(Bytecodes::_fast_iputfield, Rbc, Rscratch, true, byte_no); }
++ if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
++ __ beq(CR_is_vol, Lvolatile); // Volatile?
++ }
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ align(32, 28, 28); // Align pop.
++ // __ bind(Lltos);
++ __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(branch_table[ltos] == 0, "can't compute twice");
++ branch_table[ltos] = __ pc(); // non-volatile_entry point
++ __ pop(ltos);
++ if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
++ __ stdx(R17_tos, Rclass_or_obj, Roffset);
++ if (!is_static) { patch_bytecode(Bytecodes::_fast_lputfield, Rbc, Rscratch, true, byte_no); }
++ if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
++ __ beq(CR_is_vol, Lvolatile); // Volatile?
++ }
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ align(32, 28, 28); // Align pop.
++ // __ bind(Lbtos);
++ __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(branch_table[btos] == 0, "can't compute twice");
++ branch_table[btos] = __ pc(); // non-volatile_entry point
++ __ pop(btos);
++ if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
++ __ stbx(R17_tos, Rclass_or_obj, Roffset);
++ if (!is_static) { patch_bytecode(Bytecodes::_fast_bputfield, Rbc, Rscratch, true, byte_no); }
++ if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
++ __ beq(CR_is_vol, Lvolatile); // Volatile?
++ }
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ align(32, 28, 28); // Align pop.
++ // __ bind(Lctos);
++ __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(branch_table[ctos] == 0, "can't compute twice");
++ branch_table[ctos] = __ pc(); // non-volatile_entry point
++ __ pop(ctos);
++ if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1..
++ __ sthx(R17_tos, Rclass_or_obj, Roffset);
++ if (!is_static) { patch_bytecode(Bytecodes::_fast_cputfield, Rbc, Rscratch, true, byte_no); }
++ if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
++ __ beq(CR_is_vol, Lvolatile); // Volatile?
++ }
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ align(32, 28, 28); // Align pop.
++ // __ bind(Lstos);
++ __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(branch_table[stos] == 0, "can't compute twice");
++ branch_table[stos] = __ pc(); // non-volatile_entry point
++ __ pop(stos);
++ if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
++ __ sthx(R17_tos, Rclass_or_obj, Roffset);
++ if (!is_static) { patch_bytecode(Bytecodes::_fast_sputfield, Rbc, Rscratch, true, byte_no); }
++ if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
++ __ beq(CR_is_vol, Lvolatile); // Volatile?
++ }
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ align(32, 28, 28); // Align pop.
++ // __ bind(Latos);
++ __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
++ assert(branch_table[atos] == 0, "can't compute twice");
++ branch_table[atos] = __ pc(); // non-volatile_entry point
++ __ pop(atos);
++ if (!is_static) { pop_and_check_object(Rclass_or_obj); } // kills R11_scratch1
++ do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, _bs->kind(), false /* precise */, true /* check null */);
++ if (!is_static) { patch_bytecode(Bytecodes::_fast_aputfield, Rbc, Rscratch, true, byte_no); }
++ if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
++ __ beq(CR_is_vol, Lvolatile); // Volatile?
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ align(32, 12);
++ __ bind(Lvolatile);
++ __ fence();
++ }
++ // fallthru: __ b(Lexit);
++
++#ifdef ASSERT
++ for (int i = 0; iprint_cr("put: %s_branch_table[%d] = 0x%llx (opcode 0x%llx)",
++ // is_static ? "static" : "field", i, branch_table[i], *((unsigned int*)branch_table[i]));
++ }
++#endif
++}
++
++void TemplateTable::putfield(int byte_no) {
++ putfield_or_static(byte_no, false);
++}
++
++void TemplateTable::putstatic(int byte_no) {
++ putfield_or_static(byte_no, true);
++}
++
++// See SPARC. On PPC64, we have a different jvmti_post_field_mod which does the job.
++void TemplateTable::jvmti_post_fast_field_mod() {
++ __ should_not_reach_here();
++}
++
++void TemplateTable::fast_storefield(TosState state) {
++ transition(state, vtos);
++
++ const Register Rcache = R5_ARG3, // Do not use ARG1/2 (causes trouble in jvmti_post_field_mod).
++ Rclass_or_obj = R31, // Needs to survive C call.
++ Roffset = R22_tmp2, // Needs to survive C call.
++ Rflags = R3_ARG1,
++ Rscratch = R11_scratch1,
++ Rscratch2 = R12_scratch2,
++ Rscratch3 = R4_ARG2;
++ const ConditionRegister CR_is_vol = CCR2; // Non-volatile condition register (survives runtime call in do_oop_store).
++
++ // Constant pool already resolved => Load flags and offset of field.
++ __ get_cache_and_index_at_bcp(Rcache, 1);
++ jvmti_post_field_mod(Rcache, Rscratch, false /* not static */);
++ load_field_cp_cache_entry(noreg, Rcache, noreg, Roffset, Rflags, false);
++
++ // Get the obj and the final store addr.
++ pop_and_check_object(Rclass_or_obj); // Kills R11_scratch1.
++
++ // Get volatile flag.
++ __ rldicl_(Rscratch, Rflags, 64-ConstantPoolCacheEntry::is_volatile_shift, 63); // Extract volatile bit.
++ if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { __ cmpdi(CR_is_vol, Rscratch, 1); }
++ {
++ Label LnotVolatile;
++ __ beq(CCR0, LnotVolatile);
++ __ release();
++ __ align(32, 12);
++ __ bind(LnotVolatile);
++ }
++
++ // Do the store and fencing.
++ switch(bytecode()) {
++ case Bytecodes::_fast_aputfield:
++ // Store into the field.
++ do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, _bs->kind(), false /* precise */, true /* check null */);
++ break;
++
++ case Bytecodes::_fast_iputfield:
++ __ stwx(R17_tos, Rclass_or_obj, Roffset);
++ break;
++
++ case Bytecodes::_fast_lputfield:
++ __ stdx(R17_tos, Rclass_or_obj, Roffset);
++ break;
++
++ case Bytecodes::_fast_bputfield:
++ __ stbx(R17_tos, Rclass_or_obj, Roffset);
++ break;
++
++ case Bytecodes::_fast_cputfield:
++ case Bytecodes::_fast_sputfield:
++ __ sthx(R17_tos, Rclass_or_obj, Roffset);
++ break;
++
++ case Bytecodes::_fast_fputfield:
++ __ stfsx(F15_ftos, Rclass_or_obj, Roffset);
++ break;
++
++ case Bytecodes::_fast_dputfield:
++ __ stfdx(F15_ftos, Rclass_or_obj, Roffset);
++ break;
++
++ default: ShouldNotReachHere();
++ }
++
++ if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
++ Label LVolatile;
++ __ beq(CR_is_vol, LVolatile);
++ __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
++
++ __ align(32, 12);
++ __ bind(LVolatile);
++ __ fence();
++ }
++}
++
++void TemplateTable::fast_accessfield(TosState state) {
++ transition(atos, state);
++
++ Label LisVolatile;
++ ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++
++ const Register Rcache = R3_ARG1,
++ Rclass_or_obj = R17_tos,
++ Roffset = R22_tmp2,
++ Rflags = R23_tmp3,
++ Rscratch = R12_scratch2;
++
++ // Constant pool already resolved. Get the field offset.
++ __ get_cache_and_index_at_bcp(Rcache, 1);
++ load_field_cp_cache_entry(noreg, Rcache, noreg, Roffset, Rflags, false);
++
++ // JVMTI support
++ jvmti_post_field_access(Rcache, Rscratch, false, true);
++
++ // Get the load address.
++ __ null_check_throw(Rclass_or_obj, -1, Rscratch);
++
++ // Get volatile flag.
++ __ rldicl_(Rscratch, Rflags, 64-ConstantPoolCacheEntry::is_volatile_shift, 63); // Extract volatile bit.
++ __ bne(CCR0, LisVolatile);
++
++ switch(bytecode()) {
++ case Bytecodes::_fast_agetfield:
++ {
++ __ load_heap_oop(R17_tos, (RegisterOrConstant)Roffset, Rclass_or_obj);
++ __ verify_oop(R17_tos);
++ __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
++
++ __ bind(LisVolatile);
++ if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
++ __ load_heap_oop(R17_tos, (RegisterOrConstant)Roffset, Rclass_or_obj);
++ __ verify_oop(R17_tos);
++ __ twi_0(R17_tos);
++ __ isync();
++ break;
++ }
++ case Bytecodes::_fast_igetfield:
++ {
++ __ lwax(R17_tos, Rclass_or_obj, Roffset);
++ __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
++
++ __ bind(LisVolatile);
++ if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
++ __ lwax(R17_tos, Rclass_or_obj, Roffset);
++ __ twi_0(R17_tos);
++ __ isync();
++ break;
++ }
++ case Bytecodes::_fast_lgetfield:
++ {
++ __ ldx(R17_tos, Rclass_or_obj, Roffset);
++ __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
++
++ __ bind(LisVolatile);
++ if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
++ __ ldx(R17_tos, Rclass_or_obj, Roffset);
++ __ twi_0(R17_tos);
++ __ isync();
++ break;
++ }
++ case Bytecodes::_fast_bgetfield:
++ {
++ __ lbzx(R17_tos, Rclass_or_obj, Roffset);
++ __ extsb(R17_tos, R17_tos);
++ __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
++
++ __ bind(LisVolatile);
++ if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
++ __ lbzx(R17_tos, Rclass_or_obj, Roffset);
++ __ twi_0(R17_tos);
++ __ extsb(R17_tos, R17_tos);
++ __ isync();
++ break;
++ }
++ case Bytecodes::_fast_cgetfield:
++ {
++ __ lhzx(R17_tos, Rclass_or_obj, Roffset);
++ __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
++
++ __ bind(LisVolatile);
++ if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
++ __ lhzx(R17_tos, Rclass_or_obj, Roffset);
++ __ twi_0(R17_tos);
++ __ isync();
++ break;
++ }
++ case Bytecodes::_fast_sgetfield:
++ {
++ __ lhax(R17_tos, Rclass_or_obj, Roffset);
++ __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
++
++ __ bind(LisVolatile);
++ if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
++ __ lhax(R17_tos, Rclass_or_obj, Roffset);
++ __ twi_0(R17_tos);
++ __ isync();
++ break;
++ }
++ case Bytecodes::_fast_fgetfield:
++ {
++ __ lfsx(F15_ftos, Rclass_or_obj, Roffset);
++ __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
++
++ __ bind(LisVolatile);
++ Label Ldummy;
++ if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
++ __ lfsx(F15_ftos, Rclass_or_obj, Roffset);
++ __ fcmpu(CCR0, F15_ftos, F15_ftos); // Acquire by cmp-br-isync.
++ __ bne_predict_not_taken(CCR0, Ldummy);
++ __ bind(Ldummy);
++ __ isync();
++ break;
++ }
++ case Bytecodes::_fast_dgetfield:
++ {
++ __ lfdx(F15_ftos, Rclass_or_obj, Roffset);
++ __ dispatch_epilog(state, Bytecodes::length_for(bytecode()));
++
++ __ bind(LisVolatile);
++ Label Ldummy;
++ if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
++ __ lfdx(F15_ftos, Rclass_or_obj, Roffset);
++ __ fcmpu(CCR0, F15_ftos, F15_ftos); // Acquire by cmp-br-isync.
++ __ bne_predict_not_taken(CCR0, Ldummy);
++ __ bind(Ldummy);
++ __ isync();
++ break;
++ }
++ default: ShouldNotReachHere();
++ }
++}
++
++void TemplateTable::fast_xaccess(TosState state) {
++ transition(vtos, state);
++
++ Label LisVolatile;
++ ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++ const Register Rcache = R3_ARG1,
++ Rclass_or_obj = R17_tos,
++ Roffset = R22_tmp2,
++ Rflags = R23_tmp3,
++ Rscratch = R12_scratch2;
++
++ __ ld(Rclass_or_obj, 0, R18_locals);
++
++ // Constant pool already resolved. Get the field offset.
++ __ get_cache_and_index_at_bcp(Rcache, 2);
++ load_field_cp_cache_entry(noreg, Rcache, noreg, Roffset, Rflags, false);
++
++ // JVMTI support not needed, since we switch back to single bytecode as soon as debugger attaches.
++
++ // Needed to report exception at the correct bcp.
++ __ addi(R14_bcp, R14_bcp, 1);
++
++ // Get the load address.
++ __ null_check_throw(Rclass_or_obj, -1, Rscratch);
++
++ // Get volatile flag.
++ __ rldicl_(Rscratch, Rflags, 64-ConstantPoolCacheEntry::is_volatile_shift, 63); // Extract volatile bit.
++ __ bne(CCR0, LisVolatile);
++
++ switch(state) {
++ case atos:
++ {
++ __ load_heap_oop(R17_tos, (RegisterOrConstant)Roffset, Rclass_or_obj);
++ __ verify_oop(R17_tos);
++ __ dispatch_epilog(state, Bytecodes::length_for(bytecode()) - 1); // Undo bcp increment.
++
++ __ bind(LisVolatile);
++ if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
++ __ load_heap_oop(R17_tos, (RegisterOrConstant)Roffset, Rclass_or_obj);
++ __ verify_oop(R17_tos);
++ __ twi_0(R17_tos);
++ __ isync();
++ break;
++ }
++ case itos:
++ {
++ __ lwax(R17_tos, Rclass_or_obj, Roffset);
++ __ dispatch_epilog(state, Bytecodes::length_for(bytecode()) - 1); // Undo bcp increment.
++
++ __ bind(LisVolatile);
++ if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
++ __ lwax(R17_tos, Rclass_or_obj, Roffset);
++ __ twi_0(R17_tos);
++ __ isync();
++ break;
++ }
++ case ftos:
++ {
++ __ lfsx(F15_ftos, Rclass_or_obj, Roffset);
++ __ dispatch_epilog(state, Bytecodes::length_for(bytecode()) - 1); // Undo bcp increment.
++
++ __ bind(LisVolatile);
++ Label Ldummy;
++ if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ fence(); }
++ __ lfsx(F15_ftos, Rclass_or_obj, Roffset);
++ __ fcmpu(CCR0, F15_ftos, F15_ftos); // Acquire by cmp-br-isync.
++ __ bne_predict_not_taken(CCR0, Ldummy);
++ __ bind(Ldummy);
++ __ isync();
++ break;
++ }
++ default: ShouldNotReachHere();
++ }
++ __ addi(R14_bcp, R14_bcp, -1);
++}
++
++// ============================================================================
++// Calls
++
++// Common code for invoke
++//
++// Input:
++// - byte_no
++//
++// Output:
++// - Rmethod: The method to invoke next.
++// - Rret_addr: The return address to return to.
++// - Rindex: MethodType (invokehandle) or CallSite obj (invokedynamic)
++// - Rrecv: Cache for "this" pointer, might be noreg if static call.
++// - Rflags: Method flags from const pool cache.
++//
++// Kills:
++// - Rscratch1
++//
++void TemplateTable::prepare_invoke(int byte_no,
++ Register Rmethod, // linked method (or i-klass)
++ Register Rret_addr,// return address
++ Register Rindex, // itable index, MethodType, etc.
++ Register Rrecv, // If caller wants to see it.
++ Register Rflags, // If caller wants to test it.
++ Register Rscratch
++ ) {
++ // Determine flags.
++ const Bytecodes::Code code = bytecode();
++ const bool is_invokeinterface = code == Bytecodes::_invokeinterface;
++ const bool is_invokedynamic = code == Bytecodes::_invokedynamic;
++ const bool is_invokehandle = code == Bytecodes::_invokehandle;
++ const bool is_invokevirtual = code == Bytecodes::_invokevirtual;
++ const bool is_invokespecial = code == Bytecodes::_invokespecial;
++ const bool load_receiver = (Rrecv != noreg);
++ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
++
++ assert_different_registers(Rmethod, Rindex, Rflags, Rscratch);
++ assert_different_registers(Rmethod, Rrecv, Rflags, Rscratch);
++ assert_different_registers(Rret_addr, Rscratch);
++
++ load_invoke_cp_cache_entry(byte_no, Rmethod, Rindex, Rflags, is_invokevirtual, false, is_invokedynamic);
++
++ // Saving of SP done in call_from_interpreter.
++
++ // Maybe push "appendix" to arguments.
++ if (is_invokedynamic || is_invokehandle) {
++ Label Ldone;
++ __ rldicl_(R0, Rflags, 64-ConstantPoolCacheEntry::has_appendix_shift, 63);
++ __ beq(CCR0, Ldone);
++ // Push "appendix" (MethodType, CallSite, etc.).
++ // This must be done before we get the receiver,
++ // since the parameter_size includes it.
++ __ load_resolved_reference_at_index(Rscratch, Rindex);
++ __ verify_oop(Rscratch);
++ __ push_ptr(Rscratch);
++ __ bind(Ldone);
++ }
++
++ // Load receiver if needed (after appendix is pushed so parameter size is correct).
++ if (load_receiver) {
++ const Register Rparam_count = Rscratch;
++ __ andi(Rparam_count, Rflags, ConstantPoolCacheEntry::parameter_size_mask);
++ __ load_receiver(Rparam_count, Rrecv);
++ __ verify_oop(Rrecv);
++ }
++
++ // Get return address.
++ {
++ Register Rtable_addr = Rscratch;
++ Register Rret_type = Rret_addr;
++ address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
++
++ // Get return type. It's coded into the upper 4 bits of the lower half of the 64 bit value.
++ __ rldicl(Rret_type, Rflags, 64-ConstantPoolCacheEntry::tos_state_shift, 64-ConstantPoolCacheEntry::tos_state_bits);
++ __ load_dispatch_table(Rtable_addr, (address*)table_addr);
++ __ sldi(Rret_type, Rret_type, LogBytesPerWord);
++ // Get return address.
++ __ ldx(Rret_addr, Rtable_addr, Rret_type);
++ }
++}
++
++// Helper for virtual calls. Load target out of vtable and jump off!
++// Kills all passed registers.
++void TemplateTable::generate_vtable_call(Register Rrecv_klass, Register Rindex, Register Rret, Register Rtemp) {
++
++ assert_different_registers(Rrecv_klass, Rtemp, Rret);
++ const Register Rtarget_method = Rindex;
++
++ // Get target method & entry point.
++ const int base = InstanceKlass::vtable_start_offset() * wordSize;
++ // Calc vtable addr scale the vtable index by 8.
++ __ sldi(Rindex, Rindex, exact_log2(vtableEntry::size() * wordSize));
++ // Load target.
++ __ addi(Rrecv_klass, Rrecv_klass, base + vtableEntry::method_offset_in_bytes());
++ __ ldx(Rtarget_method, Rindex, Rrecv_klass);
++ __ call_from_interpreter(Rtarget_method, Rret, Rrecv_klass /* scratch1 */, Rtemp /* scratch2 */);
++}
++
++// Virtual or final call. Final calls are rewritten on the fly to run through "fast_finalcall" next time.
++void TemplateTable::invokevirtual(int byte_no) {
++ transition(vtos, vtos);
++
++ Register Rtable_addr = R11_scratch1,
++ Rret_type = R12_scratch2,
++ Rret_addr = R5_ARG3,
++ Rflags = R22_tmp2, // Should survive C call.
++ Rrecv = R3_ARG1,
++ Rrecv_klass = Rrecv,
++ Rvtableindex_or_method = R31, // Should survive C call.
++ Rnum_params = R4_ARG2,
++ Rnew_bc = R6_ARG4;
++
++ Label LnotFinal;
++
++ load_invoke_cp_cache_entry(byte_no, Rvtableindex_or_method, noreg, Rflags, /*virtual*/ true, false, false);
++
++ __ testbitdi(CCR0, R0, Rflags, ConstantPoolCacheEntry::is_vfinal_shift);
++ __ bfalse(CCR0, LnotFinal);
++
++ patch_bytecode(Bytecodes::_fast_invokevfinal, Rnew_bc, R12_scratch2);
++ invokevfinal_helper(Rvtableindex_or_method, Rflags, R11_scratch1, R12_scratch2);
++
++ __ align(32, 12);
++ __ bind(LnotFinal);
++ // Load "this" pointer (receiver).
++ __ rldicl(Rnum_params, Rflags, 64, 48);
++ __ load_receiver(Rnum_params, Rrecv);
++ __ verify_oop(Rrecv);
++
++ // Get return type. It's coded into the upper 4 bits of the lower half of the 64 bit value.
++ __ rldicl(Rret_type, Rflags, 64-ConstantPoolCacheEntry::tos_state_shift, 64-ConstantPoolCacheEntry::tos_state_bits);
++ __ load_dispatch_table(Rtable_addr, Interpreter::invoke_return_entry_table());
++ __ sldi(Rret_type, Rret_type, LogBytesPerWord);
++ __ ldx(Rret_addr, Rret_type, Rtable_addr);
++ __ null_check_throw(Rrecv, oopDesc::klass_offset_in_bytes(), R11_scratch1);
++ __ load_klass(Rrecv_klass, Rrecv);
++ __ verify_klass_ptr(Rrecv_klass);
++ __ profile_virtual_call(Rrecv_klass, R11_scratch1, R12_scratch2, false);
++
++ generate_vtable_call(Rrecv_klass, Rvtableindex_or_method, Rret_addr, R11_scratch1);
++}
++
++void TemplateTable::fast_invokevfinal(int byte_no) {
++ transition(vtos, vtos);
++
++ assert(byte_no == f2_byte, "use this argument");
++ Register Rflags = R22_tmp2,
++ Rmethod = R31;
++ load_invoke_cp_cache_entry(byte_no, Rmethod, noreg, Rflags, /*virtual*/ true, /*is_invokevfinal*/ true, false);
++ invokevfinal_helper(Rmethod, Rflags, R11_scratch1, R12_scratch2);
++}
++
++void TemplateTable::invokevfinal_helper(Register Rmethod, Register Rflags, Register Rscratch1, Register Rscratch2) {
++
++ assert_different_registers(Rmethod, Rflags, Rscratch1, Rscratch2);
++
++ // Load receiver from stack slot.
++ Register Rrecv = Rscratch2;
++ Register Rnum_params = Rrecv;
++
++ __ ld(Rnum_params, in_bytes(Method::const_offset()), Rmethod);
++ __ lhz(Rnum_params /* number of params */, in_bytes(ConstMethod::size_of_parameters_offset()), Rnum_params);
++
++ // Get return address.
++ Register Rtable_addr = Rscratch1,
++ Rret_addr = Rflags,
++ Rret_type = Rret_addr;
++ // Get return type. It's coded into the upper 4 bits of the lower half of the 64 bit value.
++ __ rldicl(Rret_type, Rflags, 64-ConstantPoolCacheEntry::tos_state_shift, 64-ConstantPoolCacheEntry::tos_state_bits);
++ __ load_dispatch_table(Rtable_addr, Interpreter::invoke_return_entry_table());
++ __ sldi(Rret_type, Rret_type, LogBytesPerWord);
++ __ ldx(Rret_addr, Rret_type, Rtable_addr);
++
++ // Load receiver and receiver NULL check.
++ __ load_receiver(Rnum_params, Rrecv);
++ __ null_check_throw(Rrecv, -1, Rscratch1);
++
++ __ profile_final_call(Rrecv, Rscratch1);
++
++ // Do the call.
++ __ call_from_interpreter(Rmethod, Rret_addr, Rscratch1, Rscratch2);
++}
++
++void TemplateTable::invokespecial(int byte_no) {
++ assert(byte_no == f1_byte, "use this argument");
++ transition(vtos, vtos);
++
++ Register Rtable_addr = R3_ARG1,
++ Rret_addr = R4_ARG2,
++ Rflags = R5_ARG3,
++ Rreceiver = R6_ARG4,
++ Rmethod = R31;
++
++ prepare_invoke(byte_no, Rmethod, Rret_addr, noreg, Rreceiver, Rflags, R11_scratch1);
++
++ // Receiver NULL check.
++ __ null_check_throw(Rreceiver, -1, R11_scratch1);
++
++ __ profile_call(R11_scratch1, R12_scratch2);
++ __ call_from_interpreter(Rmethod, Rret_addr, R11_scratch1, R12_scratch2);
++}
++
++void TemplateTable::invokestatic(int byte_no) {
++ assert(byte_no == f1_byte, "use this argument");
++ transition(vtos, vtos);
++
++ Register Rtable_addr = R3_ARG1,
++ Rret_addr = R4_ARG2,
++ Rflags = R5_ARG3;
++
++ prepare_invoke(byte_no, R19_method, Rret_addr, noreg, noreg, Rflags, R11_scratch1);
++
++ __ profile_call(R11_scratch1, R12_scratch2);
++ __ call_from_interpreter(R19_method, Rret_addr, R11_scratch1, R12_scratch2);
++}
++
++void TemplateTable::invokeinterface_object_method(Register Rrecv_klass,
++ Register Rret,
++ Register Rflags,
++ Register Rindex,
++ Register Rtemp1,
++ Register Rtemp2) {
++
++ assert_different_registers(Rindex, Rret, Rrecv_klass, Rflags, Rtemp1, Rtemp2);
++ Label LnotFinal;
++
++ // Check for vfinal.
++ __ testbitdi(CCR0, R0, Rflags, ConstantPoolCacheEntry::is_vfinal_shift);
++ __ bfalse(CCR0, LnotFinal);
++
++ Register Rscratch = Rflags; // Rflags is dead now.
++
++ // Final call case.
++ __ profile_final_call(Rtemp1, Rscratch);
++ // Do the final call - the index (f2) contains the method.
++ __ call_from_interpreter(Rindex, Rret, Rscratch, Rrecv_klass /* scratch */);
++
++ // Non-final callc case.
++ __ bind(LnotFinal);
++ __ profile_virtual_call(Rrecv_klass, Rtemp1, Rscratch, false);
++ generate_vtable_call(Rrecv_klass, Rindex, Rret, Rscratch);
++}
++
++void TemplateTable::invokeinterface(int byte_no) {
++ assert(byte_no == f1_byte, "use this argument");
++ transition(vtos, vtos);
++
++ const Register Rscratch1 = R11_scratch1,
++ Rscratch2 = R12_scratch2,
++ Rscratch3 = R9_ARG7,
++ Rscratch4 = R10_ARG8,
++ Rtable_addr = Rscratch2,
++ Rinterface_klass = R5_ARG3,
++ Rret_type = R8_ARG6,
++ Rret_addr = Rret_type,
++ Rindex = R6_ARG4,
++ Rreceiver = R4_ARG2,
++ Rrecv_klass = Rreceiver,
++ Rflags = R7_ARG5;
++
++ prepare_invoke(byte_no, Rinterface_klass, Rret_addr, Rindex, Rreceiver, Rflags, Rscratch1);
++
++ // Get receiver klass.
++ __ null_check_throw(Rreceiver, oopDesc::klass_offset_in_bytes(), Rscratch3);
++ __ load_klass(Rrecv_klass, Rreceiver);
++
++ // Check corner case object method.
++ Label LobjectMethod;
++
++ __ testbitdi(CCR0, R0, Rflags, ConstantPoolCacheEntry::is_forced_virtual_shift);
++ __ btrue(CCR0, LobjectMethod);
++
++ // Fallthrough: The normal invokeinterface case.
++ __ profile_virtual_call(Rrecv_klass, Rscratch1, Rscratch2, false);
++
++ // Find entry point to call.
++ Label Lthrow_icc, Lthrow_ame;
++ // Result will be returned in Rindex.
++ __ mr(Rscratch4, Rrecv_klass);
++ __ mr(Rscratch3, Rindex);
++ __ lookup_interface_method(Rrecv_klass, Rinterface_klass, Rindex, Rindex, Rscratch1, Rscratch2, Lthrow_icc);
++
++ __ cmpdi(CCR0, Rindex, 0);
++ __ beq(CCR0, Lthrow_ame);
++ // Found entry. Jump off!
++ __ call_from_interpreter(Rindex, Rret_addr, Rscratch1, Rscratch2);
++
++ // Vtable entry was NULL => Throw abstract method error.
++ __ bind(Lthrow_ame);
++ __ mr(Rrecv_klass, Rscratch4);
++ __ mr(Rindex, Rscratch3);
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
++
++ // Interface was not found => Throw incompatible class change error.
++ __ bind(Lthrow_icc);
++ __ mr(Rrecv_klass, Rscratch4);
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeError));
++
++ __ should_not_reach_here();
++
++ // Special case of invokeinterface called for virtual method of
++ // java.lang.Object. See ConstantPoolCacheEntry::set_method() for details:
++ // The invokeinterface was rewritten to a invokevirtual, hence we have
++ // to handle this corner case. This code isn't produced by javac, but could
++ // be produced by another compliant java compiler.
++ __ bind(LobjectMethod);
++ invokeinterface_object_method(Rrecv_klass, Rret_addr, Rflags, Rindex, Rscratch1, Rscratch2);
++}
++
++void TemplateTable::invokedynamic(int byte_no) {
++ transition(vtos, vtos);
++
++ const Register Rret_addr = R3_ARG1,
++ Rflags = R4_ARG2,
++ Rmethod = R22_tmp2,
++ Rscratch1 = R11_scratch1,
++ Rscratch2 = R12_scratch2;
++
++ if (!EnableInvokeDynamic) {
++ // We should not encounter this bytecode if !EnableInvokeDynamic.
++ // The verifier will stop it. However, if we get past the verifier,
++ // this will stop the thread in a reasonable way, without crashing the JVM.
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeError));
++ // The call_VM checks for exception, so we should never return here.
++ __ should_not_reach_here();
++ return;
++ }
++
++ prepare_invoke(byte_no, Rmethod, Rret_addr, Rscratch1, noreg, Rflags, Rscratch2);
++
++ // Profile this call.
++ __ profile_call(Rscratch1, Rscratch2);
++
++ // Off we go. With the new method handles, we don't jump to a method handle
++ // entry any more. Instead, we pushed an "appendix" in prepare invoke, which happens
++ // to be the callsite object the bootstrap method returned. This is passed to a
++ // "link" method which does the dispatch (Most likely just grabs the MH stored
++ // inside the callsite and does an invokehandle).
++ __ call_from_interpreter(Rmethod, Rret_addr, Rscratch1 /* scratch1 */, Rscratch2 /* scratch2 */);
++}
++
++void TemplateTable::invokehandle(int byte_no) {
++ transition(vtos, vtos);
++
++ const Register Rret_addr = R3_ARG1,
++ Rflags = R4_ARG2,
++ Rrecv = R5_ARG3,
++ Rmethod = R22_tmp2,
++ Rscratch1 = R11_scratch1,
++ Rscratch2 = R12_scratch2;
++
++ if (!EnableInvokeDynamic) {
++ // Rewriter does not generate this bytecode.
++ __ should_not_reach_here();
++ return;
++ }
++
++ prepare_invoke(byte_no, Rmethod, Rret_addr, Rscratch1, Rrecv, Rflags, Rscratch2);
++ __ verify_method_ptr(Rmethod);
++ __ null_check_throw(Rrecv, -1, Rscratch2);
++
++ __ profile_final_call(Rrecv, Rscratch1);
++
++ // Still no call from handle => We call the method handle interpreter here.
++ __ call_from_interpreter(Rmethod, Rret_addr, Rscratch1 /* scratch1 */, Rscratch2 /* scratch2 */);
++}
++
++// =============================================================================
++// Allocation
++
++// Puts allocated obj ref onto the expression stack.
++void TemplateTable::_new() {
++ transition(vtos, atos);
++
++ Label Lslow_case,
++ Ldone,
++ Linitialize_header,
++ Lallocate_shared,
++ Linitialize_object; // Including clearing the fields.
++
++ const Register RallocatedObject = R17_tos,
++ RinstanceKlass = R9_ARG7,
++ Rscratch = R11_scratch1,
++ Roffset = R8_ARG6,
++ Rinstance_size = Roffset,
++ Rcpool = R4_ARG2,
++ Rtags = R3_ARG1,
++ Rindex = R5_ARG3;
++
++ const bool allow_shared_alloc = Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
++
++ // --------------------------------------------------------------------------
++ // Check if fast case is possible.
++
++ // Load pointers to const pool and const pool's tags array.
++ __ get_cpool_and_tags(Rcpool, Rtags);
++ // Load index of constant pool entry.
++ __ get_2_byte_integer_at_bcp(1, Rindex, InterpreterMacroAssembler::Unsigned);
++
++ if (UseTLAB) {
++ // Make sure the class we're about to instantiate has been resolved
++ // This is done before loading instanceKlass to be consistent with the order
++ // how Constant Pool is updated (see ConstantPoolCache::klass_at_put).
++ __ addi(Rtags, Rtags, Array::base_offset_in_bytes());
++ __ lbzx(Rtags, Rindex, Rtags);
++
++ __ cmpdi(CCR0, Rtags, JVM_CONSTANT_Class);
++ __ bne(CCR0, Lslow_case);
++
++ // Get instanceKlass (load from Rcpool + sizeof(ConstantPool) + Rindex*BytesPerWord).
++ __ sldi(Roffset, Rindex, LogBytesPerWord);
++ __ addi(Rscratch, Rcpool, sizeof(ConstantPool));
++ __ isync(); // Order load of instance Klass wrt. tags.
++ __ ldx(RinstanceKlass, Roffset, Rscratch);
++
++ // Make sure klass is fully initialized and get instance_size.
++ __ lbz(Rscratch, in_bytes(InstanceKlass::init_state_offset()), RinstanceKlass);
++ __ lwz(Rinstance_size, in_bytes(Klass::layout_helper_offset()), RinstanceKlass);
++
++ __ cmpdi(CCR1, Rscratch, InstanceKlass::fully_initialized);
++ // Make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class.
++ __ andi_(R0, Rinstance_size, Klass::_lh_instance_slow_path_bit); // slow path bit equals 0?
++
++ __ crnand(/*CR0 eq*/2, /*CR1 eq*/4+2, /*CR0 eq*/2); // slow path bit set or not fully initialized?
++ __ beq(CCR0, Lslow_case);
++
++ // --------------------------------------------------------------------------
++ // Fast case:
++ // Allocate the instance.
++ // 1) Try to allocate in the TLAB.
++ // 2) If fail, and the TLAB is not full enough to discard, allocate in the shared Eden.
++ // 3) If the above fails (or is not applicable), go to a slow case (creates a new TLAB, etc.).
++
++ Register RoldTopValue = RallocatedObject; // Object will be allocated here if it fits.
++ Register RnewTopValue = R6_ARG4;
++ Register RendValue = R7_ARG5;
++
++ // Check if we can allocate in the TLAB.
++ __ ld(RoldTopValue, in_bytes(JavaThread::tlab_top_offset()), R16_thread);
++ __ ld(RendValue, in_bytes(JavaThread::tlab_end_offset()), R16_thread);
++
++ __ add(RnewTopValue, Rinstance_size, RoldTopValue);
++
++ // If there is enough space, we do not CAS and do not clear.
++ __ cmpld(CCR0, RnewTopValue, RendValue);
++ __ bgt(CCR0, allow_shared_alloc ? Lallocate_shared : Lslow_case);
++
++ __ std(RnewTopValue, in_bytes(JavaThread::tlab_top_offset()), R16_thread);
++
++ if (ZeroTLAB) {
++ // The fields have already been cleared.
++ __ b(Linitialize_header);
++ } else {
++ // Initialize both the header and fields.
++ __ b(Linitialize_object);
++ }
++
++ // Fall through: TLAB was too small.
++ if (allow_shared_alloc) {
++ Register RtlabWasteLimitValue = R10_ARG8;
++ Register RfreeValue = RnewTopValue;
++
++ __ bind(Lallocate_shared);
++ // Check if tlab should be discarded (refill_waste_limit >= free).
++ __ ld(RtlabWasteLimitValue, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), R16_thread);
++ __ subf(RfreeValue, RoldTopValue, RendValue);
++ __ srdi(RfreeValue, RfreeValue, LogHeapWordSize); // in dwords
++ __ cmpld(CCR0, RtlabWasteLimitValue, RfreeValue);
++ __ bge(CCR0, Lslow_case);
++
++ // Increment waste limit to prevent getting stuck on this slow path.
++ __ addi(RtlabWasteLimitValue, RtlabWasteLimitValue, (int)ThreadLocalAllocBuffer::refill_waste_limit_increment());
++ __ std(RtlabWasteLimitValue, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), R16_thread);
++ }
++ // else: No allocation in the shared eden. // fallthru: __ b(Lslow_case);
++ }
++ // else: Always go the slow path.
++
++ // --------------------------------------------------------------------------
++ // slow case
++ __ bind(Lslow_case);
++ call_VM(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), Rcpool, Rindex);
++
++ if (UseTLAB) {
++ __ b(Ldone);
++ // --------------------------------------------------------------------------
++ // Init1: Zero out newly allocated memory.
++
++ if (!ZeroTLAB || allow_shared_alloc) {
++ // Clear object fields.
++ __ bind(Linitialize_object);
++
++ // Initialize remaining object fields.
++ Register Rbase = Rtags;
++ __ addi(Rinstance_size, Rinstance_size, 7 - (int)sizeof(oopDesc));
++ __ addi(Rbase, RallocatedObject, sizeof(oopDesc));
++ __ srdi(Rinstance_size, Rinstance_size, 3);
++
++ // Clear out object skipping header. Takes also care of the zero length case.
++ __ clear_memory_doubleword(Rbase, Rinstance_size);
++ // fallthru: __ b(Linitialize_header);
++ }
++
++ // --------------------------------------------------------------------------
++ // Init2: Initialize the header: mark, klass
++ __ bind(Linitialize_header);
++
++ // Init mark.
++ if (UseBiasedLocking) {
++ __ ld(Rscratch, in_bytes(Klass::prototype_header_offset()), RinstanceKlass);
++ } else {
++ __ load_const_optimized(Rscratch, markOopDesc::prototype(), R0);
++ }
++ __ std(Rscratch, oopDesc::mark_offset_in_bytes(), RallocatedObject);
++
++ // Init klass.
++ __ store_klass_gap(RallocatedObject);
++ __ store_klass(RallocatedObject, RinstanceKlass, Rscratch); // klass (last for cms)
++
++ // Check and trigger dtrace event.
++ {
++ SkipIfEqualZero skip_if(_masm, Rscratch, &DTraceAllocProbes);
++ __ push(atos);
++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc));
++ __ pop(atos);
++ }
++ }
++
++ // continue
++ __ bind(Ldone);
++
++ // Must prevent reordering of stores for object initialization with stores that publish the new object.
++ __ membar(Assembler::StoreStore);
++}
++
++void TemplateTable::newarray() {
++ transition(itos, atos);
++
++ __ lbz(R4, 1, R14_bcp);
++ __ extsw(R5, R17_tos);
++ call_VM(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), R4, R5 /* size */);
++
++ // Must prevent reordering of stores for object initialization with stores that publish the new object.
++ __ membar(Assembler::StoreStore);
++}
++
++void TemplateTable::anewarray() {
++ transition(itos, atos);
++
++ __ get_constant_pool(R4);
++ __ get_2_byte_integer_at_bcp(1, R5, InterpreterMacroAssembler::Unsigned);
++ __ extsw(R6, R17_tos); // size
++ call_VM(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), R4 /* pool */, R5 /* index */, R6 /* size */);
++
++ // Must prevent reordering of stores for object initialization with stores that publish the new object.
++ __ membar(Assembler::StoreStore);
++}
++
++// Allocate a multi dimensional array
++void TemplateTable::multianewarray() {
++ transition(vtos, atos);
++
++ Register Rptr = R31; // Needs to survive C call.
++
++ // Put ndims * wordSize into frame temp slot
++ __ lbz(Rptr, 3, R14_bcp);
++ __ sldi(Rptr, Rptr, Interpreter::logStackElementSize);
++ // Esp points past last_dim, so set to R4 to first_dim address.
++ __ add(R4, Rptr, R15_esp);
++ call_VM(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), R4 /* first_size_address */);
++ // Pop all dimensions off the stack.
++ __ add(R15_esp, Rptr, R15_esp);
++
++ // Must prevent reordering of stores for object initialization with stores that publish the new object.
++ __ membar(Assembler::StoreStore);
++}
++
++void TemplateTable::arraylength() {
++ transition(atos, itos);
++
++ Label LnoException;
++ __ verify_oop(R17_tos);
++ __ null_check_throw(R17_tos, arrayOopDesc::length_offset_in_bytes(), R11_scratch1);
++ __ lwa(R17_tos, arrayOopDesc::length_offset_in_bytes(), R17_tos);
++}
++
++// ============================================================================
++// Typechecks
++
++void TemplateTable::checkcast() {
++ transition(atos, atos);
++
++ Label Ldone, Lis_null, Lquicked, Lresolved;
++ Register Roffset = R6_ARG4,
++ RobjKlass = R4_ARG2,
++ RspecifiedKlass = R5_ARG3, // Generate_ClassCastException_verbose_handler will read value from this register.
++ Rcpool = R11_scratch1,
++ Rtags = R12_scratch2;
++
++ // Null does not pass.
++ __ cmpdi(CCR0, R17_tos, 0);
++ __ beq(CCR0, Lis_null);
++
++ // Get constant pool tag to find out if the bytecode has already been "quickened".
++ __ get_cpool_and_tags(Rcpool, Rtags);
++
++ __ get_2_byte_integer_at_bcp(1, Roffset, InterpreterMacroAssembler::Unsigned);
++
++ __ addi(Rtags, Rtags, Array::base_offset_in_bytes());
++ __ lbzx(Rtags, Rtags, Roffset);
++
++ __ cmpdi(CCR0, Rtags, JVM_CONSTANT_Class);
++ __ beq(CCR0, Lquicked);
++
++ // Call into the VM to "quicken" instanceof.
++ __ push_ptr(); // for GC
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++ __ get_vm_result_2(RspecifiedKlass);
++ __ pop_ptr(); // Restore receiver.
++ __ b(Lresolved);
++
++ // Extract target class from constant pool.
++ __ bind(Lquicked);
++ __ sldi(Roffset, Roffset, LogBytesPerWord);
++ __ addi(Rcpool, Rcpool, sizeof(ConstantPool));
++ __ isync(); // Order load of specified Klass wrt. tags.
++ __ ldx(RspecifiedKlass, Rcpool, Roffset);
++
++ // Do the checkcast.
++ __ bind(Lresolved);
++ // Get value klass in RobjKlass.
++ __ load_klass(RobjKlass, R17_tos);
++ // Generate a fast subtype check. Branch to cast_ok if no failure. Return 0 if failure.
++ __ gen_subtype_check(RobjKlass, RspecifiedKlass, /*3 temp regs*/ Roffset, Rcpool, Rtags, /*target if subtype*/ Ldone);
++
++ // Not a subtype; so must throw exception
++ // Target class oop is in register R6_ARG4 == RspecifiedKlass by convention.
++ __ load_dispatch_table(R11_scratch1, (address*)Interpreter::_throw_ClassCastException_entry);
++ __ mtctr(R11_scratch1);
++ __ bctr();
++
++ // Profile the null case.
++ __ align(32, 12);
++ __ bind(Lis_null);
++ __ profile_null_seen(R11_scratch1, Rtags); // Rtags used as scratch.
++
++ __ align(32, 12);
++ __ bind(Ldone);
++}
++
++// Output:
++// - tos == 0: Obj was null or not an instance of class.
++// - tos == 1: Obj was an instance of class.
++void TemplateTable::instanceof() {
++ transition(atos, itos);
++
++ Label Ldone, Lis_null, Lquicked, Lresolved;
++ Register Roffset = R5_ARG3,
++ RobjKlass = R4_ARG2,
++ RspecifiedKlass = R6_ARG4, // Generate_ClassCastException_verbose_handler will expect the value in this register.
++ Rcpool = R11_scratch1,
++ Rtags = R12_scratch2;
++
++ // Null does not pass.
++ __ cmpdi(CCR0, R17_tos, 0);
++ __ beq(CCR0, Lis_null);
++
++ // Get constant pool tag to find out if the bytecode has already been "quickened".
++ __ get_cpool_and_tags(Rcpool, Rtags);
++
++ __ get_2_byte_integer_at_bcp(1, Roffset, InterpreterMacroAssembler::Unsigned);
++
++ __ addi(Rtags, Rtags, Array::base_offset_in_bytes());
++ __ lbzx(Rtags, Rtags, Roffset);
++
++ __ cmpdi(CCR0, Rtags, JVM_CONSTANT_Class);
++ __ beq(CCR0, Lquicked);
++
++ // Call into the VM to "quicken" instanceof.
++ __ push_ptr(); // for GC
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++ __ get_vm_result_2(RspecifiedKlass);
++ __ pop_ptr(); // Restore receiver.
++ __ b(Lresolved);
++
++ // Extract target class from constant pool.
++ __ bind(Lquicked);
++ __ sldi(Roffset, Roffset, LogBytesPerWord);
++ __ addi(Rcpool, Rcpool, sizeof(ConstantPool));
++ __ isync(); // Order load of specified Klass wrt. tags.
++ __ ldx(RspecifiedKlass, Rcpool, Roffset);
++
++ // Do the checkcast.
++ __ bind(Lresolved);
++ // Get value klass in RobjKlass.
++ __ load_klass(RobjKlass, R17_tos);
++ // Generate a fast subtype check. Branch to cast_ok if no failure. Return 0 if failure.
++ __ li(R17_tos, 1);
++ __ gen_subtype_check(RobjKlass, RspecifiedKlass, /*3 temp regs*/ Roffset, Rcpool, Rtags, /*target if subtype*/ Ldone);
++ __ li(R17_tos, 0);
++
++ if (ProfileInterpreter) {
++ __ b(Ldone);
++ }
++
++ // Profile the null case.
++ __ align(32, 12);
++ __ bind(Lis_null);
++ __ profile_null_seen(Rcpool, Rtags); // Rcpool and Rtags used as scratch.
++
++ __ align(32, 12);
++ __ bind(Ldone);
++}
++
++// =============================================================================
++// Breakpoints
++
++void TemplateTable::_breakpoint() {
++ transition(vtos, vtos);
++
++ // Get the unpatched byte code.
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at), R19_method, R14_bcp);
++ __ mr(R31, R3_RET);
++
++ // Post the breakpoint event.
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), R19_method, R14_bcp);
++
++ // Complete the execution of original bytecode.
++ __ dispatch_Lbyte_code(vtos, R31, Interpreter::normal_table(vtos));
++}
++
++// =============================================================================
++// Exceptions
++
++void TemplateTable::athrow() {
++ transition(atos, vtos);
++
++ // Exception oop is in tos
++ __ verify_oop(R17_tos);
++
++ __ null_check_throw(R17_tos, -1, R11_scratch1);
++
++ // Throw exception interpreter entry expects exception oop to be in R3.
++ __ mr(R3_RET, R17_tos);
++ __ load_dispatch_table(R11_scratch1, (address*)Interpreter::throw_exception_entry());
++ __ mtctr(R11_scratch1);
++ __ bctr();
++}
++
++// =============================================================================
++// Synchronization
++// Searches the basic object lock list on the stack for a free slot
++// and uses it to lock the obect in tos.
++//
++// Recursive locking is enabled by exiting the search if the same
++// object is already found in the list. Thus, a new basic lock obj lock
++// is allocated "higher up" in the stack and thus is found first
++// at next monitor exit.
++void TemplateTable::monitorenter() {
++ transition(atos, vtos);
++
++ __ verify_oop(R17_tos);
++
++ Register Rcurrent_monitor = R11_scratch1,
++ Rcurrent_obj = R12_scratch2,
++ Robj_to_lock = R17_tos,
++ Rscratch1 = R3_ARG1,
++ Rscratch2 = R4_ARG2,
++ Rscratch3 = R5_ARG3,
++ Rcurrent_obj_addr = R6_ARG4;
++
++ // ------------------------------------------------------------------------------
++ // Null pointer exception.
++ __ null_check_throw(Robj_to_lock, -1, R11_scratch1);
++
++ // Try to acquire a lock on the object.
++ // Repeat until succeeded (i.e., until monitorenter returns true).
++
++ // ------------------------------------------------------------------------------
++ // Find a free slot in the monitor block.
++ Label Lfound, Lexit, Lallocate_new;
++ ConditionRegister found_free_slot = CCR0,
++ found_same_obj = CCR1,
++ reached_limit = CCR6;
++ {
++ Label Lloop, Lentry;
++ Register Rlimit = Rcurrent_monitor;
++
++ // Set up search loop - start with topmost monitor.
++ __ add(Rcurrent_obj_addr, BasicObjectLock::obj_offset_in_bytes(), R26_monitor);
++
++ __ ld(Rlimit, 0, R1_SP);
++ __ addi(Rlimit, Rlimit, - (frame::ijava_state_size + frame::interpreter_frame_monitor_size_in_bytes() - BasicObjectLock::obj_offset_in_bytes())); // Monitor base
++
++ // Check if any slot is present => short cut to allocation if not.
++ __ cmpld(reached_limit, Rcurrent_obj_addr, Rlimit);
++ __ bgt(reached_limit, Lallocate_new);
++
++ // Pre-load topmost slot.
++ __ ld(Rcurrent_obj, 0, Rcurrent_obj_addr);
++ __ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, frame::interpreter_frame_monitor_size() * wordSize);
++ // The search loop.
++ __ bind(Lloop);
++ // Found free slot?
++ __ cmpdi(found_free_slot, Rcurrent_obj, 0);
++ // Is this entry for same obj? If so, stop the search and take the found
++ // free slot or allocate a new one to enable recursive locking.
++ __ cmpd(found_same_obj, Rcurrent_obj, Robj_to_lock);
++ __ cmpld(reached_limit, Rcurrent_obj_addr, Rlimit);
++ __ beq(found_free_slot, Lexit);
++ __ beq(found_same_obj, Lallocate_new);
++ __ bgt(reached_limit, Lallocate_new);
++ // Check if last allocated BasicLockObj reached.
++ __ ld(Rcurrent_obj, 0, Rcurrent_obj_addr);
++ __ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, frame::interpreter_frame_monitor_size() * wordSize);
++ // Next iteration if unchecked BasicObjectLocks exist on the stack.
++ __ b(Lloop);
++ }
++
++ // ------------------------------------------------------------------------------
++ // Check if we found a free slot.
++ __ bind(Lexit);
++
++ __ addi(Rcurrent_monitor, Rcurrent_obj_addr, -(frame::interpreter_frame_monitor_size() * wordSize) - BasicObjectLock::obj_offset_in_bytes());
++ __ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, - frame::interpreter_frame_monitor_size() * wordSize);
++ __ b(Lfound);
++
++ // We didn't find a free BasicObjLock => allocate one.
++ __ align(32, 12);
++ __ bind(Lallocate_new);
++ __ add_monitor_to_stack(false, Rscratch1, Rscratch2);
++ __ mr(Rcurrent_monitor, R26_monitor);
++ __ addi(Rcurrent_obj_addr, R26_monitor, BasicObjectLock::obj_offset_in_bytes());
++
++ // ------------------------------------------------------------------------------
++ // We now have a slot to lock.
++ __ bind(Lfound);
++
++ // Increment bcp to point to the next bytecode, so exception handling for async. exceptions work correctly.
++ // The object has already been poped from the stack, so the expression stack looks correct.
++ __ addi(R14_bcp, R14_bcp, 1);
++
++ __ std(Robj_to_lock, 0, Rcurrent_obj_addr);
++ __ lock_object(Rcurrent_monitor, Robj_to_lock);
++
++ // Check if there's enough space on the stack for the monitors after locking.
++ Label Lskip_stack_check;
++ // Optimization: If the monitors stack section is less then a std page size (4K) don't run
++ // the stack check. There should be enough shadow pages to fit that in.
++ __ ld(Rscratch3, 0, R1_SP);
++ __ sub(Rscratch3, Rscratch3, R26_monitor);
++ __ cmpdi(CCR0, Rscratch3, 4*K);
++ __ blt(CCR0, Lskip_stack_check);
++
++ DEBUG_ONLY(__ untested("stack overflow check during monitor enter");)
++ __ li(Rscratch1, 0);
++ __ generate_stack_overflow_check_with_compare_and_throw(Rscratch1, Rscratch2);
++
++ __ align(32, 12);
++ __ bind(Lskip_stack_check);
++
++ // The bcp has already been incremented. Just need to dispatch to next instruction.
++ __ dispatch_next(vtos);
++}
++
++void TemplateTable::monitorexit() {
++ transition(atos, vtos);
++ __ verify_oop(R17_tos);
++
++ Register Rcurrent_monitor = R11_scratch1,
++ Rcurrent_obj = R12_scratch2,
++ Robj_to_lock = R17_tos,
++ Rcurrent_obj_addr = R3_ARG1,
++ Rlimit = R4_ARG2;
++ Label Lfound, Lillegal_monitor_state;
++
++ // Check corner case: unbalanced monitorEnter / Exit.
++ __ ld(Rlimit, 0, R1_SP);
++ __ addi(Rlimit, Rlimit, - (frame::ijava_state_size + frame::interpreter_frame_monitor_size_in_bytes())); // Monitor base
++
++ // Null pointer check.
++ __ null_check_throw(Robj_to_lock, -1, R11_scratch1);
++
++ __ cmpld(CCR0, R26_monitor, Rlimit);
++ __ bgt(CCR0, Lillegal_monitor_state);
++
++ // Find the corresponding slot in the monitors stack section.
++ {
++ Label Lloop;
++
++ // Start with topmost monitor.
++ __ addi(Rcurrent_obj_addr, R26_monitor, BasicObjectLock::obj_offset_in_bytes());
++ __ addi(Rlimit, Rlimit, BasicObjectLock::obj_offset_in_bytes());
++ __ ld(Rcurrent_obj, 0, Rcurrent_obj_addr);
++ __ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, frame::interpreter_frame_monitor_size() * wordSize);
++
++ __ bind(Lloop);
++ // Is this entry for same obj?
++ __ cmpd(CCR0, Rcurrent_obj, Robj_to_lock);
++ __ beq(CCR0, Lfound);
++
++ // Check if last allocated BasicLockObj reached.
++
++ __ ld(Rcurrent_obj, 0, Rcurrent_obj_addr);
++ __ cmpld(CCR0, Rcurrent_obj_addr, Rlimit);
++ __ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, frame::interpreter_frame_monitor_size() * wordSize);
++
++ // Next iteration if unchecked BasicObjectLocks exist on the stack.
++ __ ble(CCR0, Lloop);
++ }
++
++ // Fell through without finding the basic obj lock => throw up!
++ __ bind(Lillegal_monitor_state);
++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception));
++ __ should_not_reach_here();
++
++ __ align(32, 12);
++ __ bind(Lfound);
++ __ addi(Rcurrent_monitor, Rcurrent_obj_addr,
++ -(frame::interpreter_frame_monitor_size() * wordSize) - BasicObjectLock::obj_offset_in_bytes());
++ __ unlock_object(Rcurrent_monitor);
++}
++
++// ============================================================================
++// Wide bytecodes
++
++// Wide instructions. Simply redirects to the wide entry point for that instruction.
++void TemplateTable::wide() {
++ transition(vtos, vtos);
++
++ const Register Rtable = R11_scratch1,
++ Rindex = R12_scratch2,
++ Rtmp = R0;
++
++ __ lbz(Rindex, 1, R14_bcp);
++
++ __ load_dispatch_table(Rtable, Interpreter::_wentry_point);
++
++ __ slwi(Rindex, Rindex, LogBytesPerWord);
++ __ ldx(Rtmp, Rtable, Rindex);
++ __ mtctr(Rtmp);
++ __ bctr();
++ // Note: the bcp increment step is part of the individual wide bytecode implementations.
++}
++#endif // !CC_INTERP
+--- ./hotspot/src/cpu/ppc/vm/templateTable_ppc_64.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/templateTable_ppc_64.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2013, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_TEMPLATETABLE_PPC_64_HPP
++#define CPU_PPC_VM_TEMPLATETABLE_PPC_64_HPP
++
++ static void prepare_invoke(int byte_no, Register Rmethod, Register Rret_addr, Register Rindex, Register Rrecv, Register Rflags, Register Rscratch);
++ static void invokevfinal_helper(Register Rmethod, Register Rflags, Register Rscratch1, Register Rscratch2);
++ static void generate_vtable_call(Register Rrecv_klass, Register Rindex, Register Rret, Register Rtemp);
++ static void invokeinterface_object_method(Register Rrecv_klass, Register Rret, Register Rflags, Register Rindex, Register Rtemp, Register Rtemp2);
++
++ // Branch_conditional which takes TemplateTable::Condition.
++ static void branch_conditional(ConditionRegister crx, TemplateTable::Condition cc, Label& L, bool invert = false);
++ static void if_cmp_common(Register Rfirst, Register Rsecond, Register Rscratch1, Register Rscratch2, Condition cc, bool is_jint, bool cmp0);
++
++#endif // CPU_PPC_VM_TEMPLATETABLE_PPC_64_HPP
+--- ./hotspot/src/cpu/ppc/vm/vmStructs_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/vmStructs_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_VMSTRUCTS_PPC_HPP
++#define CPU_PPC_VM_VMSTRUCTS_PPC_HPP
++
++// These are the CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
++
++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
++
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#endif // CPU_PPC_VM_VMSTRUCTS_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,488 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "compiler/disassembler.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/java.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "utilities/defaultStream.hpp"
++#include "vm_version_ppc.hpp"
++#ifdef TARGET_OS_FAMILY_aix
++# include "os_aix.inline.hpp"
++#endif
++#ifdef TARGET_OS_FAMILY_linux
++# include "os_linux.inline.hpp"
++#endif
++
++# include
++
++int VM_Version::_features = VM_Version::unknown_m;
++int VM_Version::_measured_cache_line_size = 128; // default value
++const char* VM_Version::_features_str = "";
++bool VM_Version::_is_determine_features_test_running = false;
++
++
++#define MSG(flag) \
++ if (flag && !FLAG_IS_DEFAULT(flag)) \
++ jio_fprintf(defaultStream::error_stream(), \
++ "warning: -XX:+" #flag " requires -XX:+UseSIGTRAP\n" \
++ " -XX:+" #flag " will be disabled!\n");
++
++void VM_Version::initialize() {
++
++ // Test which instructions are supported and measure cache line size.
++ determine_features();
++
++ // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features.
++ if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) {
++ if (VM_Version::has_popcntw()) {
++ FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7);
++ } else if (VM_Version::has_cmpb()) {
++ FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6);
++ } else if (VM_Version::has_popcntb()) {
++ FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 5);
++ } else {
++ FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 0);
++ }
++ }
++ guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 ||
++ PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7,
++ "PowerArchitecturePPC64 should be 0, 5, 6 or 7");
++
++ if (!UseSIGTRAP) {
++ MSG(TrapBasedICMissChecks);
++ MSG(TrapBasedNotEntrantChecks);
++ MSG(TrapBasedNullChecks);
++ FLAG_SET_ERGO(bool, TrapBasedNotEntrantChecks, false);
++ FLAG_SET_ERGO(bool, TrapBasedNullChecks, false);
++ FLAG_SET_ERGO(bool, TrapBasedICMissChecks, false);
++ }
++
++#ifdef COMPILER2
++ if (!UseSIGTRAP) {
++ MSG(TrapBasedRangeChecks);
++ FLAG_SET_ERGO(bool, TrapBasedRangeChecks, false);
++ }
++
++ // On Power6 test for section size.
++ if (PowerArchitecturePPC64 == 6) {
++ determine_section_size();
++ // TODO: PPC port } else {
++ // TODO: PPC port PdScheduling::power6SectorSize = 0x20;
++ }
++
++ MaxVectorSize = 8;
++#endif
++
++ // Create and print feature-string.
++ char buf[(num_features+1) * 16]; // Max 16 chars per feature.
++ jio_snprintf(buf, sizeof(buf),
++ "ppc64%s%s%s%s%s%s%s%s",
++ (has_fsqrt() ? " fsqrt" : ""),
++ (has_isel() ? " isel" : ""),
++ (has_lxarxeh() ? " lxarxeh" : ""),
++ (has_cmpb() ? " cmpb" : ""),
++ //(has_mftgpr()? " mftgpr" : ""),
++ (has_popcntb() ? " popcntb" : ""),
++ (has_popcntw() ? " popcntw" : ""),
++ (has_fcfids() ? " fcfids" : ""),
++ (has_vand() ? " vand" : "")
++ // Make sure number of %s matches num_features!
++ );
++ _features_str = strdup(buf);
++ NOT_PRODUCT(if (Verbose) print_features(););
++
++ // PPC64 supports 8-byte compare-exchange operations (see
++ // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
++ // and 'atomic long memory ops' (see Unsafe_GetLongVolatile).
++ _supports_cx8 = true;
++
++ UseSSE = 0; // Only on x86 and x64
++
++ intx cache_line_size = _measured_cache_line_size;
++
++ if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1;
++
++ if (AllocatePrefetchStyle == 4) {
++ AllocatePrefetchStepSize = cache_line_size; // Need exact value.
++ if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // Use larger blocks by default.
++ if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // Default is not defined?
++ } else {
++ if (cache_line_size > AllocatePrefetchStepSize) AllocatePrefetchStepSize = cache_line_size;
++ if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value.
++ if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // Default is not defined?
++ }
++
++ assert(AllocatePrefetchLines > 0, "invalid value");
++ if (AllocatePrefetchLines < 1) // Set valid value in product VM.
++ AllocatePrefetchLines = 1; // Conservative value.
++
++ if (AllocatePrefetchStyle == 3 && AllocatePrefetchDistance < cache_line_size)
++ AllocatePrefetchStyle = 1; // Fall back if inappropriate.
++
++ assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
++}
++
++void VM_Version::print_features() {
++ tty->print_cr("Version: %s cache_line_size = %d", cpu_features(), (int) get_cache_line_size());
++}
++
++#ifdef COMPILER2
++// Determine section size on power6: If section size is 8 instructions,
++// there should be a difference between the two testloops of ~15 %. If
++// no difference is detected the section is assumed to be 32 instructions.
++void VM_Version::determine_section_size() {
++
++ int unroll = 80;
++
++ const int code_size = (2* unroll * 32 + 100)*BytesPerInstWord;
++
++ // Allocate space for the code.
++ ResourceMark rm;
++ CodeBuffer cb("detect_section_size", code_size, 0);
++ MacroAssembler* a = new MacroAssembler(&cb);
++
++ uint32_t *code = (uint32_t *)a->pc();
++ // Emit code.
++ void (*test1)() = (void(*)())(void *)a->function_entry();
++
++ Label l1;
++
++ a->li(R4, 1);
++ a->sldi(R4, R4, 28);
++ a->b(l1);
++ a->align(CodeEntryAlignment);
++
++ a->bind(l1);
++
++ for (int i = 0; i < unroll; i++) {
++ // Schleife 1
++ // ------- sector 0 ------------
++ // ;; 0
++ a->nop(); // 1
++ a->fpnop0(); // 2
++ a->fpnop1(); // 3
++ a->addi(R4,R4, -1); // 4
++
++ // ;; 1
++ a->nop(); // 5
++ a->fmr(F6, F6); // 6
++ a->fmr(F7, F7); // 7
++ a->endgroup(); // 8
++ // ------- sector 8 ------------
++
++ // ;; 2
++ a->nop(); // 9
++ a->nop(); // 10
++ a->fmr(F8, F8); // 11
++ a->fmr(F9, F9); // 12
++
++ // ;; 3
++ a->nop(); // 13
++ a->fmr(F10, F10); // 14
++ a->fmr(F11, F11); // 15
++ a->endgroup(); // 16
++ // -------- sector 16 -------------
++
++ // ;; 4
++ a->nop(); // 17
++ a->nop(); // 18
++ a->fmr(F15, F15); // 19
++ a->fmr(F16, F16); // 20
++
++ // ;; 5
++ a->nop(); // 21
++ a->fmr(F17, F17); // 22
++ a->fmr(F18, F18); // 23
++ a->endgroup(); // 24
++ // ------- sector 24 ------------
++
++ // ;; 6
++ a->nop(); // 25
++ a->nop(); // 26
++ a->fmr(F19, F19); // 27
++ a->fmr(F20, F20); // 28
++
++ // ;; 7
++ a->nop(); // 29
++ a->fmr(F21, F21); // 30
++ a->fmr(F22, F22); // 31
++ a->brnop0(); // 32
++
++ // ------- sector 32 ------------
++ }
++
++ // ;; 8
++ a->cmpdi(CCR0, R4, unroll); // 33
++ a->bge(CCR0, l1); // 34
++ a->blr();
++
++ // Emit code.
++ void (*test2)() = (void(*)())(void *)a->function_entry();
++ // uint32_t *code = (uint32_t *)a->pc();
++
++ Label l2;
++
++ a->li(R4, 1);
++ a->sldi(R4, R4, 28);
++ a->b(l2);
++ a->align(CodeEntryAlignment);
++
++ a->bind(l2);
++
++ for (int i = 0; i < unroll; i++) {
++ // Schleife 2
++ // ------- sector 0 ------------
++ // ;; 0
++ a->brnop0(); // 1
++ a->nop(); // 2
++ //a->cmpdi(CCR0, R4, unroll);
++ a->fpnop0(); // 3
++ a->fpnop1(); // 4
++ a->addi(R4,R4, -1); // 5
++
++ // ;; 1
++
++ a->nop(); // 6
++ a->fmr(F6, F6); // 7
++ a->fmr(F7, F7); // 8
++ // ------- sector 8 ---------------
++
++ // ;; 2
++ a->endgroup(); // 9
++
++ // ;; 3
++ a->nop(); // 10
++ a->nop(); // 11
++ a->fmr(F8, F8); // 12
++
++ // ;; 4
++ a->fmr(F9, F9); // 13
++ a->nop(); // 14
++ a->fmr(F10, F10); // 15
++
++ // ;; 5
++ a->fmr(F11, F11); // 16
++ // -------- sector 16 -------------
++
++ // ;; 6
++ a->endgroup(); // 17
++
++ // ;; 7
++ a->nop(); // 18
++ a->nop(); // 19
++ a->fmr(F15, F15); // 20
++
++ // ;; 8
++ a->fmr(F16, F16); // 21
++ a->nop(); // 22
++ a->fmr(F17, F17); // 23
++
++ // ;; 9
++ a->fmr(F18, F18); // 24
++ // -------- sector 24 -------------
++
++ // ;; 10
++ a->endgroup(); // 25
++
++ // ;; 11
++ a->nop(); // 26
++ a->nop(); // 27
++ a->fmr(F19, F19); // 28
++
++ // ;; 12
++ a->fmr(F20, F20); // 29
++ a->nop(); // 30
++ a->fmr(F21, F21); // 31
++
++ // ;; 13
++ a->fmr(F22, F22); // 32
++ }
++
++ // -------- sector 32 -------------
++ // ;; 14
++ a->cmpdi(CCR0, R4, unroll); // 33
++ a->bge(CCR0, l2); // 34
++
++ a->blr();
++ uint32_t *code_end = (uint32_t *)a->pc();
++ a->flush();
++
++ double loop1_seconds,loop2_seconds, rel_diff;
++ uint64_t start1, stop1;
++
++ start1 = os::current_thread_cpu_time(false);
++ (*test1)();
++ stop1 = os::current_thread_cpu_time(false);
++ loop1_seconds = (stop1- start1) / (1000 *1000 *1000.0);
++
++
++ start1 = os::current_thread_cpu_time(false);
++ (*test2)();
++ stop1 = os::current_thread_cpu_time(false);
++
++ loop2_seconds = (stop1 - start1) / (1000 *1000 *1000.0);
++
++ rel_diff = (loop2_seconds - loop1_seconds) / loop1_seconds *100;
++
++ if (PrintAssembly) {
++ ttyLocker ttyl;
++ tty->print_cr("Decoding section size detection stub at " INTPTR_FORMAT " before execution:", code);
++ Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
++ tty->print_cr("Time loop1 :%f", loop1_seconds);
++ tty->print_cr("Time loop2 :%f", loop2_seconds);
++ tty->print_cr("(time2 - time1) / time1 = %f %%", rel_diff);
++
++ if (rel_diff > 12.0) {
++ tty->print_cr("Section Size 8 Instructions");
++ } else{
++ tty->print_cr("Section Size 32 Instructions or Power5");
++ }
++ }
++
++#if 0 // TODO: PPC port
++ // Set sector size (if not set explicitly).
++ if (FLAG_IS_DEFAULT(Power6SectorSize128PPC64)) {
++ if (rel_diff > 12.0) {
++ PdScheduling::power6SectorSize = 0x20;
++ } else {
++ PdScheduling::power6SectorSize = 0x80;
++ }
++ } else if (Power6SectorSize128PPC64) {
++ PdScheduling::power6SectorSize = 0x80;
++ } else {
++ PdScheduling::power6SectorSize = 0x20;
++ }
++#endif
++ if (UsePower6SchedulerPPC64) Unimplemented();
++}
++#endif // COMPILER2
++
++void VM_Version::determine_features() {
++#if defined(ABI_ELFv2)
++ const int code_size = (num_features+1+2*7)*BytesPerInstWord; // TODO(asmundak): calculation is incorrect.
++#else
++ // 7 InstWords for each call (function descriptor + blr instruction).
++ const int code_size = (num_features+1+2*7)*BytesPerInstWord;
++#endif
++ int features = 0;
++
++ // create test area
++ enum { BUFFER_SIZE = 2*4*K }; // Needs to be >=2* max cache line size (cache line size can't exceed min page size).
++ char test_area[BUFFER_SIZE];
++ char *mid_of_test_area = &test_area[BUFFER_SIZE>>1];
++
++ // Allocate space for the code.
++ ResourceMark rm;
++ CodeBuffer cb("detect_cpu_features", code_size, 0);
++ MacroAssembler* a = new MacroAssembler(&cb);
++
++ // Must be set to true so we can generate the test code.
++ _features = VM_Version::all_features_m;
++
++ // Emit code.
++ void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->function_entry();
++ uint32_t *code = (uint32_t *)a->pc();
++ // Don't use R0 in ldarx.
++ // Keep R3_ARG1 unmodified, it contains &field (see below).
++ // Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
++ a->fsqrt(F3, F4); // code[0] -> fsqrt_m
++ a->fsqrts(F3, F4); // code[1] -> fsqrts_m
++ a->isel(R7, R5, R6, 0); // code[2] -> isel_m
++ a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m
++ a->cmpb(R7, R5, R6); // code[4] -> bcmp
++ //a->mftgpr(R7, F3); // code[5] -> mftgpr
++ a->popcntb(R7, R5); // code[6] -> popcntb
++ a->popcntw(R7, R5); // code[7] -> popcntw
++ a->fcfids(F3, F4); // code[8] -> fcfids
++ a->vand(VR0, VR0, VR0); // code[9] -> vand
++ a->blr();
++
++ // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
++ void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->function_entry();
++ a->dcbz(R3_ARG1); // R3_ARG1 = addr
++ a->blr();
++
++ uint32_t *code_end = (uint32_t *)a->pc();
++ a->flush();
++ _features = VM_Version::unknown_m;
++
++ // Print the detection code.
++ if (PrintAssembly) {
++ ttyLocker ttyl;
++ tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " before execution:", code);
++ Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
++ }
++
++ // Measure cache line size.
++ memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF.
++ (*zero_cacheline_func_ptr)(mid_of_test_area); // Call function which executes dcbz to the middle.
++ int count = 0; // count zeroed bytes
++ for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++;
++ guarantee(is_power_of_2(count), "cache line size needs to be a power of 2");
++ _measured_cache_line_size = count;
++
++ // Execute code. Illegal instructions will be replaced by 0 in the signal handler.
++ VM_Version::_is_determine_features_test_running = true;
++ (*test)((address)mid_of_test_area, (uint64_t)0);
++ VM_Version::_is_determine_features_test_running = false;
++
++ // determine which instructions are legal.
++ int feature_cntr = 0;
++ if (code[feature_cntr++]) features |= fsqrt_m;
++ if (code[feature_cntr++]) features |= fsqrts_m;
++ if (code[feature_cntr++]) features |= isel_m;
++ if (code[feature_cntr++]) features |= lxarxeh_m;
++ if (code[feature_cntr++]) features |= cmpb_m;
++ //if(code[feature_cntr++])features |= mftgpr_m;
++ if (code[feature_cntr++]) features |= popcntb_m;
++ if (code[feature_cntr++]) features |= popcntw_m;
++ if (code[feature_cntr++]) features |= fcfids_m;
++ if (code[feature_cntr++]) features |= vand_m;
++
++ // Print the detection code.
++ if (PrintAssembly) {
++ ttyLocker ttyl;
++ tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " after execution:", code);
++ Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
++ }
++
++ _features = features;
++}
++
++
++static int saved_features = 0;
++
++void VM_Version::allow_all() {
++ saved_features = _features;
++ _features = all_features_m;
++}
++
++void VM_Version::revert() {
++ _features = saved_features;
++}
+--- ./hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,96 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_VM_VERSION_PPC_HPP
++#define CPU_PPC_VM_VM_VERSION_PPC_HPP
++
++#include "runtime/globals_extension.hpp"
++#include "runtime/vm_version.hpp"
++
++class VM_Version: public Abstract_VM_Version {
++protected:
++ enum Feature_Flag {
++ fsqrt,
++ fsqrts,
++ isel,
++ lxarxeh,
++ cmpb,
++ popcntb,
++ popcntw,
++ fcfids,
++ vand,
++ dcba,
++ num_features // last entry to count features
++ };
++ enum Feature_Flag_Set {
++ unknown_m = 0,
++ fsqrt_m = (1 << fsqrt ),
++ fsqrts_m = (1 << fsqrts ),
++ isel_m = (1 << isel ),
++ lxarxeh_m = (1 << lxarxeh),
++ cmpb_m = (1 << cmpb ),
++ popcntb_m = (1 << popcntb),
++ popcntw_m = (1 << popcntw),
++ fcfids_m = (1 << fcfids ),
++ vand_m = (1 << vand ),
++ dcba_m = (1 << dcba ),
++ all_features_m = -1
++ };
++ static int _features;
++ static int _measured_cache_line_size;
++ static const char* _features_str;
++ static bool _is_determine_features_test_running;
++
++ static void print_features();
++ static void determine_features(); // also measures cache line size
++ static void determine_section_size();
++ static void power6_micro_bench();
++public:
++ // Initialization
++ static void initialize();
++
++ static bool is_determine_features_test_running() { return _is_determine_features_test_running; }
++ // CPU instruction support
++ static bool has_fsqrt() { return (_features & fsqrt_m) != 0; }
++ static bool has_fsqrts() { return (_features & fsqrts_m) != 0; }
++ static bool has_isel() { return (_features & isel_m) != 0; }
++ static bool has_lxarxeh() { return (_features & lxarxeh_m) !=0; }
++ static bool has_cmpb() { return (_features & cmpb_m) != 0; }
++ static bool has_popcntb() { return (_features & popcntb_m) != 0; }
++ static bool has_popcntw() { return (_features & popcntw_m) != 0; }
++ static bool has_fcfids() { return (_features & fcfids_m) != 0; }
++ static bool has_vand() { return (_features & vand_m) != 0; }
++ static bool has_dcba() { return (_features & dcba_m) != 0; }
++
++ static const char* cpu_features() { return _features_str; }
++
++ static int get_cache_line_size() { return _measured_cache_line_size; }
++
++ // Assembler testing
++ static void allow_all();
++ static void revert();
++};
++
++#endif // CPU_PPC_VM_VM_VERSION_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/vmreg_ppc.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/vmreg_ppc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "code/vmreg.hpp"
++
++void VMRegImpl::set_regName() {
++ Register reg = ::as_Register(0);
++ int i;
++ for (i = 0; i < ConcreteRegisterImpl::max_gpr; ) {
++ regName[i++] = reg->name();
++ regName[i++] = reg->name();
++ if (reg->encoding() < RegisterImpl::number_of_registers-1)
++ reg = reg->successor();
++ }
++
++ FloatRegister freg = ::as_FloatRegister(0);
++ for ( ; i < ConcreteRegisterImpl::max_fpr; ) {
++ regName[i++] = freg->name();
++ regName[i++] = freg->name();
++ if (reg->encoding() < FloatRegisterImpl::number_of_registers-1)
++ freg = freg->successor();
++ }
++ for ( ; i < ConcreteRegisterImpl::number_of_registers; i++) {
++ regName[i] = "NON-GPR-FPR";
++ }
++}
++
+--- ./hotspot/src/cpu/ppc/vm/vmreg_ppc.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/vmreg_ppc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_VMREG_PPC_HPP
++#define CPU_PPC_VM_VMREG_PPC_HPP
++
++ bool is_Register();
++ Register as_Register();
++
++ bool is_FloatRegister();
++ FloatRegister as_FloatRegister();
++
++#endif // CPU_PPC_VM_VMREG_PPC_HPP
+--- ./hotspot/src/cpu/ppc/vm/vmreg_ppc.inline.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/vmreg_ppc.inline.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_PPC_VM_VMREG_PPC_INLINE_HPP
++#define CPU_PPC_VM_VMREG_PPC_INLINE_HPP
++
++inline VMReg RegisterImpl::as_VMReg() {
++ if (this == noreg) return VMRegImpl::Bad();
++ return VMRegImpl::as_VMReg(encoding() << 1);
++}
++
++// Since we don't have two halfs here, don't multiply by 2.
++inline VMReg ConditionRegisterImpl::as_VMReg() {
++ return VMRegImpl::as_VMReg((encoding()) + ConcreteRegisterImpl::max_fpr);
++}
++
++inline VMReg FloatRegisterImpl::as_VMReg() {
++ return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
++}
++
++inline VMReg SpecialRegisterImpl::as_VMReg() {
++ return VMRegImpl::as_VMReg((encoding()) + ConcreteRegisterImpl::max_cnd);
++}
++
++inline bool VMRegImpl::is_Register() {
++ return (unsigned int)value() < (unsigned int)ConcreteRegisterImpl::max_gpr;
++}
++
++inline bool VMRegImpl::is_FloatRegister() {
++ return value() >= ConcreteRegisterImpl::max_gpr &&
++ value() < ConcreteRegisterImpl::max_fpr;
++}
++
++inline Register VMRegImpl::as_Register() {
++ assert(is_Register() && is_even(value()), "even-aligned GPR name");
++ return ::as_Register(value()>>1);
++}
++
++inline FloatRegister VMRegImpl::as_FloatRegister() {
++ assert(is_FloatRegister() && is_even(value()), "must be");
++ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1);
++}
++
++inline bool VMRegImpl::is_concrete() {
++ assert(is_reg(), "must be");
++ return is_even(value());
++}
++
++#endif // CPU_PPC_VM_VMREG_PPC_INLINE_HPP
+--- ./hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,268 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/vtableStubs.hpp"
++#include "interp_masm_ppc_64.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/instanceKlass.hpp"
++#include "oops/klassVtable.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_ppc.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++#define __ masm->
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) // nothing
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++#ifndef PRODUCT
++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oopDesc* receiver, int index);
++#endif
++
++// Used by compiler only; may use only caller saved, non-argument
++// registers.
++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
++ // PPC port: use fixed size.
++ const int code_length = VtableStub::pd_code_size_limit(true);
++ VtableStub* s = new (code_length) VtableStub(true, vtable_index);
++ ResourceMark rm;
++ CodeBuffer cb(s->entry_point(), code_length);
++ MacroAssembler* masm = new MacroAssembler(&cb);
++ address start_pc;
++
++#ifndef PRODUCT
++ if (CountCompiledCalls) {
++ __ load_const(R11_scratch1, SharedRuntime::nof_megamorphic_calls_addr());
++ __ lwz(R12_scratch2, 0, R11_scratch1);
++ __ addi(R12_scratch2, R12_scratch2, 1);
++ __ stw(R12_scratch2, 0, R11_scratch1);
++ }
++#endif
++
++ assert(VtableStub::receiver_location() == R3_ARG1->as_VMReg(), "receiver expected in R3_ARG1");
++
++ // Get receiver klass.
++ const Register rcvr_klass = R11_scratch1;
++
++ // We might implicit NULL fault here.
++ address npe_addr = __ pc(); // npe = null pointer exception
++ __ load_klass_with_trap_null_check(rcvr_klass, R3);
++
++ // Set method (in case of interpreted method), and destination address.
++ int entry_offset = InstanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
++
++#ifndef PRODUCT
++ if (DebugVtables) {
++ Label L;
++ // Check offset vs vtable length.
++ const Register vtable_len = R12_scratch2;
++ __ lwz(vtable_len, InstanceKlass::vtable_length_offset()*wordSize, rcvr_klass);
++ __ cmpwi(CCR0, vtable_len, vtable_index*vtableEntry::size());
++ __ bge(CCR0, L);
++ __ li(R12_scratch2, vtable_index);
++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), R3_ARG1, R12_scratch2, false);
++ __ bind(L);
++ }
++#endif
++
++ int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
++
++ __ ld(R19_method, v_off, rcvr_klass);
++
++#ifndef PRODUCT
++ if (DebugVtables) {
++ Label L;
++ __ cmpdi(CCR0, R19_method, 0);
++ __ bne(CCR0, L);
++ __ stop("Vtable entry is ZERO", 102);
++ __ bind(L);
++ }
++#endif
++
++ // If the vtable entry is null, the method is abstract.
++ address ame_addr = __ pc(); // ame = abstract method error
++
++ __ load_with_trap_null_check(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
++ __ mtctr(R12_scratch2);
++ __ bctr();
++ masm->flush();
++
++ guarantee(__ pc() <= s->code_end(), "overflowed buffer");
++
++ s->set_exception_points(npe_addr, ame_addr);
++
++ return s;
++}
++
++VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
++ // PPC port: use fixed size.
++ const int code_length = VtableStub::pd_code_size_limit(false);
++ VtableStub* s = new (code_length) VtableStub(false, vtable_index);
++ ResourceMark rm;
++ CodeBuffer cb(s->entry_point(), code_length);
++ MacroAssembler* masm = new MacroAssembler(&cb);
++ address start_pc;
++
++#ifndef PRODUCT
++ if (CountCompiledCalls) {
++ __ load_const(R11_scratch1, SharedRuntime::nof_megamorphic_calls_addr());
++ __ lwz(R12_scratch2, 0, R11_scratch1);
++ __ addi(R12_scratch2, R12_scratch2, 1);
++ __ stw(R12_scratch2, 0, R11_scratch1);
++ }
++#endif
++
++ assert(VtableStub::receiver_location() == R3_ARG1->as_VMReg(), "receiver expected in R3_ARG1");
++
++ // Entry arguments:
++ // R19_method: Interface
++ // R3_ARG1: Receiver
++ //
++
++ const Register rcvr_klass = R11_scratch1;
++ const Register vtable_len = R12_scratch2;
++ const Register itable_entry_addr = R21_tmp1;
++ const Register itable_interface = R22_tmp2;
++
++ // Get receiver klass.
++
++ // We might implicit NULL fault here.
++ address npe_addr = __ pc(); // npe = null pointer exception
++ __ load_klass_with_trap_null_check(rcvr_klass, R3_ARG1);
++
++ BLOCK_COMMENT("Load start of itable entries into itable_entry.");
++ __ lwz(vtable_len, InstanceKlass::vtable_length_offset() * wordSize, rcvr_klass);
++ __ slwi(vtable_len, vtable_len, exact_log2(vtableEntry::size() * wordSize));
++ __ add(itable_entry_addr, vtable_len, rcvr_klass);
++
++ // Loop over all itable entries until desired interfaceOop(Rinterface) found.
++ BLOCK_COMMENT("Increment itable_entry_addr in loop.");
++ const int vtable_base_offset = InstanceKlass::vtable_start_offset() * wordSize;
++ __ addi(itable_entry_addr, itable_entry_addr, vtable_base_offset + itableOffsetEntry::interface_offset_in_bytes());
++
++ const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize;
++ Label search;
++ __ bind(search);
++ __ ld(itable_interface, 0, itable_entry_addr);
++
++ // Handle IncompatibleClassChangeError in itable stubs.
++ // If the entry is NULL then we've reached the end of the table
++ // without finding the expected interface, so throw an exception.
++ BLOCK_COMMENT("Handle IncompatibleClassChangeError in itable stubs.");
++ Label throw_icce;
++ __ cmpdi(CCR1, itable_interface, 0);
++ __ cmpd(CCR0, itable_interface, R19_method);
++ __ addi(itable_entry_addr, itable_entry_addr, itable_offset_search_inc);
++ __ beq(CCR1, throw_icce);
++ __ bne(CCR0, search);
++
++ // Entry found and itable_entry_addr points to it, get offset of vtable for interface.
++
++ const Register vtable_offset = R12_scratch2;
++ const Register itable_method = R11_scratch1;
++
++ const int vtable_offset_offset = (itableOffsetEntry::offset_offset_in_bytes() -
++ itableOffsetEntry::interface_offset_in_bytes()) -
++ itable_offset_search_inc;
++ __ lwz(vtable_offset, vtable_offset_offset, itable_entry_addr);
++
++ // Compute itableMethodEntry and get method and entry point for compiler.
++ const int method_offset = (itableMethodEntry::size() * wordSize * vtable_index) +
++ itableMethodEntry::method_offset_in_bytes();
++
++ __ add(itable_method, rcvr_klass, vtable_offset);
++ __ ld(R19_method, method_offset, itable_method);
++
++#ifndef PRODUCT
++ if (DebugVtables) {
++ Label ok;
++ __ cmpd(CCR0, R19_method, 0);
++ __ bne(CCR0, ok);
++ __ stop("method is null", 103);
++ __ bind(ok);
++ }
++#endif
++
++ // If the vtable entry is null, the method is abstract.
++ address ame_addr = __ pc(); // ame = abstract method error
++
++ // Must do an explicit check if implicit checks are disabled.
++ assert(!MacroAssembler::needs_explicit_null_check(in_bytes(Method::from_compiled_offset())), "sanity");
++ if (!ImplicitNullChecks || !os::zero_page_read_protected()) {
++ if (TrapBasedNullChecks) {
++ __ trap_null_check(R19_method);
++ } else {
++ __ cmpdi(CCR0, R19_method, 0);
++ __ beq(CCR0, throw_icce);
++ }
++ }
++ __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
++ __ mtctr(R12_scratch2);
++ __ bctr();
++
++ // Handle IncompatibleClassChangeError in itable stubs.
++ // More detailed error message.
++ // We force resolving of the call site by jumping to the "handle
++ // wrong method" stub, and so let the interpreter runtime do all the
++ // dirty work.
++ __ bind(throw_icce);
++ __ load_const(R11_scratch1, SharedRuntime::get_handle_wrong_method_stub());
++ __ mtctr(R11_scratch1);
++ __ bctr();
++
++ masm->flush();
++
++ guarantee(__ pc() <= s->code_end(), "overflowed buffer");
++
++ s->set_exception_points(npe_addr, ame_addr);
++ return s;
++}
++
++int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
++ if (TraceJumps || DebugVtables || CountCompiledCalls || VerifyOops) {
++ return 1000;
++ } else {
++ int decode_klass_size = MacroAssembler::instr_size_for_decode_klass_not_null();
++ if (is_vtable_stub) {
++ return 20 + decode_klass_size + 8 + 8; // Plain + cOops + Traps + safety
++ } else {
++ return 96 + decode_klass_size + 12 + 8; // Plain + cOops + Traps + safety
++ }
++ }
++}
++
++int VtableStub::pd_code_alignment() {
++ const unsigned int icache_line_size = 32;
++ return icache_line_size;
++}
+--- ./hotspot/src/cpu/sparc/vm/assembler_sparc.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/assembler_sparc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -88,6 +88,7 @@
+ orncc_op3 = 0x16,
+ xnorcc_op3 = 0x17,
+ addccc_op3 = 0x18,
++ aes4_op3 = 0x19,
+ umulcc_op3 = 0x1a,
+ smulcc_op3 = 0x1b,
+ subccc_op3 = 0x1c,
+@@ -121,7 +122,14 @@
+ fpop1_op3 = 0x34,
+ fpop2_op3 = 0x35,
+ impdep1_op3 = 0x36,
++ aes3_op3 = 0x36,
++ alignaddr_op3 = 0x36,
++ faligndata_op3 = 0x36,
++ flog3_op3 = 0x36,
++ edge_op3 = 0x36,
++ fsrc_op3 = 0x36,
+ impdep2_op3 = 0x37,
++ stpartialf_op3 = 0x37,
+ jmpl_op3 = 0x38,
+ rett_op3 = 0x39,
+ trap_op3 = 0x3a,
+@@ -172,41 +180,62 @@
+
+ enum opfs {
+ // selected opfs
+- fmovs_opf = 0x01,
+- fmovd_opf = 0x02,
++ edge8n_opf = 0x01,
+
+- fnegs_opf = 0x05,
+- fnegd_opf = 0x06,
++ fmovs_opf = 0x01,
++ fmovd_opf = 0x02,
+
+- fadds_opf = 0x41,
+- faddd_opf = 0x42,
+- fsubs_opf = 0x45,
+- fsubd_opf = 0x46,
++ fnegs_opf = 0x05,
++ fnegd_opf = 0x06,
+
+- fmuls_opf = 0x49,
+- fmuld_opf = 0x4a,
+- fdivs_opf = 0x4d,
+- fdivd_opf = 0x4e,
++ alignaddr_opf = 0x18,
+
+- fcmps_opf = 0x51,
+- fcmpd_opf = 0x52,
++ fadds_opf = 0x41,
++ faddd_opf = 0x42,
++ fsubs_opf = 0x45,
++ fsubd_opf = 0x46,
+
+- fstox_opf = 0x81,
+- fdtox_opf = 0x82,
+- fxtos_opf = 0x84,
+- fxtod_opf = 0x88,
+- fitos_opf = 0xc4,
+- fdtos_opf = 0xc6,
+- fitod_opf = 0xc8,
+- fstod_opf = 0xc9,
+- fstoi_opf = 0xd1,
+- fdtoi_opf = 0xd2,
++ faligndata_opf = 0x48,
+
+- mdtox_opf = 0x110,
+- mstouw_opf = 0x111,
+- mstosw_opf = 0x113,
+- mxtod_opf = 0x118,
+- mwtos_opf = 0x119
++ fmuls_opf = 0x49,
++ fmuld_opf = 0x4a,
++ fdivs_opf = 0x4d,
++ fdivd_opf = 0x4e,
++
++ fcmps_opf = 0x51,
++ fcmpd_opf = 0x52,
++
++ fstox_opf = 0x81,
++ fdtox_opf = 0x82,
++ fxtos_opf = 0x84,
++ fxtod_opf = 0x88,
++ fitos_opf = 0xc4,
++ fdtos_opf = 0xc6,
++ fitod_opf = 0xc8,
++ fstod_opf = 0xc9,
++ fstoi_opf = 0xd1,
++ fdtoi_opf = 0xd2,
++
++ mdtox_opf = 0x110,
++ mstouw_opf = 0x111,
++ mstosw_opf = 0x113,
++ mxtod_opf = 0x118,
++ mwtos_opf = 0x119,
++
++ aes_kexpand0_opf = 0x130,
++ aes_kexpand2_opf = 0x131
++ };
++
++ enum op5s {
++ aes_eround01_op5 = 0x00,
++ aes_eround23_op5 = 0x01,
++ aes_dround01_op5 = 0x02,
++ aes_dround23_op5 = 0x03,
++ aes_eround01_l_op5 = 0x04,
++ aes_eround23_l_op5 = 0x05,
++ aes_dround01_l_op5 = 0x06,
++ aes_dround23_l_op5 = 0x07,
++ aes_kexpand1_op5 = 0x08
+ };
+
+ enum RCondition { rc_z = 1, rc_lez = 2, rc_lz = 3, rc_nz = 5, rc_gz = 6, rc_gez = 7, rc_last = rc_gez };
+@@ -330,6 +359,8 @@
+ ASI_PRIMARY = 0x80,
+ ASI_PRIMARY_NOFAULT = 0x82,
+ ASI_PRIMARY_LITTLE = 0x88,
++ // 8x8-bit partial store
++ ASI_PST8_PRIMARY = 0xC0,
+ // Block initializing store
+ ASI_ST_BLKINIT_PRIMARY = 0xE2,
+ // Most-Recently-Used (MRU) BIS variant
+@@ -427,6 +458,7 @@
+ static int immed( bool i) { return u_field(i ? 1 : 0, 13, 13); }
+ static int opf_low6( int w) { return u_field(w, 10, 5); }
+ static int opf_low5( int w) { return u_field(w, 9, 5); }
++ static int op5( int x) { return u_field(x, 8, 5); }
+ static int trapcc( CC cc) { return u_field(cc, 12, 11); }
+ static int sx( int i) { return u_field(i, 12, 12); } // shift x=1 means 64-bit
+ static int opf( int x) { return u_field(x, 13, 5); }
+@@ -451,6 +483,7 @@
+ static int fd( FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 29, 25); };
+ static int fs1(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 18, 14); };
+ static int fs2(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 4, 0); };
++ static int fs3(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 13, 9); };
+
+ // some float instructions use this encoding on the op3 field
+ static int alt_op3(int op, FloatRegisterImpl::Width w) {
+@@ -559,6 +592,15 @@
+ return x & ((1 << 10) - 1);
+ }
+
++ // AES crypto instructions supported only on certain processors
++ static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); }
++
++ // instruction only in VIS1
++ static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
++
++ // instruction only in VIS2
++ static void vis2_only() { assert( VM_Version::has_vis2(), "This instruction only works on SPARC with VIS2"); }
++
+ // instruction only in VIS3
+ static void vis3_only() { assert( VM_Version::has_vis3(), "This instruction only works on SPARC with VIS3"); }
+
+@@ -604,11 +646,20 @@
+ }
+
+ protected:
++ // Insert a nop if the previous is cbcond
++ void insert_nop_after_cbcond() {
++ if (UseCBCond && cbcond_before()) {
++ nop();
++ }
++ }
+ // Delay slot helpers
+ // cti is called when emitting control-transfer instruction,
+ // BEFORE doing the emitting.
+ // Only effective when assertion-checking is enabled.
+ void cti() {
++ // A cbcond instruction immediately followed by a CTI
++ // instruction introduces pipeline stalls, we need to avoid that.
++ no_cbcond_before();
+ #ifdef CHECK_DELAY
+ assert_not_delayed("cti should not be in delay slot");
+ #endif
+@@ -632,7 +683,6 @@
+ void no_cbcond_before() {
+ assert(offset() == 0 || !cbcond_before(), "cbcond should not follow an other cbcond");
+ }
+-
+ public:
+
+ bool use_cbcond(Label& L) {
+@@ -682,6 +732,24 @@
+ void addccc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+
+
++ // 4-operand AES instructions
++
++ void aes_eround01( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround01_op5) | fs2(s2, FloatRegisterImpl::D) ); }
++ void aes_eround23( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround23_op5) | fs2(s2, FloatRegisterImpl::D) ); }
++ void aes_dround01( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround01_op5) | fs2(s2, FloatRegisterImpl::D) ); }
++ void aes_dround23( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround23_op5) | fs2(s2, FloatRegisterImpl::D) ); }
++ void aes_eround01_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround01_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
++ void aes_eround23_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround23_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
++ void aes_dround01_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround01_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
++ void aes_dround23_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround23_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
++ void aes_kexpand1( FloatRegister s1, FloatRegister s2, int imm5a, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | u_field(imm5a, 13, 9) | op5(aes_kexpand1_op5) | fs2(s2, FloatRegisterImpl::D) ); }
++
++
++ // 3-operand AES instructions
++
++ void aes_kexpand0( FloatRegister s1, FloatRegister s2, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes3_op3) | fs1(s1, FloatRegisterImpl::D) | opf(aes_kexpand0_opf) | fs2(s2, FloatRegisterImpl::D) ); }
++ void aes_kexpand2( FloatRegister s1, FloatRegister s2, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes3_op3) | fs1(s1, FloatRegisterImpl::D) | opf(aes_kexpand2_opf) | fs2(s2, FloatRegisterImpl::D) ); }
++
+ // pp 136
+
+ inline void bpr(RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none);
+@@ -784,6 +852,10 @@
+ void fmul( FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, dw) | op3(fpop1_op3) | fs1(s1, sw) | opf(0x60 + sw + dw*4) | fs2(s2, sw)); }
+ void fdiv( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x4c + w) | fs2(s2, w)); }
+
++ // FXORs/FXORd instructions
++
++ void fxor( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(flog3_op3) | fs1(s1, w) | opf(0x6E - w) | fs2(s2, w)); }
++
+ // pp 164
+
+ void fsqrt( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x28 + w) | fs2(s, w)); }
+@@ -1108,6 +1180,20 @@
+ inline void wrfprs( Register d) { v9_only(); emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
+
+
++ // VIS1 instructions
++
++ void alignaddr( Register s1, Register s2, Register d ) { vis1_only(); emit_int32( op(arith_op) | rd(d) | op3(alignaddr_op3) | rs1(s1) | opf(alignaddr_opf) | rs2(s2)); }
++
++ void faligndata( FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(faligndata_op3) | fs1(s1, FloatRegisterImpl::D) | opf(faligndata_opf) | fs2(s2, FloatRegisterImpl::D)); }
++
++ void fsrc2( FloatRegisterImpl::Width w, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fsrc_op3) | opf(0x7A - w) | fs2(s2, w)); }
++
++ void stpartialf( Register s1, Register s2, FloatRegister d, int ia = -1 ) { vis1_only(); emit_int32( op(ldst_op) | fd(d, FloatRegisterImpl::D) | op3(stpartialf_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); }
++
++ // VIS2 instructions
++
++ void edge8n( Register s1, Register s2, Register d ) { vis2_only(); emit_int32( op(arith_op) | rd(d) | op3(edge_op3) | rs1(s1) | opf(edge8n_opf) | rs2(s2)); }
++
+ // VIS3 instructions
+
+ void movstosw( FloatRegister s, Register d ) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstosw_opf) | fs2(s, FloatRegisterImpl::S)); }
+--- ./hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -54,33 +54,33 @@
+ inline void Assembler::add(Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | rs2(s2) ); }
+ inline void Assembler::add(Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+
+-inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt ) { v9_only(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(bpr_op2) | wdisp16(intptr_t(d), intptr_t(pc())) | predict(p) | rs1(s1), rt); has_delay_slot(); }
+-inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, Label& L) { bpr( c, a, p, s1, target(L)); }
++inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt ) { v9_only(); insert_nop_after_cbcond(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(bpr_op2) | wdisp16(intptr_t(d), intptr_t(pc())) | predict(p) | rs1(s1), rt); has_delay_slot(); }
++inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, Label& L) { insert_nop_after_cbcond(); bpr( c, a, p, s1, target(L)); }
+
+-inline void Assembler::fb( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(fb_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt); has_delay_slot(); }
+-inline void Assembler::fb( Condition c, bool a, Label& L ) { fb(c, a, target(L)); }
++inline void Assembler::fb( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep(); insert_nop_after_cbcond(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(fb_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt); has_delay_slot(); }
++inline void Assembler::fb( Condition c, bool a, Label& L ) { insert_nop_after_cbcond(); fb(c, a, target(L)); }
+
+-inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(fbp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt); has_delay_slot(); }
+-inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, Label& L ) { fbp(c, a, cc, p, target(L)); }
++inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only(); insert_nop_after_cbcond(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(fbp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt); has_delay_slot(); }
++inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, Label& L ) { insert_nop_after_cbcond(); fbp(c, a, cc, p, target(L)); }
+
+-inline void Assembler::br( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(br_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt); has_delay_slot(); }
+-inline void Assembler::br( Condition c, bool a, Label& L ) { br(c, a, target(L)); }
++inline void Assembler::br( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep(); insert_nop_after_cbcond(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(br_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt); has_delay_slot(); }
++inline void Assembler::br( Condition c, bool a, Label& L ) { insert_nop_after_cbcond(); br(c, a, target(L)); }
+
+-inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(bp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt); has_delay_slot(); }
+-inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, Label& L ) { bp(c, a, cc, p, target(L)); }
++inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only(); insert_nop_after_cbcond(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(bp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt); has_delay_slot(); }
++inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, Label& L ) { insert_nop_after_cbcond(); bp(c, a, cc, p, target(L)); }
+
+ // compare and branch
+ inline void Assembler::cbcond(Condition c, CC cc, Register s1, Register s2, Label& L) { cti(); no_cbcond_before(); emit_data(op(branch_op) | cond_cbcond(c) | op2(bpr_op2) | branchcc(cc) | wdisp10(intptr_t(target(L)), intptr_t(pc())) | rs1(s1) | rs2(s2)); }
+ inline void Assembler::cbcond(Condition c, CC cc, Register s1, int simm5, Label& L) { cti(); no_cbcond_before(); emit_data(op(branch_op) | cond_cbcond(c) | op2(bpr_op2) | branchcc(cc) | wdisp10(intptr_t(target(L)), intptr_t(pc())) | rs1(s1) | immed(true) | simm(simm5, 5)); }
+
+-inline void Assembler::call( address d, relocInfo::relocType rt ) { cti(); emit_data( op(call_op) | wdisp(intptr_t(d), intptr_t(pc()), 30), rt); has_delay_slot(); assert(rt != relocInfo::virtual_call_type, "must use virtual_call_Relocation::spec"); }
+-inline void Assembler::call( Label& L, relocInfo::relocType rt ) { call( target(L), rt); }
++inline void Assembler::call( address d, relocInfo::relocType rt ) { insert_nop_after_cbcond(); cti(); emit_data( op(call_op) | wdisp(intptr_t(d), intptr_t(pc()), 30), rt); has_delay_slot(); assert(rt != relocInfo::virtual_call_type, "must use virtual_call_Relocation::spec"); }
++inline void Assembler::call( Label& L, relocInfo::relocType rt ) { insert_nop_after_cbcond(); call( target(L), rt); }
+
+ inline void Assembler::flush( Register s1, Register s2) { emit_int32( op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2)); }
+ inline void Assembler::flush( Register s1, int simm13a) { emit_data( op(arith_op) | op3(flush_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
+
+-inline void Assembler::jmpl( Register s1, Register s2, Register d ) { cti(); emit_int32( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | rs2(s2)); has_delay_slot(); }
+-inline void Assembler::jmpl( Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { cti(); emit_data( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec); has_delay_slot(); }
++inline void Assembler::jmpl( Register s1, Register s2, Register d ) { insert_nop_after_cbcond(); cti(); emit_int32( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | rs2(s2)); has_delay_slot(); }
++inline void Assembler::jmpl( Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { insert_nop_after_cbcond(); cti(); emit_data( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec); has_delay_slot(); }
+
+ inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { emit_int32( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | rs2(s2) ); }
+ inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d, RelocationHolder const& rspec) { emit_data( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec); }
+--- ./hotspot/src/cpu/sparc/vm/bytecodeInterpreter_sparc.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/bytecodeInterpreter_sparc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -83,7 +83,7 @@
+ #define LOCALS_ADDR(offset) ((address)locals[-(offset)])
+ #define LOCALS_INT(offset) (*((jint*)&locals[-(offset)]))
+ #define LOCALS_FLOAT(offset) (*((jfloat*)&locals[-(offset)]))
+-#define LOCALS_OBJECT(offset) ((oop)locals[-(offset)])
++#define LOCALS_OBJECT(offset) (cast_to_oop(locals[-(offset)]))
+ #define LOCALS_DOUBLE(offset) (((VMJavaVal64*)&locals[-((offset) + 1)])->d)
+ #define LOCALS_LONG(offset) (((VMJavaVal64*)&locals[-((offset) + 1)])->l)
+ #define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)]))
+--- ./hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -152,7 +152,7 @@
+ }
+
+
+-int LIR_Assembler::initial_frame_size_in_bytes() {
++int LIR_Assembler::initial_frame_size_in_bytes() const {
+ return in_bytes(frame_map()->framesize_in_bytes());
+ }
+
+@@ -182,7 +182,7 @@
+ int number_of_locks = entry_state->locks_size();
+
+ // Create a frame for the compiled activation.
+- __ build_frame(initial_frame_size_in_bytes());
++ __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
+
+ // OSR buffer is
+ //
+@@ -1315,7 +1315,7 @@
+ }
+
+ Address LIR_Assembler::as_Address(LIR_Address* addr) {
+- Register reg = addr->base()->as_register();
++ Register reg = addr->base()->as_pointer_register();
+ LIR_Opr index = addr->index();
+ if (index->is_illegal()) {
+ return Address(reg, addr->disp());
+@@ -3101,7 +3101,145 @@
+ }
+
+ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
+- fatal("Type profiling not implemented on this platform");
++ Register obj = op->obj()->as_register();
++ Register tmp1 = op->tmp()->as_pointer_register();
++ Register tmp2 = G1;
++ Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
++ ciKlass* exact_klass = op->exact_klass();
++ intptr_t current_klass = op->current_klass();
++ bool not_null = op->not_null();
++ bool no_conflict = op->no_conflict();
++
++ Label update, next, none;
++
++ bool do_null = !not_null;
++ bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
++ bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
++
++ assert(do_null || do_update, "why are we here?");
++ assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
++
++ __ verify_oop(obj);
++
++ if (tmp1 != obj) {
++ __ mov(obj, tmp1);
++ }
++ if (do_null) {
++ __ br_notnull_short(tmp1, Assembler::pt, update);
++ if (!TypeEntries::was_null_seen(current_klass)) {
++ __ ld_ptr(mdo_addr, tmp1);
++ __ or3(tmp1, TypeEntries::null_seen, tmp1);
++ __ st_ptr(tmp1, mdo_addr);
++ }
++ if (do_update) {
++ __ ba(next);
++ __ delayed()->nop();
++ }
++#ifdef ASSERT
++ } else {
++ __ br_notnull_short(tmp1, Assembler::pt, update);
++ __ stop("unexpect null obj");
++#endif
++ }
++
++ __ bind(update);
++
++ if (do_update) {
++#ifdef ASSERT
++ if (exact_klass != NULL) {
++ Label ok;
++ __ load_klass(tmp1, tmp1);
++ metadata2reg(exact_klass->constant_encoding(), tmp2);
++ __ cmp_and_br_short(tmp1, tmp2, Assembler::equal, Assembler::pt, ok);
++ __ stop("exact klass and actual klass differ");
++ __ bind(ok);
++ }
++#endif
++
++ Label do_update;
++ __ ld_ptr(mdo_addr, tmp2);
++
++ if (!no_conflict) {
++ if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
++ if (exact_klass != NULL) {
++ metadata2reg(exact_klass->constant_encoding(), tmp1);
++ } else {
++ __ load_klass(tmp1, tmp1);
++ }
++
++ __ xor3(tmp1, tmp2, tmp1);
++ __ btst(TypeEntries::type_klass_mask, tmp1);
++ // klass seen before, nothing to do. The unknown bit may have been
++ // set already but no need to check.
++ __ brx(Assembler::zero, false, Assembler::pt, next);
++ __ delayed()->
++
++ btst(TypeEntries::type_unknown, tmp1);
++ // already unknown. Nothing to do anymore.
++ __ brx(Assembler::notZero, false, Assembler::pt, next);
++
++ if (TypeEntries::is_type_none(current_klass)) {
++ __ delayed()->btst(TypeEntries::type_mask, tmp2);
++ __ brx(Assembler::zero, true, Assembler::pt, do_update);
++ // first time here. Set profile type.
++ __ delayed()->or3(tmp2, tmp1, tmp2);
++ } else {
++ __ delayed()->nop();
++ }
++ } else {
++ assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
++ ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
++
++ __ btst(TypeEntries::type_unknown, tmp2);
++ // already unknown. Nothing to do anymore.
++ __ brx(Assembler::notZero, false, Assembler::pt, next);
++ __ delayed()->nop();
++ }
++
++ // different than before. Cannot keep accurate profile.
++ __ or3(tmp2, TypeEntries::type_unknown, tmp2);
++ } else {
++ // There's a single possible klass at this profile point
++ assert(exact_klass != NULL, "should be");
++ if (TypeEntries::is_type_none(current_klass)) {
++ metadata2reg(exact_klass->constant_encoding(), tmp1);
++ __ xor3(tmp1, tmp2, tmp1);
++ __ btst(TypeEntries::type_klass_mask, tmp1);
++ __ brx(Assembler::zero, false, Assembler::pt, next);
++#ifdef ASSERT
++
++ {
++ Label ok;
++ __ delayed()->btst(TypeEntries::type_mask, tmp2);
++ __ brx(Assembler::zero, true, Assembler::pt, ok);
++ __ delayed()->nop();
++
++ __ stop("unexpected profiling mismatch");
++ __ bind(ok);
++ }
++ // first time here. Set profile type.
++ __ or3(tmp2, tmp1, tmp2);
++#else
++ // first time here. Set profile type.
++ __ delayed()->or3(tmp2, tmp1, tmp2);
++#endif
++
++ } else {
++ assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
++ ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
++
++ // already unknown. Nothing to do anymore.
++ __ btst(TypeEntries::type_unknown, tmp2);
++ __ brx(Assembler::notZero, false, Assembler::pt, next);
++ __ delayed()->or3(tmp2, TypeEntries::type_unknown, tmp2);
++ }
++ }
++
++ __ bind(do_update);
++ __ st_ptr(tmp2, mdo_addr);
++
++ __ bind(next);
++ }
+ }
+
+ void LIR_Assembler::align_backward_branch_target() {
+@@ -3321,9 +3459,14 @@
+
+ void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) {
+ LIR_Address* addr = addr_opr->as_address_ptr();
+- assert(addr->index()->is_illegal() && addr->scale() == LIR_Address::times_1 && Assembler::is_simm13(addr->disp()), "can't handle complex addresses yet");
+-
+- __ add(addr->base()->as_pointer_register(), addr->disp(), dest->as_pointer_register());
++ assert(addr->index()->is_illegal() && addr->scale() == LIR_Address::times_1, "can't handle complex addresses yet");
++
++ if (Assembler::is_simm13(addr->disp())) {
++ __ add(addr->base()->as_pointer_register(), addr->disp(), dest->as_pointer_register());
++ } else {
++ __ set(addr->disp(), G3_scratch);
++ __ add(addr->base()->as_pointer_register(), G3_scratch, dest->as_pointer_register());
++ }
+ }
+
+
+--- ./hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1221,10 +1221,8 @@
+ bool is_obj = (type == T_ARRAY || type == T_OBJECT);
+ LIR_Opr offset = off.result();
+
+- if (data != dst) {
+- __ move(data, dst);
+- data = dst;
+- }
++ // Because we want a 2-arg form of xchg
++ __ move(data, dst);
+
+ assert (!x->is_add() && (type == T_INT || (is_obj LP64_ONLY(&& UseCompressedOops))), "unexpected type");
+ LIR_Address* addr;
+@@ -1254,7 +1252,7 @@
+ pre_barrier(ptr, LIR_OprFact::illegalOpr /* pre_val */,
+ true /* do_load */, false /* patch */, NULL);
+ }
+- __ xchg(LIR_OprFact::address(addr), data, dst, tmp);
++ __ xchg(LIR_OprFact::address(addr), dst, dst, tmp);
+ if (is_obj) {
+ // Seems to be a precise address
+ post_barrier(ptr, data);
+--- ./hotspot/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -55,9 +55,9 @@
+ }
+
+
+-void C1_MacroAssembler::build_frame(int frame_size_in_bytes) {
+-
+- generate_stack_overflow_check(frame_size_in_bytes);
++void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) {
++ assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
++ generate_stack_overflow_check(bang_size_in_bytes);
+ // Create the frame.
+ save_frame_c1(frame_size_in_bytes);
+ }
+--- ./hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -90,6 +90,8 @@
+ define_pd_global(uintx, CodeCacheMinBlockLength, 4);
+ define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K);
+
++define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed on sparc.
++
+ // Heap related flags
+ define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M));
+
+--- ./hotspot/src/cpu/sparc/vm/copy_sparc.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/copy_sparc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -184,7 +184,7 @@
+ assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation");
+
+ if (value == 0 && UseBlockZeroing &&
+- (count > (BlockZeroingLowLimit >> LogHeapWordSize))) {
++ (count > (size_t)(BlockZeroingLowLimit >> LogHeapWordSize))) {
+ // Call it only when block zeroing is used
+ ((_zero_Fn)StubRoutines::zero_aligned_words())(tohw, count);
+ } else {
+--- ./hotspot/src/cpu/sparc/vm/cppInterpreter_sparc.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/cppInterpreter_sparc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -960,7 +960,7 @@
+
+ // reset handle block
+ __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), G3_scratch);
+- __ st_ptr(G0, G3_scratch, JNIHandleBlock::top_offset_in_bytes());
++ __ st(G0, G3_scratch, JNIHandleBlock::top_offset_in_bytes());
+
+
+ // handle exceptions (exception handling will handle unlocking!)
+@@ -2101,7 +2101,7 @@
+ int monitor_size = method->is_synchronized() ?
+ 1*frame::interpreter_frame_monitor_size() : 0;
+ return size_activation_helper(method->max_locals(), method->max_stack(),
+- monitor_size) + call_stub_size;
++ monitor_size) + call_stub_size;
+ }
+
+ void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill,
+@@ -2185,31 +2185,31 @@
+ istate->_last_Java_pc = (intptr_t*) last_Java_pc;
+ }
+
++static int frame_size_helper(int max_stack,
++ int moncount,
++ int callee_param_size,
++ int callee_locals_size,
++ bool is_top_frame,
++ int& monitor_size,
++ int& full_frame_words) {
++ int extra_locals_size = callee_locals_size - callee_param_size;
++ monitor_size = (sizeof(BasicObjectLock) * moncount) / wordSize;
++ full_frame_words = size_activation_helper(extra_locals_size, max_stack, monitor_size);
++ int short_frame_words = size_activation_helper(extra_locals_size, max_stack, monitor_size);
++ int frame_words = is_top_frame ? full_frame_words : short_frame_words;
+
+-int AbstractInterpreter::layout_activation(Method* method,
+- int tempcount, // Number of slots on java expression stack in use
+- int popframe_extra_args,
+- int moncount, // Number of active monitors
+- int caller_actual_parameters,
+- int callee_param_size,
+- int callee_locals_size,
+- frame* caller,
+- frame* interpreter_frame,
+- bool is_top_frame,
+- bool is_bottom_frame) {
++ return frame_words;
++}
+
+- assert(popframe_extra_args == 0, "NEED TO FIX");
+- // NOTE this code must exactly mimic what InterpreterGenerator::generate_compute_interpreter_state()
+- // does as far as allocating an interpreter frame.
+- // If interpreter_frame!=NULL, set up the method, locals, and monitors.
+- // The frame interpreter_frame, if not NULL, is guaranteed to be the right size,
+- // as determined by a previous call to this method.
+- // It is also guaranteed to be walkable even though it is in a skeletal state
++int AbstractInterpreter::size_activation(int max_stack,
++ int tempcount,
++ int extra_args,
++ int moncount,
++ int callee_param_size,
++ int callee_locals_size,
++ bool is_top_frame) {
++ assert(extra_args == 0, "NEED TO FIX");
+ // NOTE: return size is in words not bytes
+- // NOTE: tempcount is the current size of the java expression stack. For top most
+- // frames we will allocate a full sized expression stack and not the curback
+- // version that non-top frames have.
+-
+ // Calculate the amount our frame will be adjust by the callee. For top frame
+ // this is zero.
+
+@@ -2218,87 +2218,108 @@
+ // to it. So it ignores last_frame_adjust value. Seems suspicious as far
+ // as getting sender_sp correct.
+
+- int extra_locals_size = callee_locals_size - callee_param_size;
+- int monitor_size = (sizeof(BasicObjectLock) * moncount) / wordSize;
+- int full_frame_words = size_activation_helper(extra_locals_size, method->max_stack(), monitor_size);
+- int short_frame_words = size_activation_helper(extra_locals_size, method->max_stack(), monitor_size);
+- int frame_words = is_top_frame ? full_frame_words : short_frame_words;
++ int unused_monitor_size = 0;
++ int unused_full_frame_words = 0;
++ return frame_size_helper(max_stack, moncount, callee_param_size, callee_locals_size, is_top_frame,
++ unused_monitor_size, unused_full_frame_words);
++}
++void AbstractInterpreter::layout_activation(Method* method,
++ int tempcount, // Number of slots on java expression stack in use
++ int popframe_extra_args,
++ int moncount, // Number of active monitors
++ int caller_actual_parameters,
++ int callee_param_size,
++ int callee_locals_size,
++ frame* caller,
++ frame* interpreter_frame,
++ bool is_top_frame,
++ bool is_bottom_frame) {
++ assert(popframe_extra_args == 0, "NEED TO FIX");
++ // NOTE this code must exactly mimic what InterpreterGenerator::generate_compute_interpreter_state()
++ // does as far as allocating an interpreter frame.
++ // Set up the method, locals, and monitors.
++ // The frame interpreter_frame is guaranteed to be the right size,
++ // as determined by a previous call to the size_activation() method.
++ // It is also guaranteed to be walkable even though it is in a skeletal state
++ // NOTE: tempcount is the current size of the java expression stack. For top most
++ // frames we will allocate a full sized expression stack and not the curback
++ // version that non-top frames have.
+
++ int monitor_size = 0;
++ int full_frame_words = 0;
++ int frame_words = frame_size_helper(method->max_stack(), moncount, callee_param_size, callee_locals_size,
++ is_top_frame, monitor_size, full_frame_words);
+
+ /*
+- if we actually have a frame to layout we must now fill in all the pieces. This means both
++ We must now fill in all the pieces of the frame. This means both
+ the interpreterState and the registers.
+ */
+- if (interpreter_frame != NULL) {
+
+- // MUCHO HACK
++ // MUCHO HACK
+
+- intptr_t* frame_bottom = interpreter_frame->sp() - (full_frame_words - frame_words);
+- // 'interpreter_frame->sp()' is unbiased while 'frame_bottom' must be a biased value in 64bit mode.
+- assert(((intptr_t)frame_bottom & 0xf) == 0, "SP biased in layout_activation");
+- frame_bottom = (intptr_t*)((intptr_t)frame_bottom - STACK_BIAS);
++ intptr_t* frame_bottom = interpreter_frame->sp() - (full_frame_words - frame_words);
++ // 'interpreter_frame->sp()' is unbiased while 'frame_bottom' must be a biased value in 64bit mode.
++ assert(((intptr_t)frame_bottom & 0xf) == 0, "SP biased in layout_activation");
++ frame_bottom = (intptr_t*)((intptr_t)frame_bottom - STACK_BIAS);
+
+- /* Now fillin the interpreterState object */
++ /* Now fillin the interpreterState object */
+
+- interpreterState cur_state = (interpreterState) ((intptr_t)interpreter_frame->fp() - sizeof(BytecodeInterpreter));
++ interpreterState cur_state = (interpreterState) ((intptr_t)interpreter_frame->fp() - sizeof(BytecodeInterpreter));
+
+
+- intptr_t* locals;
++ intptr_t* locals;
+
+- // Calculate the postion of locals[0]. This is painful because of
+- // stack alignment (same as ia64). The problem is that we can
+- // not compute the location of locals from fp(). fp() will account
+- // for the extra locals but it also accounts for aligning the stack
+- // and we can't determine if the locals[0] was misaligned but max_locals
+- // was enough to have the
+- // calculate postion of locals. fp already accounts for extra locals.
+- // +2 for the static long no_params() issue.
++ // Calculate the postion of locals[0]. This is painful because of
++ // stack alignment (same as ia64). The problem is that we can
++ // not compute the location of locals from fp(). fp() will account
++ // for the extra locals but it also accounts for aligning the stack
++ // and we can't determine if the locals[0] was misaligned but max_locals
++ // was enough to have the
++ // calculate postion of locals. fp already accounts for extra locals.
++ // +2 for the static long no_params() issue.
+
+- if (caller->is_interpreted_frame()) {
+- // locals must agree with the caller because it will be used to set the
+- // caller's tos when we return.
+- interpreterState prev = caller->get_interpreterState();
+- // stack() is prepushed.
+- locals = prev->stack() + method->size_of_parameters();
++ if (caller->is_interpreted_frame()) {
++ // locals must agree with the caller because it will be used to set the
++ // caller's tos when we return.
++ interpreterState prev = caller->get_interpreterState();
++ // stack() is prepushed.
++ locals = prev->stack() + method->size_of_parameters();
++ } else {
++ // Lay out locals block in the caller adjacent to the register window save area.
++ //
++ // Compiled frames do not allocate a varargs area which is why this if
++ // statement is needed.
++ //
++ intptr_t* fp = interpreter_frame->fp();
++ int local_words = method->max_locals() * Interpreter::stackElementWords;
++
++ if (caller->is_compiled_frame()) {
++ locals = fp + frame::register_save_words + local_words - 1;
+ } else {
+- // Lay out locals block in the caller adjacent to the register window save area.
+- //
+- // Compiled frames do not allocate a varargs area which is why this if
+- // statement is needed.
+- //
+- intptr_t* fp = interpreter_frame->fp();
+- int local_words = method->max_locals() * Interpreter::stackElementWords;
+-
+- if (caller->is_compiled_frame()) {
+- locals = fp + frame::register_save_words + local_words - 1;
+- } else {
+- locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1;
+- }
+-
++ locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1;
+ }
+- // END MUCHO HACK
+-
+- intptr_t* monitor_base = (intptr_t*) cur_state;
+- intptr_t* stack_base = monitor_base - monitor_size;
+- /* +1 because stack is always prepushed */
+- intptr_t* stack = stack_base - (tempcount + 1);
+-
+-
+- BytecodeInterpreter::layout_interpreterState(cur_state,
+- caller,
+- interpreter_frame,
+- method,
+- locals,
+- stack,
+- stack_base,
+- monitor_base,
+- frame_bottom,
+- is_top_frame);
+-
+- BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, interpreter_frame->fp());
+
+ }
+- return frame_words;
++ // END MUCHO HACK
++
++ intptr_t* monitor_base = (intptr_t*) cur_state;
++ intptr_t* stack_base = monitor_base - monitor_size;
++ /* +1 because stack is always prepushed */
++ intptr_t* stack = stack_base - (tempcount + 1);
++
++
++ BytecodeInterpreter::layout_interpreterState(cur_state,
++ caller,
++ interpreter_frame,
++ method,
++ locals,
++ stack,
++ stack_base,
++ monitor_base,
++ frame_bottom,
++ is_top_frame);
++
++ BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, interpreter_frame->fp());
+ }
+
+ #endif // CC_INTERP
+--- ./hotspot/src/cpu/sparc/vm/frame_sparc.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/frame_sparc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -259,8 +259,8 @@
+
+ // next two fns read and write Lmonitors value,
+ private:
+- BasicObjectLock* interpreter_frame_monitors() const { return *interpreter_frame_monitors_addr(); }
+- void interpreter_frame_set_monitors(BasicObjectLock* monitors) { *interpreter_frame_monitors_addr() = monitors; }
++ BasicObjectLock* interpreter_frame_monitors() const;
++ void interpreter_frame_set_monitors(BasicObjectLock* monitors);
+ #else
+ public:
+ inline interpreterState get_interpreterState() const {
+--- ./hotspot/src/cpu/sparc/vm/frame_sparc.inline.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/frame_sparc.inline.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -226,6 +226,13 @@
+ return (Method**)sp_addr_at( Lmethod->sp_offset_in_saved_window());
+ }
+
++inline BasicObjectLock* frame::interpreter_frame_monitors() const {
++ return *interpreter_frame_monitors_addr();
++}
++
++inline void frame::interpreter_frame_set_monitors(BasicObjectLock* monitors) {
++ *interpreter_frame_monitors_addr() = monitors;
++}
+
+ // Constant pool cache
+
+@@ -237,6 +244,10 @@
+ inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
+ return (ConstantPoolCache**)sp_addr_at( LcpoolCache->sp_offset_in_saved_window());
+ }
++
++inline oop* frame::interpreter_frame_temp_oop_addr() const {
++ return (oop *)(fp() + interpreter_frame_oop_temp_offset);
++}
+ #endif // CC_INTERP
+
+
+--- ./hotspot/src/cpu/sparc/vm/globalDefinitions_sparc.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/globalDefinitions_sparc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -30,6 +30,12 @@
+
+ const int StackAlignmentInBytes = (2*wordSize);
+
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are properly extended to 64 bits.
++// If set, SharedRuntime::c_calling_convention() must adapt
++// signatures accordingly.
++const bool CCallingConventionRequiresIntsAsLongs = false;
++
+ #define SUPPORTS_NATIVE_CX8
+
+ #endif // CPU_SPARC_VM_GLOBALDEFINITIONS_SPARC_HPP
+--- ./hotspot/src/cpu/sparc/vm/globals_sparc.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/globals_sparc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -43,7 +43,8 @@
+ define_pd_global(bool, NeedsDeoptSuspend, true); // register window machines need this
+
+ define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks
+-define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast
++define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on sparc.
++define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast
+
+ define_pd_global(intx, CodeEntryAlignment, 32);
+ // The default setting 16/16 seems to work best.
+--- ./hotspot/src/cpu/sparc/vm/interp_masm_sparc.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/interp_masm_sparc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1892,6 +1892,220 @@
+ }
+ }
+
++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) {
++ Label not_null, do_nothing, do_update;
++
++ assert_different_registers(obj, mdo_addr.base(), tmp);
++
++ verify_oop(obj);
++
++ ld_ptr(mdo_addr, tmp);
++
++ br_notnull_short(obj, pt, not_null);
++ or3(tmp, TypeEntries::null_seen, tmp);
++ ba_short(do_update);
++
++ bind(not_null);
++ load_klass(obj, obj);
++
++ xor3(obj, tmp, obj);
++ btst(TypeEntries::type_klass_mask, obj);
++ // klass seen before, nothing to do. The unknown bit may have been
++ // set already but no need to check.
++ brx(zero, false, pt, do_nothing);
++ delayed()->
++
++ btst(TypeEntries::type_unknown, obj);
++ // already unknown. Nothing to do anymore.
++ brx(notZero, false, pt, do_nothing);
++ delayed()->
++
++ btst(TypeEntries::type_mask, tmp);
++ brx(zero, true, pt, do_update);
++ // first time here. Set profile type.
++ delayed()->or3(tmp, obj, tmp);
++
++ // different than before. Cannot keep accurate profile.
++ or3(tmp, TypeEntries::type_unknown, tmp);
++
++ bind(do_update);
++ // update profile
++ st_ptr(tmp, mdo_addr);
++
++ bind(do_nothing);
++}
++
++void InterpreterMacroAssembler::profile_arguments_type(Register callee, Register tmp1, Register tmp2, bool is_virtual) {
++ if (!ProfileInterpreter) {
++ return;
++ }
++
++ assert_different_registers(callee, tmp1, tmp2, ImethodDataPtr);
++
++ if (MethodData::profile_arguments() || MethodData::profile_return()) {
++ Label profile_continue;
++
++ test_method_data_pointer(profile_continue);
++
++ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
++
++ ldub(ImethodDataPtr, in_bytes(DataLayout::tag_offset()) - off_to_start, tmp1);
++ cmp_and_br_short(tmp1, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag, notEqual, pn, profile_continue);
++
++ if (MethodData::profile_arguments()) {
++ Label done;
++ int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
++ add(ImethodDataPtr, off_to_args, ImethodDataPtr);
++
++ for (int i = 0; i < TypeProfileArgsLimit; i++) {
++ if (i > 0 || MethodData::profile_return()) {
++ // If return value type is profiled we may have no argument to profile
++ ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, tmp1);
++ sub(tmp1, i*TypeStackSlotEntries::per_arg_count(), tmp1);
++ cmp_and_br_short(tmp1, TypeStackSlotEntries::per_arg_count(), less, pn, done);
++ }
++ ld_ptr(Address(callee, Method::const_offset()), tmp1);
++ lduh(Address(tmp1, ConstMethod::size_of_parameters_offset()), tmp1);
++ // stack offset o (zero based) from the start of the argument
++ // list, for n arguments translates into offset n - o - 1 from
++ // the end of the argument list. But there's an extra slot at
++ // the stop of the stack. So the offset is n - o from Lesp.
++ ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args, tmp2);
++ sub(tmp1, tmp2, tmp1);
++
++ // Can't use MacroAssembler::argument_address() which needs Gargs to be set up
++ sll(tmp1, Interpreter::logStackElementSize, tmp1);
++ ld_ptr(Lesp, tmp1, tmp1);
++
++ Address mdo_arg_addr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
++ profile_obj_type(tmp1, mdo_arg_addr, tmp2);
++
++ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
++ add(ImethodDataPtr, to_add, ImethodDataPtr);
++ off_to_args += to_add;
++ }
++
++ if (MethodData::profile_return()) {
++ ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, tmp1);
++ sub(tmp1, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(), tmp1);
++ }
++
++ bind(done);
++
++ if (MethodData::profile_return()) {
++ // We're right after the type profile for the last
++ // argument. tmp1 is the number of cells left in the
++ // CallTypeData/VirtualCallTypeData to reach its end. Non null
++ // if there's a return to profile.
++ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
++ sll(tmp1, exact_log2(DataLayout::cell_size), tmp1);
++ add(ImethodDataPtr, tmp1, ImethodDataPtr);
++ }
++ } else {
++ assert(MethodData::profile_return(), "either profile call args or call ret");
++ update_mdp_by_constant(in_bytes(TypeEntriesAtCall::return_only_size()));
++ }
++
++ // mdp points right after the end of the
++ // CallTypeData/VirtualCallTypeData, right after the cells for the
++ // return value type if there's one.
++
++ bind(profile_continue);
++ }
++}
++
++void InterpreterMacroAssembler::profile_return_type(Register ret, Register tmp1, Register tmp2) {
++ assert_different_registers(ret, tmp1, tmp2);
++ if (ProfileInterpreter && MethodData::profile_return()) {
++ Label profile_continue, done;
++
++ test_method_data_pointer(profile_continue);
++
++ if (MethodData::profile_return_jsr292_only()) {
++ // If we don't profile all invoke bytecodes we must make sure
++ // it's a bytecode we indeed profile. We can't go back to the
++ // begining of the ProfileData we intend to update to check its
++ // type because we're right after it and we don't known its
++ // length.
++ Label do_profile;
++ ldub(Lbcp, 0, tmp1);
++ cmp_and_br_short(tmp1, Bytecodes::_invokedynamic, equal, pn, do_profile);
++ cmp(tmp1, Bytecodes::_invokehandle);
++ br(equal, false, pn, do_profile);
++ delayed()->ldub(Lmethod, Method::intrinsic_id_offset_in_bytes(), tmp1);
++ cmp_and_br_short(tmp1, vmIntrinsics::_compiledLambdaForm, notEqual, pt, profile_continue);
++
++ bind(do_profile);
++ }
++
++ Address mdo_ret_addr(ImethodDataPtr, -in_bytes(ReturnTypeEntry::size()));
++ mov(ret, tmp1);
++ profile_obj_type(tmp1, mdo_ret_addr, tmp2);
++
++ bind(profile_continue);
++ }
++}
++
++void InterpreterMacroAssembler::profile_parameters_type(Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
++ if (ProfileInterpreter && MethodData::profile_parameters()) {
++ Label profile_continue, done;
++
++ test_method_data_pointer(profile_continue);
++
++ // Load the offset of the area within the MDO used for
++ // parameters. If it's negative we're not profiling any parameters.
++ lduw(ImethodDataPtr, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()), tmp1);
++ cmp_and_br_short(tmp1, 0, less, pn, profile_continue);
++
++ // Compute a pointer to the area for parameters from the offset
++ // and move the pointer to the slot for the last
++ // parameters. Collect profiling from last parameter down.
++ // mdo start + parameters offset + array length - 1
++
++ // Pointer to the parameter area in the MDO
++ Register mdp = tmp1;
++ add(ImethodDataPtr, tmp1, mdp);
++
++ // offset of the current profile entry to update
++ Register entry_offset = tmp2;
++ // entry_offset = array len in number of cells
++ ld_ptr(mdp, ArrayData::array_len_offset(), entry_offset);
++
++ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
++ assert(off_base % DataLayout::cell_size == 0, "should be a number of cells");
++
++ // entry_offset (number of cells) = array len - size of 1 entry + offset of the stack slot field
++ sub(entry_offset, TypeStackSlotEntries::per_arg_count() - (off_base / DataLayout::cell_size), entry_offset);
++ // entry_offset in bytes
++ sll(entry_offset, exact_log2(DataLayout::cell_size), entry_offset);
++
++ Label loop;
++ bind(loop);
++
++ // load offset on the stack from the slot for this parameter
++ ld_ptr(mdp, entry_offset, tmp3);
++ sll(tmp3,Interpreter::logStackElementSize, tmp3);
++ neg(tmp3);
++ // read the parameter from the local area
++ ld_ptr(Llocals, tmp3, tmp3);
++
++ // make entry_offset now point to the type field for this parameter
++ int type_base = in_bytes(ParametersTypeData::type_offset(0));
++ assert(type_base > off_base, "unexpected");
++ add(entry_offset, type_base - off_base, entry_offset);
++
++ // profile the parameter
++ Address arg_type(mdp, entry_offset);
++ profile_obj_type(tmp3, arg_type, tmp4);
++
++ // go to next parameter
++ sub(entry_offset, TypeStackSlotEntries::per_arg_count() * DataLayout::cell_size + (type_base - off_base), entry_offset);
++ cmp_and_br_short(entry_offset, off_base, greaterEqual, pt, loop);
++
++ bind(profile_continue);
++ }
++}
++
+ // add a InterpMonitorElem to stack (see frame_sparc.hpp)
+
+ void InterpreterMacroAssembler::add_monitor_to_stack( bool stack_is_empty,
+--- ./hotspot/src/cpu/sparc/vm/interp_masm_sparc.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/interp_masm_sparc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -323,6 +323,11 @@
+ Register scratch2,
+ Register scratch3);
+
++ void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp);
++ void profile_arguments_type(Register callee, Register tmp1, Register tmp2, bool is_virtual);
++ void profile_return_type(Register ret, Register tmp1, Register tmp2);
++ void profile_parameters_type(Register tmp1, Register tmp2, Register tmp3, Register tmp4);
++
+ // Debugging
+ void interp_verify_oop(Register reg, TosState state, const char * file, int line); // only if +VerifyOops && state == atos
+ void verify_oop_or_return_address(Register reg, Register rtmp); // for astore
+--- ./hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -3531,7 +3531,7 @@
+ // was post-decremented.) Skip this address by starting at i=1, and
+ // touch a few more pages below. N.B. It is important to touch all
+ // the way down to and including i=StackShadowPages.
+- for (int i = 1; i <= StackShadowPages; i++) {
++ for (int i = 1; i < StackShadowPages; i++) {
+ set((-i*offset)+STACK_BIAS, Rscratch);
+ st(G0, Rtsp, Rscratch);
+ }
+--- ./hotspot/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -233,6 +233,7 @@
+ }
+
+ inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) {
++ insert_nop_after_cbcond();
+ br(c, a, p, target(L));
+ }
+
+@@ -248,6 +249,7 @@
+ }
+
+ inline void MacroAssembler::brx( Condition c, bool a, Predict p, Label& L ) {
++ insert_nop_after_cbcond();
+ brx(c, a, p, target(L));
+ }
+
+@@ -269,6 +271,7 @@
+ }
+
+ inline void MacroAssembler::fb( Condition c, bool a, Predict p, Label& L ) {
++ insert_nop_after_cbcond();
+ fb(c, a, p, target(L));
+ }
+
+@@ -318,6 +321,7 @@
+ }
+
+ inline void MacroAssembler::call( Label& L, relocInfo::relocType rt ) {
++ insert_nop_after_cbcond();
+ MacroAssembler::call( target(L), rt);
+ }
+
+--- ./hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1106,7 +1106,9 @@
+
+ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
+ VMRegPair *regs,
++ VMRegPair *regs2,
+ int total_args_passed) {
++ assert(regs2 == NULL, "not needed on sparc");
+
+ // Return the number of VMReg stack_slots needed for the args.
+ // This value does not include an abi space (like register window
+@@ -2084,7 +2086,7 @@
+ // the 1st six register arguments). It's weird see int_stk_helper.
+ //
+ int out_arg_slots;
+- out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
+
+ if (is_critical_native) {
+ // Critical natives may have to call out so they need a save area
+@@ -2685,7 +2687,7 @@
+ if (!is_critical_native) {
+ // reset handle block
+ __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);
+- __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes());
++ __ st(G0, L5, JNIHandleBlock::top_offset_in_bytes());
+
+ __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);
+ check_forward_pending_exception(masm, G3_scratch);
+@@ -2831,7 +2833,7 @@
+ // the 1st six register arguments). It's weird see int_stk_helper.
+ //
+ int out_arg_slots;
+- out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
+
+ // Calculate the total number of stack slots we will need.
+
+@@ -3353,13 +3355,16 @@
+ Register O4array_size = O4;
+ Label loop;
+
+- // Before we make new frames, check to see if stack is available.
+- // Do this after the caller's return address is on top of stack
++#ifdef ASSERT
++ // Compilers generate code that bang the stack by as much as the
++ // interpreter would need. So this stack banging should never
++ // trigger a fault. Verify that it does not on non product builds.
+ if (UseStackBanging) {
+ // Get total frame size for interpreted frames
+ __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes(), O4);
+ __ bang_stack_size(O4, O3, G3_scratch);
+ }
++#endif
+
+ __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(), O4array_size);
+ __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(), G3pcs);
+@@ -3407,9 +3412,11 @@
+ ResourceMark rm;
+ // setup code generation tools
+ int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code
++#ifdef ASSERT
+ if (UseStackBanging) {
+ pad += StackShadowPages*16 + 32;
+ }
++#endif
+ #ifdef _LP64
+ CodeBuffer buffer("deopt_blob", 2100+pad, 512);
+ #else
+@@ -3630,9 +3637,11 @@
+ ResourceMark rm;
+ // setup code generation tools
+ int pad = VerifyThread ? 512 : 0;
++#ifdef ASSERT
+ if (UseStackBanging) {
+ pad += StackShadowPages*16 + 32;
+ }
++#endif
+ #ifdef _LP64
+ CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
+ #else
+--- ./hotspot/src/cpu/sparc/vm/sparc.ad Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/sparc.ad Wed Jul 30 03:51:43 2014 -0700
+@@ -457,6 +457,13 @@
+ // This is a block of C++ code which provides values, functions, and
+ // definitions necessary in the rest of the architecture description
+ source_hpp %{
++// Header information of the source block.
++// Method declarations/definitions which are used outside
++// the ad-scope can conveniently be defined here.
++//
++// To keep related declarations/definitions/uses close together,
++// we switch between source %{ }% and source_hpp %{ }% freely as needed.
++
+ // Must be visible to the DFA in dfa_sparc.cpp
+ extern bool can_branch_register( Node *bol, Node *cmp );
+
+@@ -468,6 +475,46 @@
+ #define LONG_HI_REG(x) (x)
+ #define LONG_LO_REG(x) (x)
+
++class CallStubImpl {
++
++ //--------------------------------------------------------------
++ //---< Used for optimization in Compile::Shorten_branches >---
++ //--------------------------------------------------------------
++
++ public:
++ // Size of call trampoline stub.
++ static uint size_call_trampoline() {
++ return 0; // no call trampolines on this platform
++ }
++
++ // number of relocations needed by a call trampoline stub
++ static uint reloc_call_trampoline() {
++ return 0; // no call trampolines on this platform
++ }
++};
++
++class HandlerImpl {
++
++ public:
++
++ static int emit_exception_handler(CodeBuffer &cbuf);
++ static int emit_deopt_handler(CodeBuffer& cbuf);
++
++ static uint size_exception_handler() {
++ if (TraceJumps) {
++ return (400); // just a guess
++ }
++ return ( NativeJump::instruction_size ); // sethi;jmp;nop
++ }
++
++ static uint size_deopt_handler() {
++ if (TraceJumps) {
++ return (400); // just a guess
++ }
++ return ( 4+ NativeJump::instruction_size ); // save;sethi;jmp;restore
++ }
++};
++
+ %}
+
+ source %{
+@@ -1040,6 +1087,11 @@
+ }
+ }
+
++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) {
++ ShouldNotReachHere();
++}
++
+ void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+ Compile* C = ra_->C;
+ Compile::ConstantTable& constant_table = C->constant_table();
+@@ -1141,15 +1193,16 @@
+ st->print_cr("Verify_Thread"); st->print("\t");
+ }
+
+- size_t framesize = C->frame_slots() << LogBytesPerInt;
++ size_t framesize = C->frame_size_in_bytes();
++ int bangsize = C->bang_size_in_bytes();
+
+ // Calls to C2R adapters often do not accept exceptional returns.
+ // We require that their callers must bang for them. But be careful, because
+ // some VM calls (such as call site linkage) can use several kilobytes of
+ // stack. But the stack safety zone should account for that.
+ // See bugs 4446381, 4468289, 4497237.
+- if (C->need_stack_bang(framesize)) {
+- st->print_cr("! stack bang"); st->print("\t");
++ if (C->need_stack_bang(bangsize)) {
++ st->print_cr("! stack bang (%d bytes)", bangsize); st->print("\t");
+ }
+
+ if (Assembler::is_simm13(-framesize)) {
+@@ -1173,17 +1226,18 @@
+
+ __ verify_thread();
+
+- size_t framesize = C->frame_slots() << LogBytesPerInt;
++ size_t framesize = C->frame_size_in_bytes();
+ assert(framesize >= 16*wordSize, "must have room for reg. save area");
+ assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
++ int bangsize = C->bang_size_in_bytes();
+
+ // Calls to C2R adapters often do not accept exceptional returns.
+ // We require that their callers must bang for them. But be careful, because
+ // some VM calls (such as call site linkage) can use several kilobytes of
+ // stack. But the stack safety zone should account for that.
+ // See bugs 4446381, 4468289, 4497237.
+- if (C->need_stack_bang(framesize)) {
+- __ generate_stack_overflow_check(framesize);
++ if (C->need_stack_bang(bangsize)) {
++ __ generate_stack_overflow_check(bangsize);
+ }
+
+ if (Assembler::is_simm13(-framesize)) {
+@@ -1216,7 +1270,7 @@
+ void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
+ Compile* C = ra_->C;
+
+- if( do_polling() && ra_->C->is_method_compilation() ) {
++ if(do_polling() && ra_->C->is_method_compilation()) {
+ st->print("SETHI #PollAddr,L0\t! Load Polling address\n\t");
+ #ifdef _LP64
+ st->print("LDX [L0],G0\t!Poll for Safepointing\n\t");
+@@ -1225,8 +1279,12 @@
+ #endif
+ }
+
+- if( do_polling() )
++ if(do_polling()) {
++ if (UseCBCond && !ra_->C->is_method_compilation()) {
++ st->print("NOP\n\t");
++ }
+ st->print("RET\n\t");
++ }
+
+ st->print("RESTORE");
+ }
+@@ -1239,15 +1297,20 @@
+ __ verify_thread();
+
+ // If this does safepoint polling, then do it here
+- if( do_polling() && ra_->C->is_method_compilation() ) {
++ if(do_polling() && ra_->C->is_method_compilation()) {
+ AddressLiteral polling_page(os::get_polling_page());
+ __ sethi(polling_page, L0);
+ __ relocate(relocInfo::poll_return_type);
+- __ ld_ptr( L0, 0, G0 );
++ __ ld_ptr(L0, 0, G0);
+ }
+
+ // If this is a return, then stuff the restore in the delay slot
+- if( do_polling() ) {
++ if(do_polling()) {
++ if (UseCBCond && !ra_->C->is_method_compilation()) {
++ // Insert extra padding for the case when the epilogue is preceded by
++ // a cbcond jump, which can't be followed by a CTI instruction
++ __ nop();
++ }
+ __ ret();
+ __ delayed()->restore();
+ } else {
+@@ -1705,22 +1768,9 @@
+
+ //=============================================================================
+
+-uint size_exception_handler() {
+- if (TraceJumps) {
+- return (400); // just a guess
+- }
+- return ( NativeJump::instruction_size ); // sethi;jmp;nop
+-}
+-
+-uint size_deopt_handler() {
+- if (TraceJumps) {
+- return (400); // just a guess
+- }
+- return ( 4+ NativeJump::instruction_size ); // save;sethi;jmp;restore
+-}
+
+ // Emit exception handler code.
+-int emit_exception_handler(CodeBuffer& cbuf) {
++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
+ Register temp_reg = G3;
+ AddressLiteral exception_blob(OptoRuntime::exception_blob()->entry_point());
+ MacroAssembler _masm(&cbuf);
+@@ -1741,7 +1791,7 @@
+ return offset;
+ }
+
+-int emit_deopt_handler(CodeBuffer& cbuf) {
++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
+ // Can't use any of the current frame's registers as we may have deopted
+ // at a poll and everything (including G3) can be live.
+ Register temp_reg = L0;
+@@ -1848,6 +1898,12 @@
+ return false;
+ }
+
++// Current (2013) SPARC platforms need to read original key
++// to construct decryption expanded key
++const bool Matcher::pass_original_key_for_aes() {
++ return true;
++}
++
+ // USII supports fxtof through the whole range of number, USIII doesn't
+ const bool Matcher::convL2FSupported(void) {
+ return VM_Version::has_fast_fxtof();
+@@ -1885,6 +1941,9 @@
+ return (VM_Version::is_T4() || VM_Version::is_sparc64()) ? ConditionalMoveLimit : 0;
+ }
+
++// Does the CPU require late expand (see block.cpp for description of late expand)?
++const bool Matcher::require_postalloc_expand = false;
++
+ // Should the Matcher clone shifts on addressing modes, expecting them to
+ // be subsumed into complex addressing expressions or compute them into
+ // registers? True for Intel but false for most RISCs
+@@ -2023,19 +2082,6 @@
+ return L7_REGP_mask();
+ }
+
+-const RegMask Matcher::mathExactI_result_proj_mask() {
+- return G1_REGI_mask();
+-}
+-
+-const RegMask Matcher::mathExactL_result_proj_mask() {
+- return G1_REGL_mask();
+-}
+-
+-const RegMask Matcher::mathExactI_flags_proj_mask() {
+- return INT_FLAGS_mask();
+-}
+-
+-
+ %}
+
+
+@@ -2503,7 +2549,7 @@
+ enc_class call_epilog %{
+ if( VerifyStackAtCalls ) {
+ MacroAssembler _masm(&cbuf);
+- int framesize = ra_->C->frame_slots() << LogBytesPerInt;
++ int framesize = ra_->C->frame_size_in_bytes();
+ Register temp_reg = G3;
+ __ add(SP, framesize, temp_reg);
+ __ cmp(temp_reg, FP);
+@@ -3242,7 +3288,7 @@
+ // C.
+ c_calling_convention %{
+ // This is obviously always outgoing
+- (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
++ (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
+ %}
+
+ // Location of native (C/C++) and interpreter return values. This is specified to
+@@ -3295,7 +3341,18 @@
+ //----------Instruction Attributes---------------------------------------------
+ ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
+ ins_attrib ins_size(32); // Required size attribute (in bits)
+-ins_attrib ins_avoid_back_to_back(0); // instruction should not be generated back to back
++
++// avoid_back_to_back attribute is an expression that must return
++// one of the following values defined in MachNode:
++// AVOID_NONE - instruction can be placed anywhere
++// AVOID_BEFORE - instruction cannot be placed after an
++// instruction with MachNode::AVOID_AFTER
++// AVOID_AFTER - the next instruction cannot be the one
++// with MachNode::AVOID_BEFORE
++// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at
++// the same time
++ins_attrib ins_avoid_back_to_back(MachNode::AVOID_NONE);
++
+ ins_attrib ins_short_branch(0); // Required flag: is this instruction a
+ // non-matching short branch variant of some
+ // long branch?
+@@ -6595,6 +6652,7 @@
+ ins_encode %{
+ __ encode_heap_oop($src$$Register, $dst$$Register);
+ %}
++ ins_avoid_back_to_back(Universe::narrow_oop_base() == NULL ? AVOID_NONE : AVOID_BEFORE);
+ ins_pipe(ialu_reg);
+ %}
+
+@@ -6653,6 +6711,7 @@
+
+ instruct membar_acquire() %{
+ match(MemBarAcquire);
++ match(LoadFence);
+ ins_cost(4*MEMORY_REF_COST);
+
+ size(0);
+@@ -6673,6 +6732,7 @@
+
+ instruct membar_release() %{
+ match(MemBarRelease);
++ match(StoreFence);
+ ins_cost(4*MEMORY_REF_COST);
+
+ size(0);
+@@ -9162,6 +9222,7 @@
+ __ ba(*L);
+ __ delayed()->nop();
+ %}
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(br);
+ %}
+
+@@ -9174,13 +9235,13 @@
+ size(4);
+ ins_cost(BRANCH_COST);
+ format %{ "BA $labl\t! short branch" %}
+- ins_encode %{
++ ins_encode %{
+ Label* L = $labl$$label;
+ assert(__ use_cbcond(*L), "back to back cbcond");
+ __ ba_short(*L);
+ %}
+ ins_short_branch(1);
+- ins_avoid_back_to_back(1);
++ ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
+ ins_pipe(cbcond_reg_imm);
+ %}
+
+@@ -9194,6 +9255,7 @@
+ format %{ "BP$cmp $icc,$labl" %}
+ // Prim = bits 24-22, Secnd = bits 31-30
+ ins_encode( enc_bp( labl, cmp, icc ) );
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(br_cc);
+ %}
+
+@@ -9205,6 +9267,7 @@
+ format %{ "BP$cmp $icc,$labl" %}
+ // Prim = bits 24-22, Secnd = bits 31-30
+ ins_encode( enc_bp( labl, cmp, icc ) );
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(br_cc);
+ %}
+
+@@ -9223,6 +9286,7 @@
+ __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, *L);
+ __ delayed()->nop();
+ %}
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(br_cc);
+ %}
+
+@@ -9241,6 +9305,7 @@
+ __ fbp( (Assembler::Condition)($cmp$$cmpcode), false, (Assembler::CC)($fcc$$reg), predict_taken, *L);
+ __ delayed()->nop();
+ %}
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(br_fcc);
+ %}
+
+@@ -9253,6 +9318,7 @@
+ format %{ "BP$cmp $icc,$labl\t! Loop end" %}
+ // Prim = bits 24-22, Secnd = bits 31-30
+ ins_encode( enc_bp( labl, cmp, icc ) );
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(br_cc);
+ %}
+
+@@ -9265,6 +9331,7 @@
+ format %{ "BP$cmp $icc,$labl\t! Loop end" %}
+ // Prim = bits 24-22, Secnd = bits 31-30
+ ins_encode( enc_bp( labl, cmp, icc ) );
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(br_cc);
+ %}
+
+@@ -9515,7 +9582,7 @@
+ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L);
+ %}
+ ins_short_branch(1);
+- ins_avoid_back_to_back(1);
++ ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
+ ins_pipe(cbcond_reg_reg);
+ %}
+
+@@ -9533,7 +9600,7 @@
+ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L);
+ %}
+ ins_short_branch(1);
+- ins_avoid_back_to_back(1);
++ ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
+ ins_pipe(cbcond_reg_imm);
+ %}
+
+@@ -9551,7 +9618,7 @@
+ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L);
+ %}
+ ins_short_branch(1);
+- ins_avoid_back_to_back(1);
++ ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
+ ins_pipe(cbcond_reg_reg);
+ %}
+
+@@ -9569,7 +9636,7 @@
+ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L);
+ %}
+ ins_short_branch(1);
+- ins_avoid_back_to_back(1);
++ ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
+ ins_pipe(cbcond_reg_imm);
+ %}
+
+@@ -9587,7 +9654,7 @@
+ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::xcc, $op1$$Register, $op2$$Register, *L);
+ %}
+ ins_short_branch(1);
+- ins_avoid_back_to_back(1);
++ ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
+ ins_pipe(cbcond_reg_reg);
+ %}
+
+@@ -9605,7 +9672,7 @@
+ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::xcc, $op1$$Register, $op2$$constant, *L);
+ %}
+ ins_short_branch(1);
+- ins_avoid_back_to_back(1);
++ ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
+ ins_pipe(cbcond_reg_imm);
+ %}
+
+@@ -9628,7 +9695,7 @@
+ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::ptr_cc, $op1$$Register, $op2$$Register, *L);
+ %}
+ ins_short_branch(1);
+- ins_avoid_back_to_back(1);
++ ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
+ ins_pipe(cbcond_reg_reg);
+ %}
+
+@@ -9650,7 +9717,7 @@
+ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::ptr_cc, $op1$$Register, G0, *L);
+ %}
+ ins_short_branch(1);
+- ins_avoid_back_to_back(1);
++ ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
+ ins_pipe(cbcond_reg_reg);
+ %}
+
+@@ -9668,7 +9735,7 @@
+ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L);
+ %}
+ ins_short_branch(1);
+- ins_avoid_back_to_back(1);
++ ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
+ ins_pipe(cbcond_reg_reg);
+ %}
+
+@@ -9686,7 +9753,7 @@
+ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, G0, *L);
+ %}
+ ins_short_branch(1);
+- ins_avoid_back_to_back(1);
++ ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
+ ins_pipe(cbcond_reg_reg);
+ %}
+
+@@ -9705,7 +9772,7 @@
+ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L);
+ %}
+ ins_short_branch(1);
+- ins_avoid_back_to_back(1);
++ ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
+ ins_pipe(cbcond_reg_reg);
+ %}
+
+@@ -9723,7 +9790,7 @@
+ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L);
+ %}
+ ins_short_branch(1);
+- ins_avoid_back_to_back(1);
++ ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
+ ins_pipe(cbcond_reg_imm);
+ %}
+
+@@ -9740,6 +9807,7 @@
+ ins_cost(BRANCH_COST);
+ format %{ "BR$cmp $op1,$labl" %}
+ ins_encode( enc_bpr( labl, cmp, op1 ) );
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(br_reg);
+ %}
+
+@@ -9752,6 +9820,7 @@
+ ins_cost(BRANCH_COST);
+ format %{ "BR$cmp $op1,$labl" %}
+ ins_encode( enc_bpr( labl, cmp, op1 ) );
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(br_reg);
+ %}
+
+@@ -9764,6 +9833,7 @@
+ ins_cost(BRANCH_COST);
+ format %{ "BR$cmp $op1,$labl" %}
+ ins_encode( enc_bpr( labl, cmp, op1 ) );
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(br_reg);
+ %}
+
+@@ -9804,6 +9874,7 @@
+ __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L);
+ __ delayed()->nop();
+ %}
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(br_cc);
+ %}
+
+@@ -9931,6 +10002,7 @@
+ ins_cost(CALL_COST);
+ format %{ "CALL,static ; NOP ==> " %}
+ ins_encode( Java_Static_Call( meth ), call_epilog );
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(simple_call);
+ %}
+
+@@ -9967,6 +10039,7 @@
+ format %{ "CALL,runtime" %}
+ ins_encode( Java_To_Runtime( meth ),
+ call_epilog, adjust_long_from_native_call );
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(simple_call);
+ %}
+
+@@ -9979,6 +10052,7 @@
+ ins_encode( Java_To_Runtime( meth ),
+ call_epilog,
+ adjust_long_from_native_call );
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(simple_call);
+ %}
+
+@@ -9991,6 +10065,7 @@
+ ins_encode( Java_To_Runtime( meth ),
+ call_epilog,
+ adjust_long_from_native_call );
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(simple_call);
+ %}
+
+@@ -10004,6 +10079,7 @@
+ ins_cost(CALL_COST);
+ format %{ "Jmp $jump_target ; NOP \t! $method_oop holds method oop" %}
+ ins_encode(form_jmpl(jump_target));
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(tail_call);
+ %}
+
+@@ -10035,6 +10111,7 @@
+ // opcode(Assembler::jmpl_op3, Assembler::arith_op);
+ // The hack duplicates the exception oop into G3, so that CreateEx can use it there.
+ // ins_encode( form3_rs1_simm13_rd( jump_target, 0x00, R_G0 ), move_return_pc_to_o1() );
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(tail_call);
+ %}
+
+@@ -10065,6 +10142,7 @@
+ // use the following format syntax
+ format %{ "Jmp rethrow_stub" %}
+ ins_encode(enc_rethrow);
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(tail_call);
+ %}
+
+@@ -10093,6 +10171,7 @@
+ ins_cost(DEFAULT_COST*10);
+ format %{ "CALL PartialSubtypeCheck\n\tNOP" %}
+ ins_encode( enc_PartialSubtypeCheck() );
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(partial_subtype_check_pipe);
+ %}
+
+@@ -10102,6 +10181,7 @@
+ ins_cost(DEFAULT_COST*10);
+ format %{ "CALL PartialSubtypeCheck\n\tNOP\t# (sets condition codes)" %}
+ ins_encode( enc_PartialSubtypeCheck() );
++ ins_avoid_back_to_back(AVOID_BEFORE);
+ ins_pipe(partial_subtype_check_pipe);
+ %}
+
+--- ./hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -83,7 +83,7 @@
+ private:
+
+ #ifdef PRODUCT
+-#define inc_counter_np(a,b,c) (0)
++#define inc_counter_np(a,b,c)
+ #else
+ #define inc_counter_np(counter, t1, t2) \
+ BLOCK_COMMENT("inc_counter " #counter); \
+@@ -1055,7 +1055,7 @@
+ Label& L_loop, bool use_prefetch, bool use_bis);
+
+ void disjoint_copy_core(Register from, Register to, Register count, int log2_elem_size,
+- int iter_size, CopyLoopFunc copy_loop_func) {
++ int iter_size, StubGenerator::CopyLoopFunc copy_loop_func) {
+ Label L_copy;
+
+ assert(log2_elem_size <= 3, "the following code should be changed");
+@@ -1206,7 +1206,7 @@
+ __ inc(from, 8);
+ __ sllx(O3, left_shift, O3);
+
+- disjoint_copy_core(from, to, count, log2_elem_size, 16, copy_16_bytes_shift_loop);
++ disjoint_copy_core(from, to, count, log2_elem_size, 16, &StubGenerator::copy_16_bytes_shift_loop);
+
+ __ inccc(count, count_dec>>1 ); // + 8 bytes
+ __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
+@@ -2085,7 +2085,7 @@
+ __ dec(count, 4); // The cmp at the beginning guaranty count >= 4
+ __ sllx(O3, 32, O3);
+
+- disjoint_copy_core(from, to, count, 2, 16, copy_16_bytes_loop);
++ disjoint_copy_core(from, to, count, 2, 16, &StubGenerator::copy_16_bytes_loop);
+
+ __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
+ __ delayed()->inc(count, 4); // restore 'count'
+@@ -2366,7 +2366,7 @@
+ // count >= 0 (original count - 8)
+ __ mov(from, from64);
+
+- disjoint_copy_core(from64, to64, count, 3, 64, copy_64_bytes_loop);
++ disjoint_copy_core(from64, to64, count, 3, 64, &StubGenerator::copy_64_bytes_loop);
+
+ // Restore O4(offset0), O5(offset8)
+ __ sub(from64, from, offset0);
+@@ -3304,6 +3304,1277 @@
+ }
+ }
+
++ address generate_aescrypt_encryptBlock() {
++ // required since we read expanded key 'int' array starting first element without alignment considerations
++ assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0,
++ "the following code assumes that first element of an int array is aligned to 8 bytes");
++ __ align(CodeEntryAlignment);
++ StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
++ Label L_load_misaligned_input, L_load_expanded_key, L_doLast128bit, L_storeOutput, L_store_misaligned_output;
++ address start = __ pc();
++ Register from = O0; // source byte array
++ Register to = O1; // destination byte array
++ Register key = O2; // expanded key array
++ const Register keylen = O4; //reg for storing expanded key array length
++
++ // read expanded key length
++ __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
++
++ // Method to address arbitrary alignment for load instructions:
++ // Check last 3 bits of 'from' address to see if it is aligned to 8-byte boundary
++ // If zero/aligned then continue with double FP load instructions
++ // If not zero/mis-aligned then alignaddr will set GSR.align with number of bytes to skip during faligndata
++ // alignaddr will also convert arbitrary aligned 'from' address to nearest 8-byte aligned address
++ // load 3 * 8-byte components (to read 16 bytes input) in 3 different FP regs starting at this aligned address
++ // faligndata will then extract (based on GSR.align value) the appropriate 8 bytes from the 2 source regs
++
++ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(from, 7, G0);
++ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input);
++ __ delayed()->alignaddr(from, G0, from);
++
++ // aligned case: load input into F54-F56
++ __ ldf(FloatRegisterImpl::D, from, 0, F54);
++ __ ldf(FloatRegisterImpl::D, from, 8, F56);
++ __ ba_short(L_load_expanded_key);
++
++ __ BIND(L_load_misaligned_input);
++ __ ldf(FloatRegisterImpl::D, from, 0, F54);
++ __ ldf(FloatRegisterImpl::D, from, 8, F56);
++ __ ldf(FloatRegisterImpl::D, from, 16, F58);
++ __ faligndata(F54, F56, F54);
++ __ faligndata(F56, F58, F56);
++
++ __ BIND(L_load_expanded_key);
++ // Since we load expanded key buffers starting first element, 8-byte alignment is guaranteed
++ for ( int i = 0; i <= 38; i += 2 ) {
++ __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i));
++ }
++
++ // perform cipher transformation
++ __ fxor(FloatRegisterImpl::D, F0, F54, F54);
++ __ fxor(FloatRegisterImpl::D, F2, F56, F56);
++ // rounds 1 through 8
++ for ( int i = 4; i <= 28; i += 8 ) {
++ __ aes_eround01(as_FloatRegister(i), F54, F56, F58);
++ __ aes_eround23(as_FloatRegister(i+2), F54, F56, F60);
++ __ aes_eround01(as_FloatRegister(i+4), F58, F60, F54);
++ __ aes_eround23(as_FloatRegister(i+6), F58, F60, F56);
++ }
++ __ aes_eround01(F36, F54, F56, F58); //round 9
++ __ aes_eround23(F38, F54, F56, F60);
++
++ // 128-bit original key size
++ __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_doLast128bit);
++
++ for ( int i = 40; i <= 50; i += 2 ) {
++ __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i) );
++ }
++ __ aes_eround01(F40, F58, F60, F54); //round 10
++ __ aes_eround23(F42, F58, F60, F56);
++ __ aes_eround01(F44, F54, F56, F58); //round 11
++ __ aes_eround23(F46, F54, F56, F60);
++
++ // 192-bit original key size
++ __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_storeOutput);
++
++ __ ldf(FloatRegisterImpl::D, key, 208, F52);
++ __ aes_eround01(F48, F58, F60, F54); //round 12
++ __ aes_eround23(F50, F58, F60, F56);
++ __ ldf(FloatRegisterImpl::D, key, 216, F46);
++ __ ldf(FloatRegisterImpl::D, key, 224, F48);
++ __ ldf(FloatRegisterImpl::D, key, 232, F50);
++ __ aes_eround01(F52, F54, F56, F58); //round 13
++ __ aes_eround23(F46, F54, F56, F60);
++ __ ba_short(L_storeOutput);
++
++ __ BIND(L_doLast128bit);
++ __ ldf(FloatRegisterImpl::D, key, 160, F48);
++ __ ldf(FloatRegisterImpl::D, key, 168, F50);
++
++ __ BIND(L_storeOutput);
++ // perform last round of encryption common for all key sizes
++ __ aes_eround01_l(F48, F58, F60, F54); //last round
++ __ aes_eround23_l(F50, F58, F60, F56);
++
++ // Method to address arbitrary alignment for store instructions:
++ // Check last 3 bits of 'dest' address to see if it is aligned to 8-byte boundary
++ // If zero/aligned then continue with double FP store instructions
++ // If not zero/mis-aligned then edge8n will generate edge mask in result reg (O3 in below case)
++ // Example: If dest address is 0x07 and nearest 8-byte aligned address is 0x00 then edge mask will be 00000001
++ // Compute (8-n) where n is # of bytes skipped by partial store(stpartialf) inst from edge mask, n=7 in this case
++ // We get the value of n from the andcc that checks 'dest' alignment. n is available in O5 in below case.
++ // Set GSR.align to (8-n) using alignaddr
++ // Circular byte shift store values by n places so that the original bytes are at correct position for stpartialf
++ // Set the arbitrarily aligned 'dest' address to nearest 8-byte aligned address
++ // Store (partial) the original first (8-n) bytes starting at the original 'dest' address
++ // Negate the edge mask so that the subsequent stpartialf can store the original (8-n-1)th through 8th bytes at appropriate address
++ // We need to execute this process for both the 8-byte result values
++
++ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(to, 7, O5);
++ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output);
++ __ delayed()->edge8n(to, G0, O3);
++
++ // aligned case: store output into the destination array
++ __ stf(FloatRegisterImpl::D, F54, to, 0);
++ __ retl();
++ __ delayed()->stf(FloatRegisterImpl::D, F56, to, 8);
++
++ __ BIND(L_store_misaligned_output);
++ __ add(to, 8, O4);
++ __ mov(8, O2);
++ __ sub(O2, O5, O2);
++ __ alignaddr(O2, G0, O2);
++ __ faligndata(F54, F54, F54);
++ __ faligndata(F56, F56, F56);
++ __ and3(to, -8, to);
++ __ and3(O4, -8, O4);
++ __ stpartialf(to, O3, F54, Assembler::ASI_PST8_PRIMARY);
++ __ stpartialf(O4, O3, F56, Assembler::ASI_PST8_PRIMARY);
++ __ add(to, 8, to);
++ __ add(O4, 8, O4);
++ __ orn(G0, O3, O3);
++ __ stpartialf(to, O3, F54, Assembler::ASI_PST8_PRIMARY);
++ __ retl();
++ __ delayed()->stpartialf(O4, O3, F56, Assembler::ASI_PST8_PRIMARY);
++
++ return start;
++ }
++
++ address generate_aescrypt_decryptBlock() {
++ assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0,
++ "the following code assumes that first element of an int array is aligned to 8 bytes");
++ // required since we read original key 'byte' array as well in the decryption stubs
++ assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0,
++ "the following code assumes that first element of a byte array is aligned to 8 bytes");
++ __ align(CodeEntryAlignment);
++ StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
++ address start = __ pc();
++ Label L_load_misaligned_input, L_load_original_key, L_expand192bit, L_expand256bit, L_reload_misaligned_input;
++ Label L_256bit_transform, L_common_transform, L_store_misaligned_output;
++ Register from = O0; // source byte array
++ Register to = O1; // destination byte array
++ Register key = O2; // expanded key array
++ Register original_key = O3; // original key array only required during decryption
++ const Register keylen = O4; // reg for storing expanded key array length
++
++ // read expanded key array length
++ __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
++
++ // save 'from' since we may need to recheck alignment in case of 256-bit decryption
++ __ mov(from, G1);
++
++ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(from, 7, G0);
++ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input);
++ __ delayed()->alignaddr(from, G0, from);
++
++ // aligned case: load input into F52-F54
++ __ ldf(FloatRegisterImpl::D, from, 0, F52);
++ __ ldf(FloatRegisterImpl::D, from, 8, F54);
++ __ ba_short(L_load_original_key);
++
++ __ BIND(L_load_misaligned_input);
++ __ ldf(FloatRegisterImpl::D, from, 0, F52);
++ __ ldf(FloatRegisterImpl::D, from, 8, F54);
++ __ ldf(FloatRegisterImpl::D, from, 16, F56);
++ __ faligndata(F52, F54, F52);
++ __ faligndata(F54, F56, F54);
++
++ __ BIND(L_load_original_key);
++ // load original key from SunJCE expanded decryption key
++ // Since we load original key buffer starting first element, 8-byte alignment is guaranteed
++ for ( int i = 0; i <= 3; i++ ) {
++ __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
++ }
++
++ // 256-bit original key size
++ __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit);
++
++ // 192-bit original key size
++ __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit);
++
++ // 128-bit original key size
++ // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
++ for ( int i = 0; i <= 36; i += 4 ) {
++ __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4));
++ __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6));
++ }
++
++ // perform 128-bit key specific inverse cipher transformation
++ __ fxor(FloatRegisterImpl::D, F42, F54, F54);
++ __ fxor(FloatRegisterImpl::D, F40, F52, F52);
++ __ ba_short(L_common_transform);
++
++ __ BIND(L_expand192bit);
++
++ // start loading rest of the 192-bit key
++ __ ldf(FloatRegisterImpl::S, original_key, 16, F4);
++ __ ldf(FloatRegisterImpl::S, original_key, 20, F5);
++
++ // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
++ for ( int i = 0; i <= 36; i += 6 ) {
++ __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6));
++ __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8));
++ __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10));
++ }
++ __ aes_kexpand1(F42, F46, 7, F48);
++ __ aes_kexpand2(F44, F48, F50);
++
++ // perform 192-bit key specific inverse cipher transformation
++ __ fxor(FloatRegisterImpl::D, F50, F54, F54);
++ __ fxor(FloatRegisterImpl::D, F48, F52, F52);
++ __ aes_dround23(F46, F52, F54, F58);
++ __ aes_dround01(F44, F52, F54, F56);
++ __ aes_dround23(F42, F56, F58, F54);
++ __ aes_dround01(F40, F56, F58, F52);
++ __ ba_short(L_common_transform);
++
++ __ BIND(L_expand256bit);
++
++ // load rest of the 256-bit key
++ for ( int i = 4; i <= 7; i++ ) {
++ __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
++ }
++
++ // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
++ for ( int i = 0; i <= 40; i += 8 ) {
++ __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8));
++ __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10));
++ __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12));
++ __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14));
++ }
++ __ aes_kexpand1(F48, F54, 6, F56);
++ __ aes_kexpand2(F50, F56, F58);
++
++ for ( int i = 0; i <= 6; i += 2 ) {
++ __ fsrc2(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i));
++ }
++
++ // reload original 'from' address
++ __ mov(G1, from);
++
++ // re-check 8-byte alignment
++ __ andcc(from, 7, G0);
++ __ br(Assembler::notZero, true, Assembler::pn, L_reload_misaligned_input);
++ __ delayed()->alignaddr(from, G0, from);
++
++ // aligned case: load input into F52-F54
++ __ ldf(FloatRegisterImpl::D, from, 0, F52);
++ __ ldf(FloatRegisterImpl::D, from, 8, F54);
++ __ ba_short(L_256bit_transform);
++
++ __ BIND(L_reload_misaligned_input);
++ __ ldf(FloatRegisterImpl::D, from, 0, F52);
++ __ ldf(FloatRegisterImpl::D, from, 8, F54);
++ __ ldf(FloatRegisterImpl::D, from, 16, F56);
++ __ faligndata(F52, F54, F52);
++ __ faligndata(F54, F56, F54);
++
++ // perform 256-bit key specific inverse cipher transformation
++ __ BIND(L_256bit_transform);
++ __ fxor(FloatRegisterImpl::D, F0, F54, F54);
++ __ fxor(FloatRegisterImpl::D, F2, F52, F52);
++ __ aes_dround23(F4, F52, F54, F58);
++ __ aes_dround01(F6, F52, F54, F56);
++ __ aes_dround23(F50, F56, F58, F54);
++ __ aes_dround01(F48, F56, F58, F52);
++ __ aes_dround23(F46, F52, F54, F58);
++ __ aes_dround01(F44, F52, F54, F56);
++ __ aes_dround23(F42, F56, F58, F54);
++ __ aes_dround01(F40, F56, F58, F52);
++
++ for ( int i = 0; i <= 7; i++ ) {
++ __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
++ }
++
++ // perform inverse cipher transformations common for all key sizes
++ __ BIND(L_common_transform);
++ for ( int i = 38; i >= 6; i -= 8 ) {
++ __ aes_dround23(as_FloatRegister(i), F52, F54, F58);
++ __ aes_dround01(as_FloatRegister(i-2), F52, F54, F56);
++ if ( i != 6) {
++ __ aes_dround23(as_FloatRegister(i-4), F56, F58, F54);
++ __ aes_dround01(as_FloatRegister(i-6), F56, F58, F52);
++ } else {
++ __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F54);
++ __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F52);
++ }
++ }
++
++ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(to, 7, O5);
++ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output);
++ __ delayed()->edge8n(to, G0, O3);
++
++ // aligned case: store output into the destination array
++ __ stf(FloatRegisterImpl::D, F52, to, 0);
++ __ retl();
++ __ delayed()->stf(FloatRegisterImpl::D, F54, to, 8);
++
++ __ BIND(L_store_misaligned_output);
++ __ add(to, 8, O4);
++ __ mov(8, O2);
++ __ sub(O2, O5, O2);
++ __ alignaddr(O2, G0, O2);
++ __ faligndata(F52, F52, F52);
++ __ faligndata(F54, F54, F54);
++ __ and3(to, -8, to);
++ __ and3(O4, -8, O4);
++ __ stpartialf(to, O3, F52, Assembler::ASI_PST8_PRIMARY);
++ __ stpartialf(O4, O3, F54, Assembler::ASI_PST8_PRIMARY);
++ __ add(to, 8, to);
++ __ add(O4, 8, O4);
++ __ orn(G0, O3, O3);
++ __ stpartialf(to, O3, F52, Assembler::ASI_PST8_PRIMARY);
++ __ retl();
++ __ delayed()->stpartialf(O4, O3, F54, Assembler::ASI_PST8_PRIMARY);
++
++ return start;
++ }
++
++ address generate_cipherBlockChaining_encryptAESCrypt() {
++ assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0,
++ "the following code assumes that first element of an int array is aligned to 8 bytes");
++ assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0,
++ "the following code assumes that first element of a byte array is aligned to 8 bytes");
++ __ align(CodeEntryAlignment);
++ StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
++ Label L_cbcenc128, L_load_misaligned_input_128bit, L_128bit_transform, L_store_misaligned_output_128bit;
++ Label L_check_loop_end_128bit, L_cbcenc192, L_load_misaligned_input_192bit, L_192bit_transform;
++ Label L_store_misaligned_output_192bit, L_check_loop_end_192bit, L_cbcenc256, L_load_misaligned_input_256bit;
++ Label L_256bit_transform, L_store_misaligned_output_256bit, L_check_loop_end_256bit;
++ address start = __ pc();
++ Register from = I0; // source byte array
++ Register to = I1; // destination byte array
++ Register key = I2; // expanded key array
++ Register rvec = I3; // init vector
++ const Register len_reg = I4; // cipher length
++ const Register keylen = I5; // reg for storing expanded key array length
++
++ __ save_frame(0);
++ // save cipher len to return in the end
++ __ mov(len_reg, L0);
++
++ // read expanded key length
++ __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
++
++ // load initial vector, 8-byte alignment is guranteed
++ __ ldf(FloatRegisterImpl::D, rvec, 0, F60);
++ __ ldf(FloatRegisterImpl::D, rvec, 8, F62);
++ // load key, 8-byte alignment is guranteed
++ __ ldx(key,0,G1);
++ __ ldx(key,8,G5);
++
++ // start loading expanded key, 8-byte alignment is guranteed
++ for ( int i = 0, j = 16; i <= 38; i += 2, j += 8 ) {
++ __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
++ }
++
++ // 128-bit original key size
++ __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_cbcenc128);
++
++ for ( int i = 40, j = 176; i <= 46; i += 2, j += 8 ) {
++ __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
++ }
++
++ // 192-bit original key size
++ __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_cbcenc192);
++
++ for ( int i = 48, j = 208; i <= 54; i += 2, j += 8 ) {
++ __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
++ }
++
++ // 256-bit original key size
++ __ ba_short(L_cbcenc256);
++
++ __ align(OptoLoopAlignment);
++ __ BIND(L_cbcenc128);
++ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(from, 7, G0);
++ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_128bit);
++ __ delayed()->mov(from, L1); // save original 'from' address before alignaddr
++
++ // aligned case: load input into G3 and G4
++ __ ldx(from,0,G3);
++ __ ldx(from,8,G4);
++ __ ba_short(L_128bit_transform);
++
++ __ BIND(L_load_misaligned_input_128bit);
++ // can clobber F48, F50 and F52 as they are not used in 128 and 192-bit key encryption
++ __ alignaddr(from, G0, from);
++ __ ldf(FloatRegisterImpl::D, from, 0, F48);
++ __ ldf(FloatRegisterImpl::D, from, 8, F50);
++ __ ldf(FloatRegisterImpl::D, from, 16, F52);
++ __ faligndata(F48, F50, F48);
++ __ faligndata(F50, F52, F50);
++ __ movdtox(F48, G3);
++ __ movdtox(F50, G4);
++ __ mov(L1, from);
++
++ __ BIND(L_128bit_transform);
++ __ xor3(G1,G3,G3);
++ __ xor3(G5,G4,G4);
++ __ movxtod(G3,F56);
++ __ movxtod(G4,F58);
++ __ fxor(FloatRegisterImpl::D, F60, F56, F60);
++ __ fxor(FloatRegisterImpl::D, F62, F58, F62);
++
++ // TEN_EROUNDS
++ for ( int i = 0; i <= 32; i += 8 ) {
++ __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
++ __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
++ if (i != 32 ) {
++ __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
++ __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
++ } else {
++ __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
++ __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
++ }
++ }
++
++ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(to, 7, L1);
++ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_128bit);
++ __ delayed()->edge8n(to, G0, L2);
++
++ // aligned case: store output into the destination array
++ __ stf(FloatRegisterImpl::D, F60, to, 0);
++ __ stf(FloatRegisterImpl::D, F62, to, 8);
++ __ ba_short(L_check_loop_end_128bit);
++
++ __ BIND(L_store_misaligned_output_128bit);
++ __ add(to, 8, L3);
++ __ mov(8, L4);
++ __ sub(L4, L1, L4);
++ __ alignaddr(L4, G0, L4);
++ // save cipher text before circular right shift
++ // as it needs to be stored as iv for next block (see code before next retl)
++ __ movdtox(F60, L6);
++ __ movdtox(F62, L7);
++ __ faligndata(F60, F60, F60);
++ __ faligndata(F62, F62, F62);
++ __ mov(to, L5);
++ __ and3(to, -8, to);
++ __ and3(L3, -8, L3);
++ __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
++ __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
++ __ add(to, 8, to);
++ __ add(L3, 8, L3);
++ __ orn(G0, L2, L2);
++ __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
++ __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
++ __ mov(L5, to);
++ __ movxtod(L6, F60);
++ __ movxtod(L7, F62);
++
++ __ BIND(L_check_loop_end_128bit);
++ __ add(from, 16, from);
++ __ add(to, 16, to);
++ __ subcc(len_reg, 16, len_reg);
++ __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc128);
++ __ delayed()->nop();
++ // re-init intial vector for next block, 8-byte alignment is guaranteed
++ __ stf(FloatRegisterImpl::D, F60, rvec, 0);
++ __ stf(FloatRegisterImpl::D, F62, rvec, 8);
++ __ mov(L0, I0);
++ __ ret();
++ __ delayed()->restore();
++
++ __ align(OptoLoopAlignment);
++ __ BIND(L_cbcenc192);
++ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(from, 7, G0);
++ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_192bit);
++ __ delayed()->mov(from, L1); // save original 'from' address before alignaddr
++
++ // aligned case: load input into G3 and G4
++ __ ldx(from,0,G3);
++ __ ldx(from,8,G4);
++ __ ba_short(L_192bit_transform);
++
++ __ BIND(L_load_misaligned_input_192bit);
++ // can clobber F48, F50 and F52 as they are not used in 128 and 192-bit key encryption
++ __ alignaddr(from, G0, from);
++ __ ldf(FloatRegisterImpl::D, from, 0, F48);
++ __ ldf(FloatRegisterImpl::D, from, 8, F50);
++ __ ldf(FloatRegisterImpl::D, from, 16, F52);
++ __ faligndata(F48, F50, F48);
++ __ faligndata(F50, F52, F50);
++ __ movdtox(F48, G3);
++ __ movdtox(F50, G4);
++ __ mov(L1, from);
++
++ __ BIND(L_192bit_transform);
++ __ xor3(G1,G3,G3);
++ __ xor3(G5,G4,G4);
++ __ movxtod(G3,F56);
++ __ movxtod(G4,F58);
++ __ fxor(FloatRegisterImpl::D, F60, F56, F60);
++ __ fxor(FloatRegisterImpl::D, F62, F58, F62);
++
++ // TWELEVE_EROUNDS
++ for ( int i = 0; i <= 40; i += 8 ) {
++ __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
++ __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
++ if (i != 40 ) {
++ __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
++ __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
++ } else {
++ __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
++ __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
++ }
++ }
++
++ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(to, 7, L1);
++ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_192bit);
++ __ delayed()->edge8n(to, G0, L2);
++
++ // aligned case: store output into the destination array
++ __ stf(FloatRegisterImpl::D, F60, to, 0);
++ __ stf(FloatRegisterImpl::D, F62, to, 8);
++ __ ba_short(L_check_loop_end_192bit);
++
++ __ BIND(L_store_misaligned_output_192bit);
++ __ add(to, 8, L3);
++ __ mov(8, L4);
++ __ sub(L4, L1, L4);
++ __ alignaddr(L4, G0, L4);
++ __ movdtox(F60, L6);
++ __ movdtox(F62, L7);
++ __ faligndata(F60, F60, F60);
++ __ faligndata(F62, F62, F62);
++ __ mov(to, L5);
++ __ and3(to, -8, to);
++ __ and3(L3, -8, L3);
++ __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
++ __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
++ __ add(to, 8, to);
++ __ add(L3, 8, L3);
++ __ orn(G0, L2, L2);
++ __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
++ __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
++ __ mov(L5, to);
++ __ movxtod(L6, F60);
++ __ movxtod(L7, F62);
++
++ __ BIND(L_check_loop_end_192bit);
++ __ add(from, 16, from);
++ __ subcc(len_reg, 16, len_reg);
++ __ add(to, 16, to);
++ __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc192);
++ __ delayed()->nop();
++ // re-init intial vector for next block, 8-byte alignment is guaranteed
++ __ stf(FloatRegisterImpl::D, F60, rvec, 0);
++ __ stf(FloatRegisterImpl::D, F62, rvec, 8);
++ __ mov(L0, I0);
++ __ ret();
++ __ delayed()->restore();
++
++ __ align(OptoLoopAlignment);
++ __ BIND(L_cbcenc256);
++ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(from, 7, G0);
++ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_256bit);
++ __ delayed()->mov(from, L1); // save original 'from' address before alignaddr
++
++ // aligned case: load input into G3 and G4
++ __ ldx(from,0,G3);
++ __ ldx(from,8,G4);
++ __ ba_short(L_256bit_transform);
++
++ __ BIND(L_load_misaligned_input_256bit);
++ // cannot clobber F48, F50 and F52. F56, F58 can be used though
++ __ alignaddr(from, G0, from);
++ __ movdtox(F60, L2); // save F60 before overwriting
++ __ ldf(FloatRegisterImpl::D, from, 0, F56);
++ __ ldf(FloatRegisterImpl::D, from, 8, F58);
++ __ ldf(FloatRegisterImpl::D, from, 16, F60);
++ __ faligndata(F56, F58, F56);
++ __ faligndata(F58, F60, F58);
++ __ movdtox(F56, G3);
++ __ movdtox(F58, G4);
++ __ mov(L1, from);
++ __ movxtod(L2, F60);
++
++ __ BIND(L_256bit_transform);
++ __ xor3(G1,G3,G3);
++ __ xor3(G5,G4,G4);
++ __ movxtod(G3,F56);
++ __ movxtod(G4,F58);
++ __ fxor(FloatRegisterImpl::D, F60, F56, F60);
++ __ fxor(FloatRegisterImpl::D, F62, F58, F62);
++
++ // FOURTEEN_EROUNDS
++ for ( int i = 0; i <= 48; i += 8 ) {
++ __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
++ __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
++ if (i != 48 ) {
++ __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
++ __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
++ } else {
++ __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
++ __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
++ }
++ }
++
++ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(to, 7, L1);
++ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_256bit);
++ __ delayed()->edge8n(to, G0, L2);
++
++ // aligned case: store output into the destination array
++ __ stf(FloatRegisterImpl::D, F60, to, 0);
++ __ stf(FloatRegisterImpl::D, F62, to, 8);
++ __ ba_short(L_check_loop_end_256bit);
++
++ __ BIND(L_store_misaligned_output_256bit);
++ __ add(to, 8, L3);
++ __ mov(8, L4);
++ __ sub(L4, L1, L4);
++ __ alignaddr(L4, G0, L4);
++ __ movdtox(F60, L6);
++ __ movdtox(F62, L7);
++ __ faligndata(F60, F60, F60);
++ __ faligndata(F62, F62, F62);
++ __ mov(to, L5);
++ __ and3(to, -8, to);
++ __ and3(L3, -8, L3);
++ __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
++ __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
++ __ add(to, 8, to);
++ __ add(L3, 8, L3);
++ __ orn(G0, L2, L2);
++ __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
++ __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
++ __ mov(L5, to);
++ __ movxtod(L6, F60);
++ __ movxtod(L7, F62);
++
++ __ BIND(L_check_loop_end_256bit);
++ __ add(from, 16, from);
++ __ subcc(len_reg, 16, len_reg);
++ __ add(to, 16, to);
++ __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc256);
++ __ delayed()->nop();
++ // re-init intial vector for next block, 8-byte alignment is guaranteed
++ __ stf(FloatRegisterImpl::D, F60, rvec, 0);
++ __ stf(FloatRegisterImpl::D, F62, rvec, 8);
++ __ mov(L0, I0);
++ __ ret();
++ __ delayed()->restore();
++
++ return start;
++ }
++
++ address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
++ assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0,
++ "the following code assumes that first element of an int array is aligned to 8 bytes");
++ assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0,
++ "the following code assumes that first element of a byte array is aligned to 8 bytes");
++ __ align(CodeEntryAlignment);
++ StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
++ Label L_cbcdec_end, L_expand192bit, L_expand256bit, L_dec_first_block_start;
++ Label L_dec_first_block128, L_dec_first_block192, L_dec_next2_blocks128, L_dec_next2_blocks192, L_dec_next2_blocks256;
++ Label L_load_misaligned_input_first_block, L_transform_first_block, L_load_misaligned_next2_blocks128, L_transform_next2_blocks128;
++ Label L_load_misaligned_next2_blocks192, L_transform_next2_blocks192, L_load_misaligned_next2_blocks256, L_transform_next2_blocks256;
++ Label L_store_misaligned_output_first_block, L_check_decrypt_end, L_store_misaligned_output_next2_blocks128;
++ Label L_check_decrypt_loop_end128, L_store_misaligned_output_next2_blocks192, L_check_decrypt_loop_end192;
++ Label L_store_misaligned_output_next2_blocks256, L_check_decrypt_loop_end256;
++ address start = __ pc();
++ Register from = I0; // source byte array
++ Register to = I1; // destination byte array
++ Register key = I2; // expanded key array
++ Register rvec = I3; // init vector
++ const Register len_reg = I4; // cipher length
++ const Register original_key = I5; // original key array only required during decryption
++ const Register keylen = L6; // reg for storing expanded key array length
++
++ __ save_frame(0); //args are read from I* registers since we save the frame in the beginning
++ // save cipher len to return in the end
++ __ mov(len_reg, L7);
++
++ // load original key from SunJCE expanded decryption key
++ // Since we load original key buffer starting first element, 8-byte alignment is guaranteed
++ for ( int i = 0; i <= 3; i++ ) {
++ __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
++ }
++
++ // load initial vector, 8-byte alignment is guaranteed
++ __ ldx(rvec,0,L0);
++ __ ldx(rvec,8,L1);
++
++ // read expanded key array length
++ __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
++
++ // 256-bit original key size
++ __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit);
++
++ // 192-bit original key size
++ __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit);
++
++ // 128-bit original key size
++ // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
++ for ( int i = 0; i <= 36; i += 4 ) {
++ __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4));
++ __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6));
++ }
++
++ // load expanded key[last-1] and key[last] elements
++ __ movdtox(F40,L2);
++ __ movdtox(F42,L3);
++
++ __ and3(len_reg, 16, L4);
++ __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks128);
++ __ nop();
++
++ __ ba_short(L_dec_first_block_start);
++
++ __ BIND(L_expand192bit);
++ // load rest of the 192-bit key
++ __ ldf(FloatRegisterImpl::S, original_key, 16, F4);
++ __ ldf(FloatRegisterImpl::S, original_key, 20, F5);
++
++ // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
++ for ( int i = 0; i <= 36; i += 6 ) {
++ __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6));
++ __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8));
++ __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10));
++ }
++ __ aes_kexpand1(F42, F46, 7, F48);
++ __ aes_kexpand2(F44, F48, F50);
++
++ // load expanded key[last-1] and key[last] elements
++ __ movdtox(F48,L2);
++ __ movdtox(F50,L3);
++
++ __ and3(len_reg, 16, L4);
++ __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks192);
++ __ nop();
++
++ __ ba_short(L_dec_first_block_start);
++
++ __ BIND(L_expand256bit);
++ // load rest of the 256-bit key
++ for ( int i = 4; i <= 7; i++ ) {
++ __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
++ }
++
++ // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
++ for ( int i = 0; i <= 40; i += 8 ) {
++ __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8));
++ __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10));
++ __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12));
++ __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14));
++ }
++ __ aes_kexpand1(F48, F54, 6, F56);
++ __ aes_kexpand2(F50, F56, F58);
++
++ // load expanded key[last-1] and key[last] elements
++ __ movdtox(F56,L2);
++ __ movdtox(F58,L3);
++
++ __ and3(len_reg, 16, L4);
++ __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks256);
++
++ __ BIND(L_dec_first_block_start);
++ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(from, 7, G0);
++ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_first_block);
++ __ delayed()->mov(from, G1); // save original 'from' address before alignaddr
++
++ // aligned case: load input into L4 and L5
++ __ ldx(from,0,L4);
++ __ ldx(from,8,L5);
++ __ ba_short(L_transform_first_block);
++
++ __ BIND(L_load_misaligned_input_first_block);
++ __ alignaddr(from, G0, from);
++ // F58, F60, F62 can be clobbered
++ __ ldf(FloatRegisterImpl::D, from, 0, F58);
++ __ ldf(FloatRegisterImpl::D, from, 8, F60);
++ __ ldf(FloatRegisterImpl::D, from, 16, F62);
++ __ faligndata(F58, F60, F58);
++ __ faligndata(F60, F62, F60);
++ __ movdtox(F58, L4);
++ __ movdtox(F60, L5);
++ __ mov(G1, from);
++
++ __ BIND(L_transform_first_block);
++ __ xor3(L2,L4,G1);
++ __ movxtod(G1,F60);
++ __ xor3(L3,L5,G1);
++ __ movxtod(G1,F62);
++
++ // 128-bit original key size
++ __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pn, L_dec_first_block128);
++
++ // 192-bit original key size
++ __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_first_block192);
++
++ __ aes_dround23(F54, F60, F62, F58);
++ __ aes_dround01(F52, F60, F62, F56);
++ __ aes_dround23(F50, F56, F58, F62);
++ __ aes_dround01(F48, F56, F58, F60);
++
++ __ BIND(L_dec_first_block192);
++ __ aes_dround23(F46, F60, F62, F58);
++ __ aes_dround01(F44, F60, F62, F56);
++ __ aes_dround23(F42, F56, F58, F62);
++ __ aes_dround01(F40, F56, F58, F60);
++
++ __ BIND(L_dec_first_block128);
++ for ( int i = 38; i >= 6; i -= 8 ) {
++ __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
++ __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
++ if ( i != 6) {
++ __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
++ __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
++ } else {
++ __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
++ __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
++ }
++ }
++
++ __ movxtod(L0,F56);
++ __ movxtod(L1,F58);
++ __ mov(L4,L0);
++ __ mov(L5,L1);
++ __ fxor(FloatRegisterImpl::D, F56, F60, F60);
++ __ fxor(FloatRegisterImpl::D, F58, F62, F62);
++
++ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(to, 7, G1);
++ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_first_block);
++ __ delayed()->edge8n(to, G0, G2);
++
++ // aligned case: store output into the destination array
++ __ stf(FloatRegisterImpl::D, F60, to, 0);
++ __ stf(FloatRegisterImpl::D, F62, to, 8);
++ __ ba_short(L_check_decrypt_end);
++
++ __ BIND(L_store_misaligned_output_first_block);
++ __ add(to, 8, G3);
++ __ mov(8, G4);
++ __ sub(G4, G1, G4);
++ __ alignaddr(G4, G0, G4);
++ __ faligndata(F60, F60, F60);
++ __ faligndata(F62, F62, F62);
++ __ mov(to, G1);
++ __ and3(to, -8, to);
++ __ and3(G3, -8, G3);
++ __ stpartialf(to, G2, F60, Assembler::ASI_PST8_PRIMARY);
++ __ stpartialf(G3, G2, F62, Assembler::ASI_PST8_PRIMARY);
++ __ add(to, 8, to);
++ __ add(G3, 8, G3);
++ __ orn(G0, G2, G2);
++ __ stpartialf(to, G2, F60, Assembler::ASI_PST8_PRIMARY);
++ __ stpartialf(G3, G2, F62, Assembler::ASI_PST8_PRIMARY);
++ __ mov(G1, to);
++
++ __ BIND(L_check_decrypt_end);
++ __ add(from, 16, from);
++ __ add(to, 16, to);
++ __ subcc(len_reg, 16, len_reg);
++ __ br(Assembler::equal, false, Assembler::pt, L_cbcdec_end);
++ __ delayed()->nop();
++
++ // 256-bit original key size
++ __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_dec_next2_blocks256);
++
++ // 192-bit original key size
++ __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_next2_blocks192);
++
++ __ align(OptoLoopAlignment);
++ __ BIND(L_dec_next2_blocks128);
++ __ nop();
++
++ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(from, 7, G0);
++ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks128);
++ __ delayed()->mov(from, G1); // save original 'from' address before alignaddr
++
++ // aligned case: load input into G4, G5, L4 and L5
++ __ ldx(from,0,G4);
++ __ ldx(from,8,G5);
++ __ ldx(from,16,L4);
++ __ ldx(from,24,L5);
++ __ ba_short(L_transform_next2_blocks128);
++
++ __ BIND(L_load_misaligned_next2_blocks128);
++ __ alignaddr(from, G0, from);
++ // F40, F42, F58, F60, F62 can be clobbered
++ __ ldf(FloatRegisterImpl::D, from, 0, F40);
++ __ ldf(FloatRegisterImpl::D, from, 8, F42);
++ __ ldf(FloatRegisterImpl::D, from, 16, F60);
++ __ ldf(FloatRegisterImpl::D, from, 24, F62);
++ __ ldf(FloatRegisterImpl::D, from, 32, F58);
++ __ faligndata(F40, F42, F40);
++ __ faligndata(F42, F60, F42);
++ __ faligndata(F60, F62, F60);
++ __ faligndata(F62, F58, F62);
++ __ movdtox(F40, G4);
++ __ movdtox(F42, G5);
++ __ movdtox(F60, L4);
++ __ movdtox(F62, L5);
++ __ mov(G1, from);
++
++ __ BIND(L_transform_next2_blocks128);
++ // F40:F42 used for first 16-bytes
++ __ xor3(L2,G4,G1);
++ __ movxtod(G1,F40);
++ __ xor3(L3,G5,G1);
++ __ movxtod(G1,F42);
++
++ // F60:F62 used for next 16-bytes
++ __ xor3(L2,L4,G1);
++ __ movxtod(G1,F60);
++ __ xor3(L3,L5,G1);
++ __ movxtod(G1,F62);
++
++ for ( int i = 38; i >= 6; i -= 8 ) {
++ __ aes_dround23(as_FloatRegister(i), F40, F42, F44);
++ __ aes_dround01(as_FloatRegister(i-2), F40, F42, F46);
++ __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
++ __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
++ if (i != 6 ) {
++ __ aes_dround23(as_FloatRegister(i-4), F46, F44, F42);
++ __ aes_dround01(as_FloatRegister(i-6), F46, F44, F40);
++ __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
++ __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
++ } else {
++ __ aes_dround23_l(as_FloatRegister(i-4), F46, F44, F42);
++ __ aes_dround01_l(as_FloatRegister(i-6), F46, F44, F40);
++ __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
++ __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
++ }
++ }
++
++ __ movxtod(L0,F46);
++ __ movxtod(L1,F44);
++ __ fxor(FloatRegisterImpl::D, F46, F40, F40);
++ __ fxor(FloatRegisterImpl::D, F44, F42, F42);
++
++ __ movxtod(G4,F56);
++ __ movxtod(G5,F58);
++ __ mov(L4,L0);
++ __ mov(L5,L1);
++ __ fxor(FloatRegisterImpl::D, F56, F60, F60);
++ __ fxor(FloatRegisterImpl::D, F58, F62, F62);
++
++ // For mis-aligned store of 32 bytes of result we can do:
++ // Circular right-shift all 4 FP registers so that 'head' and 'tail'
++ // parts that need to be stored starting at mis-aligned address are in a FP reg
++ // the other 3 FP regs can thus be stored using regular store
++ // we then use the edge + partial-store mechanism to store the 'head' and 'tail' parts
++
++ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(to, 7, G1);
++ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks128);
++ __ delayed()->edge8n(to, G0, G2);
++
++ // aligned case: store output into the destination array
++ __ stf(FloatRegisterImpl::D, F40, to, 0);
++ __ stf(FloatRegisterImpl::D, F42, to, 8);
++ __ stf(FloatRegisterImpl::D, F60, to, 16);
++ __ stf(FloatRegisterImpl::D, F62, to, 24);
++ __ ba_short(L_check_decrypt_loop_end128);
++
++ __ BIND(L_store_misaligned_output_next2_blocks128);
++ __ mov(8, G4);
++ __ sub(G4, G1, G4);
++ __ alignaddr(G4, G0, G4);
++ __ faligndata(F40, F42, F56); // F56 can be clobbered
++ __ faligndata(F42, F60, F42);
++ __ faligndata(F60, F62, F60);
++ __ faligndata(F62, F40, F40);
++ __ mov(to, G1);
++ __ and3(to, -8, to);
++ __ stpartialf(to, G2, F40, Assembler::ASI_PST8_PRIMARY);
++ __ stf(FloatRegisterImpl::D, F56, to, 8);
++ __ stf(FloatRegisterImpl::D, F42, to, 16);
++ __ stf(FloatRegisterImpl::D, F60, to, 24);
++ __ add(to, 32, to);
++ __ orn(G0, G2, G2);
++ __ stpartialf(to, G2, F40, Assembler::ASI_PST8_PRIMARY);
++ __ mov(G1, to);
++
++ __ BIND(L_check_decrypt_loop_end128);
++ __ add(from, 32, from);
++ __ add(to, 32, to);
++ __ subcc(len_reg, 32, len_reg);
++ __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks128);
++ __ delayed()->nop();
++ __ ba_short(L_cbcdec_end);
++
++ __ align(OptoLoopAlignment);
++ __ BIND(L_dec_next2_blocks192);
++ __ nop();
++
++ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(from, 7, G0);
++ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks192);
++ __ delayed()->mov(from, G1); // save original 'from' address before alignaddr
++
++ // aligned case: load input into G4, G5, L4 and L5
++ __ ldx(from,0,G4);
++ __ ldx(from,8,G5);
++ __ ldx(from,16,L4);
++ __ ldx(from,24,L5);
++ __ ba_short(L_transform_next2_blocks192);
++
++ __ BIND(L_load_misaligned_next2_blocks192);
++ __ alignaddr(from, G0, from);
++ // F48, F50, F52, F60, F62 can be clobbered
++ __ ldf(FloatRegisterImpl::D, from, 0, F48);
++ __ ldf(FloatRegisterImpl::D, from, 8, F50);
++ __ ldf(FloatRegisterImpl::D, from, 16, F60);
++ __ ldf(FloatRegisterImpl::D, from, 24, F62);
++ __ ldf(FloatRegisterImpl::D, from, 32, F52);
++ __ faligndata(F48, F50, F48);
++ __ faligndata(F50, F60, F50);
++ __ faligndata(F60, F62, F60);
++ __ faligndata(F62, F52, F62);
++ __ movdtox(F48, G4);
++ __ movdtox(F50, G5);
++ __ movdtox(F60, L4);
++ __ movdtox(F62, L5);
++ __ mov(G1, from);
++
++ __ BIND(L_transform_next2_blocks192);
++ // F48:F50 used for first 16-bytes
++ __ xor3(L2,G4,G1);
++ __ movxtod(G1,F48);
++ __ xor3(L3,G5,G1);
++ __ movxtod(G1,F50);
++
++ // F60:F62 used for next 16-bytes
++ __ xor3(L2,L4,G1);
++ __ movxtod(G1,F60);
++ __ xor3(L3,L5,G1);
++ __ movxtod(G1,F62);
++
++ for ( int i = 46; i >= 6; i -= 8 ) {
++ __ aes_dround23(as_FloatRegister(i), F48, F50, F52);
++ __ aes_dround01(as_FloatRegister(i-2), F48, F50, F54);
++ __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
++ __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
++ if (i != 6 ) {
++ __ aes_dround23(as_FloatRegister(i-4), F54, F52, F50);
++ __ aes_dround01(as_FloatRegister(i-6), F54, F52, F48);
++ __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
++ __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
++ } else {
++ __ aes_dround23_l(as_FloatRegister(i-4), F54, F52, F50);
++ __ aes_dround01_l(as_FloatRegister(i-6), F54, F52, F48);
++ __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
++ __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
++ }
++ }
++
++ __ movxtod(L0,F54);
++ __ movxtod(L1,F52);
++ __ fxor(FloatRegisterImpl::D, F54, F48, F48);
++ __ fxor(FloatRegisterImpl::D, F52, F50, F50);
++
++ __ movxtod(G4,F56);
++ __ movxtod(G5,F58);
++ __ mov(L4,L0);
++ __ mov(L5,L1);
++ __ fxor(FloatRegisterImpl::D, F56, F60, F60);
++ __ fxor(FloatRegisterImpl::D, F58, F62, F62);
++
++ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(to, 7, G1);
++ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks192);
++ __ delayed()->edge8n(to, G0, G2);
++
++ // aligned case: store output into the destination array
++ __ stf(FloatRegisterImpl::D, F48, to, 0);
++ __ stf(FloatRegisterImpl::D, F50, to, 8);
++ __ stf(FloatRegisterImpl::D, F60, to, 16);
++ __ stf(FloatRegisterImpl::D, F62, to, 24);
++ __ ba_short(L_check_decrypt_loop_end192);
++
++ __ BIND(L_store_misaligned_output_next2_blocks192);
++ __ mov(8, G4);
++ __ sub(G4, G1, G4);
++ __ alignaddr(G4, G0, G4);
++ __ faligndata(F48, F50, F56); // F56 can be clobbered
++ __ faligndata(F50, F60, F50);
++ __ faligndata(F60, F62, F60);
++ __ faligndata(F62, F48, F48);
++ __ mov(to, G1);
++ __ and3(to, -8, to);
++ __ stpartialf(to, G2, F48, Assembler::ASI_PST8_PRIMARY);
++ __ stf(FloatRegisterImpl::D, F56, to, 8);
++ __ stf(FloatRegisterImpl::D, F50, to, 16);
++ __ stf(FloatRegisterImpl::D, F60, to, 24);
++ __ add(to, 32, to);
++ __ orn(G0, G2, G2);
++ __ stpartialf(to, G2, F48, Assembler::ASI_PST8_PRIMARY);
++ __ mov(G1, to);
++
++ __ BIND(L_check_decrypt_loop_end192);
++ __ add(from, 32, from);
++ __ add(to, 32, to);
++ __ subcc(len_reg, 32, len_reg);
++ __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks192);
++ __ delayed()->nop();
++ __ ba_short(L_cbcdec_end);
++
++ __ align(OptoLoopAlignment);
++ __ BIND(L_dec_next2_blocks256);
++ __ nop();
++
++ // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(from, 7, G0);
++ __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks256);
++ __ delayed()->mov(from, G1); // save original 'from' address before alignaddr
++
++ // aligned case: load input into G4, G5, L4 and L5
++ __ ldx(from,0,G4);
++ __ ldx(from,8,G5);
++ __ ldx(from,16,L4);
++ __ ldx(from,24,L5);
++ __ ba_short(L_transform_next2_blocks256);
++
++ __ BIND(L_load_misaligned_next2_blocks256);
++ __ alignaddr(from, G0, from);
++ // F0, F2, F4, F60, F62 can be clobbered
++ __ ldf(FloatRegisterImpl::D, from, 0, F0);
++ __ ldf(FloatRegisterImpl::D, from, 8, F2);
++ __ ldf(FloatRegisterImpl::D, from, 16, F60);
++ __ ldf(FloatRegisterImpl::D, from, 24, F62);
++ __ ldf(FloatRegisterImpl::D, from, 32, F4);
++ __ faligndata(F0, F2, F0);
++ __ faligndata(F2, F60, F2);
++ __ faligndata(F60, F62, F60);
++ __ faligndata(F62, F4, F62);
++ __ movdtox(F0, G4);
++ __ movdtox(F2, G5);
++ __ movdtox(F60, L4);
++ __ movdtox(F62, L5);
++ __ mov(G1, from);
++
++ __ BIND(L_transform_next2_blocks256);
++ // F0:F2 used for first 16-bytes
++ __ xor3(L2,G4,G1);
++ __ movxtod(G1,F0);
++ __ xor3(L3,G5,G1);
++ __ movxtod(G1,F2);
++
++ // F60:F62 used for next 16-bytes
++ __ xor3(L2,L4,G1);
++ __ movxtod(G1,F60);
++ __ xor3(L3,L5,G1);
++ __ movxtod(G1,F62);
++
++ __ aes_dround23(F54, F0, F2, F4);
++ __ aes_dround01(F52, F0, F2, F6);
++ __ aes_dround23(F54, F60, F62, F58);
++ __ aes_dround01(F52, F60, F62, F56);
++ __ aes_dround23(F50, F6, F4, F2);
++ __ aes_dround01(F48, F6, F4, F0);
++ __ aes_dround23(F50, F56, F58, F62);
++ __ aes_dround01(F48, F56, F58, F60);
++ // save F48:F54 in temp registers
++ __ movdtox(F54,G2);
++ __ movdtox(F52,G3);
++ __ movdtox(F50,G6);
++ __ movdtox(F48,G1);
++ for ( int i = 46; i >= 14; i -= 8 ) {
++ __ aes_dround23(as_FloatRegister(i), F0, F2, F4);
++ __ aes_dround01(as_FloatRegister(i-2), F0, F2, F6);
++ __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
++ __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
++ __ aes_dround23(as_FloatRegister(i-4), F6, F4, F2);
++ __ aes_dround01(as_FloatRegister(i-6), F6, F4, F0);
++ __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
++ __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
++ }
++ // init F48:F54 with F0:F6 values (original key)
++ __ ldf(FloatRegisterImpl::D, original_key, 0, F48);
++ __ ldf(FloatRegisterImpl::D, original_key, 8, F50);
++ __ ldf(FloatRegisterImpl::D, original_key, 16, F52);
++ __ ldf(FloatRegisterImpl::D, original_key, 24, F54);
++ __ aes_dround23(F54, F0, F2, F4);
++ __ aes_dround01(F52, F0, F2, F6);
++ __ aes_dround23(F54, F60, F62, F58);
++ __ aes_dround01(F52, F60, F62, F56);
++ __ aes_dround23_l(F50, F6, F4, F2);
++ __ aes_dround01_l(F48, F6, F4, F0);
++ __ aes_dround23_l(F50, F56, F58, F62);
++ __ aes_dround01_l(F48, F56, F58, F60);
++ // re-init F48:F54 with their original values
++ __ movxtod(G2,F54);
++ __ movxtod(G3,F52);
++ __ movxtod(G6,F50);
++ __ movxtod(G1,F48);
++
++ __ movxtod(L0,F6);
++ __ movxtod(L1,F4);
++ __ fxor(FloatRegisterImpl::D, F6, F0, F0);
++ __ fxor(FloatRegisterImpl::D, F4, F2, F2);
++
++ __ movxtod(G4,F56);
++ __ movxtod(G5,F58);
++ __ mov(L4,L0);
++ __ mov(L5,L1);
++ __ fxor(FloatRegisterImpl::D, F56, F60, F60);
++ __ fxor(FloatRegisterImpl::D, F58, F62, F62);
++
++ // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
++ __ andcc(to, 7, G1);
++ __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks256);
++ __ delayed()->edge8n(to, G0, G2);
++
++ // aligned case: store output into the destination array
++ __ stf(FloatRegisterImpl::D, F0, to, 0);
++ __ stf(FloatRegisterImpl::D, F2, to, 8);
++ __ stf(FloatRegisterImpl::D, F60, to, 16);
++ __ stf(FloatRegisterImpl::D, F62, to, 24);
++ __ ba_short(L_check_decrypt_loop_end256);
++
++ __ BIND(L_store_misaligned_output_next2_blocks256);
++ __ mov(8, G4);
++ __ sub(G4, G1, G4);
++ __ alignaddr(G4, G0, G4);
++ __ faligndata(F0, F2, F56); // F56 can be clobbered
++ __ faligndata(F2, F60, F2);
++ __ faligndata(F60, F62, F60);
++ __ faligndata(F62, F0, F0);
++ __ mov(to, G1);
++ __ and3(to, -8, to);
++ __ stpartialf(to, G2, F0, Assembler::ASI_PST8_PRIMARY);
++ __ stf(FloatRegisterImpl::D, F56, to, 8);
++ __ stf(FloatRegisterImpl::D, F2, to, 16);
++ __ stf(FloatRegisterImpl::D, F60, to, 24);
++ __ add(to, 32, to);
++ __ orn(G0, G2, G2);
++ __ stpartialf(to, G2, F0, Assembler::ASI_PST8_PRIMARY);
++ __ mov(G1, to);
++
++ __ BIND(L_check_decrypt_loop_end256);
++ __ add(from, 32, from);
++ __ add(to, 32, to);
++ __ subcc(len_reg, 32, len_reg);
++ __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks256);
++ __ delayed()->nop();
++
++ __ BIND(L_cbcdec_end);
++ // re-init intial vector for next block, 8-byte alignment is guaranteed
++ __ stx(L0, rvec, 0);
++ __ stx(L1, rvec, 8);
++ __ mov(L7, I0);
++ __ ret();
++ __ delayed()->restore();
++
++ return start;
++ }
++
+ void generate_initial() {
+ // Generates all stubs and initializes the entry points
+
+@@ -3368,6 +4639,14 @@
+ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
+ &StubRoutines::_safefetchN_fault_pc,
+ &StubRoutines::_safefetchN_continuation_pc);
++
++ // generate AES intrinsics code
++ if (UseAESIntrinsics) {
++ StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
++ StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
++ StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
++ StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
++ }
+ }
+
+
+--- ./hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -41,7 +41,7 @@
+ enum /* platform_dependent_constants */ {
+ // %%%%%%%% May be able to shrink this a lot
+ code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
+- code_size2 = 20000 // simply increase if too small (assembler will crash if too small)
++ code_size2 = 22000 // simply increase if too small (assembler will crash if too small)
+ };
+
+ class Sparc {
+--- ./hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -156,6 +156,10 @@
+ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
+ address entry = __ pc();
+
++ if (state == atos) {
++ __ profile_return_type(O0, G3_scratch, G1_scratch);
++ }
++
+ #if !defined(_LP64) && defined(COMPILER2)
+ // All return values are where we want them, except for Longs. C2 returns
+ // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
+@@ -1143,7 +1147,7 @@
+
+ // reset handle block
+ __ ld_ptr(G2_thread, JavaThread::active_handles_offset(), G3_scratch);
+- __ st_ptr(G0, G3_scratch, JNIHandleBlock::top_offset_in_bytes());
++ __ st(G0, G3_scratch, JNIHandleBlock::top_offset_in_bytes());
+
+ // If we have an oop result store it where it will be safe for any further gc
+ // until we return now that we've released the handle it might be protected by
+@@ -1333,6 +1337,7 @@
+ __ movbool(true, G3_scratch);
+ __ stbool(G3_scratch, do_not_unlock_if_synchronized);
+
++ __ profile_parameters_type(G1_scratch, G3_scratch, G4_scratch, Lscratch);
+ // increment invocation counter and check for overflow
+ //
+ // Note: checking for negative value instead of overflow
+@@ -1559,37 +1564,23 @@
+ int monitor_size = method->is_synchronized() ?
+ 1*frame::interpreter_frame_monitor_size() : 0;
+ return size_activation_helper(method->max_locals(), method->max_stack(),
+- monitor_size) + call_stub_size;
++ monitor_size) + call_stub_size;
+ }
+
+-int AbstractInterpreter::layout_activation(Method* method,
+- int tempcount,
+- int popframe_extra_args,
+- int moncount,
+- int caller_actual_parameters,
+- int callee_param_count,
+- int callee_local_count,
+- frame* caller,
+- frame* interpreter_frame,
+- bool is_top_frame,
+- bool is_bottom_frame) {
++int AbstractInterpreter::size_activation(int max_stack,
++ int temps,
++ int extra_args,
++ int monitors,
++ int callee_params,
++ int callee_locals,
++ bool is_top_frame) {
+ // Note: This calculation must exactly parallel the frame setup
+ // in InterpreterGenerator::generate_fixed_frame.
+- // If f!=NULL, set up the following variables:
+- // - Lmethod
+- // - Llocals
+- // - Lmonitors (to the indicated number of monitors)
+- // - Lesp (to the indicated number of temps)
+- // The frame f (if not NULL) on entry is a description of the caller of the frame
+- // we are about to layout. We are guaranteed that we will be able to fill in a
+- // new interpreter frame as its callee (i.e. the stack space is allocated and
+- // the amount was determined by an earlier call to this method with f == NULL).
+- // On return f (if not NULL) while describe the interpreter frame we just layed out.
+
+- int monitor_size = moncount * frame::interpreter_frame_monitor_size();
+- int rounded_vm_local_words = round_to(frame::interpreter_frame_vm_local_words,WordsPerLong);
++ int monitor_size = monitors * frame::interpreter_frame_monitor_size();
+
+ assert(monitor_size == round_to(monitor_size, WordsPerLong), "must align");
++
+ //
+ // Note: if you look closely this appears to be doing something much different
+ // than generate_fixed_frame. What is happening is this. On sparc we have to do
+@@ -1614,146 +1605,171 @@
+ // there is no sense in messing working code.
+ //
+
+- int rounded_cls = round_to((callee_local_count - callee_param_count), WordsPerLong);
++ int rounded_cls = round_to((callee_locals - callee_params), WordsPerLong);
+ assert(rounded_cls == round_to(rounded_cls, WordsPerLong), "must align");
+
+- int raw_frame_size = size_activation_helper(rounded_cls, method->max_stack(),
+- monitor_size);
++ int raw_frame_size = size_activation_helper(rounded_cls, max_stack, monitor_size);
+
+- if (interpreter_frame != NULL) {
+- // The skeleton frame must already look like an interpreter frame
+- // even if not fully filled out.
+- assert(interpreter_frame->is_interpreted_frame(), "Must be interpreted frame");
++ return raw_frame_size;
++}
+
+- intptr_t* fp = interpreter_frame->fp();
++void AbstractInterpreter::layout_activation(Method* method,
++ int tempcount,
++ int popframe_extra_args,
++ int moncount,
++ int caller_actual_parameters,
++ int callee_param_count,
++ int callee_local_count,
++ frame* caller,
++ frame* interpreter_frame,
++ bool is_top_frame,
++ bool is_bottom_frame) {
++ // Set up the following variables:
++ // - Lmethod
++ // - Llocals
++ // - Lmonitors (to the indicated number of monitors)
++ // - Lesp (to the indicated number of temps)
++ // The frame caller on entry is a description of the caller of the
++ // frame we are about to layout. We are guaranteed that we will be
++ // able to fill in a new interpreter frame as its callee (i.e. the
++ // stack space is allocated and the amount was determined by an
++ // earlier call to the size_activation() method). On return caller
++ // while describe the interpreter frame we just layed out.
+
+- JavaThread* thread = JavaThread::current();
+- RegisterMap map(thread, false);
+- // More verification that skeleton frame is properly walkable
+- assert(fp == caller->sp(), "fp must match");
++ // The skeleton frame must already look like an interpreter frame
++ // even if not fully filled out.
++ assert(interpreter_frame->is_interpreted_frame(), "Must be interpreted frame");
+
+- intptr_t* montop = fp - rounded_vm_local_words;
++ int rounded_vm_local_words = round_to(frame::interpreter_frame_vm_local_words,WordsPerLong);
++ int monitor_size = moncount * frame::interpreter_frame_monitor_size();
++ assert(monitor_size == round_to(monitor_size, WordsPerLong), "must align");
+
+- // preallocate monitors (cf. __ add_monitor_to_stack)
+- intptr_t* monitors = montop - monitor_size;
++ intptr_t* fp = interpreter_frame->fp();
+
+- // preallocate stack space
+- intptr_t* esp = monitors - 1 -
+- (tempcount * Interpreter::stackElementWords) -
+- popframe_extra_args;
++ JavaThread* thread = JavaThread::current();
++ RegisterMap map(thread, false);
++ // More verification that skeleton frame is properly walkable
++ assert(fp == caller->sp(), "fp must match");
+
+- int local_words = method->max_locals() * Interpreter::stackElementWords;
+- NEEDS_CLEANUP;
+- intptr_t* locals;
+- if (caller->is_interpreted_frame()) {
+- // Can force the locals area to end up properly overlapping the top of the expression stack.
+- intptr_t* Lesp_ptr = caller->interpreter_frame_tos_address() - 1;
+- // Note that this computation means we replace size_of_parameters() values from the caller
+- // interpreter frame's expression stack with our argument locals
+- int parm_words = caller_actual_parameters * Interpreter::stackElementWords;
+- locals = Lesp_ptr + parm_words;
+- int delta = local_words - parm_words;
+- int computed_sp_adjustment = (delta > 0) ? round_to(delta, WordsPerLong) : 0;
+- *interpreter_frame->register_addr(I5_savedSP) = (intptr_t) (fp + computed_sp_adjustment) - STACK_BIAS;
+- if (!is_bottom_frame) {
+- // Llast_SP is set below for the current frame to SP (with the
+- // extra space for the callee's locals). Here we adjust
+- // Llast_SP for the caller's frame, removing the extra space
+- // for the current method's locals.
+- *caller->register_addr(Llast_SP) = *interpreter_frame->register_addr(I5_savedSP);
+- } else {
+- assert(*caller->register_addr(Llast_SP) >= *interpreter_frame->register_addr(I5_savedSP), "strange Llast_SP");
+- }
++ intptr_t* montop = fp - rounded_vm_local_words;
++
++ // preallocate monitors (cf. __ add_monitor_to_stack)
++ intptr_t* monitors = montop - monitor_size;
++
++ // preallocate stack space
++ intptr_t* esp = monitors - 1 -
++ (tempcount * Interpreter::stackElementWords) -
++ popframe_extra_args;
++
++ int local_words = method->max_locals() * Interpreter::stackElementWords;
++ NEEDS_CLEANUP;
++ intptr_t* locals;
++ if (caller->is_interpreted_frame()) {
++ // Can force the locals area to end up properly overlapping the top of the expression stack.
++ intptr_t* Lesp_ptr = caller->interpreter_frame_tos_address() - 1;
++ // Note that this computation means we replace size_of_parameters() values from the caller
++ // interpreter frame's expression stack with our argument locals
++ int parm_words = caller_actual_parameters * Interpreter::stackElementWords;
++ locals = Lesp_ptr + parm_words;
++ int delta = local_words - parm_words;
++ int computed_sp_adjustment = (delta > 0) ? round_to(delta, WordsPerLong) : 0;
++ *interpreter_frame->register_addr(I5_savedSP) = (intptr_t) (fp + computed_sp_adjustment) - STACK_BIAS;
++ if (!is_bottom_frame) {
++ // Llast_SP is set below for the current frame to SP (with the
++ // extra space for the callee's locals). Here we adjust
++ // Llast_SP for the caller's frame, removing the extra space
++ // for the current method's locals.
++ *caller->register_addr(Llast_SP) = *interpreter_frame->register_addr(I5_savedSP);
+ } else {
+- assert(caller->is_compiled_frame() || caller->is_entry_frame(), "only possible cases");
+- // Don't have Lesp available; lay out locals block in the caller
+- // adjacent to the register window save area.
+- //
+- // Compiled frames do not allocate a varargs area which is why this if
+- // statement is needed.
+- //
+- if (caller->is_compiled_frame()) {
+- locals = fp + frame::register_save_words + local_words - 1;
+- } else {
+- locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1;
+- }
+- if (!caller->is_entry_frame()) {
+- // Caller wants his own SP back
+- int caller_frame_size = caller->cb()->frame_size();
+- *interpreter_frame->register_addr(I5_savedSP) = (intptr_t)(caller->fp() - caller_frame_size) - STACK_BIAS;
++ assert(*caller->register_addr(Llast_SP) >= *interpreter_frame->register_addr(I5_savedSP), "strange Llast_SP");
++ }
++ } else {
++ assert(caller->is_compiled_frame() || caller->is_entry_frame(), "only possible cases");
++ // Don't have Lesp available; lay out locals block in the caller
++ // adjacent to the register window save area.
++ //
++ // Compiled frames do not allocate a varargs area which is why this if
++ // statement is needed.
++ //
++ if (caller->is_compiled_frame()) {
++ locals = fp + frame::register_save_words + local_words - 1;
++ } else {
++ locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1;
++ }
++ if (!caller->is_entry_frame()) {
++ // Caller wants his own SP back
++ int caller_frame_size = caller->cb()->frame_size();
++ *interpreter_frame->register_addr(I5_savedSP) = (intptr_t)(caller->fp() - caller_frame_size) - STACK_BIAS;
++ }
++ }
++ if (TraceDeoptimization) {
++ if (caller->is_entry_frame()) {
++ // make sure I5_savedSP and the entry frames notion of saved SP
++ // agree. This assertion duplicate a check in entry frame code
++ // but catches the failure earlier.
++ assert(*caller->register_addr(Lscratch) == *interpreter_frame->register_addr(I5_savedSP),
++ "would change callers SP");
++ }
++ if (caller->is_entry_frame()) {
++ tty->print("entry ");
++ }
++ if (caller->is_compiled_frame()) {
++ tty->print("compiled ");
++ if (caller->is_deoptimized_frame()) {
++ tty->print("(deopt) ");
+ }
+ }
+- if (TraceDeoptimization) {
+- if (caller->is_entry_frame()) {
+- // make sure I5_savedSP and the entry frames notion of saved SP
+- // agree. This assertion duplicate a check in entry frame code
+- // but catches the failure earlier.
+- assert(*caller->register_addr(Lscratch) == *interpreter_frame->register_addr(I5_savedSP),
+- "would change callers SP");
+- }
+- if (caller->is_entry_frame()) {
+- tty->print("entry ");
+- }
+- if (caller->is_compiled_frame()) {
+- tty->print("compiled ");
+- if (caller->is_deoptimized_frame()) {
+- tty->print("(deopt) ");
+- }
+- }
+- if (caller->is_interpreted_frame()) {
+- tty->print("interpreted ");
+- }
+- tty->print_cr("caller fp=0x%x sp=0x%x", caller->fp(), caller->sp());
+- tty->print_cr("save area = 0x%x, 0x%x", caller->sp(), caller->sp() + 16);
+- tty->print_cr("save area = 0x%x, 0x%x", caller->fp(), caller->fp() + 16);
+- tty->print_cr("interpreter fp=0x%x sp=0x%x", interpreter_frame->fp(), interpreter_frame->sp());
+- tty->print_cr("save area = 0x%x, 0x%x", interpreter_frame->sp(), interpreter_frame->sp() + 16);
+- tty->print_cr("save area = 0x%x, 0x%x", interpreter_frame->fp(), interpreter_frame->fp() + 16);
+- tty->print_cr("Llocals = 0x%x", locals);
+- tty->print_cr("Lesp = 0x%x", esp);
+- tty->print_cr("Lmonitors = 0x%x", monitors);
++ if (caller->is_interpreted_frame()) {
++ tty->print("interpreted ");
+ }
++ tty->print_cr("caller fp=0x%x sp=0x%x", caller->fp(), caller->sp());
++ tty->print_cr("save area = 0x%x, 0x%x", caller->sp(), caller->sp() + 16);
++ tty->print_cr("save area = 0x%x, 0x%x", caller->fp(), caller->fp() + 16);
++ tty->print_cr("interpreter fp=0x%x sp=0x%x", interpreter_frame->fp(), interpreter_frame->sp());
++ tty->print_cr("save area = 0x%x, 0x%x", interpreter_frame->sp(), interpreter_frame->sp() + 16);
++ tty->print_cr("save area = 0x%x, 0x%x", interpreter_frame->fp(), interpreter_frame->fp() + 16);
++ tty->print_cr("Llocals = 0x%x", locals);
++ tty->print_cr("Lesp = 0x%x", esp);
++ tty->print_cr("Lmonitors = 0x%x", monitors);
++ }
+
+- if (method->max_locals() > 0) {
+- assert(locals < caller->sp() || locals >= (caller->sp() + 16), "locals in save area");
+- assert(locals < caller->fp() || locals > (caller->fp() + 16), "locals in save area");
+- assert(locals < interpreter_frame->sp() || locals > (interpreter_frame->sp() + 16), "locals in save area");
+- assert(locals < interpreter_frame->fp() || locals >= (interpreter_frame->fp() + 16), "locals in save area");
+- }
++ if (method->max_locals() > 0) {
++ assert(locals < caller->sp() || locals >= (caller->sp() + 16), "locals in save area");
++ assert(locals < caller->fp() || locals > (caller->fp() + 16), "locals in save area");
++ assert(locals < interpreter_frame->sp() || locals > (interpreter_frame->sp() + 16), "locals in save area");
++ assert(locals < interpreter_frame->fp() || locals >= (interpreter_frame->fp() + 16), "locals in save area");
++ }
+ #ifdef _LP64
+- assert(*interpreter_frame->register_addr(I5_savedSP) & 1, "must be odd");
++ assert(*interpreter_frame->register_addr(I5_savedSP) & 1, "must be odd");
+ #endif
+
+- *interpreter_frame->register_addr(Lmethod) = (intptr_t) method;
+- *interpreter_frame->register_addr(Llocals) = (intptr_t) locals;
+- *interpreter_frame->register_addr(Lmonitors) = (intptr_t) monitors;
+- *interpreter_frame->register_addr(Lesp) = (intptr_t) esp;
+- // Llast_SP will be same as SP as there is no adapter space
+- *interpreter_frame->register_addr(Llast_SP) = (intptr_t) interpreter_frame->sp() - STACK_BIAS;
+- *interpreter_frame->register_addr(LcpoolCache) = (intptr_t) method->constants()->cache();
++ *interpreter_frame->register_addr(Lmethod) = (intptr_t) method;
++ *interpreter_frame->register_addr(Llocals) = (intptr_t) locals;
++ *interpreter_frame->register_addr(Lmonitors) = (intptr_t) monitors;
++ *interpreter_frame->register_addr(Lesp) = (intptr_t) esp;
++ // Llast_SP will be same as SP as there is no adapter space
++ *interpreter_frame->register_addr(Llast_SP) = (intptr_t) interpreter_frame->sp() - STACK_BIAS;
++ *interpreter_frame->register_addr(LcpoolCache) = (intptr_t) method->constants()->cache();
+ #ifdef FAST_DISPATCH
+- *interpreter_frame->register_addr(IdispatchTables) = (intptr_t) Interpreter::dispatch_table();
++ *interpreter_frame->register_addr(IdispatchTables) = (intptr_t) Interpreter::dispatch_table();
+ #endif
+
+
+ #ifdef ASSERT
+- BasicObjectLock* mp = (BasicObjectLock*)monitors;
++ BasicObjectLock* mp = (BasicObjectLock*)monitors;
+
+- assert(interpreter_frame->interpreter_frame_method() == method, "method matches");
+- assert(interpreter_frame->interpreter_frame_local_at(9) == (intptr_t *)((intptr_t)locals - (9 * Interpreter::stackElementSize)), "locals match");
+- assert(interpreter_frame->interpreter_frame_monitor_end() == mp, "monitor_end matches");
+- assert(((intptr_t *)interpreter_frame->interpreter_frame_monitor_begin()) == ((intptr_t *)mp)+monitor_size, "monitor_begin matches");
+- assert(interpreter_frame->interpreter_frame_tos_address()-1 == esp, "esp matches");
++ assert(interpreter_frame->interpreter_frame_method() == method, "method matches");
++ assert(interpreter_frame->interpreter_frame_local_at(9) == (intptr_t *)((intptr_t)locals - (9 * Interpreter::stackElementSize)), "locals match");
++ assert(interpreter_frame->interpreter_frame_monitor_end() == mp, "monitor_end matches");
++ assert(((intptr_t *)interpreter_frame->interpreter_frame_monitor_begin()) == ((intptr_t *)mp)+monitor_size, "monitor_begin matches");
++ assert(interpreter_frame->interpreter_frame_tos_address()-1 == esp, "esp matches");
+
+- // check bounds
+- intptr_t* lo = interpreter_frame->sp() + (frame::memory_parameter_word_sp_offset - 1);
+- intptr_t* hi = interpreter_frame->fp() - rounded_vm_local_words;
+- assert(lo < monitors && montop <= hi, "monitors in bounds");
+- assert(lo <= esp && esp < monitors, "esp in bounds");
++ // check bounds
++ intptr_t* lo = interpreter_frame->sp() + (frame::memory_parameter_word_sp_offset - 1);
++ intptr_t* hi = interpreter_frame->fp() - rounded_vm_local_words;
++ assert(lo < monitors && montop <= hi, "monitors in bounds");
++ assert(lo <= esp && esp < monitors, "esp in bounds");
+ #endif // ASSERT
+- }
+-
+- return raw_frame_size;
+ }
+
+ //----------------------------------------------------------------------------------------------------
+--- ./hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -2942,12 +2942,12 @@
+
+
+ void TemplateTable::generate_vtable_call(Register Rrecv, Register Rindex, Register Rret) {
+- Register Rtemp = G4_scratch;
+ Register Rcall = Rindex;
+ assert_different_registers(Rcall, G5_method, Gargs, Rret);
+
+ // get target Method* & entry point
+ __ lookup_virtual_method(Rrecv, Rindex, G5_method);
++ __ profile_arguments_type(G5_method, Rcall, Gargs, true);
+ __ call_from_interpreter(Rcall, Gargs, Rret);
+ }
+
+@@ -3022,6 +3022,7 @@
+ __ null_check(O0);
+
+ __ profile_final_call(O4);
++ __ profile_arguments_type(G5_method, Rscratch, Gargs, true);
+
+ // get return address
+ AddressLiteral table(Interpreter::invoke_return_entry_table());
+@@ -3051,6 +3052,7 @@
+
+ // do the call
+ __ profile_call(O4);
++ __ profile_arguments_type(G5_method, Rscratch, Gargs, false);
+ __ call_from_interpreter(Rscratch, Gargs, Rret);
+ }
+
+@@ -3066,6 +3068,7 @@
+
+ // do the call
+ __ profile_call(O4);
++ __ profile_arguments_type(G5_method, Rscratch, Gargs, false);
+ __ call_from_interpreter(Rscratch, Gargs, Rret);
+ }
+
+@@ -3091,6 +3094,7 @@
+ // do the call - the index (f2) contains the Method*
+ assert_different_registers(G5_method, Gargs, Rcall);
+ __ mov(Rindex, G5_method);
++ __ profile_arguments_type(G5_method, Rcall, Gargs, true);
+ __ call_from_interpreter(Rcall, Gargs, Rret);
+ __ bind(notFinal);
+
+@@ -3197,6 +3201,7 @@
+ Register Rcall = Rinterface;
+ assert_different_registers(Rcall, G5_method, Gargs, Rret);
+
++ __ profile_arguments_type(G5_method, Rcall, Gargs, true);
+ __ call_from_interpreter(Rcall, Gargs, Rret);
+ }
+
+@@ -3226,6 +3231,7 @@
+ // do the call
+ __ verify_oop(G4_mtype);
+ __ profile_final_call(O4); // FIXME: profile the LambdaForm also
++ __ profile_arguments_type(G5_method, Rscratch, Gargs, true);
+ __ call_from_interpreter(Rscratch, Gargs, Rret);
+ }
+
+@@ -3262,6 +3268,7 @@
+
+ // do the call
+ __ verify_oop(G4_callsite);
++ __ profile_arguments_type(G5_method, Rscratch, Gargs, false);
+ __ call_from_interpreter(Rscratch, Gargs, Rret);
+ }
+
+--- ./hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -234,7 +234,7 @@
+ assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
+
+ char buf[512];
+- jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
++ jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+ (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")),
+ (has_hardware_popc() ? ", popc" : ""),
+ (has_vis1() ? ", vis1" : ""),
+@@ -242,6 +242,7 @@
+ (has_vis3() ? ", vis3" : ""),
+ (has_blk_init() ? ", blk_init" : ""),
+ (has_cbcond() ? ", cbcond" : ""),
++ (has_aes() ? ", aes" : ""),
+ (is_ultra3() ? ", ultra3" : ""),
+ (is_sun4v() ? ", sun4v" : ""),
+ (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")),
+@@ -265,6 +266,41 @@
+ if (!has_vis1()) // Drop to 0 if no VIS1 support
+ UseVIS = 0;
+
++ // SPARC T4 and above should have support for AES instructions
++ if (has_aes()) {
++ if (UseVIS > 2) { // AES intrinsics use MOVxTOd/MOVdTOx which are VIS3
++ if (FLAG_IS_DEFAULT(UseAES)) {
++ FLAG_SET_DEFAULT(UseAES, true);
++ }
++ if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
++ FLAG_SET_DEFAULT(UseAESIntrinsics, true);
++ }
++ // we disable both the AES flags if either of them is disabled on the command line
++ if (!UseAES || !UseAESIntrinsics) {
++ FLAG_SET_DEFAULT(UseAES, false);
++ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++ }
++ } else {
++ if (UseAES || UseAESIntrinsics) {
++ warning("SPARC AES intrinsics require VIS3 instruction support. Intrinsics will be disabled.");
++ if (UseAES) {
++ FLAG_SET_DEFAULT(UseAES, false);
++ }
++ if (UseAESIntrinsics) {
++ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++ }
++ }
++ }
++ } else if (UseAES || UseAESIntrinsics) {
++ warning("AES instructions are not available on this CPU");
++ if (UseAES) {
++ FLAG_SET_DEFAULT(UseAES, false);
++ }
++ if (UseAESIntrinsics) {
++ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++ }
++ }
++
+ if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
+ (cache_line_size > ContendedPaddingWidth))
+ ContendedPaddingWidth = cache_line_size;
+@@ -282,22 +318,22 @@
+ tty->print("BIS");
+ }
+ if (AllocatePrefetchLines > 1) {
+- tty->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
++ tty->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize);
+ } else {
+- tty->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
++ tty->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize);
+ }
+ }
+ if (PrefetchCopyIntervalInBytes > 0) {
+- tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes);
++ tty->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
+ }
+ if (PrefetchScanIntervalInBytes > 0) {
+- tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes);
++ tty->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
+ }
+ if (PrefetchFieldsAhead > 0) {
+- tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead);
++ tty->print_cr("PrefetchFieldsAhead %d", (int) PrefetchFieldsAhead);
+ }
+ if (ContendedPaddingWidth > 0) {
+- tty->print_cr("ContendedPaddingWidth %d", ContendedPaddingWidth);
++ tty->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
+ }
+ }
+ #endif // PRODUCT
+--- ./hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -48,7 +48,9 @@
+ sparc64_family = 14,
+ M_family = 15,
+ T_family = 16,
+- T1_model = 17
++ T1_model = 17,
++ sparc5_instructions = 18,
++ aes_instructions = 19
+ };
+
+ enum Feature_Flag_Set {
+@@ -73,6 +75,8 @@
+ M_family_m = 1 << M_family,
+ T_family_m = 1 << T_family,
+ T1_model_m = 1 << T1_model,
++ sparc5_instructions_m = 1 << sparc5_instructions,
++ aes_instructions_m = 1 << aes_instructions,
+
+ generic_v8_m = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
+ generic_v9_m = generic_v8_m | v9_instructions_m,
+@@ -123,6 +127,8 @@
+ static bool has_vis3() { return (_features & vis3_instructions_m) != 0; }
+ static bool has_blk_init() { return (_features & blk_init_instructions_m) != 0; }
+ static bool has_cbcond() { return (_features & cbcond_instructions_m) != 0; }
++ static bool has_sparc5_instr() { return (_features & sparc5_instructions_m) != 0; }
++ static bool has_aes() { return (_features & aes_instructions_m) != 0; }
+
+ static bool supports_compare_and_exchange()
+ { return has_v9(); }
+@@ -133,6 +139,7 @@
+
+ static bool is_M_series() { return is_M_family(_features); }
+ static bool is_T4() { return is_T_family(_features) && has_cbcond(); }
++ static bool is_T7() { return is_T_family(_features) && has_sparc5_instr(); }
+
+ // Fujitsu SPARC64
+ static bool is_sparc64() { return (_features & sparc64_family_m) != 0; }
+@@ -152,7 +159,7 @@
+ static const char* cpu_features() { return _features_str; }
+
+ static intx prefetch_data_size() {
+- return is_T4() ? 32 : 64; // default prefetch block size on sparc
++ return is_T4() && !is_T7() ? 32 : 64; // default prefetch block size on sparc
+ }
+
+ // Prefetch
+--- ./hotspot/src/cpu/x86/vm/assembler_x86.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/assembler_x86.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -522,11 +522,11 @@
+ // these asserts are somewhat nonsensical
+ #ifndef _LP64
+ assert(which == imm_operand || which == disp32_operand,
+- err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip));
++ err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)));
+ #else
+ assert((which == call32_operand || which == imm_operand) && is_64bit ||
+ which == narrow_oop_operand && !is_64bit,
+- err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip));
++ err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)));
+ #endif // _LP64
+ return ip;
+
+@@ -1089,6 +1089,21 @@
+ emit_arith(0x23, 0xC0, dst, src);
+ }
+
++void Assembler::andnl(Register dst, Register src1, Register src2) {
++ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
++ int encode = vex_prefix_0F38_and_encode(dst, src1, src2);
++ emit_int8((unsigned char)0xF2);
++ emit_int8((unsigned char)(0xC0 | encode));
++}
++
++void Assembler::andnl(Register dst, Register src1, Address src2) {
++ InstructionMark im(this);
++ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
++ vex_prefix_0F38(dst, src1, src2);
++ emit_int8((unsigned char)0xF2);
++ emit_operand(dst, src2);
++}
++
+ void Assembler::bsfl(Register dst, Register src) {
+ int encode = prefix_and_encode(dst->encoding(), src->encoding());
+ emit_int8(0x0F);
+@@ -1097,7 +1112,6 @@
+ }
+
+ void Assembler::bsrl(Register dst, Register src) {
+- assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
+ int encode = prefix_and_encode(dst->encoding(), src->encoding());
+ emit_int8(0x0F);
+ emit_int8((unsigned char)0xBD);
+@@ -1110,6 +1124,51 @@
+ emit_int8((unsigned char)(0xC8 | encode));
+ }
+
++void Assembler::blsil(Register dst, Register src) {
++ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
++ int encode = vex_prefix_0F38_and_encode(rbx, dst, src);
++ emit_int8((unsigned char)0xF3);
++ emit_int8((unsigned char)(0xC0 | encode));
++}
++
++void Assembler::blsil(Register dst, Address src) {
++ InstructionMark im(this);
++ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
++ vex_prefix_0F38(rbx, dst, src);
++ emit_int8((unsigned char)0xF3);
++ emit_operand(rbx, src);
++}
++
++void Assembler::blsmskl(Register dst, Register src) {
++ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
++ int encode = vex_prefix_0F38_and_encode(rdx, dst, src);
++ emit_int8((unsigned char)0xF3);
++ emit_int8((unsigned char)(0xC0 | encode));
++}
++
++void Assembler::blsmskl(Register dst, Address src) {
++ InstructionMark im(this);
++ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
++ vex_prefix_0F38(rdx, dst, src);
++ emit_int8((unsigned char)0xF3);
++ emit_operand(rdx, src);
++}
++
++void Assembler::blsrl(Register dst, Register src) {
++ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
++ int encode = vex_prefix_0F38_and_encode(rcx, dst, src);
++ emit_int8((unsigned char)0xF3);
++ emit_int8((unsigned char)(0xC0 | encode));
++}
++
++void Assembler::blsrl(Register dst, Address src) {
++ InstructionMark im(this);
++ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
++ vex_prefix_0F38(rcx, dst, src);
++ emit_int8((unsigned char)0xF3);
++ emit_operand(rcx, src);
++}
++
+ void Assembler::call(Label& L, relocInfo::relocType rtype) {
+ // suspect disp32 is always good
+ int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
+@@ -1707,7 +1766,7 @@
+
+ // Move Unaligned 256bit Vector
+ void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
+- assert(UseAVX, "");
++ assert(UseAVX > 0, "");
+ bool vector256 = true;
+ int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);
+ emit_int8(0x6F);
+@@ -1715,7 +1774,7 @@
+ }
+
+ void Assembler::vmovdqu(XMMRegister dst, Address src) {
+- assert(UseAVX, "");
++ assert(UseAVX > 0, "");
+ InstructionMark im(this);
+ bool vector256 = true;
+ vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
+@@ -1724,7 +1783,7 @@
+ }
+
+ void Assembler::vmovdqu(Address dst, XMMRegister src) {
+- assert(UseAVX, "");
++ assert(UseAVX > 0, "");
+ InstructionMark im(this);
+ bool vector256 = true;
+ // swap src<->dst for encoding
+@@ -2283,6 +2342,11 @@
+ emit_int8(imm8);
+ }
+
++void Assembler::pause() {
++ emit_int8((unsigned char)0xF3);
++ emit_int8((unsigned char)0x90);
++}
++
+ void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
+ assert(VM_Version::supports_sse4_2(), "");
+ InstructionMark im(this);
+@@ -2607,6 +2671,11 @@
+ }
+ }
+
++void Assembler::rdtsc() {
++ emit_int8((unsigned char)0x0F);
++ emit_int8((unsigned char)0x31);
++}
++
+ // copies data from [esi] to [edi] using rcx pointer sized words
+ // generic
+ void Assembler::rep_mov() {
+@@ -2878,6 +2947,24 @@
+ emit_operand(dst, src);
+ }
+
++void Assembler::tzcntl(Register dst, Register src) {
++ assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
++ emit_int8((unsigned char)0xF3);
++ int encode = prefix_and_encode(dst->encoding(), src->encoding());
++ emit_int8(0x0F);
++ emit_int8((unsigned char)0xBC);
++ emit_int8((unsigned char)0xC0 | encode);
++}
++
++void Assembler::tzcntq(Register dst, Register src) {
++ assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
++ emit_int8((unsigned char)0xF3);
++ int encode = prefixq_and_encode(dst->encoding(), src->encoding());
++ emit_int8(0x0F);
++ emit_int8((unsigned char)0xBC);
++ emit_int8((unsigned char)(0xC0 | encode));
++}
++
+ void Assembler::ucomisd(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
+@@ -2898,6 +2985,11 @@
+ emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
+ }
+
++void Assembler::xabort(int8_t imm8) {
++ emit_int8((unsigned char)0xC6);
++ emit_int8((unsigned char)0xF8);
++ emit_int8((unsigned char)(imm8 & 0xFF));
++}
+
+ void Assembler::xaddl(Address dst, Register src) {
+ InstructionMark im(this);
+@@ -2907,6 +2999,24 @@
+ emit_operand(src, dst);
+ }
+
++void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
++ InstructionMark im(this);
++ relocate(rtype);
++ if (abort.is_bound()) {
++ address entry = target(abort);
++ assert(entry != NULL, "abort entry NULL");
++ intptr_t offset = entry - pc();
++ emit_int8((unsigned char)0xC7);
++ emit_int8((unsigned char)0xF8);
++ emit_int32(offset - 6); // 2 opcode + 4 address
++ } else {
++ abort.add_patch_at(code(), locator());
++ emit_int8((unsigned char)0xC7);
++ emit_int8((unsigned char)0xF8);
++ emit_int32(0);
++ }
++}
++
+ void Assembler::xchgl(Register dst, Address src) { // xchg
+ InstructionMark im(this);
+ prefix(src, dst);
+@@ -2920,6 +3030,12 @@
+ emit_int8((unsigned char)(0xC0 | encode));
+ }
+
++void Assembler::xend() {
++ emit_int8((unsigned char)0x0F);
++ emit_int8((unsigned char)0x01);
++ emit_int8((unsigned char)0xD5);
++}
++
+ void Assembler::xgetbv() {
+ emit_int8(0x0F);
+ emit_int8(0x01);
+@@ -4837,6 +4953,21 @@
+ emit_arith(0x23, 0xC0, dst, src);
+ }
+
++void Assembler::andnq(Register dst, Register src1, Register src2) {
++ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
++ int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2);
++ emit_int8((unsigned char)0xF2);
++ emit_int8((unsigned char)(0xC0 | encode));
++}
++
++void Assembler::andnq(Register dst, Register src1, Address src2) {
++ InstructionMark im(this);
++ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
++ vex_prefix_0F38_q(dst, src1, src2);
++ emit_int8((unsigned char)0xF2);
++ emit_operand(dst, src2);
++}
++
+ void Assembler::bsfq(Register dst, Register src) {
+ int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+ emit_int8(0x0F);
+@@ -4845,7 +4976,6 @@
+ }
+
+ void Assembler::bsrq(Register dst, Register src) {
+- assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
+ int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+ emit_int8(0x0F);
+ emit_int8((unsigned char)0xBD);
+@@ -4858,6 +4988,51 @@
+ emit_int8((unsigned char)(0xC8 | encode));
+ }
+
++void Assembler::blsiq(Register dst, Register src) {
++ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
++ int encode = vex_prefix_0F38_and_encode_q(rbx, dst, src);
++ emit_int8((unsigned char)0xF3);
++ emit_int8((unsigned char)(0xC0 | encode));
++}
++
++void Assembler::blsiq(Register dst, Address src) {
++ InstructionMark im(this);
++ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
++ vex_prefix_0F38_q(rbx, dst, src);
++ emit_int8((unsigned char)0xF3);
++ emit_operand(rbx, src);
++}
++
++void Assembler::blsmskq(Register dst, Register src) {
++ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
++ int encode = vex_prefix_0F38_and_encode_q(rdx, dst, src);
++ emit_int8((unsigned char)0xF3);
++ emit_int8((unsigned char)(0xC0 | encode));
++}
++
++void Assembler::blsmskq(Register dst, Address src) {
++ InstructionMark im(this);
++ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
++ vex_prefix_0F38_q(rdx, dst, src);
++ emit_int8((unsigned char)0xF3);
++ emit_operand(rdx, src);
++}
++
++void Assembler::blsrq(Register dst, Register src) {
++ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
++ int encode = vex_prefix_0F38_and_encode_q(rcx, dst, src);
++ emit_int8((unsigned char)0xF3);
++ emit_int8((unsigned char)(0xC0 | encode));
++}
++
++void Assembler::blsrq(Register dst, Address src) {
++ InstructionMark im(this);
++ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
++ vex_prefix_0F38_q(rcx, dst, src);
++ emit_int8((unsigned char)0xF3);
++ emit_operand(rcx, src);
++}
++
+ void Assembler::cdqq() {
+ prefix(REX_W);
+ emit_int8((unsigned char)0x99);
+--- ./hotspot/src/cpu/x86/vm/assembler_x86.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/assembler_x86.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -590,10 +590,35 @@
+ vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256);
+ }
+
++ void vex_prefix_0F38(Register dst, Register nds, Address src) {
++ bool vex_w = false;
++ bool vector256 = false;
++ vex_prefix(src, nds->encoding(), dst->encoding(),
++ VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
++ }
++
++ void vex_prefix_0F38_q(Register dst, Register nds, Address src) {
++ bool vex_w = true;
++ bool vector256 = false;
++ vex_prefix(src, nds->encoding(), dst->encoding(),
++ VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
++ }
+ int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
+ VexSimdPrefix pre, VexOpcode opc,
+ bool vex_w, bool vector256);
+
++ int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) {
++ bool vex_w = false;
++ bool vector256 = false;
++ return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
++ VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
++ }
++ int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) {
++ bool vex_w = true;
++ bool vector256 = false;
++ return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
++ VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
++ }
+ int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
+ VexSimdPrefix pre, bool vector256 = false,
+ VexOpcode opc = VEX_OPCODE_0F) {
+@@ -897,6 +922,27 @@
+ void andq(Register dst, Address src);
+ void andq(Register dst, Register src);
+
++ // BMI instructions
++ void andnl(Register dst, Register src1, Register src2);
++ void andnl(Register dst, Register src1, Address src2);
++ void andnq(Register dst, Register src1, Register src2);
++ void andnq(Register dst, Register src1, Address src2);
++
++ void blsil(Register dst, Register src);
++ void blsil(Register dst, Address src);
++ void blsiq(Register dst, Register src);
++ void blsiq(Register dst, Address src);
++
++ void blsmskl(Register dst, Register src);
++ void blsmskl(Register dst, Address src);
++ void blsmskq(Register dst, Register src);
++ void blsmskq(Register dst, Address src);
++
++ void blsrl(Register dst, Register src);
++ void blsrl(Register dst, Address src);
++ void blsrq(Register dst, Register src);
++ void blsrq(Register dst, Address src);
++
+ void bsfl(Register dst, Register src);
+ void bsrl(Register dst, Register src);
+
+@@ -1405,6 +1451,8 @@
+ // Pemutation of 64bit words
+ void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256);
+
++ void pause();
++
+ // SSE4.2 string instructions
+ void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
+ void pcmpestri(XMMRegister xmm1, Address src, int imm8);
+@@ -1489,6 +1537,8 @@
+
+ void rclq(Register dst, int imm8);
+
++ void rdtsc();
++
+ void ret(int imm16);
+
+ void sahf();
+@@ -1574,6 +1624,9 @@
+ void testq(Register dst, int32_t imm32);
+ void testq(Register dst, Register src);
+
++ // BMI - count trailing zeros
++ void tzcntl(Register dst, Register src);
++ void tzcntq(Register dst, Register src);
+
+ // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
+ void ucomisd(XMMRegister dst, Address src);
+@@ -1583,16 +1636,22 @@
+ void ucomiss(XMMRegister dst, Address src);
+ void ucomiss(XMMRegister dst, XMMRegister src);
+
++ void xabort(int8_t imm8);
++
+ void xaddl(Address dst, Register src);
+
+ void xaddq(Address dst, Register src);
+
++ void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);
++
+ void xchgl(Register reg, Address adr);
+ void xchgl(Register dst, Register src);
+
+ void xchgq(Register reg, Address adr);
+ void xchgq(Register dst, Register src);
+
++ void xend();
++
+ // Get Value of Extended Control Register
+ void xgetbv();
+
+--- ./hotspot/src/cpu/x86/vm/bytecodeInterpreter_x86.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/bytecodeInterpreter_x86.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -94,7 +94,7 @@
+ #define LOCALS_ADDR(offset) ((address)locals[-(offset)])
+ #define LOCALS_INT(offset) ((jint)(locals[-(offset)]))
+ #define LOCALS_FLOAT(offset) (*((jfloat*)&locals[-(offset)]))
+-#define LOCALS_OBJECT(offset) ((oop)locals[-(offset)])
++#define LOCALS_OBJECT(offset) (cast_to_oop(locals[-(offset)]))
+ #define LOCALS_DOUBLE(offset) (((VMJavaVal64*)&locals[-((offset) + 1)])->d)
+ #define LOCALS_LONG(offset) (((VMJavaVal64*)&locals[-((offset) + 1)])->l)
+ #define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)]))
+--- ./hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -288,7 +288,7 @@
+
+ // build frame
+ ciMethod* m = compilation()->method();
+- __ build_frame(initial_frame_size_in_bytes());
++ __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
+
+ // OSR buffer is
+ //
+@@ -376,7 +376,7 @@
+ }
+
+ // This specifies the rsp decrement needed to build the frame
+-int LIR_Assembler::initial_frame_size_in_bytes() {
++int LIR_Assembler::initial_frame_size_in_bytes() const {
+ // if rounding, must let FrameMap know!
+
+ // The frame_map records size in slots (32bit word)
+@@ -801,7 +801,13 @@
+ if (UseCompressedOops && !wide) {
+ __ movl(as_Address(addr), (int32_t)NULL_WORD);
+ } else {
++#ifdef _LP64
++ __ xorptr(rscratch1, rscratch1);
++ null_check_here = code_offset();
++ __ movptr(as_Address(addr), rscratch1);
++#else
+ __ movptr(as_Address(addr), NULL_WORD);
++#endif
+ }
+ } else {
+ if (is_literal_address(addr)) {
+--- ./hotspot/src/cpu/x86/vm/c1_LinearScan_x86.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/c1_LinearScan_x86.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -95,7 +95,7 @@
+
+ #ifndef PRODUCT
+ if (TraceFPURegisterUsage) {
+- tty->print("FPU regs for block %d, LIR instr %d): ", b->block_id(), id); regs.print_on(tty); tty->print_cr("");
++ tty->print("FPU regs for block %d, LIR instr %d): ", b->block_id(), id); regs.print_on(tty); tty->cr();
+ }
+ #endif
+ }
+--- ./hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -349,13 +349,14 @@
+ }
+
+
+-void C1_MacroAssembler::build_frame(int frame_size_in_bytes) {
++void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) {
++ assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
+ // Make sure there is enough stack space for this method's activation.
+ // Note that we do this before doing an enter(). This matches the
+ // ordering of C2's stack overflow check / rsp decrement and allows
+ // the SharedRuntime stack overflow handling to be consistent
+ // between the two compilers.
+- generate_stack_overflow_check(frame_size_in_bytes);
++ generate_stack_overflow_check(bang_size_in_bytes);
+
+ push(rbp);
+ #ifdef TIERED
+--- ./hotspot/src/cpu/x86/vm/c2_globals_x86.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/c2_globals_x86.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -88,6 +88,8 @@
+ define_pd_global(uintx, CodeCacheMinBlockLength, 4);
+ define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K);
+
++define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed on x86.
++
+ // Heap related flags
+ define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M));
+
+--- ./hotspot/src/cpu/x86/vm/compiledIC_x86.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/compiledIC_x86.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -122,7 +122,7 @@
+ if (TraceICs) {
+ ResourceMark rm;
+ tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
+- instruction_address(),
++ p2i(instruction_address()),
+ callee->name_and_sig_as_C_string());
+ }
+
+--- ./hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1354,7 +1354,7 @@
+
+ // reset handle block
+ __ movptr(t, Address(thread, JavaThread::active_handles_offset()));
+- __ movptr(Address(t, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
++ __ movl(Address(t, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
+
+ // If result was an oop then unbox and save it in the frame
+ { Label L;
+@@ -2336,29 +2336,42 @@
+ "Stack top out of range");
+ }
+
+-int AbstractInterpreter::layout_activation(Method* method,
+- int tempcount, //
+- int popframe_extra_args,
+- int moncount,
+- int caller_actual_parameters,
+- int callee_param_count,
+- int callee_locals,
+- frame* caller,
+- frame* interpreter_frame,
+- bool is_top_frame,
+- bool is_bottom_frame) {
+-
+- assert(popframe_extra_args == 0, "FIX ME");
+- // NOTE this code must exactly mimic what InterpreterGenerator::generate_compute_interpreter_state()
+- // does as far as allocating an interpreter frame.
+- // If interpreter_frame!=NULL, set up the method, locals, and monitors.
+- // The frame interpreter_frame, if not NULL, is guaranteed to be the right size,
+- // as determined by a previous call to this method.
+- // It is also guaranteed to be walkable even though it is in a skeletal state
++
++static int frame_size_helper(int max_stack,
++ int tempcount,
++ int moncount,
++ int callee_param_count,
++ int callee_locals,
++ bool is_top_frame,
++ int& monitor_size,
++ int& full_frame_size) {
++ int extra_locals_size = (callee_locals - callee_param_count) * BytesPerWord;
++ monitor_size = sizeof(BasicObjectLock) * moncount;
++
++ // First calculate the frame size without any java expression stack
++ int short_frame_size = size_activation_helper(extra_locals_size,
++ monitor_size);
++
++ // Now with full size expression stack
++ full_frame_size = short_frame_size + max_stack * BytesPerWord;
++
++ // and now with only live portion of the expression stack
++ short_frame_size = short_frame_size + tempcount * BytesPerWord;
++
++ // the size the activation is right now. Only top frame is full size
++ int frame_size = (is_top_frame ? full_frame_size : short_frame_size);
++ return frame_size;
++}
++
++int AbstractInterpreter::size_activation(int max_stack,
++ int tempcount,
++ int extra_args,
++ int moncount,
++ int callee_param_count,
++ int callee_locals,
++ bool is_top_frame) {
++ assert(extra_args == 0, "FIX ME");
+ // NOTE: return size is in words not bytes
+- // NOTE: tempcount is the current size of the java expression stack. For top most
+- // frames we will allocate a full sized expression stack and not the curback
+- // version that non-top frames have.
+
+ // Calculate the amount our frame will be adjust by the callee. For top frame
+ // this is zero.
+@@ -2368,87 +2381,102 @@
+ // to it. So it ignores last_frame_adjust value. Seems suspicious as far
+ // as getting sender_sp correct.
+
+- int extra_locals_size = (callee_locals - callee_param_count) * BytesPerWord;
+- int monitor_size = sizeof(BasicObjectLock) * moncount;
+-
+- // First calculate the frame size without any java expression stack
+- int short_frame_size = size_activation_helper(extra_locals_size,
+- monitor_size);
+-
+- // Now with full size expression stack
+- int full_frame_size = short_frame_size + method->max_stack() * BytesPerWord;
+-
+- // and now with only live portion of the expression stack
+- short_frame_size = short_frame_size + tempcount * BytesPerWord;
+-
+- // the size the activation is right now. Only top frame is full size
+- int frame_size = (is_top_frame ? full_frame_size : short_frame_size);
+-
+- if (interpreter_frame != NULL) {
++ int unused_monitor_size = 0;
++ int unused_full_frame_size = 0;
++ return frame_size_helper(max_stack, tempcount, moncount, callee_param_count, callee_locals,
++ is_top_frame, unused_monitor_size, unused_full_frame_size)/BytesPerWord;
++}
++
++void AbstractInterpreter::layout_activation(Method* method,
++ int tempcount, //
++ int popframe_extra_args,
++ int moncount,
++ int caller_actual_parameters,
++ int callee_param_count,
++ int callee_locals,
++ frame* caller,
++ frame* interpreter_frame,
++ bool is_top_frame,
++ bool is_bottom_frame) {
++
++ assert(popframe_extra_args == 0, "FIX ME");
++ // NOTE this code must exactly mimic what InterpreterGenerator::generate_compute_interpreter_state()
++ // does as far as allocating an interpreter frame.
++ // Set up the method, locals, and monitors.
++ // The frame interpreter_frame is guaranteed to be the right size,
++ // as determined by a previous call to the size_activation() method.
++ // It is also guaranteed to be walkable even though it is in a skeletal state
++ // NOTE: tempcount is the current size of the java expression stack. For top most
++ // frames we will allocate a full sized expression stack and not the curback
++ // version that non-top frames have.
++
++ int monitor_size = 0;
++ int full_frame_size = 0;
++ int frame_size = frame_size_helper(method->max_stack(), tempcount, moncount, callee_param_count, callee_locals,
++ is_top_frame, monitor_size, full_frame_size);
++
+ #ifdef ASSERT
+- assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable");
++ assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable");
+ #endif
+
+- // MUCHO HACK
+-
+- intptr_t* frame_bottom = (intptr_t*) ((intptr_t)interpreter_frame->sp() - (full_frame_size - frame_size));
+-
+- /* Now fillin the interpreterState object */
+-
+- // The state object is the first thing on the frame and easily located
+-
+- interpreterState cur_state = (interpreterState) ((intptr_t)interpreter_frame->fp() - sizeof(BytecodeInterpreter));
+-
+-
+- // Find the locals pointer. This is rather simple on x86 because there is no
+- // confusing rounding at the callee to account for. We can trivially locate
+- // our locals based on the current fp().
+- // Note: the + 2 is for handling the "static long no_params() method" issue.
+- // (too bad I don't really remember that issue well...)
+-
+- intptr_t* locals;
+- // If the caller is interpreted we need to make sure that locals points to the first
+- // argument that the caller passed and not in an area where the stack might have been extended.
+- // because the stack to stack to converter needs a proper locals value in order to remove the
+- // arguments from the caller and place the result in the proper location. Hmm maybe it'd be
+- // simpler if we simply stored the result in the BytecodeInterpreter object and let the c++ code
+- // adjust the stack?? HMMM QQQ
+- //
+- if (caller->is_interpreted_frame()) {
+- // locals must agree with the caller because it will be used to set the
+- // caller's tos when we return.
+- interpreterState prev = caller->get_interpreterState();
+- // stack() is prepushed.
+- locals = prev->stack() + method->size_of_parameters();
+- // locals = caller->unextended_sp() + (method->size_of_parameters() - 1);
+- if (locals != interpreter_frame->fp() + frame::sender_sp_offset + (method->max_locals() - 1) + 2) {
+- // os::breakpoint();
+- }
+- } else {
+- // this is where a c2i would have placed locals (except for the +2)
+- locals = interpreter_frame->fp() + frame::sender_sp_offset + (method->max_locals() - 1) + 2;
++ // MUCHO HACK
++
++ intptr_t* frame_bottom = (intptr_t*) ((intptr_t)interpreter_frame->sp() - (full_frame_size - frame_size));
++
++ /* Now fillin the interpreterState object */
++
++ // The state object is the first thing on the frame and easily located
++
++ interpreterState cur_state = (interpreterState) ((intptr_t)interpreter_frame->fp() - sizeof(BytecodeInterpreter));
++
++
++ // Find the locals pointer. This is rather simple on x86 because there is no
++ // confusing rounding at the callee to account for. We can trivially locate
++ // our locals based on the current fp().
++ // Note: the + 2 is for handling the "static long no_params() method" issue.
++ // (too bad I don't really remember that issue well...)
++
++ intptr_t* locals;
++ // If the caller is interpreted we need to make sure that locals points to the first
++ // argument that the caller passed and not in an area where the stack might have been extended.
++ // because the stack to stack to converter needs a proper locals value in order to remove the
++ // arguments from the caller and place the result in the proper location. Hmm maybe it'd be
++ // simpler if we simply stored the result in the BytecodeInterpreter object and let the c++ code
++ // adjust the stack?? HMMM QQQ
++ //
++ if (caller->is_interpreted_frame()) {
++ // locals must agree with the caller because it will be used to set the
++ // caller's tos when we return.
++ interpreterState prev = caller->get_interpreterState();
++ // stack() is prepushed.
++ locals = prev->stack() + method->size_of_parameters();
++ // locals = caller->unextended_sp() + (method->size_of_parameters() - 1);
++ if (locals != interpreter_frame->fp() + frame::sender_sp_offset + (method->max_locals() - 1) + 2) {
++ // os::breakpoint();
+ }
+-
+- intptr_t* monitor_base = (intptr_t*) cur_state;
+- intptr_t* stack_base = (intptr_t*) ((intptr_t) monitor_base - monitor_size);
+- /* +1 because stack is always prepushed */
+- intptr_t* stack = (intptr_t*) ((intptr_t) stack_base - (tempcount + 1) * BytesPerWord);
+-
+-
+- BytecodeInterpreter::layout_interpreterState(cur_state,
+- caller,
+- interpreter_frame,
+- method,
+- locals,
+- stack,
+- stack_base,
+- monitor_base,
+- frame_bottom,
+- is_top_frame);
+-
+- // BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, interpreter_frame->fp());
++ } else {
++ // this is where a c2i would have placed locals (except for the +2)
++ locals = interpreter_frame->fp() + frame::sender_sp_offset + (method->max_locals() - 1) + 2;
+ }
+- return frame_size/BytesPerWord;
++
++ intptr_t* monitor_base = (intptr_t*) cur_state;
++ intptr_t* stack_base = (intptr_t*) ((intptr_t) monitor_base - monitor_size);
++ /* +1 because stack is always prepushed */
++ intptr_t* stack = (intptr_t*) ((intptr_t) stack_base - (tempcount + 1) * BytesPerWord);
++
++
++ BytecodeInterpreter::layout_interpreterState(cur_state,
++ caller,
++ interpreter_frame,
++ method,
++ locals,
++ stack,
++ stack_base,
++ monitor_base,
++ frame_bottom,
++ is_top_frame);
++
++ // BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, interpreter_frame->fp());
+ }
+
+ #endif // CC_INTERP (all)
+--- ./hotspot/src/cpu/x86/vm/frame_x86.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/frame_x86.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -48,6 +48,7 @@
+ }
+ #endif
+
++PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
+
+ // Profiling/safepoint support
+
+--- ./hotspot/src/cpu/x86/vm/frame_x86.inline.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/frame_x86.inline.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -247,6 +247,10 @@
+ }
+ }
+
++inline oop* frame::interpreter_frame_temp_oop_addr() const {
++ return (oop *)(fp() + interpreter_frame_oop_temp_offset);
++}
++
+ #endif /* CC_INTERP */
+
+ inline int frame::pd_oop_map_offset_adjustment() const {
+--- ./hotspot/src/cpu/x86/vm/globalDefinitions_x86.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/globalDefinitions_x86.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -27,6 +27,12 @@
+
+ const int StackAlignmentInBytes = 16;
+
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are properly extended to 64 bits.
++// If set, SharedRuntime::c_calling_convention() must adapt
++// signatures accordingly.
++const bool CCallingConventionRequiresIntsAsLongs = false;
++
+ #define SUPPORTS_NATIVE_CX8
+
+ #endif // CPU_X86_VM_GLOBALDEFINITIONS_X86_HPP
+--- ./hotspot/src/cpu/x86/vm/globals_x86.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/globals_x86.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -37,7 +37,8 @@
+ define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this
+
+ define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks
+-define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast
++define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86.
++define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast
+
+ // See 4827828 for this change. There is no globals_core_i486.hpp. I can't
+ // assign a different value for C2 without touching a number of files. Use
+@@ -128,11 +129,53 @@
+ product(bool, UseFastStosb, false, \
+ "Use fast-string operation for zeroing: rep stosb") \
+ \
++ /* Use Restricted Transactional Memory for lock eliding */ \
++ experimental(bool, UseRTMLocking, false, \
++ "Enable RTM lock eliding for inflated locks in compiled code") \
++ \
++ experimental(bool, UseRTMForStackLocks, false, \
++ "Enable RTM lock eliding for stack locks in compiled code") \
++ \
++ experimental(bool, UseRTMDeopt, false, \
++ "Perform deopt and recompilation based on RTM abort ratio") \
++ \
++ experimental(uintx, RTMRetryCount, 5, \
++ "Number of RTM retries on lock abort or busy") \
++ \
++ experimental(intx, RTMSpinLoopCount, 100, \
++ "Spin count for lock to become free before RTM retry") \
++ \
++ experimental(intx, RTMAbortThreshold, 1000, \
++ "Calculate abort ratio after this number of aborts") \
++ \
++ experimental(intx, RTMLockingThreshold, 10000, \
++ "Lock count at which to do RTM lock eliding without " \
++ "abort ratio calculation") \
++ \
++ experimental(intx, RTMAbortRatio, 50, \
++ "Lock abort ratio at which to stop use RTM lock eliding") \
++ \
++ experimental(intx, RTMTotalCountIncrRate, 64, \
++ "Increment total RTM attempted lock count once every n times") \
++ \
++ experimental(intx, RTMLockingCalculationDelay, 0, \
++ "Number of milliseconds to wait before start calculating aborts " \
++ "for RTM locking") \
++ \
++ experimental(bool, UseRTMXendForLockBusy, true, \
++ "Use RTM Xend instead of Xabort when lock busy") \
++ \
+ /* assembler */ \
+ product(bool, Use486InstrsOnly, false, \
+ "Use 80486 Compliant instruction subset") \
+ \
+ product(bool, UseCountLeadingZerosInstruction, false, \
+ "Use count leading zeros instruction") \
++ \
++ product(bool, UseCountTrailingZerosInstruction, false, \
++ "Use count trailing zeros instruction") \
++ \
++ product(bool, UseBMI1Instructions, false, \
++ "Use BMI instructions")
+
+ #endif // CPU_X86_VM_GLOBALS_X86_HPP
+--- ./hotspot/src/cpu/x86/vm/interp_masm_x86.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/interp_masm_x86.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -127,7 +127,7 @@
+
+ if (MethodData::profile_return()) {
+ // We're right after the type profile for the last
+- // argument. tmp is the number of cell left in the
++ // argument. tmp is the number of cells left in the
+ // CallTypeData/VirtualCallTypeData to reach its end. Non null
+ // if there's a return to profile.
+ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
+@@ -137,7 +137,7 @@
+ movptr(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize), mdp);
+ } else {
+ assert(MethodData::profile_return(), "either profile call args or call ret");
+- update_mdp_by_constant(mdp, in_bytes(ReturnTypeEntry::size()));
++ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
+ }
+
+ // mdp points right after the end of the
+@@ -198,7 +198,7 @@
+ // parameters. Collect profiling from last parameter down.
+ // mdo start + parameters offset + array length - 1
+ addptr(mdp, tmp1);
+- movptr(tmp1, Address(mdp, in_bytes(ArrayData::array_len_offset())));
++ movptr(tmp1, Address(mdp, ArrayData::array_len_offset()));
+ decrement(tmp1, TypeStackSlotEntries::per_arg_count());
+
+ Label loop;
+--- ./hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -51,6 +51,7 @@
+
+ #define __ _masm->
+
++PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
+
+ #ifdef _WIN64
+ address AbstractInterpreterGenerator::generate_slow_signature_handler() {
+--- ./hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -55,6 +55,7 @@
+
+ #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
++PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
+
+ #ifdef ASSERT
+ bool AbstractAssembler::pd_check_instruction_mark() { return true; }
+@@ -98,217 +99,6 @@
+ return Address::make_array(adr);
+ }
+
+-int MacroAssembler::biased_locking_enter(Register lock_reg,
+- Register obj_reg,
+- Register swap_reg,
+- Register tmp_reg,
+- bool swap_reg_contains_mark,
+- Label& done,
+- Label* slow_case,
+- BiasedLockingCounters* counters) {
+- assert(UseBiasedLocking, "why call this otherwise?");
+- assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
+- assert_different_registers(lock_reg, obj_reg, swap_reg);
+-
+- if (PrintBiasedLockingStatistics && counters == NULL)
+- counters = BiasedLocking::counters();
+-
+- bool need_tmp_reg = false;
+- if (tmp_reg == noreg) {
+- need_tmp_reg = true;
+- tmp_reg = lock_reg;
+- } else {
+- assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
+- }
+- assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+- Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
+- Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes());
+- Address saved_mark_addr(lock_reg, 0);
+-
+- // Biased locking
+- // See whether the lock is currently biased toward our thread and
+- // whether the epoch is still valid
+- // Note that the runtime guarantees sufficient alignment of JavaThread
+- // pointers to allow age to be placed into low bits
+- // First check to see whether biasing is even enabled for this object
+- Label cas_label;
+- int null_check_offset = -1;
+- if (!swap_reg_contains_mark) {
+- null_check_offset = offset();
+- movl(swap_reg, mark_addr);
+- }
+- if (need_tmp_reg) {
+- push(tmp_reg);
+- }
+- movl(tmp_reg, swap_reg);
+- andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
+- cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
+- if (need_tmp_reg) {
+- pop(tmp_reg);
+- }
+- jcc(Assembler::notEqual, cas_label);
+- // The bias pattern is present in the object's header. Need to check
+- // whether the bias owner and the epoch are both still current.
+- // Note that because there is no current thread register on x86 we
+- // need to store off the mark word we read out of the object to
+- // avoid reloading it and needing to recheck invariants below. This
+- // store is unfortunate but it makes the overall code shorter and
+- // simpler.
+- movl(saved_mark_addr, swap_reg);
+- if (need_tmp_reg) {
+- push(tmp_reg);
+- }
+- get_thread(tmp_reg);
+- xorl(swap_reg, tmp_reg);
+- if (swap_reg_contains_mark) {
+- null_check_offset = offset();
+- }
+- movl(tmp_reg, klass_addr);
+- xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset()));
+- andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
+- if (need_tmp_reg) {
+- pop(tmp_reg);
+- }
+- if (counters != NULL) {
+- cond_inc32(Assembler::zero,
+- ExternalAddress((address)counters->biased_lock_entry_count_addr()));
+- }
+- jcc(Assembler::equal, done);
+-
+- Label try_revoke_bias;
+- Label try_rebias;
+-
+- // At this point we know that the header has the bias pattern and
+- // that we are not the bias owner in the current epoch. We need to
+- // figure out more details about the state of the header in order to
+- // know what operations can be legally performed on the object's
+- // header.
+-
+- // If the low three bits in the xor result aren't clear, that means
+- // the prototype header is no longer biased and we have to revoke
+- // the bias on this object.
+- testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
+- jcc(Assembler::notZero, try_revoke_bias);
+-
+- // Biasing is still enabled for this data type. See whether the
+- // epoch of the current bias is still valid, meaning that the epoch
+- // bits of the mark word are equal to the epoch bits of the
+- // prototype header. (Note that the prototype header's epoch bits
+- // only change at a safepoint.) If not, attempt to rebias the object
+- // toward the current thread. Note that we must be absolutely sure
+- // that the current epoch is invalid in order to do this because
+- // otherwise the manipulations it performs on the mark word are
+- // illegal.
+- testl(swap_reg, markOopDesc::epoch_mask_in_place);
+- jcc(Assembler::notZero, try_rebias);
+-
+- // The epoch of the current bias is still valid but we know nothing
+- // about the owner; it might be set or it might be clear. Try to
+- // acquire the bias of the object using an atomic operation. If this
+- // fails we will go in to the runtime to revoke the object's bias.
+- // Note that we first construct the presumed unbiased header so we
+- // don't accidentally blow away another thread's valid bias.
+- movl(swap_reg, saved_mark_addr);
+- andl(swap_reg,
+- markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+- if (need_tmp_reg) {
+- push(tmp_reg);
+- }
+- get_thread(tmp_reg);
+- orl(tmp_reg, swap_reg);
+- if (os::is_MP()) {
+- lock();
+- }
+- cmpxchgptr(tmp_reg, Address(obj_reg, 0));
+- if (need_tmp_reg) {
+- pop(tmp_reg);
+- }
+- // If the biasing toward our thread failed, this means that
+- // another thread succeeded in biasing it toward itself and we
+- // need to revoke that bias. The revocation will occur in the
+- // interpreter runtime in the slow case.
+- if (counters != NULL) {
+- cond_inc32(Assembler::zero,
+- ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
+- }
+- if (slow_case != NULL) {
+- jcc(Assembler::notZero, *slow_case);
+- }
+- jmp(done);
+-
+- bind(try_rebias);
+- // At this point we know the epoch has expired, meaning that the
+- // current "bias owner", if any, is actually invalid. Under these
+- // circumstances _only_, we are allowed to use the current header's
+- // value as the comparison value when doing the cas to acquire the
+- // bias in the current epoch. In other words, we allow transfer of
+- // the bias from one thread to another directly in this situation.
+- //
+- // FIXME: due to a lack of registers we currently blow away the age
+- // bits in this situation. Should attempt to preserve them.
+- if (need_tmp_reg) {
+- push(tmp_reg);
+- }
+- get_thread(tmp_reg);
+- movl(swap_reg, klass_addr);
+- orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset()));
+- movl(swap_reg, saved_mark_addr);
+- if (os::is_MP()) {
+- lock();
+- }
+- cmpxchgptr(tmp_reg, Address(obj_reg, 0));
+- if (need_tmp_reg) {
+- pop(tmp_reg);
+- }
+- // If the biasing toward our thread failed, then another thread
+- // succeeded in biasing it toward itself and we need to revoke that
+- // bias. The revocation will occur in the runtime in the slow case.
+- if (counters != NULL) {
+- cond_inc32(Assembler::zero,
+- ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
+- }
+- if (slow_case != NULL) {
+- jcc(Assembler::notZero, *slow_case);
+- }
+- jmp(done);
+-
+- bind(try_revoke_bias);
+- // The prototype mark in the klass doesn't have the bias bit set any
+- // more, indicating that objects of this data type are not supposed
+- // to be biased any more. We are going to try to reset the mark of
+- // this object to the prototype value and fall through to the
+- // CAS-based locking scheme. Note that if our CAS fails, it means
+- // that another thread raced us for the privilege of revoking the
+- // bias of this particular object, so it's okay to continue in the
+- // normal locking code.
+- //
+- // FIXME: due to a lack of registers we currently blow away the age
+- // bits in this situation. Should attempt to preserve them.
+- movl(swap_reg, saved_mark_addr);
+- if (need_tmp_reg) {
+- push(tmp_reg);
+- }
+- movl(tmp_reg, klass_addr);
+- movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
+- if (os::is_MP()) {
+- lock();
+- }
+- cmpxchgptr(tmp_reg, Address(obj_reg, 0));
+- if (need_tmp_reg) {
+- pop(tmp_reg);
+- }
+- // Fall through to the normal CAS-based lock, because no matter what
+- // the result of the above CAS, some thread must have succeeded in
+- // removing the bias bit from the object's header.
+- if (counters != NULL) {
+- cond_inc32(Assembler::zero,
+- ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
+- }
+-
+- bind(cas_label);
+-
+- return null_check_offset;
+-}
+ void MacroAssembler::call_VM_leaf_base(address entry_point,
+ int number_of_arguments) {
+ call(RuntimeAddress(entry_point));
+@@ -512,7 +302,9 @@
+ mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
+ }
+
+-void MacroAssembler::movptr(Register dst, AddressLiteral src) {
++void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
++ // scratch register is not used,
++ // it is defined to match parameters of 64-bit version of this method.
+ if (src.is_lval()) {
+ mov_literal32(dst, (intptr_t)src.target(), src.rspec());
+ } else {
+@@ -726,165 +518,6 @@
+ return array;
+ }
+
+-int MacroAssembler::biased_locking_enter(Register lock_reg,
+- Register obj_reg,
+- Register swap_reg,
+- Register tmp_reg,
+- bool swap_reg_contains_mark,
+- Label& done,
+- Label* slow_case,
+- BiasedLockingCounters* counters) {
+- assert(UseBiasedLocking, "why call this otherwise?");
+- assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
+- assert(tmp_reg != noreg, "tmp_reg must be supplied");
+- assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
+- assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+- Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
+- Address saved_mark_addr(lock_reg, 0);
+-
+- if (PrintBiasedLockingStatistics && counters == NULL)
+- counters = BiasedLocking::counters();
+-
+- // Biased locking
+- // See whether the lock is currently biased toward our thread and
+- // whether the epoch is still valid
+- // Note that the runtime guarantees sufficient alignment of JavaThread
+- // pointers to allow age to be placed into low bits
+- // First check to see whether biasing is even enabled for this object
+- Label cas_label;
+- int null_check_offset = -1;
+- if (!swap_reg_contains_mark) {
+- null_check_offset = offset();
+- movq(swap_reg, mark_addr);
+- }
+- movq(tmp_reg, swap_reg);
+- andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
+- cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
+- jcc(Assembler::notEqual, cas_label);
+- // The bias pattern is present in the object's header. Need to check
+- // whether the bias owner and the epoch are both still current.
+- load_prototype_header(tmp_reg, obj_reg);
+- orq(tmp_reg, r15_thread);
+- xorq(tmp_reg, swap_reg);
+- andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
+- if (counters != NULL) {
+- cond_inc32(Assembler::zero,
+- ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
+- }
+- jcc(Assembler::equal, done);
+-
+- Label try_revoke_bias;
+- Label try_rebias;
+-
+- // At this point we know that the header has the bias pattern and
+- // that we are not the bias owner in the current epoch. We need to
+- // figure out more details about the state of the header in order to
+- // know what operations can be legally performed on the object's
+- // header.
+-
+- // If the low three bits in the xor result aren't clear, that means
+- // the prototype header is no longer biased and we have to revoke
+- // the bias on this object.
+- testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
+- jcc(Assembler::notZero, try_revoke_bias);
+-
+- // Biasing is still enabled for this data type. See whether the
+- // epoch of the current bias is still valid, meaning that the epoch
+- // bits of the mark word are equal to the epoch bits of the
+- // prototype header. (Note that the prototype header's epoch bits
+- // only change at a safepoint.) If not, attempt to rebias the object
+- // toward the current thread. Note that we must be absolutely sure
+- // that the current epoch is invalid in order to do this because
+- // otherwise the manipulations it performs on the mark word are
+- // illegal.
+- testq(tmp_reg, markOopDesc::epoch_mask_in_place);
+- jcc(Assembler::notZero, try_rebias);
+-
+- // The epoch of the current bias is still valid but we know nothing
+- // about the owner; it might be set or it might be clear. Try to
+- // acquire the bias of the object using an atomic operation. If this
+- // fails we will go in to the runtime to revoke the object's bias.
+- // Note that we first construct the presumed unbiased header so we
+- // don't accidentally blow away another thread's valid bias.
+- andq(swap_reg,
+- markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+- movq(tmp_reg, swap_reg);
+- orq(tmp_reg, r15_thread);
+- if (os::is_MP()) {
+- lock();
+- }
+- cmpxchgq(tmp_reg, Address(obj_reg, 0));
+- // If the biasing toward our thread failed, this means that
+- // another thread succeeded in biasing it toward itself and we
+- // need to revoke that bias. The revocation will occur in the
+- // interpreter runtime in the slow case.
+- if (counters != NULL) {
+- cond_inc32(Assembler::zero,
+- ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
+- }
+- if (slow_case != NULL) {
+- jcc(Assembler::notZero, *slow_case);
+- }
+- jmp(done);
+-
+- bind(try_rebias);
+- // At this point we know the epoch has expired, meaning that the
+- // current "bias owner", if any, is actually invalid. Under these
+- // circumstances _only_, we are allowed to use the current header's
+- // value as the comparison value when doing the cas to acquire the
+- // bias in the current epoch. In other words, we allow transfer of
+- // the bias from one thread to another directly in this situation.
+- //
+- // FIXME: due to a lack of registers we currently blow away the age
+- // bits in this situation. Should attempt to preserve them.
+- load_prototype_header(tmp_reg, obj_reg);
+- orq(tmp_reg, r15_thread);
+- if (os::is_MP()) {
+- lock();
+- }
+- cmpxchgq(tmp_reg, Address(obj_reg, 0));
+- // If the biasing toward our thread failed, then another thread
+- // succeeded in biasing it toward itself and we need to revoke that
+- // bias. The revocation will occur in the runtime in the slow case.
+- if (counters != NULL) {
+- cond_inc32(Assembler::zero,
+- ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
+- }
+- if (slow_case != NULL) {
+- jcc(Assembler::notZero, *slow_case);
+- }
+- jmp(done);
+-
+- bind(try_revoke_bias);
+- // The prototype mark in the klass doesn't have the bias bit set any
+- // more, indicating that objects of this data type are not supposed
+- // to be biased any more. We are going to try to reset the mark of
+- // this object to the prototype value and fall through to the
+- // CAS-based locking scheme. Note that if our CAS fails, it means
+- // that another thread raced us for the privilege of revoking the
+- // bias of this particular object, so it's okay to continue in the
+- // normal locking code.
+- //
+- // FIXME: due to a lack of registers we currently blow away the age
+- // bits in this situation. Should attempt to preserve them.
+- load_prototype_header(tmp_reg, obj_reg);
+- if (os::is_MP()) {
+- lock();
+- }
+- cmpxchgq(tmp_reg, Address(obj_reg, 0));
+- // Fall through to the normal CAS-based lock, because no matter what
+- // the result of the above CAS, some thread must have succeeded in
+- // removing the bias bit from the object's header.
+- if (counters != NULL) {
+- cond_inc32(Assembler::zero,
+- ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
+- }
+-
+- bind(cas_label);
+-
+- return null_check_offset;
+-}
+-
+ void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
+ Label L, E;
+
+@@ -983,6 +616,15 @@
+ /* else */ { subq(dst, value) ; return; }
+ }
+
++void MacroAssembler::incrementq(AddressLiteral dst) {
++ if (reachable(dst)) {
++ incrementq(as_Address(dst));
++ } else {
++ lea(rscratch1, dst);
++ incrementq(Address(rscratch1, 0));
++ }
++}
++
+ void MacroAssembler::incrementq(Register reg, int value) {
+ if (value == min_jint) { addq(reg, value); return; }
+ if (value < 0) { decrementq(reg, -value); return; }
+@@ -1051,15 +693,15 @@
+ movq(dst, rscratch1);
+ }
+
+-void MacroAssembler::movptr(Register dst, AddressLiteral src) {
++void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
+ if (src.is_lval()) {
+ mov_literal64(dst, (intptr_t)src.target(), src.rspec());
+ } else {
+ if (reachable(src)) {
+ movq(dst, as_Address(src));
+ } else {
+- lea(rscratch1, src);
+- movq(dst, Address(rscratch1,0));
++ lea(scratch, src);
++ movq(dst, Address(scratch, 0));
+ }
+ }
+ }
+@@ -1358,13 +1000,37 @@
+ LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
+ }
+
+-void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
+- pushf();
++void MacroAssembler::atomic_incl(Address counter_addr) {
+ if (os::is_MP())
+ lock();
+ incrementl(counter_addr);
+- popf();
+-}
++}
++
++void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register scr) {
++ if (reachable(counter_addr)) {
++ atomic_incl(as_Address(counter_addr));
++ } else {
++ lea(scr, counter_addr);
++ atomic_incl(Address(scr, 0));
++ }
++}
++
++#ifdef _LP64
++void MacroAssembler::atomic_incq(Address counter_addr) {
++ if (os::is_MP())
++ lock();
++ incrementq(counter_addr);
++}
++
++void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register scr) {
++ if (reachable(counter_addr)) {
++ atomic_incq(as_Address(counter_addr));
++ } else {
++ lea(scr, counter_addr);
++ atomic_incq(Address(scr, 0));
++ }
++}
++#endif
+
+ // Writes to stack successive pages until offset reached to check for
+ // stack overflow + shadow pages. This clobbers tmp.
+@@ -1386,13 +1052,241 @@
+ // was post-decremented.) Skip this address by starting at i=1, and
+ // touch a few more pages below. N.B. It is important to touch all
+ // the way down to and including i=StackShadowPages.
+- for (int i = 1; i <= StackShadowPages; i++) {
++ for (int i = 1; i < StackShadowPages; i++) {
+ // this could be any sized move but this is can be a debugging crumb
+ // so the bigger the better.
+ movptr(Address(tmp, (-i*os::vm_page_size())), size );
+ }
+ }
+
++int MacroAssembler::biased_locking_enter(Register lock_reg,
++ Register obj_reg,
++ Register swap_reg,
++ Register tmp_reg,
++ bool swap_reg_contains_mark,
++ Label& done,
++ Label* slow_case,
++ BiasedLockingCounters* counters) {
++ assert(UseBiasedLocking, "why call this otherwise?");
++ assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
++ LP64_ONLY( assert(tmp_reg != noreg, "tmp_reg must be supplied"); )
++ bool need_tmp_reg = false;
++ if (tmp_reg == noreg) {
++ need_tmp_reg = true;
++ tmp_reg = lock_reg;
++ assert_different_registers(lock_reg, obj_reg, swap_reg);
++ } else {
++ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
++ }
++ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
++ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
++ Address saved_mark_addr(lock_reg, 0);
++
++ if (PrintBiasedLockingStatistics && counters == NULL) {
++ counters = BiasedLocking::counters();
++ }
++ // Biased locking
++ // See whether the lock is currently biased toward our thread and
++ // whether the epoch is still valid
++ // Note that the runtime guarantees sufficient alignment of JavaThread
++ // pointers to allow age to be placed into low bits
++ // First check to see whether biasing is even enabled for this object
++ Label cas_label;
++ int null_check_offset = -1;
++ if (!swap_reg_contains_mark) {
++ null_check_offset = offset();
++ movptr(swap_reg, mark_addr);
++ }
++ if (need_tmp_reg) {
++ push(tmp_reg);
++ }
++ movptr(tmp_reg, swap_reg);
++ andptr(tmp_reg, markOopDesc::biased_lock_mask_in_place);
++ cmpptr(tmp_reg, markOopDesc::biased_lock_pattern);
++ if (need_tmp_reg) {
++ pop(tmp_reg);
++ }
++ jcc(Assembler::notEqual, cas_label);
++ // The bias pattern is present in the object's header. Need to check
++ // whether the bias owner and the epoch are both still current.
++#ifndef _LP64
++ // Note that because there is no current thread register on x86_32 we
++ // need to store off the mark word we read out of the object to
++ // avoid reloading it and needing to recheck invariants below. This
++ // store is unfortunate but it makes the overall code shorter and
++ // simpler.
++ movptr(saved_mark_addr, swap_reg);
++#endif
++ if (need_tmp_reg) {
++ push(tmp_reg);
++ }
++ if (swap_reg_contains_mark) {
++ null_check_offset = offset();
++ }
++ load_prototype_header(tmp_reg, obj_reg);
++#ifdef _LP64
++ orptr(tmp_reg, r15_thread);
++ xorptr(tmp_reg, swap_reg);
++ Register header_reg = tmp_reg;
++#else
++ xorptr(tmp_reg, swap_reg);
++ get_thread(swap_reg);
++ xorptr(swap_reg, tmp_reg);
++ Register header_reg = swap_reg;
++#endif
++ andptr(header_reg, ~((int) markOopDesc::age_mask_in_place));
++ if (need_tmp_reg) {
++ pop(tmp_reg);
++ }
++ if (counters != NULL) {
++ cond_inc32(Assembler::zero,
++ ExternalAddress((address) counters->biased_lock_entry_count_addr()));
++ }
++ jcc(Assembler::equal, done);
++
++ Label try_revoke_bias;
++ Label try_rebias;
++
++ // At this point we know that the header has the bias pattern and
++ // that we are not the bias owner in the current epoch. We need to
++ // figure out more details about the state of the header in order to
++ // know what operations can be legally performed on the object's
++ // header.
++
++ // If the low three bits in the xor result aren't clear, that means
++ // the prototype header is no longer biased and we have to revoke
++ // the bias on this object.
++ testptr(header_reg, markOopDesc::biased_lock_mask_in_place);
++ jccb(Assembler::notZero, try_revoke_bias);
++
++ // Biasing is still enabled for this data type. See whether the
++ // epoch of the current bias is still valid, meaning that the epoch
++ // bits of the mark word are equal to the epoch bits of the
++ // prototype header. (Note that the prototype header's epoch bits
++ // only change at a safepoint.) If not, attempt to rebias the object
++ // toward the current thread. Note that we must be absolutely sure
++ // that the current epoch is invalid in order to do this because
++ // otherwise the manipulations it performs on the mark word are
++ // illegal.
++ testptr(header_reg, markOopDesc::epoch_mask_in_place);
++ jccb(Assembler::notZero, try_rebias);
++
++ // The epoch of the current bias is still valid but we know nothing
++ // about the owner; it might be set or it might be clear. Try to
++ // acquire the bias of the object using an atomic operation. If this
++ // fails we will go in to the runtime to revoke the object's bias.
++ // Note that we first construct the presumed unbiased header so we
++ // don't accidentally blow away another thread's valid bias.
++ NOT_LP64( movptr(swap_reg, saved_mark_addr); )
++ andptr(swap_reg,
++ markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
++ if (need_tmp_reg) {
++ push(tmp_reg);
++ }
++#ifdef _LP64
++ movptr(tmp_reg, swap_reg);
++ orptr(tmp_reg, r15_thread);
++#else
++ get_thread(tmp_reg);
++ orptr(tmp_reg, swap_reg);
++#endif
++ if (os::is_MP()) {
++ lock();
++ }
++ cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
++ if (need_tmp_reg) {
++ pop(tmp_reg);
++ }
++ // If the biasing toward our thread failed, this means that
++ // another thread succeeded in biasing it toward itself and we
++ // need to revoke that bias. The revocation will occur in the
++ // interpreter runtime in the slow case.
++ if (counters != NULL) {
++ cond_inc32(Assembler::zero,
++ ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
++ }
++ if (slow_case != NULL) {
++ jcc(Assembler::notZero, *slow_case);
++ }
++ jmp(done);
++
++ bind(try_rebias);
++ // At this point we know the epoch has expired, meaning that the
++ // current "bias owner", if any, is actually invalid. Under these
++ // circumstances _only_, we are allowed to use the current header's
++ // value as the comparison value when doing the cas to acquire the
++ // bias in the current epoch. In other words, we allow transfer of
++ // the bias from one thread to another directly in this situation.
++ //
++ // FIXME: due to a lack of registers we currently blow away the age
++ // bits in this situation. Should attempt to preserve them.
++ if (need_tmp_reg) {
++ push(tmp_reg);
++ }
++ load_prototype_header(tmp_reg, obj_reg);
++#ifdef _LP64
++ orptr(tmp_reg, r15_thread);
++#else
++ get_thread(swap_reg);
++ orptr(tmp_reg, swap_reg);
++ movptr(swap_reg, saved_mark_addr);
++#endif
++ if (os::is_MP()) {
++ lock();
++ }
++ cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
++ if (need_tmp_reg) {
++ pop(tmp_reg);
++ }
++ // If the biasing toward our thread failed, then another thread
++ // succeeded in biasing it toward itself and we need to revoke that
++ // bias. The revocation will occur in the runtime in the slow case.
++ if (counters != NULL) {
++ cond_inc32(Assembler::zero,
++ ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
++ }
++ if (slow_case != NULL) {
++ jcc(Assembler::notZero, *slow_case);
++ }
++ jmp(done);
++
++ bind(try_revoke_bias);
++ // The prototype mark in the klass doesn't have the bias bit set any
++ // more, indicating that objects of this data type are not supposed
++ // to be biased any more. We are going to try to reset the mark of
++ // this object to the prototype value and fall through to the
++ // CAS-based locking scheme. Note that if our CAS fails, it means
++ // that another thread raced us for the privilege of revoking the
++ // bias of this particular object, so it's okay to continue in the
++ // normal locking code.
++ //
++ // FIXME: due to a lack of registers we currently blow away the age
++ // bits in this situation. Should attempt to preserve them.
++ NOT_LP64( movptr(swap_reg, saved_mark_addr); )
++ if (need_tmp_reg) {
++ push(tmp_reg);
++ }
++ load_prototype_header(tmp_reg, obj_reg);
++ if (os::is_MP()) {
++ lock();
++ }
++ cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
++ if (need_tmp_reg) {
++ pop(tmp_reg);
++ }
++ // Fall through to the normal CAS-based lock, because no matter what
++ // the result of the above CAS, some thread must have succeeded in
++ // removing the bias bit from the object's header.
++ if (counters != NULL) {
++ cond_inc32(Assembler::zero,
++ ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
++ }
++
++ bind(cas_label);
++
++ return null_check_offset;
++}
++
+ void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
+ assert(UseBiasedLocking, "why call this otherwise?");
+
+@@ -1408,6 +1302,992 @@
+ jcc(Assembler::equal, done);
+ }
+
++#ifdef COMPILER2
++
++#if INCLUDE_RTM_OPT
++
++// Update rtm_counters based on abort status
++// input: abort_status
++// rtm_counters (RTMLockingCounters*)
++// flags are killed
++void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
++
++ atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
++ if (PrintPreciseRTMLockingStatistics) {
++ for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
++ Label check_abort;
++ testl(abort_status, (1< 0) {
++ // Delay calculation
++ movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr()), tmpReg);
++ testptr(tmpReg, tmpReg);
++ jccb(Assembler::equal, L_done);
++ }
++ // Abort ratio calculation only if abort_count > RTMAbortThreshold
++ // Aborted transactions = abort_count * 100
++ // All transactions = total_count * RTMTotalCountIncrRate
++ // Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
++
++ movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::abort_count_offset()));
++ cmpptr(tmpReg, RTMAbortThreshold);
++ jccb(Assembler::below, L_check_always_rtm2);
++ imulptr(tmpReg, tmpReg, 100);
++
++ Register scrReg = rtm_counters_Reg;
++ movptr(scrReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
++ imulptr(scrReg, scrReg, RTMTotalCountIncrRate);
++ imulptr(scrReg, scrReg, RTMAbortRatio);
++ cmpptr(tmpReg, scrReg);
++ jccb(Assembler::below, L_check_always_rtm1);
++ if (method_data != NULL) {
++ // set rtm_state to "no rtm" in MDO
++ mov_metadata(tmpReg, method_data);
++ if (os::is_MP()) {
++ lock();
++ }
++ orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), NoRTM);
++ }
++ jmpb(L_done);
++ bind(L_check_always_rtm1);
++ // Reload RTMLockingCounters* address
++ lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
++ bind(L_check_always_rtm2);
++ movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
++ cmpptr(tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
++ jccb(Assembler::below, L_done);
++ if (method_data != NULL) {
++ // set rtm_state to "always rtm" in MDO
++ mov_metadata(tmpReg, method_data);
++ if (os::is_MP()) {
++ lock();
++ }
++ orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), UseRTM);
++ }
++ bind(L_done);
++}
++
++// Update counters and perform abort ratio calculation
++// input: abort_status_Reg
++// rtm_counters_Reg, flags are killed
++void MacroAssembler::rtm_profiling(Register abort_status_Reg,
++ Register rtm_counters_Reg,
++ RTMLockingCounters* rtm_counters,
++ Metadata* method_data,
++ bool profile_rtm) {
++
++ assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
++ // update rtm counters based on rax value at abort
++ // reads abort_status_Reg, updates flags
++ lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
++ rtm_counters_update(abort_status_Reg, rtm_counters_Reg);
++ if (profile_rtm) {
++ // Save abort status because abort_status_Reg is used by following code.
++ if (RTMRetryCount > 0) {
++ push(abort_status_Reg);
++ }
++ assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
++ rtm_abort_ratio_calculation(abort_status_Reg, rtm_counters_Reg, rtm_counters, method_data);
++ // restore abort status
++ if (RTMRetryCount > 0) {
++ pop(abort_status_Reg);
++ }
++ }
++}
++
++// Retry on abort if abort's status is 0x6: can retry (0x2) | memory conflict (0x4)
++// inputs: retry_count_Reg
++// : abort_status_Reg
++// output: retry_count_Reg decremented by 1
++// flags are killed
++void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg, Label& retryLabel) {
++ Label doneRetry;
++ assert(abort_status_Reg == rax, "");
++ // The abort reason bits are in eax (see all states in rtmLocking.hpp)
++ // 0x6 = conflict on which we can retry (0x2) | memory conflict (0x4)
++ // if reason is in 0x6 and retry count != 0 then retry
++ andptr(abort_status_Reg, 0x6);
++ jccb(Assembler::zero, doneRetry);
++ testl(retry_count_Reg, retry_count_Reg);
++ jccb(Assembler::zero, doneRetry);
++ pause();
++ decrementl(retry_count_Reg);
++ jmp(retryLabel);
++ bind(doneRetry);
++}
++
++// Spin and retry if lock is busy,
++// inputs: box_Reg (monitor address)
++// : retry_count_Reg
++// output: retry_count_Reg decremented by 1
++// : clear z flag if retry count exceeded
++// tmp_Reg, scr_Reg, flags are killed
++void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register box_Reg,
++ Register tmp_Reg, Register scr_Reg, Label& retryLabel) {
++ Label SpinLoop, SpinExit, doneRetry;
++ // Clean monitor_value bit to get valid pointer
++ int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
++
++ testl(retry_count_Reg, retry_count_Reg);
++ jccb(Assembler::zero, doneRetry);
++ decrementl(retry_count_Reg);
++ movptr(scr_Reg, RTMSpinLoopCount);
++
++ bind(SpinLoop);
++ pause();
++ decrementl(scr_Reg);
++ jccb(Assembler::lessEqual, SpinExit);
++ movptr(tmp_Reg, Address(box_Reg, owner_offset));
++ testptr(tmp_Reg, tmp_Reg);
++ jccb(Assembler::notZero, SpinLoop);
++
++ bind(SpinExit);
++ jmp(retryLabel);
++ bind(doneRetry);
++ incrementl(retry_count_Reg); // clear z flag
++}
++
++// Use RTM for normal stack locks
++// Input: objReg (object to lock)
++void MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg,
++ Register retry_on_abort_count_Reg,
++ RTMLockingCounters* stack_rtm_counters,
++ Metadata* method_data, bool profile_rtm,
++ Label& DONE_LABEL, Label& IsInflated) {
++ assert(UseRTMForStackLocks, "why call this otherwise?");
++ assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
++ assert(tmpReg == rax, "");
++ assert(scrReg == rdx, "");
++ Label L_rtm_retry, L_decrement_retry, L_on_abort;
++
++ if (RTMRetryCount > 0) {
++ movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
++ bind(L_rtm_retry);
++ }
++ movptr(tmpReg, Address(objReg, 0));
++ testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
++ jcc(Assembler::notZero, IsInflated);
++
++ if (PrintPreciseRTMLockingStatistics || profile_rtm) {
++ Label L_noincrement;
++ if (RTMTotalCountIncrRate > 1) {
++ // tmpReg, scrReg and flags are killed
++ branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
++ }
++ assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
++ atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg);
++ bind(L_noincrement);
++ }
++ xbegin(L_on_abort);
++ movptr(tmpReg, Address(objReg, 0)); // fetch markword
++ andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
++ cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked
++ jcc(Assembler::equal, DONE_LABEL); // all done if unlocked
++
++ Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
++ if (UseRTMXendForLockBusy) {
++ xend();
++ movptr(abort_status_Reg, 0x2); // Set the abort status to 2 (so we can retry)
++ jmp(L_decrement_retry);
++ }
++ else {
++ xabort(0);
++ }
++ bind(L_on_abort);
++ if (PrintPreciseRTMLockingStatistics || profile_rtm) {
++ rtm_profiling(abort_status_Reg, scrReg, stack_rtm_counters, method_data, profile_rtm);
++ }
++ bind(L_decrement_retry);
++ if (RTMRetryCount > 0) {
++ // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
++ rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
++ }
++}
++
++// Use RTM for inflating locks
++// inputs: objReg (object to lock)
++// boxReg (on-stack box address (displaced header location) - KILLED)
++// tmpReg (ObjectMonitor address + 2(monitor_value))
++void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
++ Register scrReg, Register retry_on_busy_count_Reg,
++ Register retry_on_abort_count_Reg,
++ RTMLockingCounters* rtm_counters,
++ Metadata* method_data, bool profile_rtm,
++ Label& DONE_LABEL) {
++ assert(UseRTMLocking, "why call this otherwise?");
++ assert(tmpReg == rax, "");
++ assert(scrReg == rdx, "");
++ Label L_rtm_retry, L_decrement_retry, L_on_abort;
++ // Clean monitor_value bit to get valid pointer
++ int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
++
++ // Without cast to int32_t a movptr will destroy r10 which is typically obj
++ movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
++ movptr(boxReg, tmpReg); // Save ObjectMonitor address
++
++ if (RTMRetryCount > 0) {
++ movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy
++ movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
++ bind(L_rtm_retry);
++ }
++ if (PrintPreciseRTMLockingStatistics || profile_rtm) {
++ Label L_noincrement;
++ if (RTMTotalCountIncrRate > 1) {
++ // tmpReg, scrReg and flags are killed
++ branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
++ }
++ assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
++ atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
++ bind(L_noincrement);
++ }
++ xbegin(L_on_abort);
++ movptr(tmpReg, Address(objReg, 0));
++ movptr(tmpReg, Address(tmpReg, owner_offset));
++ testptr(tmpReg, tmpReg);
++ jcc(Assembler::zero, DONE_LABEL);
++ if (UseRTMXendForLockBusy) {
++ xend();
++ jmp(L_decrement_retry);
++ }
++ else {
++ xabort(0);
++ }
++ bind(L_on_abort);
++ Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
++ if (PrintPreciseRTMLockingStatistics || profile_rtm) {
++ rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
++ }
++ if (RTMRetryCount > 0) {
++ // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
++ rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
++ }
++
++ movptr(tmpReg, Address(boxReg, owner_offset)) ;
++ testptr(tmpReg, tmpReg) ;
++ jccb(Assembler::notZero, L_decrement_retry) ;
++
++ // Appears unlocked - try to swing _owner from null to non-null.
++ // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
++#ifdef _LP64
++ Register threadReg = r15_thread;
++#else
++ get_thread(scrReg);
++ Register threadReg = scrReg;
++#endif
++ if (os::is_MP()) {
++ lock();
++ }
++ cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
++
++ if (RTMRetryCount > 0) {
++ // success done else retry
++ jccb(Assembler::equal, DONE_LABEL) ;
++ bind(L_decrement_retry);
++ // Spin and retry if lock is busy.
++ rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
++ }
++ else {
++ bind(L_decrement_retry);
++ }
++}
++
++#endif // INCLUDE_RTM_OPT
++
++// Fast_Lock and Fast_Unlock used by C2
++
++// Because the transitions from emitted code to the runtime
++// monitorenter/exit helper stubs are so slow it's critical that
++// we inline both the stack-locking fast-path and the inflated fast path.
++//
++// See also: cmpFastLock and cmpFastUnlock.
++//
++// What follows is a specialized inline transliteration of the code
++// in slow_enter() and slow_exit(). If we're concerned about I$ bloat
++// another option would be to emit TrySlowEnter and TrySlowExit methods
++// at startup-time. These methods would accept arguments as
++// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
++// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
++// In practice, however, the # of lock sites is bounded and is usually small.
++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
++// if the processor uses simple bimodal branch predictors keyed by EIP
++// Since the helper routines would be called from multiple synchronization
++// sites.
++//
++// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
++// to those specialized methods. That'd give us a mostly platform-independent
++// implementation that the JITs could optimize and inline at their pleasure.
++// Done correctly, the only time we'd need to cross to native could would be
++// to park() or unpark() threads. We'd also need a few more unsafe operators
++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
++// (b) explicit barriers or fence operations.
++//
++// TODO:
++//
++// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
++// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
++// Given TLAB allocation, Self is usually manifested in a register, so passing it into
++// the lock operators would typically be faster than reifying Self.
++//
++// * Ideally I'd define the primitives as:
++// fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
++// fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
++// Unfortunately ADLC bugs prevent us from expressing the ideal form.
++// Instead, we're stuck with a rather awkward and brittle register assignments below.
++// Furthermore the register assignments are overconstrained, possibly resulting in
++// sub-optimal code near the synchronization site.
++//
++// * Eliminate the sp-proximity tests and just use "== Self" tests instead.
++// Alternately, use a better sp-proximity test.
++//
++// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
++// Either one is sufficient to uniquely identify a thread.
++// TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
++//
++// * Intrinsify notify() and notifyAll() for the common cases where the
++// object is locked by the calling thread but the waitlist is empty.
++// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
++//
++// * use jccb and jmpb instead of jcc and jmp to improve code density.
++// But beware of excessive branch density on AMD Opterons.
++//
++// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
++// or failure of the fast-path. If the fast-path fails then we pass
++// control to the slow-path, typically in C. In Fast_Lock and
++// Fast_Unlock we often branch to DONE_LABEL, just to find that C2
++// will emit a conditional branch immediately after the node.
++// So we have branches to branches and lots of ICC.ZF games.
++// Instead, it might be better to have C2 pass a "FailureLabel"
++// into Fast_Lock and Fast_Unlock. In the case of success, control
++// will drop through the node. ICC.ZF is undefined at exit.
++// In the case of failure, the node will branch directly to the
++// FailureLabel
++
++
++// obj: object to lock
++// box: on-stack box address (displaced header location) - KILLED
++// rax,: tmp -- KILLED
++// scr: tmp -- KILLED
++void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
++ Register scrReg, Register cx1Reg, Register cx2Reg,
++ BiasedLockingCounters* counters,
++ RTMLockingCounters* rtm_counters,
++ RTMLockingCounters* stack_rtm_counters,
++ Metadata* method_data,
++ bool use_rtm, bool profile_rtm) {
++ // Ensure the register assignents are disjoint
++ assert(tmpReg == rax, "");
++
++ if (use_rtm) {
++ assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
++ } else {
++ assert(cx1Reg == noreg, "");
++ assert(cx2Reg == noreg, "");
++ assert_different_registers(objReg, boxReg, tmpReg, scrReg);
++ }
++
++ if (counters != NULL) {
++ atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
++ }
++ if (EmitSync & 1) {
++ // set box->dhw = unused_mark (3)
++ // Force all sync thru slow-path: slow_enter() and slow_exit()
++ movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
++ cmpptr (rsp, (int32_t)NULL_WORD);
++ } else
++ if (EmitSync & 2) {
++ Label DONE_LABEL ;
++ if (UseBiasedLocking) {
++ // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
++ }
++
++ movptr(tmpReg, Address(objReg, 0)); // fetch markword
++ orptr (tmpReg, 0x1);
++ movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
++ if (os::is_MP()) {
++ lock();
++ }
++ cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
++ jccb(Assembler::equal, DONE_LABEL);
++ // Recursive locking
++ subptr(tmpReg, rsp);
++ andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
++ movptr(Address(boxReg, 0), tmpReg);
++ bind(DONE_LABEL);
++ } else {
++ // Possible cases that we'll encounter in fast_lock
++ // ------------------------------------------------
++ // * Inflated
++ // -- unlocked
++ // -- Locked
++ // = by self
++ // = by other
++ // * biased
++ // -- by Self
++ // -- by other
++ // * neutral
++ // * stack-locked
++ // -- by self
++ // = sp-proximity test hits
++ // = sp-proximity test generates false-negative
++ // -- by other
++ //
++
++ Label IsInflated, DONE_LABEL;
++
++ // it's stack-locked, biased or neutral
++ // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
++ // order to reduce the number of conditional branches in the most common cases.
++ // Beware -- there's a subtle invariant that fetch of the markword
++ // at [FETCH], below, will never observe a biased encoding (*101b).
++ // If this invariant is not held we risk exclusion (safety) failure.
++ if (UseBiasedLocking && !UseOptoBiasInlining) {
++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters);
++ }
++
++#if INCLUDE_RTM_OPT
++ if (UseRTMForStackLocks && use_rtm) {
++ rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
++ stack_rtm_counters, method_data, profile_rtm,
++ DONE_LABEL, IsInflated);
++ }
++#endif // INCLUDE_RTM_OPT
++
++ movptr(tmpReg, Address(objReg, 0)); // [FETCH]
++ testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
++ jccb(Assembler::notZero, IsInflated);
++
++ // Attempt stack-locking ...
++ orptr (tmpReg, markOopDesc::unlocked_value);
++ movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
++ if (os::is_MP()) {
++ lock();
++ }
++ cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
++ if (counters != NULL) {
++ cond_inc32(Assembler::equal,
++ ExternalAddress((address)counters->fast_path_entry_count_addr()));
++ }
++ jcc(Assembler::equal, DONE_LABEL); // Success
++
++ // Recursive locking.
++ // The object is stack-locked: markword contains stack pointer to BasicLock.
++ // Locked by current thread if difference with current SP is less than one page.
++ subptr(tmpReg, rsp);
++ // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
++ andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
++ movptr(Address(boxReg, 0), tmpReg);
++ if (counters != NULL) {
++ cond_inc32(Assembler::equal,
++ ExternalAddress((address)counters->fast_path_entry_count_addr()));
++ }
++ jmp(DONE_LABEL);
++
++ bind(IsInflated);
++ // The object is inflated. tmpReg contains pointer to ObjectMonitor* + 2(monitor_value)
++
++#if INCLUDE_RTM_OPT
++ // Use the same RTM locking code in 32- and 64-bit VM.
++ if (use_rtm) {
++ rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
++ rtm_counters, method_data, profile_rtm, DONE_LABEL);
++ } else {
++#endif // INCLUDE_RTM_OPT
++
++#ifndef _LP64
++ // The object is inflated.
++ //
++ // TODO-FIXME: eliminate the ugly use of manifest constants:
++ // Use markOopDesc::monitor_value instead of "2".
++ // use markOop::unused_mark() instead of "3".
++ // The tmpReg value is an objectMonitor reference ORed with
++ // markOopDesc::monitor_value (2). We can either convert tmpReg to an
++ // objectmonitor pointer by masking off the "2" bit or we can just
++ // use tmpReg as an objectmonitor pointer but bias the objectmonitor
++ // field offsets with "-2" to compensate for and annul the low-order tag bit.
++ //
++ // I use the latter as it avoids AGI stalls.
++ // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
++ // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
++ //
++ #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
++
++ // boxReg refers to the on-stack BasicLock in the current frame.
++ // We'd like to write:
++ // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices.
++ // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
++ // additional latency as we have another ST in the store buffer that must drain.
++
++ if (EmitSync & 8192) {
++ movptr(Address(boxReg, 0), 3); // results in ST-before-CAS penalty
++ get_thread (scrReg);
++ movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
++ movptr(tmpReg, NULL_WORD); // consider: xor vs mov
++ if (os::is_MP()) {
++ lock();
++ }
++ cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
++ } else
++ if ((EmitSync & 128) == 0) { // avoid ST-before-CAS
++ movptr(scrReg, boxReg);
++ movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
++
++ // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
++ if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
++ // prefetchw [eax + Offset(_owner)-2]
++ prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
++ }
++
++ if ((EmitSync & 64) == 0) {
++ // Optimistic form: consider XORL tmpReg,tmpReg
++ movptr(tmpReg, NULL_WORD);
++ } else {
++ // Can suffer RTS->RTO upgrades on shared or cold $ lines
++ // Test-And-CAS instead of CAS
++ movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); // rax, = m->_owner
++ testptr(tmpReg, tmpReg); // Locked ?
++ jccb (Assembler::notZero, DONE_LABEL);
++ }
++
++ // Appears unlocked - try to swing _owner from null to non-null.
++ // Ideally, I'd manifest "Self" with get_thread and then attempt
++ // to CAS the register containing Self into m->Owner.
++ // But we don't have enough registers, so instead we can either try to CAS
++ // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
++ // we later store "Self" into m->Owner. Transiently storing a stack address
++ // (rsp or the address of the box) into m->owner is harmless.
++ // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
++ if (os::is_MP()) {
++ lock();
++ }
++ cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
++ movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
++ jccb (Assembler::notZero, DONE_LABEL);
++ get_thread (scrReg); // beware: clobbers ICCs
++ movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg);
++ xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
++
++ // If the CAS fails we can either retry or pass control to the slow-path.
++ // We use the latter tactic.
++ // Pass the CAS result in the icc.ZFlag into DONE_LABEL
++ // If the CAS was successful ...
++ // Self has acquired the lock
++ // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
++ // Intentional fall-through into DONE_LABEL ...
++ } else {
++ movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())); // results in ST-before-CAS penalty
++ movptr(boxReg, tmpReg);
++
++ // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
++ if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
++ // prefetchw [eax + Offset(_owner)-2]
++ prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
++ }
++
++ if ((EmitSync & 64) == 0) {
++ // Optimistic form
++ xorptr (tmpReg, tmpReg);
++ } else {
++ // Can suffer RTS->RTO upgrades on shared or cold $ lines
++ movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); // rax, = m->_owner
++ testptr(tmpReg, tmpReg); // Locked ?
++ jccb (Assembler::notZero, DONE_LABEL);
++ }
++
++ // Appears unlocked - try to swing _owner from null to non-null.
++ // Use either "Self" (in scr) or rsp as thread identity in _owner.
++ // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
++ get_thread (scrReg);
++ if (os::is_MP()) {
++ lock();
++ }
++ cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
++
++ // If the CAS fails we can either retry or pass control to the slow-path.
++ // We use the latter tactic.
++ // Pass the CAS result in the icc.ZFlag into DONE_LABEL
++ // If the CAS was successful ...
++ // Self has acquired the lock
++ // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
++ // Intentional fall-through into DONE_LABEL ...
++ }
++#else // _LP64
++ // It's inflated
++
++ // TODO: someday avoid the ST-before-CAS penalty by
++ // relocating (deferring) the following ST.
++ // We should also think about trying a CAS without having
++ // fetched _owner. If the CAS is successful we may
++ // avoid an RTO->RTS upgrade on the $line.
++
++ // Without cast to int32_t a movptr will destroy r10 which is typically obj
++ movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
++
++ movptr (boxReg, tmpReg);
++ movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
++ testptr(tmpReg, tmpReg);
++ jccb (Assembler::notZero, DONE_LABEL);
++
++ // It's inflated and appears unlocked
++ if (os::is_MP()) {
++ lock();
++ }
++ cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
++ // Intentional fall-through into DONE_LABEL ...
++#endif // _LP64
++
++#if INCLUDE_RTM_OPT
++ } // use_rtm()
++#endif
++ // DONE_LABEL is a hot target - we'd really like to place it at the
++ // start of cache line by padding with NOPs.
++ // See the AMD and Intel software optimization manuals for the
++ // most efficient "long" NOP encodings.
++ // Unfortunately none of our alignment mechanisms suffice.
++ bind(DONE_LABEL);
++
++ // At DONE_LABEL the icc ZFlag is set as follows ...
++ // Fast_Unlock uses the same protocol.
++ // ZFlag == 1 -> Success
++ // ZFlag == 0 -> Failure - force control through the slow-path
++ }
++}
++
++// obj: object to unlock
++// box: box address (displaced header location), killed. Must be EAX.
++// tmp: killed, cannot be obj nor box.
++//
++// Some commentary on balanced locking:
++//
++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
++// Methods that don't have provably balanced locking are forced to run in the
++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
++// The interpreter provides two properties:
++// I1: At return-time the interpreter automatically and quietly unlocks any
++// objects acquired the current activation (frame). Recall that the
++// interpreter maintains an on-stack list of locks currently held by
++// a frame.
++// I2: If a method attempts to unlock an object that is not held by the
++// the frame the interpreter throws IMSX.
++//
++// Lets say A(), which has provably balanced locking, acquires O and then calls B().
++// B() doesn't have provably balanced locking so it runs in the interpreter.
++// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
++// is still locked by A().
++//
++// The only other source of unbalanced locking would be JNI. The "Java Native Interface:
++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
++// should not be unlocked by "normal" java-level locking and vice-versa. The specification
++// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
++
++void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
++ assert(boxReg == rax, "");
++ assert_different_registers(objReg, boxReg, tmpReg);
++
++ if (EmitSync & 4) {
++ // Disable - inhibit all inlining. Force control through the slow-path
++ cmpptr (rsp, 0);
++ } else
++ if (EmitSync & 8) {
++ Label DONE_LABEL;
++ if (UseBiasedLocking) {
++ biased_locking_exit(objReg, tmpReg, DONE_LABEL);
++ }
++ // Classic stack-locking code ...
++ // Check whether the displaced header is 0
++ //(=> recursive unlock)
++ movptr(tmpReg, Address(boxReg, 0));
++ testptr(tmpReg, tmpReg);
++ jccb(Assembler::zero, DONE_LABEL);
++ // If not recursive lock, reset the header to displaced header
++ if (os::is_MP()) {
++ lock();
++ }
++ cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
++ bind(DONE_LABEL);
++ } else {
++ Label DONE_LABEL, Stacked, CheckSucc;
++
++ // Critically, the biased locking test must have precedence over
++ // and appear before the (box->dhw == 0) recursive stack-lock test.
++ if (UseBiasedLocking && !UseOptoBiasInlining) {
++ biased_locking_exit(objReg, tmpReg, DONE_LABEL);
++ }
++
++#if INCLUDE_RTM_OPT
++ if (UseRTMForStackLocks && use_rtm) {
++ assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
++ Label L_regular_unlock;
++ movptr(tmpReg, Address(objReg, 0)); // fetch markword
++ andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
++ cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked
++ jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
++ xend(); // otherwise end...
++ jmp(DONE_LABEL); // ... and we're done
++ bind(L_regular_unlock);
++ }
++#endif
++
++ cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
++ jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
++ movptr(tmpReg, Address(objReg, 0)); // Examine the object's markword
++ testptr(tmpReg, markOopDesc::monitor_value); // Inflated?
++ jccb (Assembler::zero, Stacked);
++
++ // It's inflated.
++#if INCLUDE_RTM_OPT
++ if (use_rtm) {
++ Label L_regular_inflated_unlock;
++ // Clean monitor_value bit to get valid pointer
++ int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
++ movptr(boxReg, Address(tmpReg, owner_offset));
++ testptr(boxReg, boxReg);
++ jccb(Assembler::notZero, L_regular_inflated_unlock);
++ xend();
++ jmpb(DONE_LABEL);
++ bind(L_regular_inflated_unlock);
++ }
++#endif
++
++ // Despite our balanced locking property we still check that m->_owner == Self
++ // as java routines or native JNI code called by this thread might
++ // have released the lock.
++ // Refer to the comments in synchronizer.cpp for how we might encode extra
++ // state in _succ so we can avoid fetching EntryList|cxq.
++ //
++ // I'd like to add more cases in fast_lock() and fast_unlock() --
++ // such as recursive enter and exit -- but we have to be wary of
++ // I$ bloat, T$ effects and BP$ effects.
++ //
++ // If there's no contention try a 1-0 exit. That is, exit without
++ // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
++ // we detect and recover from the race that the 1-0 exit admits.
++ //
++ // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
++ // before it STs null into _owner, releasing the lock. Updates
++ // to data protected by the critical section must be visible before
++ // we drop the lock (and thus before any other thread could acquire
++ // the lock and observe the fields protected by the lock).
++ // IA32's memory-model is SPO, so STs are ordered with respect to
++ // each other and there's no need for an explicit barrier (fence).
++ // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
++#ifndef _LP64
++ get_thread (boxReg);
++ if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
++ // prefetchw [ebx + Offset(_owner)-2]
++ prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
++ }
++
++ // Note that we could employ various encoding schemes to reduce
++ // the number of loads below (currently 4) to just 2 or 3.
++ // Refer to the comments in synchronizer.cpp.
++ // In practice the chain of fetches doesn't seem to impact performance, however.
++ if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
++ // Attempt to reduce branch density - AMD's branch predictor.
++ xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
++ orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
++ orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
++ orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
++ jccb (Assembler::notZero, DONE_LABEL);
++ movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
++ jmpb (DONE_LABEL);
++ } else {
++ xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
++ orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
++ jccb (Assembler::notZero, DONE_LABEL);
++ movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
++ orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
++ jccb (Assembler::notZero, CheckSucc);
++ movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
++ jmpb (DONE_LABEL);
++ }
++
++ // The Following code fragment (EmitSync & 65536) improves the performance of
++ // contended applications and contended synchronization microbenchmarks.
++ // Unfortunately the emission of the code - even though not executed - causes regressions
++ // in scimark and jetstream, evidently because of $ effects. Replacing the code
++ // with an equal number of never-executed NOPs results in the same regression.
++ // We leave it off by default.
++
++ if ((EmitSync & 65536) != 0) {
++ Label LSuccess, LGoSlowPath ;
++
++ bind (CheckSucc);
++
++ // Optional pre-test ... it's safe to elide this
++ if ((EmitSync & 16) == 0) {
++ cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
++ jccb (Assembler::zero, LGoSlowPath);
++ }
++
++ // We have a classic Dekker-style idiom:
++ // ST m->_owner = 0 ; MEMBAR; LD m->_succ
++ // There are a number of ways to implement the barrier:
++ // (1) lock:andl &m->_owner, 0
++ // is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
++ // LOCK: ANDL [ebx+Offset(_Owner)-2], 0
++ // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
++ // (2) If supported, an explicit MFENCE is appealing.
++ // In older IA32 processors MFENCE is slower than lock:add or xchg
++ // particularly if the write-buffer is full as might be the case if
++ // if stores closely precede the fence or fence-equivalent instruction.
++ // In more modern implementations MFENCE appears faster, however.
++ // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
++ // The $lines underlying the top-of-stack should be in M-state.
++ // The locked add instruction is serializing, of course.
++ // (4) Use xchg, which is serializing
++ // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
++ // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
++ // The integer condition codes will tell us if succ was 0.
++ // Since _succ and _owner should reside in the same $line and
++ // we just stored into _owner, it's likely that the $line
++ // remains in M-state for the lock:orl.
++ //
++ // We currently use (3), although it's likely that switching to (2)
++ // is correct for the future.
++
++ movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
++ if (os::is_MP()) {
++ if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
++ mfence();
++ } else {
++ lock (); addptr(Address(rsp, 0), 0);
++ }
++ }
++ // Ratify _succ remains non-null
++ cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0);
++ jccb (Assembler::notZero, LSuccess);
++
++ xorptr(boxReg, boxReg); // box is really EAX
++ if (os::is_MP()) { lock(); }
++ cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
++ jccb (Assembler::notEqual, LSuccess);
++ // Since we're low on registers we installed rsp as a placeholding in _owner.
++ // Now install Self over rsp. This is safe as we're transitioning from
++ // non-null to non=null
++ get_thread (boxReg);
++ movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg);
++ // Intentional fall-through into LGoSlowPath ...
++
++ bind (LGoSlowPath);
++ orptr(boxReg, 1); // set ICC.ZF=0 to indicate failure
++ jmpb (DONE_LABEL);
++
++ bind (LSuccess);
++ xorptr(boxReg, boxReg); // set ICC.ZF=1 to indicate success
++ jmpb (DONE_LABEL);
++ }
++
++ bind (Stacked);
++ // It's not inflated and it's not recursively stack-locked and it's not biased.
++ // It must be stack-locked.
++ // Try to reset the header to displaced header.
++ // The "box" value on the stack is stable, so we can reload
++ // and be assured we observe the same value as above.
++ movptr(tmpReg, Address(boxReg, 0));
++ if (os::is_MP()) {
++ lock();
++ }
++ cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
++ // Intention fall-thru into DONE_LABEL
++
++ // DONE_LABEL is a hot target - we'd really like to place it at the
++ // start of cache line by padding with NOPs.
++ // See the AMD and Intel software optimization manuals for the
++ // most efficient "long" NOP encodings.
++ // Unfortunately none of our alignment mechanisms suffice.
++ if ((EmitSync & 65536) == 0) {
++ bind (CheckSucc);
++ }
++#else // _LP64
++ // It's inflated
++ movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
++ xorptr(boxReg, r15_thread);
++ orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
++ jccb (Assembler::notZero, DONE_LABEL);
++ movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
++ orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
++ jccb (Assembler::notZero, CheckSucc);
++ movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD);
++ jmpb (DONE_LABEL);
++
++ if ((EmitSync & 65536) == 0) {
++ Label LSuccess, LGoSlowPath ;
++ bind (CheckSucc);
++ cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
++ jccb (Assembler::zero, LGoSlowPath);
++
++ // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
++ // the explicit ST;MEMBAR combination, but masm doesn't currently support
++ // "ANDQ M,IMM". Don't use MFENCE here. lock:add to TOS, xchg, etc
++ // are all faster when the write buffer is populated.
++ movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD);
++ if (os::is_MP()) {
++ lock (); addl (Address(rsp, 0), 0);
++ }
++ cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
++ jccb (Assembler::notZero, LSuccess);
++
++ movptr (boxReg, (int32_t)NULL_WORD); // box is really EAX
++ if (os::is_MP()) { lock(); }
++ cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
++ jccb (Assembler::notEqual, LSuccess);
++ // Intentional fall-through into slow-path
++
++ bind (LGoSlowPath);
++ orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
++ jmpb (DONE_LABEL);
++
++ bind (LSuccess);
++ testl (boxReg, 0); // set ICC.ZF=1 to indicate success
++ jmpb (DONE_LABEL);
++ }
++
++ bind (Stacked);
++ movptr(tmpReg, Address (boxReg, 0)); // re-fetch
++ if (os::is_MP()) { lock(); }
++ cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
++
++ if (EmitSync & 65536) {
++ bind (CheckSucc);
++ }
++#endif
++ bind(DONE_LABEL);
++ // Avoid branch to branch on AMD processors
++ if (EmitSync & 32768) {
++ nop();
++ }
++ }
++}
++#endif // COMPILER2
++
+ void MacroAssembler::c2bool(Register x) {
+ // implements x == 0 ? 0 : 1
+ // note: must only look at least-significant byte of x
+@@ -1969,7 +2849,9 @@
+ Condition negated_cond = negate_condition(cond);
+ Label L;
+ jcc(negated_cond, L);
++ pushf(); // Preserve flags
+ atomic_incl(counter_addr);
++ popf();
+ bind(L);
+ }
+
+@@ -2271,10 +3153,12 @@
+ // if fast computation is not possible, result is NaN. Requires
+ // fallback from user of this macro.
+ // increase precision for intermediate steps of the computation
++ BLOCK_COMMENT("fast_pow {");
+ increase_precision();
+ fyl2x(); // Stack: (Y*log2(X)) ...
+ pow_exp_core_encoding(); // Stack: exp(X) ...
+ restore_precision();
++ BLOCK_COMMENT("} fast_pow");
+ }
+
+ void MacroAssembler::fast_exp() {
+@@ -5212,7 +6096,7 @@
+
+
+ // C2 compiled method's prolog code.
+-void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) {
++void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b) {
+
+ // WARNING: Initial instruction MUST be 5 bytes or longer so that
+ // NativeJump::patch_verified_entry will be able to patch out the entry
+@@ -5220,18 +6104,20 @@
+ // the frame allocation can be either 3 or 6 bytes. So if we don't do
+ // stack bang then we must use the 6 byte frame allocation even if
+ // we have no frame. :-(
++ assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect");
+
+ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+ // Remove word for return addr
+ framesize -= wordSize;
++ stack_bang_size -= wordSize;
+
+ // Calls to C2R adapters often do not accept exceptional returns.
+ // We require that their callers must bang for them. But be careful, because
+ // some VM calls (such as call site linkage) can use several kilobytes of
+ // stack. But the stack safety zone should account for that.
+ // See bugs 4446381, 4468289, 4497237.
+- if (stack_bang) {
+- generate_stack_overflow_check(framesize);
++ if (stack_bang_size > 0) {
++ generate_stack_overflow_check(stack_bang_size);
+
+ // We always push rbp, so that on return to interpreter rbp, will be
+ // restored correctly and we can correct the stack.
+--- ./hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -27,6 +27,7 @@
+
+ #include "asm/assembler.hpp"
+ #include "utilities/macros.hpp"
++#include "runtime/rtmLocking.hpp"
+
+
+ // MacroAssembler extends Assembler by frequently used macros.
+@@ -111,7 +112,8 @@
+ op == 0xE9 /* jmp */ ||
+ op == 0xEB /* short jmp */ ||
+ (op & 0xF0) == 0x70 /* short jcc */ ||
+- op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */,
++ op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */ ||
++ op == 0xC7 && branch[1] == 0xF8 /* xbegin */,
+ "Invalid opcode at patch point");
+
+ if (op == 0xEB || (op & 0xF0) == 0x70) {
+@@ -121,7 +123,7 @@
+ guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset");
+ *disp = imm8;
+ } else {
+- int* disp = (int*) &branch[(op == 0x0F)? 2: 1];
++ int* disp = (int*) &branch[(op == 0x0F || op == 0xC7)? 2: 1];
+ int imm32 = target - (address) &disp[1];
+ *disp = imm32;
+ }
+@@ -161,7 +163,6 @@
+ void incrementq(Register reg, int value = 1);
+ void incrementq(Address dst, int value = 1);
+
+-
+ // Support optimal SSE move instructions.
+ void movflt(XMMRegister dst, XMMRegister src) {
+ if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
+@@ -187,6 +188,8 @@
+ void incrementl(AddressLiteral dst);
+ void incrementl(ArrayAddress dst);
+
++ void incrementq(AddressLiteral dst);
++
+ // Alignment
+ void align(int modulus);
+
+@@ -651,7 +654,40 @@
+ Label& done, Label* slow_case = NULL,
+ BiasedLockingCounters* counters = NULL);
+ void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
+-
++#ifdef COMPILER2
++ // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
++ // See full desription in macroAssembler_x86.cpp.
++ void fast_lock(Register obj, Register box, Register tmp,
++ Register scr, Register cx1, Register cx2,
++ BiasedLockingCounters* counters,
++ RTMLockingCounters* rtm_counters,
++ RTMLockingCounters* stack_rtm_counters,
++ Metadata* method_data,
++ bool use_rtm, bool profile_rtm);
++ void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm);
++#if INCLUDE_RTM_OPT
++ void rtm_counters_update(Register abort_status, Register rtm_counters);
++ void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel);
++ void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg,
++ RTMLockingCounters* rtm_counters,
++ Metadata* method_data);
++ void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg,
++ RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
++ void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel);
++ void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel);
++ void rtm_stack_locking(Register obj, Register tmp, Register scr,
++ Register retry_on_abort_count,
++ RTMLockingCounters* stack_rtm_counters,
++ Metadata* method_data, bool profile_rtm,
++ Label& DONE_LABEL, Label& IsInflated);
++ void rtm_inflated_locking(Register obj, Register box, Register tmp,
++ Register scr, Register retry_on_busy_count,
++ Register retry_on_abort_count,
++ RTMLockingCounters* rtm_counters,
++ Metadata* method_data, bool profile_rtm,
++ Label& DONE_LABEL);
++#endif
++#endif
+
+ Condition negate_condition(Condition cond);
+
+@@ -716,6 +752,7 @@
+
+
+ void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); }
++ void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32)) NOT_LP64(imull(dst, src, imm32)); }
+
+
+ void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); }
+@@ -757,7 +794,14 @@
+ // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes.
+ void cond_inc32(Condition cond, AddressLiteral counter_addr);
+ // Unconditional atomic increment.
+- void atomic_incl(AddressLiteral counter_addr);
++ void atomic_incl(Address counter_addr);
++ void atomic_incl(AddressLiteral counter_addr, Register scr = rscratch1);
++#ifdef _LP64
++ void atomic_incq(Address counter_addr);
++ void atomic_incq(AddressLiteral counter_addr, Register scr = rscratch1);
++#endif
++ void atomic_incptr(AddressLiteral counter_addr, Register scr = rscratch1) { LP64_ONLY(atomic_incq(counter_addr, scr)) NOT_LP64(atomic_incl(counter_addr, scr)) ; }
++ void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr)) NOT_LP64(atomic_incl(counter_addr)) ; }
+
+ void lea(Register dst, AddressLiteral adr);
+ void lea(Address dst, AddressLiteral adr);
+@@ -1069,7 +1113,11 @@
+
+ void movptr(Register dst, Address src);
+
+- void movptr(Register dst, AddressLiteral src);
++#ifdef _LP64
++ void movptr(Register dst, AddressLiteral src, Register scratch=rscratch1);
++#else
++ void movptr(Register dst, AddressLiteral src, Register scratch=noreg); // Scratch reg is ignored in 32-bit
++#endif
+
+ void movptr(Register dst, intptr_t src);
+ void movptr(Register dst, Register src);
+@@ -1122,7 +1170,7 @@
+ void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
+
+ // C2 compiled method's prolog code.
+- void verified_entry(int framesize, bool stack_bang, bool fp_mode_24b);
++ void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b);
+
+ // clear memory of size 'cnt' qwords, starting at 'base'.
+ void clear_mem(Register base, Register cnt, Register rtmp);
+--- ./hotspot/src/cpu/x86/vm/methodHandles_x86.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/methodHandles_x86.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -29,6 +29,8 @@
+ #include "memory/allocation.inline.hpp"
+ #include "prims/methodHandles.hpp"
+
++PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
++
+ #define __ _masm->
+
+ #ifdef PRODUCT
+--- ./hotspot/src/cpu/x86/vm/nativeInst_x86.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/nativeInst_x86.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -35,6 +35,8 @@
+ #include "c1/c1_Runtime1.hpp"
+ #endif
+
++PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
++
+ void NativeInstruction::wrote(int offset) {
+ ICache::invalidate_word(addr_at(offset));
+ }
+--- ./hotspot/src/cpu/x86/vm/rtmLocking.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/x86/vm/rtmLocking.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,60 @@
++/*
++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "memory/allocation.inline.hpp"
++#include "runtime/task.hpp"
++#include "runtime/rtmLocking.hpp"
++
++// One-shot PeriodicTask subclass for enabling RTM locking
++uintx RTMLockingCounters::_calculation_flag = 0;
++
++class RTMLockingCalculationTask : public PeriodicTask {
++ public:
++ RTMLockingCalculationTask(size_t interval_time) : PeriodicTask(interval_time){ }
++
++ virtual void task() {
++ RTMLockingCounters::_calculation_flag = 1;
++ // Reclaim our storage and disenroll ourself
++ delete this;
++ }
++};
++
++void RTMLockingCounters::init() {
++ if (UseRTMLocking && RTMLockingCalculationDelay > 0) {
++ RTMLockingCalculationTask* task = new RTMLockingCalculationTask(RTMLockingCalculationDelay);
++ task->enroll();
++ } else {
++ _calculation_flag = 1;
++ }
++}
++
++//------------------------------print_on-------------------------------
++void RTMLockingCounters::print_on(outputStream* st) {
++ tty->print_cr("# rtm locks total (estimated): " UINTX_FORMAT, _total_count * RTMTotalCountIncrRate);
++ tty->print_cr("# rtm lock aborts : " UINTX_FORMAT, _abort_count);
++ for (int i = 0; i < ABORT_STATUS_LIMIT; i++) {
++ tty->print_cr("# rtm lock aborts %d: " UINTX_FORMAT, i, _abortX_count[i]);
++ }
++}
+--- ./hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -977,7 +977,9 @@
+
+ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
+ VMRegPair *regs,
++ VMRegPair *regs2,
+ int total_args_passed) {
++ assert(regs2 == NULL, "not needed on x86");
+ // We return the amount of VMRegImpl stack slots we need to reserve for all
+ // the arguments NOT counting out_preserve_stack_slots.
+
+@@ -1624,7 +1626,7 @@
+ // Now figure out where the args must be stored and how much stack space
+ // they require.
+ int out_arg_slots;
+- out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
+
+ // Compute framesize for the wrapper. We need to handlize all oops in
+ // registers a max of 2 on x86.
+@@ -1815,6 +1817,13 @@
+ // Frame is now completed as far as size and linkage.
+ int frame_complete = ((intptr_t)__ pc()) - start;
+
++ if (UseRTMLocking) {
++ // Abort RTM transaction before calling JNI
++ // because critical section will be large and will be
++ // aborted anyway. Also nmethod could be deoptimized.
++ __ xabort(0);
++ }
++
+ // Calculate the difference between rsp and rbp,. We need to know it
+ // after the native call because on windows Java Natives will pop
+ // the arguments and it is painful to do rsp relative addressing
+@@ -2257,7 +2266,7 @@
+ if (!is_critical_native) {
+ // reset handle block
+ __ movptr(rcx, Address(thread, JavaThread::active_handles_offset()));
+- __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
++ __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
+
+ // Any exception pending?
+ __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
+@@ -2495,7 +2504,7 @@
+ // they require (neglecting out_preserve_stack_slots).
+
+ int out_arg_slots;
+- out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
+
+ // Calculate the total number of stack slots we will need.
+
+@@ -3005,11 +3014,15 @@
+ // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved)
+ __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
+
+- // Stack bang to make sure there's enough room for these interpreter frames.
++#ifdef ASSERT
++ // Compilers generate code that bang the stack by as much as the
++ // interpreter would need. So this stack banging should never
++ // trigger a fault. Verify that it does not on non product builds.
+ if (UseStackBanging) {
+ __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
+ __ bang_stack_size(rbx, rcx);
+ }
++#endif
+
+ // Load array of frame pcs into ECX
+ __ movptr(rcx,Address(rdi,Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
+@@ -3168,6 +3181,12 @@
+ };
+
+ address start = __ pc();
++
++ if (UseRTMLocking) {
++ // Abort RTM transaction before possible nmethod deoptimization.
++ __ xabort(0);
++ }
++
+ // Push self-frame.
+ __ subptr(rsp, return_off*wordSize); // Epilog!
+
+@@ -3225,12 +3244,15 @@
+ // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved)
+ __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
+
+- // Stack bang to make sure there's enough room for these interpreter frames.
++#ifdef ASSERT
++ // Compilers generate code that bang the stack by as much as the
++ // interpreter would need. So this stack banging should never
++ // trigger a fault. Verify that it does not on non product builds.
+ if (UseStackBanging) {
+ __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
+ __ bang_stack_size(rbx, rcx);
+ }
+-
++#endif
+
+ // Load array of frame pcs into ECX
+ __ movl(rcx,Address(rdi,Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
+@@ -3353,6 +3375,14 @@
+ address call_pc = NULL;
+ bool cause_return = (poll_type == POLL_AT_RETURN);
+ bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
++
++ if (UseRTMLocking) {
++ // Abort RTM transaction before calling runtime
++ // because critical section will be large and will be
++ // aborted anyway. Also nmethod could be deoptimized.
++ __ xabort(0);
++ }
++
+ // If cause_return is true we are at a poll_return and there is
+ // the return address on the stack to the caller on the nmethod
+ // that is safepoint. We can leave this return on the stack and
+--- ./hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -889,7 +889,9 @@
+
+ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
+ VMRegPair *regs,
++ VMRegPair *regs2,
+ int total_args_passed) {
++ assert(regs2 == NULL, "not needed on x86");
+ // We return the amount of VMRegImpl stack slots we need to reserve for all
+ // the arguments NOT counting out_preserve_stack_slots.
+
+@@ -1857,7 +1859,7 @@
+ // Now figure out where the args must be stored and how much stack space
+ // they require.
+ int out_arg_slots;
+- out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
+
+ // Compute framesize for the wrapper. We need to handlize all oops in
+ // incoming registers
+@@ -2010,6 +2012,13 @@
+ // Frame is now completed as far as size and linkage.
+ int frame_complete = ((intptr_t)__ pc()) - start;
+
++ if (UseRTMLocking) {
++ // Abort RTM transaction before calling JNI
++ // because critical section will be large and will be
++ // aborted anyway. Also nmethod could be deoptimized.
++ __ xabort(0);
++ }
++
+ #ifdef ASSERT
+ {
+ Label L;
+@@ -2500,7 +2509,7 @@
+ if (!is_critical_native) {
+ // reset handle block
+ __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset()));
+- __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
++ __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
+ }
+
+ // pop our frame
+@@ -2761,7 +2770,7 @@
+ // the 1st six register arguments). It's weird see int_stk_helper.
+
+ int out_arg_slots;
+- out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
+
+ // Calculate the total number of stack slots we will need.
+
+@@ -3475,11 +3484,15 @@
+ // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved)
+ __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
+
+- // Stack bang to make sure there's enough room for these interpreter frames.
++#ifdef ASSERT
++ // Compilers generate code that bang the stack by as much as the
++ // interpreter would need. So this stack banging should never
++ // trigger a fault. Verify that it does not on non product builds.
+ if (UseStackBanging) {
+ __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
+ __ bang_stack_size(rbx, rcx);
+ }
++#endif
+
+ // Load address of array of frame pcs into rcx
+ __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
+@@ -3610,6 +3623,11 @@
+
+ address start = __ pc();
+
++ if (UseRTMLocking) {
++ // Abort RTM transaction before possible nmethod deoptimization.
++ __ xabort(0);
++ }
++
+ // Push self-frame. We get here with a return address on the
+ // stack, so rsp is 8-byte aligned until we allocate our frame.
+ __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog!
+@@ -3668,11 +3686,15 @@
+ // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved)
+ __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
+
+- // Stack bang to make sure there's enough room for these interpreter frames.
++#ifdef ASSERT
++ // Compilers generate code that bang the stack by as much as the
++ // interpreter would need. So this stack banging should never
++ // trigger a fault. Verify that it does not on non product builds.
+ if (UseStackBanging) {
+ __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
+ __ bang_stack_size(rbx, rcx);
+ }
++#endif
+
+ // Load address of array of frame pcs into rcx (address*)
+ __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
+@@ -3790,6 +3812,13 @@
+ bool cause_return = (poll_type == POLL_AT_RETURN);
+ bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
+
++ if (UseRTMLocking) {
++ // Abort RTM transaction before calling runtime
++ // because critical section will be large and will be
++ // aborted anyway. Also nmethod could be deoptimized.
++ __ xabort(0);
++ }
++
+ // Make room for return address (or push it again)
+ if (!cause_return) {
+ __ push(rbx);
+--- ./hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -2403,6 +2403,9 @@
+ // c_rarg3 - r vector byte array address
+ // c_rarg4 - input length
+ //
++ // Output:
++ // rax - input length
++ //
+ address generate_cipherBlockChaining_encryptAESCrypt() {
+ assert(UseAES, "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+@@ -2483,7 +2486,7 @@
+ __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object
+
+ handleSOERegisters(false /*restoring*/);
+- __ movl(rax, 0); // return 0 (why?)
++ __ movptr(rax, len_param); // return length
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+@@ -2557,6 +2560,9 @@
+ // c_rarg3 - r vector byte array address
+ // c_rarg4 - input length
+ //
++ // Output:
++ // rax - input length
++ //
+
+ address generate_cipherBlockChaining_decryptAESCrypt() {
+ assert(UseAES, "need AES instructions and misaligned SSE support");
+@@ -2650,7 +2656,7 @@
+ __ movptr(rvec , rvec_param); // restore this since used in loop
+ __ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object
+ handleSOERegisters(false /*restoring*/);
+- __ movl(rax, 0); // return 0 (why?)
++ __ movptr(rax, len_param); // return length
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+--- ./hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -3217,6 +3217,9 @@
+ // c_rarg3 - r vector byte array address
+ // c_rarg4 - input length
+ //
++ // Output:
++ // rax - input length
++ //
+ address generate_cipherBlockChaining_encryptAESCrypt() {
+ assert(UseAES, "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+@@ -3232,7 +3235,7 @@
+ #ifndef _WIN64
+ const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
+ #else
+- const Address len_mem(rsp, 6 * wordSize); // length is on stack on Win64
++ const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
+ const Register len_reg = r10; // pick the first volatile windows register
+ #endif
+ const Register pos = rax;
+@@ -3259,6 +3262,8 @@
+ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+ __ movdqu(xmm_save(i), as_XMMRegister(i));
+ }
++#else
++ __ push(len_reg); // Save
+ #endif
+
+ const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
+@@ -3301,8 +3306,10 @@
+ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+ __ movdqu(as_XMMRegister(i), xmm_save(i));
+ }
++ __ movl(rax, len_mem);
++#else
++ __ pop(rax); // return length
+ #endif
+- __ movl(rax, 0); // return 0 (why?)
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+@@ -3409,6 +3416,9 @@
+ // c_rarg3 - r vector byte array address
+ // c_rarg4 - input length
+ //
++ // Output:
++ // rax - input length
++ //
+
+ address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
+ assert(UseAES, "need AES instructions and misaligned SSE support");
+@@ -3427,7 +3437,7 @@
+ #ifndef _WIN64
+ const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
+ #else
+- const Address len_mem(rsp, 6 * wordSize); // length is on stack on Win64
++ const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
+ const Register len_reg = r10; // pick the first volatile windows register
+ #endif
+ const Register pos = rax;
+@@ -3448,7 +3458,10 @@
+ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+ __ movdqu(xmm_save(i), as_XMMRegister(i));
+ }
++#else
++ __ push(len_reg); // Save
+ #endif
++
+ // the java expanded key ordering is rotated one position from what we want
+ // so we start from 0x10 here and hit 0x00 last
+ const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
+@@ -3554,8 +3567,10 @@
+ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+ __ movdqu(as_XMMRegister(i), xmm_save(i));
+ }
++ __ movl(rax, len_mem);
++#else
++ __ pop(rax); // return length
+ #endif
+- __ movl(rax, 0); // return 0 (why?)
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+--- ./hotspot/src/cpu/x86/vm/templateInterpreter_x86.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/cpu/x86/vm/templateInterpreter_x86.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,124 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "ci/ciMethod.hpp"
++#include "interpreter/interpreter.hpp"
++#include "runtime/frame.inline.hpp"
++
++#ifndef CC_INTERP
++
++// asm based interpreter deoptimization helpers
++int AbstractInterpreter::size_activation(int max_stack,
++ int temps,
++ int extra_args,
++ int monitors,
++ int callee_params,
++ int callee_locals,
++ bool is_top_frame) {
++ // Note: This calculation must exactly parallel the frame setup
++ // in AbstractInterpreterGenerator::generate_method_entry.
++
++ // fixed size of an interpreter frame:
++ int overhead = frame::sender_sp_offset -
++ frame::interpreter_frame_initial_sp_offset;
++ // Our locals were accounted for by the caller (or last_frame_adjust
++ // on the transistion) Since the callee parameters already account
++ // for the callee's params we only need to account for the extra
++ // locals.
++ int size = overhead +
++ (callee_locals - callee_params)*Interpreter::stackElementWords +
++ monitors * frame::interpreter_frame_monitor_size() +
++ temps* Interpreter::stackElementWords + extra_args;
++
++ return size;
++}
++
++void AbstractInterpreter::layout_activation(Method* method,
++ int tempcount,
++ int popframe_extra_args,
++ int moncount,
++ int caller_actual_parameters,
++ int callee_param_count,
++ int callee_locals,
++ frame* caller,
++ frame* interpreter_frame,
++ bool is_top_frame,
++ bool is_bottom_frame) {
++ // The frame interpreter_frame is guaranteed to be the right size,
++ // as determined by a previous call to the size_activation() method.
++ // It is also guaranteed to be walkable even though it is in a
++ // skeletal state
++
++ int max_locals = method->max_locals() * Interpreter::stackElementWords;
++ int extra_locals = (method->max_locals() - method->size_of_parameters()) *
++ Interpreter::stackElementWords;
++
++#ifdef ASSERT
++ if (!EnableInvokeDynamic) {
++ // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences?
++ // Probably, since deoptimization doesn't work yet.
++ assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable");
++ }
++ assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
++#endif
++
++ interpreter_frame->interpreter_frame_set_method(method);
++ // NOTE the difference in using sender_sp and
++ // interpreter_frame_sender_sp interpreter_frame_sender_sp is
++ // the original sp of the caller (the unextended_sp) and
++ // sender_sp is fp+8/16 (32bit/64bit) XXX
++ intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
++
++#ifdef ASSERT
++ if (caller->is_interpreted_frame()) {
++ assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
++ }
++#endif
++
++ interpreter_frame->interpreter_frame_set_locals(locals);
++ BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
++ BasicObjectLock* monbot = montop - moncount;
++ interpreter_frame->interpreter_frame_set_monitor_end(monbot);
++
++ // Set last_sp
++ intptr_t* esp = (intptr_t*) monbot -
++ tempcount*Interpreter::stackElementWords -
++ popframe_extra_args;
++ interpreter_frame->interpreter_frame_set_last_sp(esp);
++
++ // All frames but the initial (oldest) interpreter frame we fill in have
++ // a value for sender_sp that allows walking the stack but isn't
++ // truly correct. Correct the value here.
++ if (extra_locals != 0 &&
++ interpreter_frame->sender_sp() ==
++ interpreter_frame->interpreter_frame_sender_sp()) {
++ interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() +
++ extra_locals);
++ }
++ *interpreter_frame->interpreter_frame_cache_addr() =
++ method->constants()->cache();
++}
++
++#endif // CC_INTERP
+--- ./hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1287,7 +1287,7 @@
+
+ // reset handle block
+ __ movptr(t, Address(thread, JavaThread::active_handles_offset()));
+- __ movptr(Address(t, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
++ __ movl(Address(t, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
+
+ // If result was an oop then unbox and save it in the frame
+ { Label L;
+@@ -1686,91 +1686,6 @@
+ return overhead_size + method_stack + stub_code;
+ }
+
+-// asm based interpreter deoptimization helpers
+-
+-int AbstractInterpreter::layout_activation(Method* method,
+- int tempcount,
+- int popframe_extra_args,
+- int moncount,
+- int caller_actual_parameters,
+- int callee_param_count,
+- int callee_locals,
+- frame* caller,
+- frame* interpreter_frame,
+- bool is_top_frame,
+- bool is_bottom_frame) {
+- // Note: This calculation must exactly parallel the frame setup
+- // in AbstractInterpreterGenerator::generate_method_entry.
+- // If interpreter_frame!=NULL, set up the method, locals, and monitors.
+- // The frame interpreter_frame, if not NULL, is guaranteed to be the right size,
+- // as determined by a previous call to this method.
+- // It is also guaranteed to be walkable even though it is in a skeletal state
+- // NOTE: return size is in words not bytes
+-
+- // fixed size of an interpreter frame:
+- int max_locals = method->max_locals() * Interpreter::stackElementWords;
+- int extra_locals = (method->max_locals() - method->size_of_parameters()) *
+- Interpreter::stackElementWords;
+-
+- int overhead = frame::sender_sp_offset - frame::interpreter_frame_initial_sp_offset;
+-
+- // Our locals were accounted for by the caller (or last_frame_adjust on the transistion)
+- // Since the callee parameters already account for the callee's params we only need to account for
+- // the extra locals.
+-
+-
+- int size = overhead +
+- ((callee_locals - callee_param_count)*Interpreter::stackElementWords) +
+- (moncount*frame::interpreter_frame_monitor_size()) +
+- tempcount*Interpreter::stackElementWords + popframe_extra_args;
+-
+- if (interpreter_frame != NULL) {
+-#ifdef ASSERT
+- if (!EnableInvokeDynamic)
+- // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences?
+- // Probably, since deoptimization doesn't work yet.
+- assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable");
+- assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
+-#endif
+-
+- interpreter_frame->interpreter_frame_set_method(method);
+- // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
+- // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
+- // and sender_sp is fp+8
+- intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
+-
+-#ifdef ASSERT
+- if (caller->is_interpreted_frame()) {
+- assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
+- }
+-#endif
+-
+- interpreter_frame->interpreter_frame_set_locals(locals);
+- BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
+- BasicObjectLock* monbot = montop - moncount;
+- interpreter_frame->interpreter_frame_set_monitor_end(monbot);
+-
+- // Set last_sp
+- intptr_t* rsp = (intptr_t*) monbot -
+- tempcount*Interpreter::stackElementWords -
+- popframe_extra_args;
+- interpreter_frame->interpreter_frame_set_last_sp(rsp);
+-
+- // All frames but the initial (oldest) interpreter frame we fill in have a
+- // value for sender_sp that allows walking the stack but isn't
+- // truly correct. Correct the value here.
+-
+- if (extra_locals != 0 &&
+- interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) {
+- interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals);
+- }
+- *interpreter_frame->interpreter_frame_cache_addr() =
+- method->constants()->cache();
+- }
+- return size;
+-}
+-
+-
+ //------------------------------------------------------------------------------------------------------------------------
+ // Exceptions
+
+--- ./hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1259,7 +1259,7 @@
+
+ // reset handle block
+ __ movptr(t, Address(r15_thread, JavaThread::active_handles_offset()));
+- __ movptr(Address(t, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
++ __ movl(Address(t, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
+
+ // If result is an oop unbox and store it in frame where gc will see it
+ // and result handler will pick it up
+@@ -1695,87 +1695,6 @@
+ return (overhead_size + method_stack + stub_code);
+ }
+
+-int AbstractInterpreter::layout_activation(Method* method,
+- int tempcount,
+- int popframe_extra_args,
+- int moncount,
+- int caller_actual_parameters,
+- int callee_param_count,
+- int callee_locals,
+- frame* caller,
+- frame* interpreter_frame,
+- bool is_top_frame,
+- bool is_bottom_frame) {
+- // Note: This calculation must exactly parallel the frame setup
+- // in AbstractInterpreterGenerator::generate_method_entry.
+- // If interpreter_frame!=NULL, set up the method, locals, and monitors.
+- // The frame interpreter_frame, if not NULL, is guaranteed to be the
+- // right size, as determined by a previous call to this method.
+- // It is also guaranteed to be walkable even though it is in a skeletal state
+-
+- // fixed size of an interpreter frame:
+- int max_locals = method->max_locals() * Interpreter::stackElementWords;
+- int extra_locals = (method->max_locals() - method->size_of_parameters()) *
+- Interpreter::stackElementWords;
+-
+- int overhead = frame::sender_sp_offset -
+- frame::interpreter_frame_initial_sp_offset;
+- // Our locals were accounted for by the caller (or last_frame_adjust
+- // on the transistion) Since the callee parameters already account
+- // for the callee's params we only need to account for the extra
+- // locals.
+- int size = overhead +
+- (callee_locals - callee_param_count)*Interpreter::stackElementWords +
+- moncount * frame::interpreter_frame_monitor_size() +
+- tempcount* Interpreter::stackElementWords + popframe_extra_args;
+- if (interpreter_frame != NULL) {
+-#ifdef ASSERT
+- if (!EnableInvokeDynamic)
+- // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences?
+- // Probably, since deoptimization doesn't work yet.
+- assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable");
+- assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
+-#endif
+-
+- interpreter_frame->interpreter_frame_set_method(method);
+- // NOTE the difference in using sender_sp and
+- // interpreter_frame_sender_sp interpreter_frame_sender_sp is
+- // the original sp of the caller (the unextended_sp) and
+- // sender_sp is fp+16 XXX
+- intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
+-
+-#ifdef ASSERT
+- if (caller->is_interpreted_frame()) {
+- assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
+- }
+-#endif
+-
+- interpreter_frame->interpreter_frame_set_locals(locals);
+- BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
+- BasicObjectLock* monbot = montop - moncount;
+- interpreter_frame->interpreter_frame_set_monitor_end(monbot);
+-
+- // Set last_sp
+- intptr_t* esp = (intptr_t*) monbot -
+- tempcount*Interpreter::stackElementWords -
+- popframe_extra_args;
+- interpreter_frame->interpreter_frame_set_last_sp(esp);
+-
+- // All frames but the initial (oldest) interpreter frame we fill in have
+- // a value for sender_sp that allows walking the stack but isn't
+- // truly correct. Correct the value here.
+- if (extra_locals != 0 &&
+- interpreter_frame->sender_sp() ==
+- interpreter_frame->interpreter_frame_sender_sp()) {
+- interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() +
+- extra_locals);
+- }
+- *interpreter_frame->interpreter_frame_cache_addr() =
+- method->constants()->cache();
+- }
+- return size;
+-}
+-
+ //-----------------------------------------------------------------------------
+ // Exceptions
+
+--- ./hotspot/src/cpu/x86/vm/vm_version_x86.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/vm_version_x86.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -50,13 +50,18 @@
+ const char* VM_Version::_features_str = "";
+ VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
+
++// Address of instruction which causes SEGV
++address VM_Version::_cpuinfo_segv_addr = 0;
++// Address of instruction after the one which causes SEGV
++address VM_Version::_cpuinfo_cont_addr = 0;
++
+ static BufferBlob* stub_blob;
+-static const int stub_size = 550;
++static const int stub_size = 600;
+
+ extern "C" {
+- typedef void (*getPsrInfo_stub_t)(void*);
++ typedef void (*get_cpu_info_stub_t)(void*);
+ }
+-static getPsrInfo_stub_t getPsrInfo_stub = NULL;
++static get_cpu_info_stub_t get_cpu_info_stub = NULL;
+
+
+ class VM_Version_StubGenerator: public StubCodeGenerator {
+@@ -64,7 +69,7 @@
+
+ VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
+
+- address generate_getPsrInfo() {
++ address generate_get_cpu_info() {
+ // Flags to test CPU type.
+ const uint32_t HS_EFL_AC = 0x40000;
+ const uint32_t HS_EFL_ID = 0x200000;
+@@ -76,13 +81,13 @@
+ Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
+ Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done;
+
+- StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
++ StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
+ # define __ _masm->
+
+ address start = __ pc();
+
+ //
+- // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info);
++ // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
+ //
+ // LP64: rcx and rdx are first and second argument registers on windows
+
+@@ -234,9 +239,9 @@
+ // Check if OS has enabled XGETBV instruction to access XCR0
+ // (OSXSAVE feature flag) and CPU supports AVX
+ //
+- __ andl(rcx, 0x18000000);
++ __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
+ __ cmpl(rcx, 0x18000000);
+- __ jccb(Assembler::notEqual, sef_cpuid);
++ __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
+
+ //
+ // XCR0, XFEATURE_ENABLED_MASK register
+@@ -247,6 +252,53 @@
+ __ movl(Address(rsi, 0), rax);
+ __ movl(Address(rsi, 4), rdx);
+
++ __ andl(rax, 0x6); // xcr0 bits sse | ymm
++ __ cmpl(rax, 0x6);
++ __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
++
++ //
++ // Some OSs have a bug when upper 128bits of YMM
++ // registers are not restored after a signal processing.
++ // Generate SEGV here (reference through NULL)
++ // and check upper YMM bits after it.
++ //
++ VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
++ intx saved_useavx = UseAVX;
++ intx saved_usesse = UseSSE;
++ UseAVX = 1;
++ UseSSE = 2;
++
++ // load value into all 32 bytes of ymm7 register
++ __ movl(rcx, VM_Version::ymm_test_value());
++
++ __ movdl(xmm0, rcx);
++ __ pshufd(xmm0, xmm0, 0x00);
++ __ vinsertf128h(xmm0, xmm0, xmm0);
++ __ vmovdqu(xmm7, xmm0);
++#ifdef _LP64
++ __ vmovdqu(xmm8, xmm0);
++ __ vmovdqu(xmm15, xmm0);
++#endif
++
++ __ xorl(rsi, rsi);
++ VM_Version::set_cpuinfo_segv_addr( __ pc() );
++ // Generate SEGV
++ __ movl(rax, Address(rsi, 0));
++
++ VM_Version::set_cpuinfo_cont_addr( __ pc() );
++ // Returns here after signal. Save xmm0 to check it later.
++ __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
++ __ vmovdqu(Address(rsi, 0), xmm0);
++ __ vmovdqu(Address(rsi, 32), xmm7);
++#ifdef _LP64
++ __ vmovdqu(Address(rsi, 64), xmm8);
++ __ vmovdqu(Address(rsi, 96), xmm15);
++#endif
++
++ VM_Version::clean_cpuFeatures();
++ UseAVX = saved_useavx;
++ UseSSE = saved_usesse;
++
+ //
+ // cpuid(0x7) Structured Extended Features
+ //
+@@ -339,6 +391,14 @@
+ };
+
+
++void VM_Version::get_cpu_info_wrapper() {
++ get_cpu_info_stub(&_cpuid_info);
++}
++
++#ifndef CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED
++ #define CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED(f) f()
++#endif
++
+ void VM_Version::get_processor_features() {
+
+ _cpu = 4; // 486 by default
+@@ -349,7 +409,11 @@
+
+ if (!Use486InstrsOnly) {
+ // Get raw processor info
+- getPsrInfo_stub(&_cpuid_info);
++
++ // Some platforms (like Win*) need a wrapper around here
++ // in order to properly handle SEGV for YMM registers test.
++ CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED(get_cpu_info_wrapper);
++
+ assert_is_initialized();
+ _cpu = extended_cpu_family();
+ _model = extended_cpu_model();
+@@ -429,7 +493,7 @@
+ }
+
+ char buf[256];
+- jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
++ jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+ cores_per_cpu(), threads_per_core(),
+ cpu_family(), _model, _stepping,
+ (supports_cmov() ? ", cmov" : ""),
+@@ -446,8 +510,9 @@
+ (supports_avx() ? ", avx" : ""),
+ (supports_avx2() ? ", avx2" : ""),
+ (supports_aes() ? ", aes" : ""),
+- (supports_clmul() ? ", clmul" : ""),
++ (supports_clmul() ? ", clmul" : ""),
+ (supports_erms() ? ", erms" : ""),
++ (supports_rtm() ? ", rtm" : ""),
+ (supports_mmx_ext() ? ", mmxext" : ""),
+ (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
+ (supports_lzcnt() ? ", lzcnt": ""),
+@@ -455,7 +520,9 @@
+ (supports_ht() ? ", ht": ""),
+ (supports_tsc() ? ", tsc": ""),
+ (supports_tscinv_bit() ? ", tscinvbit": ""),
+- (supports_tscinv() ? ", tscinv": ""));
++ (supports_tscinv() ? ", tscinv": ""),
++ (supports_bmi1() ? ", bmi1" : ""),
++ (supports_bmi2() ? ", bmi2" : ""));
+ _features_str = strdup(buf);
+
+ // UseSSE is set to the smaller of what hardware supports and what
+@@ -486,7 +553,7 @@
+ }
+ } else if (UseAES) {
+ if (!FLAG_IS_DEFAULT(UseAES))
+- warning("AES instructions not available on this CPU");
++ warning("AES instructions are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAES, false);
+ }
+
+@@ -519,10 +586,57 @@
+ }
+ } else if (UseAESIntrinsics) {
+ if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
+- warning("AES intrinsics not available on this CPU");
++ warning("AES intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ }
+
++ // Adjust RTM (Restricted Transactional Memory) flags
++ if (!supports_rtm() && UseRTMLocking) {
++ // Can't continue because UseRTMLocking affects UseBiasedLocking flag
++ // setting during arguments processing. See use_biased_locking().
++ // VM_Version_init() is executed after UseBiasedLocking is used
++ // in Thread::allocate().
++ vm_exit_during_initialization("RTM instructions are not available on this CPU");
++ }
++
++#if INCLUDE_RTM_OPT
++ if (UseRTMLocking) {
++ if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
++ // RTM locking should be used only for applications with
++ // high lock contention. For now we do not use it by default.
++ vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
++ }
++ if (!is_power_of_2(RTMTotalCountIncrRate)) {
++ warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
++ FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
++ }
++ if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
++ warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
++ FLAG_SET_DEFAULT(RTMAbortRatio, 50);
++ }
++ } else { // !UseRTMLocking
++ if (UseRTMForStackLocks) {
++ if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
++ warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
++ }
++ FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
++ }
++ if (UseRTMDeopt) {
++ FLAG_SET_DEFAULT(UseRTMDeopt, false);
++ }
++ if (PrintPreciseRTMLockingStatistics) {
++ FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
++ }
++ }
++#else
++ if (UseRTMLocking) {
++ // Only C2 does RTM locking optimization.
++ // Can't continue because UseRTMLocking affects UseBiasedLocking flag
++ // setting during arguments processing. See use_biased_locking().
++ vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
++ }
++#endif
++
+ #ifdef COMPILER2
+ if (UseFPUForSpilling) {
+ if (UseSSE < 2) {
+@@ -538,14 +652,28 @@
+ if (MaxVectorSize > 32) {
+ FLAG_SET_DEFAULT(MaxVectorSize, 32);
+ }
+- if (MaxVectorSize > 16 && UseAVX == 0) {
+- // Only supported with AVX+
++ if (MaxVectorSize > 16 && (UseAVX == 0 || !os_supports_avx_vectors())) {
++ // 32 bytes vectors (in YMM) are only supported with AVX+
+ FLAG_SET_DEFAULT(MaxVectorSize, 16);
+ }
+ if (UseSSE < 2) {
+- // Only supported with SSE2+
++ // Vectors (in XMM) are only supported with SSE2+
+ FLAG_SET_DEFAULT(MaxVectorSize, 0);
+ }
++#ifdef ASSERT
++ if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
++ tty->print_cr("State of YMM registers after signal handle:");
++ int nreg = 2 LP64_ONLY(+2);
++ const char* ymm_name[4] = {"0", "7", "8", "15"};
++ for (int i = 0; i < nreg; i++) {
++ tty->print("YMM%s:", ymm_name[i]);
++ for (int j = 7; j >=0; j--) {
++ tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
++ }
++ tty->cr();
++ }
++ }
++#endif
+ }
+ #endif
+
+@@ -600,13 +728,6 @@
+ }
+ }
+
+- // Use count leading zeros count instruction if available.
+- if (supports_lzcnt()) {
+- if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
+- UseCountLeadingZerosInstruction = true;
+- }
+- }
+-
+ // some defaults for AMD family 15h
+ if ( cpu_family() == 0x15 ) {
+ // On family 15h processors default is no sw prefetch
+@@ -683,14 +804,35 @@
+ }
+ }
+ }
+-#if defined(COMPILER2) && defined(_ALLBSD_SOURCE)
+- if (MaxVectorSize > 16) {
+- // Limit vectors size to 16 bytes on BSD until it fixes
+- // restoring upper 128bit of YMM registers on return
+- // from signal handler.
+- FLAG_SET_DEFAULT(MaxVectorSize, 16);
++
++ // Use count leading zeros count instruction if available.
++ if (supports_lzcnt()) {
++ if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
++ UseCountLeadingZerosInstruction = true;
+ }
+-#endif // COMPILER2
++ } else if (UseCountLeadingZerosInstruction) {
++ warning("lzcnt instruction is not available on this CPU");
++ FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
++ }
++
++ if (supports_bmi1()) {
++ if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
++ UseBMI1Instructions = true;
++ }
++ } else if (UseBMI1Instructions) {
++ warning("BMI1 instructions are not available on this CPU");
++ FLAG_SET_DEFAULT(UseBMI1Instructions, false);
++ }
++
++ // Use count trailing zeros instruction if available
++ if (supports_bmi1()) {
++ if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
++ UseCountTrailingZerosInstruction = UseBMI1Instructions;
++ }
++ } else if (UseCountTrailingZerosInstruction) {
++ warning("tzcnt instruction is not available on this CPU");
++ FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
++ }
+
+ // Use population count instruction if available.
+ if (supports_popcnt()) {
+@@ -783,13 +925,18 @@
+ if (PrintMiscellaneous && Verbose) {
+ tty->print_cr("Logical CPUs per core: %u",
+ logical_processors_per_package());
+- tty->print("UseSSE=%d",UseSSE);
++ tty->print("UseSSE=%d", (int) UseSSE);
+ if (UseAVX > 0) {
+- tty->print(" UseAVX=%d",UseAVX);
++ tty->print(" UseAVX=%d", (int) UseAVX);
+ }
+ if (UseAES) {
+ tty->print(" UseAES=1");
+ }
++#ifdef COMPILER2
++ if (MaxVectorSize > 0) {
++ tty->print(" MaxVectorSize=%d", (int) MaxVectorSize);
++ }
++#endif
+ tty->cr();
+ tty->print("Allocation");
+ if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
+@@ -810,40 +957,61 @@
+ }
+ }
+ if (AllocatePrefetchLines > 1) {
+- tty->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
++ tty->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize);
+ } else {
+- tty->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
++ tty->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize);
+ }
+ }
+
+ if (PrefetchCopyIntervalInBytes > 0) {
+- tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes);
++ tty->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
+ }
+ if (PrefetchScanIntervalInBytes > 0) {
+- tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes);
++ tty->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
+ }
+ if (PrefetchFieldsAhead > 0) {
+- tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead);
++ tty->print_cr("PrefetchFieldsAhead %d", (int) PrefetchFieldsAhead);
+ }
+ if (ContendedPaddingWidth > 0) {
+- tty->print_cr("ContendedPaddingWidth %d", ContendedPaddingWidth);
++ tty->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
+ }
+ }
+ #endif // !PRODUCT
+ }
+
++bool VM_Version::use_biased_locking() {
++#if INCLUDE_RTM_OPT
++ // RTM locking is most useful when there is high lock contention and
++ // low data contention. With high lock contention the lock is usually
++ // inflated and biased locking is not suitable for that case.
++ // RTM locking code requires that biased locking is off.
++ // Note: we can't switch off UseBiasedLocking in get_processor_features()
++ // because it is used by Thread::allocate() which is called before
++ // VM_Version::initialize().
++ if (UseRTMLocking && UseBiasedLocking) {
++ if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
++ FLAG_SET_DEFAULT(UseBiasedLocking, false);
++ } else {
++ warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
++ UseBiasedLocking = false;
++ }
++ }
++#endif
++ return UseBiasedLocking;
++}
++
+ void VM_Version::initialize() {
+ ResourceMark rm;
+ // Making this stub must be FIRST use of assembler
+
+- stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size);
++ stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
+ if (stub_blob == NULL) {
+- vm_exit_during_initialization("Unable to allocate getPsrInfo_stub");
++ vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
+ }
+ CodeBuffer c(stub_blob);
+ VM_Version_StubGenerator g(&c);
+- getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t,
+- g.generate_getPsrInfo());
++ get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
++ g.generate_get_cpu_info());
+
+ get_processor_features();
+ }
+--- ./hotspot/src/cpu/x86/vm/vm_version_x86.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/vm_version_x86.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -141,7 +141,8 @@
+ struct {
+ uint32_t LahfSahf : 1,
+ CmpLegacy : 1,
+- : 4,
++ : 3,
++ lzcnt_intel : 1,
+ lzcnt : 1,
+ sse4a : 1,
+ misalignsse : 1,
+@@ -206,7 +207,9 @@
+ : 2,
+ bmi2 : 1,
+ erms : 1,
+- : 22;
++ : 1,
++ rtm : 1,
++ : 20;
+ } bits;
+ };
+
+@@ -228,6 +231,9 @@
+ // 0 if this instruction is not available
+ static const char* _features_str;
+
++ static address _cpuinfo_segv_addr; // address of instruction which causes SEGV
++ static address _cpuinfo_cont_addr; // address of instruction after the one which causes SEGV
++
+ enum {
+ CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX)
+ CPU_CMOV = (1 << 1),
+@@ -251,7 +257,10 @@
+ CPU_AVX2 = (1 << 18),
+ CPU_AES = (1 << 19),
+ CPU_ERMS = (1 << 20), // enhanced 'rep movsb/stosb' instructions
+- CPU_CLMUL = (1 << 21) // carryless multiply for CRC
++ CPU_CLMUL = (1 << 21), // carryless multiply for CRC
++ CPU_BMI1 = (1 << 22),
++ CPU_BMI2 = (1 << 23),
++ CPU_RTM = (1 << 24) // Restricted Transactional Memory instructions
+ } cpuFeatureFlags;
+
+ enum {
+@@ -358,6 +367,9 @@
+ // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
+ XemXcr0Eax xem_xcr0_eax;
+ uint32_t xem_xcr0_edx; // reserved
++
++ // Space to save ymm registers after signal handle
++ int ymm_save[8*4]; // Save ymm0, ymm7, ymm8, ymm15
+ };
+
+ // The actual cpuid info block
+@@ -423,6 +435,8 @@
+ if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
+ result |= CPU_AVX2;
+ }
++ if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
++ result |= CPU_BMI1;
+ if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
+ result |= CPU_TSC;
+ if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
+@@ -433,6 +447,8 @@
+ result |= CPU_ERMS;
+ if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
+ result |= CPU_CLMUL;
++ if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
++ result |= CPU_RTM;
+
+ // AMD features.
+ if (is_amd()) {
+@@ -444,10 +460,32 @@
+ if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
+ result |= CPU_SSE4A;
+ }
++ // Intel features.
++ if(is_intel()) {
++ if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
++ result |= CPU_BMI2;
++ if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
++ result |= CPU_LZCNT;
++ }
+
+ return result;
+ }
+
++ static bool os_supports_avx_vectors() {
++ if (!supports_avx()) {
++ return false;
++ }
++ // Verify that OS save/restore all bits of AVX registers
++ // during signal processing.
++ int nreg = 2 LP64_ONLY(+2);
++ for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
++ if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
++ return false;
++ }
++ }
++ return true;
++ }
++
+ static void get_processor_features();
+
+ public:
+@@ -464,10 +502,27 @@
+ static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
+ static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
+ static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
++ static ByteSize ymm_save_offset() { return byte_offset_of(CpuidInfo, ymm_save); }
++
++ // The value used to check ymm register after signal handle
++ static int ymm_test_value() { return 0xCAFEBABE; }
++
++ static void get_cpu_info_wrapper();
++ static void set_cpuinfo_segv_addr(address pc) { _cpuinfo_segv_addr = pc; }
++ static bool is_cpuinfo_segv_addr(address pc) { return _cpuinfo_segv_addr == pc; }
++ static void set_cpuinfo_cont_addr(address pc) { _cpuinfo_cont_addr = pc; }
++ static address cpuinfo_cont_addr() { return _cpuinfo_cont_addr; }
++
++ static void clean_cpuFeatures() { _cpuFeatures = 0; }
++ static void set_avx_cpuFeatures() { _cpuFeatures = (CPU_SSE | CPU_SSE2 | CPU_AVX); }
++
+
+ // Initialization
+ static void initialize();
+
++ // Override Abstract_VM_Version implementation
++ static bool use_biased_locking();
++
+ // Asserts
+ static void assert_is_initialized() {
+ assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
+@@ -560,7 +615,9 @@
+ static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; }
+ static bool supports_erms() { return (_cpuFeatures & CPU_ERMS) != 0; }
+ static bool supports_clmul() { return (_cpuFeatures & CPU_CLMUL) != 0; }
+-
++ static bool supports_rtm() { return (_cpuFeatures & CPU_RTM) != 0; }
++ static bool supports_bmi1() { return (_cpuFeatures & CPU_BMI1) != 0; }
++ static bool supports_bmi2() { return (_cpuFeatures & CPU_BMI2) != 0; }
+ // Intel features
+ static bool is_intel_family_core() { return is_intel() &&
+ extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
+--- ./hotspot/src/cpu/x86/vm/vtableStubs_x86_32.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/vtableStubs_x86_32.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -118,7 +118,7 @@
+
+ if (PrintMiscellaneous && (WizardMode || Verbose)) {
+ tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d",
+- vtable_index, s->entry_point(),
++ vtable_index, p2i(s->entry_point()),
+ (int)(s->code_end() - s->entry_point()),
+ (int)(s->code_end() - __ pc()));
+ }
+@@ -199,7 +199,7 @@
+
+ if (PrintMiscellaneous && (WizardMode || Verbose)) {
+ tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d",
+- itable_index, s->entry_point(),
++ itable_index, p2i(s->entry_point()),
+ (int)(s->code_end() - s->entry_point()),
+ (int)(s->code_end() - __ pc()));
+ }
+--- ./hotspot/src/cpu/x86/vm/vtableStubs_x86_64.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/vtableStubs_x86_64.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -35,6 +35,8 @@
+ #include "opto/runtime.hpp"
+ #endif
+
++PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
++
+ // machine-dependent part of VtableStubs: create VtableStub of correct size and
+ // initialize its code
+
+--- ./hotspot/src/cpu/x86/vm/x86.ad Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/x86.ad Wed Jul 30 03:51:43 2014 -0700
+@@ -474,7 +474,125 @@
+
+ %}
+
++
++//----------SOURCE BLOCK-------------------------------------------------------
++// This is a block of C++ code which provides values, functions, and
++// definitions necessary in the rest of the architecture description
++
++source_hpp %{
++// Header information of the source block.
++// Method declarations/definitions which are used outside
++// the ad-scope can conveniently be defined here.
++//
++// To keep related declarations/definitions/uses close together,
++// we switch between source %{ }% and source_hpp %{ }% freely as needed.
++
++class CallStubImpl {
++
++ //--------------------------------------------------------------
++ //---< Used for optimization in Compile::shorten_branches >---
++ //--------------------------------------------------------------
++
++ public:
++ // Size of call trampoline stub.
++ static uint size_call_trampoline() {
++ return 0; // no call trampolines on this platform
++ }
++
++ // number of relocations needed by a call trampoline stub
++ static uint reloc_call_trampoline() {
++ return 0; // no call trampolines on this platform
++ }
++};
++
++class HandlerImpl {
++
++ public:
++
++ static int emit_exception_handler(CodeBuffer &cbuf);
++ static int emit_deopt_handler(CodeBuffer& cbuf);
++
++ static uint size_exception_handler() {
++ // NativeCall instruction size is the same as NativeJump.
++ // exception handler starts out as jump and can be patched to
++ // a call be deoptimization. (4932387)
++ // Note that this value is also credited (in output.cpp) to
++ // the size of the code section.
++ return NativeJump::instruction_size;
++ }
++
++#ifdef _LP64
++ static uint size_deopt_handler() {
++ // three 5 byte instructions
++ return 15;
++ }
++#else
++ static uint size_deopt_handler() {
++ // NativeCall instruction size is the same as NativeJump.
++ // exception handler starts out as jump and can be patched to
++ // a call be deoptimization. (4932387)
++ // Note that this value is also credited (in output.cpp) to
++ // the size of the code section.
++ return 5 + NativeJump::instruction_size; // pushl(); jmp;
++ }
++#endif
++};
++
++%} // end source_hpp
++
+ source %{
++
++// Emit exception handler code.
++// Stuff framesize into a register and call a VM stub routine.
++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
++
++ // Note that the code buffer's insts_mark is always relative to insts.
++ // That's why we must use the macroassembler to generate a handler.
++ MacroAssembler _masm(&cbuf);
++ address base = __ start_a_stub(size_exception_handler());
++ if (base == NULL) return 0; // CodeBuffer::expand failed
++ int offset = __ offset();
++ __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
++ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
++ __ end_a_stub();
++ return offset;
++}
++
++// Emit deopt handler code.
++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
++
++ // Note that the code buffer's insts_mark is always relative to insts.
++ // That's why we must use the macroassembler to generate a handler.
++ MacroAssembler _masm(&cbuf);
++ address base = __ start_a_stub(size_deopt_handler());
++ if (base == NULL) return 0; // CodeBuffer::expand failed
++ int offset = __ offset();
++
++#ifdef _LP64
++ address the_pc = (address) __ pc();
++ Label next;
++ // push a "the_pc" on the stack without destroying any registers
++ // as they all may be live.
++
++ // push address of "next"
++ __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
++ __ bind(next);
++ // adjust it so it matches "the_pc"
++ __ subptr(Address(rsp, 0), __ offset() - offset);
++#else
++ InternalAddress here(__ pc());
++ __ pushptr(here.addr());
++#endif
++
++ __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
++ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
++ __ end_a_stub();
++ return offset;
++}
++
++
++//=============================================================================
++
+ // Float masks come from different places depending on platform.
+ #ifdef _LP64
+ static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
+@@ -581,6 +699,12 @@
+ return !AlignVector; // can be changed by flag
+ }
+
++// x86 AES instructions are compatible with SunJCE expanded
++// keys, hence we do not need to pass the original key to stubs
++const bool Matcher::pass_original_key_for_aes() {
++ return false;
++}
++
+ // Helper methods for MachSpillCopyNode::implementation().
+ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+ int src_hi, int dst_hi, uint ireg, outputStream* st) {
+--- ./hotspot/src/cpu/x86/vm/x86_32.ad Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/x86_32.ad Wed Jul 30 03:51:43 2014 -0700
+@@ -487,6 +487,11 @@
+ return 0; // absolute addressing, no offset
+ }
+
++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) {
++ ShouldNotReachHere();
++}
++
+ void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+ // Empty encoding
+ }
+@@ -507,14 +512,15 @@
+ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
+ Compile* C = ra_->C;
+
+- int framesize = C->frame_slots() << LogBytesPerInt;
++ int framesize = C->frame_size_in_bytes();
++ int bangsize = C->bang_size_in_bytes();
+ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+ // Remove wordSize for return addr which is already pushed.
+ framesize -= wordSize;
+
+- if (C->need_stack_bang(framesize)) {
++ if (C->need_stack_bang(bangsize)) {
+ framesize -= wordSize;
+- st->print("# stack bang");
++ st->print("# stack bang (%d bytes)", bangsize);
+ st->print("\n\t");
+ st->print("PUSH EBP\t# Save EBP");
+ if (framesize) {
+@@ -558,9 +564,10 @@
+ Compile* C = ra_->C;
+ MacroAssembler _masm(&cbuf);
+
+- int framesize = C->frame_slots() << LogBytesPerInt;
+-
+- __ verified_entry(framesize, C->need_stack_bang(framesize), C->in_24_bit_fp_mode());
++ int framesize = C->frame_size_in_bytes();
++ int bangsize = C->bang_size_in_bytes();
++
++ __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
+
+ C->set_frame_complete(cbuf.insts_size());
+
+@@ -584,7 +591,7 @@
+ #ifndef PRODUCT
+ void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
+ Compile *C = ra_->C;
+- int framesize = C->frame_slots() << LogBytesPerInt;
++ int framesize = C->frame_size_in_bytes();
+ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+ // Remove two words for return addr and rbp,
+ framesize -= 2*wordSize;
+@@ -624,7 +631,7 @@
+ masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
+ }
+
+- int framesize = C->frame_slots() << LogBytesPerInt;
++ int framesize = C->frame_size_in_bytes();
+ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+ // Remove two words for return addr and rbp,
+ framesize -= 2*wordSize;
+@@ -658,7 +665,7 @@
+ if (C->max_vector_size() > 16) size += 3; // vzeroupper
+ if (do_polling() && C->is_method_compilation()) size += 6;
+
+- int framesize = C->frame_slots() << LogBytesPerInt;
++ int framesize = C->frame_size_in_bytes();
+ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+ // Remove two words for return addr and rbp,
+ framesize -= 2*wordSize;
+@@ -1292,59 +1299,6 @@
+
+
+ //=============================================================================
+-uint size_exception_handler() {
+- // NativeCall instruction size is the same as NativeJump.
+- // exception handler starts out as jump and can be patched to
+- // a call be deoptimization. (4932387)
+- // Note that this value is also credited (in output.cpp) to
+- // the size of the code section.
+- return NativeJump::instruction_size;
+-}
+-
+-// Emit exception handler code. Stuff framesize into a register
+-// and call a VM stub routine.
+-int emit_exception_handler(CodeBuffer& cbuf) {
+-
+- // Note that the code buffer's insts_mark is always relative to insts.
+- // That's why we must use the macroassembler to generate a handler.
+- MacroAssembler _masm(&cbuf);
+- address base =
+- __ start_a_stub(size_exception_handler());
+- if (base == NULL) return 0; // CodeBuffer::expand failed
+- int offset = __ offset();
+- __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
+- assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
+- __ end_a_stub();
+- return offset;
+-}
+-
+-uint size_deopt_handler() {
+- // NativeCall instruction size is the same as NativeJump.
+- // exception handler starts out as jump and can be patched to
+- // a call be deoptimization. (4932387)
+- // Note that this value is also credited (in output.cpp) to
+- // the size of the code section.
+- return 5 + NativeJump::instruction_size; // pushl(); jmp;
+-}
+-
+-// Emit deopt handler code.
+-int emit_deopt_handler(CodeBuffer& cbuf) {
+-
+- // Note that the code buffer's insts_mark is always relative to insts.
+- // That's why we must use the macroassembler to generate a handler.
+- MacroAssembler _masm(&cbuf);
+- address base =
+- __ start_a_stub(size_exception_handler());
+- if (base == NULL) return 0; // CodeBuffer::expand failed
+- int offset = __ offset();
+- InternalAddress here(__ pc());
+- __ pushptr(here.addr());
+-
+- __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+- assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
+- __ end_a_stub();
+- return offset;
+-}
+
+ int Matcher::regnum_to_fpu_offset(int regnum) {
+ return regnum - 32; // The FP registers are in the second chunk
+@@ -1389,6 +1343,9 @@
+ // No CMOVF/CMOVD with SSE/SSE2
+ const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
+
++// Does the CPU require late expand (see block.cpp for description of late expand)?
++const bool Matcher::require_postalloc_expand = false;
++
+ // Should the Matcher clone shifts on addressing modes, expecting them to
+ // be subsumed into complex addressing expressions or compute them into
+ // registers? True for Intel but false for most RISCs
+@@ -1534,19 +1491,6 @@
+ return EBP_REG_mask();
+ }
+
+-const RegMask Matcher::mathExactI_result_proj_mask() {
+- return EAX_REG_mask();
+-}
+-
+-const RegMask Matcher::mathExactL_result_proj_mask() {
+- ShouldNotReachHere();
+- return RegMask();
+-}
+-
+-const RegMask Matcher::mathExactI_flags_proj_mask() {
+- return INT_FLAGS_mask();
+-}
+-
+ // Returns true if the high 32 bits of the value is known to be zero.
+ bool is_operand_hi32_zero(Node* n) {
+ int opc = n->Opcode();
+@@ -2910,542 +2854,6 @@
+ emit_d8 (cbuf,0 );
+ %}
+
+-
+- // Because the transitions from emitted code to the runtime
+- // monitorenter/exit helper stubs are so slow it's critical that
+- // we inline both the stack-locking fast-path and the inflated fast path.
+- //
+- // See also: cmpFastLock and cmpFastUnlock.
+- //
+- // What follows is a specialized inline transliteration of the code
+- // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
+- // another option would be to emit TrySlowEnter and TrySlowExit methods
+- // at startup-time. These methods would accept arguments as
+- // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
+- // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
+- // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
+- // In practice, however, the # of lock sites is bounded and is usually small.
+- // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
+- // if the processor uses simple bimodal branch predictors keyed by EIP
+- // Since the helper routines would be called from multiple synchronization
+- // sites.
+- //
+- // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
+- // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
+- // to those specialized methods. That'd give us a mostly platform-independent
+- // implementation that the JITs could optimize and inline at their pleasure.
+- // Done correctly, the only time we'd need to cross to native could would be
+- // to park() or unpark() threads. We'd also need a few more unsafe operators
+- // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
+- // (b) explicit barriers or fence operations.
+- //
+- // TODO:
+- //
+- // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
+- // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
+- // Given TLAB allocation, Self is usually manifested in a register, so passing it into
+- // the lock operators would typically be faster than reifying Self.
+- //
+- // * Ideally I'd define the primitives as:
+- // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
+- // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
+- // Unfortunately ADLC bugs prevent us from expressing the ideal form.
+- // Instead, we're stuck with a rather awkward and brittle register assignments below.
+- // Furthermore the register assignments are overconstrained, possibly resulting in
+- // sub-optimal code near the synchronization site.
+- //
+- // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
+- // Alternately, use a better sp-proximity test.
+- //
+- // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
+- // Either one is sufficient to uniquely identify a thread.
+- // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
+- //
+- // * Intrinsify notify() and notifyAll() for the common cases where the
+- // object is locked by the calling thread but the waitlist is empty.
+- // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
+- //
+- // * use jccb and jmpb instead of jcc and jmp to improve code density.
+- // But beware of excessive branch density on AMD Opterons.
+- //
+- // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
+- // or failure of the fast-path. If the fast-path fails then we pass
+- // control to the slow-path, typically in C. In Fast_Lock and
+- // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
+- // will emit a conditional branch immediately after the node.
+- // So we have branches to branches and lots of ICC.ZF games.
+- // Instead, it might be better to have C2 pass a "FailureLabel"
+- // into Fast_Lock and Fast_Unlock. In the case of success, control
+- // will drop through the node. ICC.ZF is undefined at exit.
+- // In the case of failure, the node will branch directly to the
+- // FailureLabel
+-
+-
+- // obj: object to lock
+- // box: on-stack box address (displaced header location) - KILLED
+- // rax,: tmp -- KILLED
+- // scr: tmp -- KILLED
+- enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
+-
+- Register objReg = as_Register($obj$$reg);
+- Register boxReg = as_Register($box$$reg);
+- Register tmpReg = as_Register($tmp$$reg);
+- Register scrReg = as_Register($scr$$reg);
+-
+- // Ensure the register assignents are disjoint
+- guarantee (objReg != boxReg, "") ;
+- guarantee (objReg != tmpReg, "") ;
+- guarantee (objReg != scrReg, "") ;
+- guarantee (boxReg != tmpReg, "") ;
+- guarantee (boxReg != scrReg, "") ;
+- guarantee (tmpReg == as_Register(EAX_enc), "") ;
+-
+- MacroAssembler masm(&cbuf);
+-
+- if (_counters != NULL) {
+- masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
+- }
+- if (EmitSync & 1) {
+- // set box->dhw = unused_mark (3)
+- // Force all sync thru slow-path: slow_enter() and slow_exit()
+- masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;
+- masm.cmpptr (rsp, (int32_t)0) ;
+- } else
+- if (EmitSync & 2) {
+- Label DONE_LABEL ;
+- if (UseBiasedLocking) {
+- // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
+- masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
+- }
+-
+- masm.movptr(tmpReg, Address(objReg, 0)) ; // fetch markword
+- masm.orptr (tmpReg, 0x1);
+- masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
+- if (os::is_MP()) { masm.lock(); }
+- masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
+- masm.jcc(Assembler::equal, DONE_LABEL);
+- // Recursive locking
+- masm.subptr(tmpReg, rsp);
+- masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
+- masm.movptr(Address(boxReg, 0), tmpReg);
+- masm.bind(DONE_LABEL) ;
+- } else {
+- // Possible cases that we'll encounter in fast_lock
+- // ------------------------------------------------
+- // * Inflated
+- // -- unlocked
+- // -- Locked
+- // = by self
+- // = by other
+- // * biased
+- // -- by Self
+- // -- by other
+- // * neutral
+- // * stack-locked
+- // -- by self
+- // = sp-proximity test hits
+- // = sp-proximity test generates false-negative
+- // -- by other
+- //
+-
+- Label IsInflated, DONE_LABEL, PopDone ;
+-
+- // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
+- // order to reduce the number of conditional branches in the most common cases.
+- // Beware -- there's a subtle invariant that fetch of the markword
+- // at [FETCH], below, will never observe a biased encoding (*101b).
+- // If this invariant is not held we risk exclusion (safety) failure.
+- if (UseBiasedLocking && !UseOptoBiasInlining) {
+- masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
+- }
+-
+- masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH]
+- masm.testptr(tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral)
+- masm.jccb (Assembler::notZero, IsInflated) ;
+-
+- // Attempt stack-locking ...
+- masm.orptr (tmpReg, 0x1);
+- masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
+- if (os::is_MP()) { masm.lock(); }
+- masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
+- if (_counters != NULL) {
+- masm.cond_inc32(Assembler::equal,
+- ExternalAddress((address)_counters->fast_path_entry_count_addr()));
+- }
+- masm.jccb (Assembler::equal, DONE_LABEL);
+-
+- // Recursive locking
+- masm.subptr(tmpReg, rsp);
+- masm.andptr(tmpReg, 0xFFFFF003 );
+- masm.movptr(Address(boxReg, 0), tmpReg);
+- if (_counters != NULL) {
+- masm.cond_inc32(Assembler::equal,
+- ExternalAddress((address)_counters->fast_path_entry_count_addr()));
+- }
+- masm.jmp (DONE_LABEL) ;
+-
+- masm.bind (IsInflated) ;
+-
+- // The object is inflated.
+- //
+- // TODO-FIXME: eliminate the ugly use of manifest constants:
+- // Use markOopDesc::monitor_value instead of "2".
+- // use markOop::unused_mark() instead of "3".
+- // The tmpReg value is an objectMonitor reference ORed with
+- // markOopDesc::monitor_value (2). We can either convert tmpReg to an
+- // objectmonitor pointer by masking off the "2" bit or we can just
+- // use tmpReg as an objectmonitor pointer but bias the objectmonitor
+- // field offsets with "-2" to compensate for and annul the low-order tag bit.
+- //
+- // I use the latter as it avoids AGI stalls.
+- // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
+- // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
+- //
+- #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
+-
+- // boxReg refers to the on-stack BasicLock in the current frame.
+- // We'd like to write:
+- // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices.
+- // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
+- // additional latency as we have another ST in the store buffer that must drain.
+-
+- if (EmitSync & 8192) {
+- masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
+- masm.get_thread (scrReg) ;
+- masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
+- masm.movptr(tmpReg, NULL_WORD); // consider: xor vs mov
+- if (os::is_MP()) { masm.lock(); }
+- masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
+- } else
+- if ((EmitSync & 128) == 0) { // avoid ST-before-CAS
+- masm.movptr(scrReg, boxReg) ;
+- masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
+-
+- // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
+- if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
+- // prefetchw [eax + Offset(_owner)-2]
+- masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
+- }
+-
+- if ((EmitSync & 64) == 0) {
+- // Optimistic form: consider XORL tmpReg,tmpReg
+- masm.movptr(tmpReg, NULL_WORD) ;
+- } else {
+- // Can suffer RTS->RTO upgrades on shared or cold $ lines
+- // Test-And-CAS instead of CAS
+- masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
+- masm.testptr(tmpReg, tmpReg) ; // Locked ?
+- masm.jccb (Assembler::notZero, DONE_LABEL) ;
+- }
+-
+- // Appears unlocked - try to swing _owner from null to non-null.
+- // Ideally, I'd manifest "Self" with get_thread and then attempt
+- // to CAS the register containing Self into m->Owner.
+- // But we don't have enough registers, so instead we can either try to CAS
+- // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
+- // we later store "Self" into m->Owner. Transiently storing a stack address
+- // (rsp or the address of the box) into m->owner is harmless.
+- // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
+- if (os::is_MP()) { masm.lock(); }
+- masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
+- masm.movptr(Address(scrReg, 0), 3) ; // box->_displaced_header = 3
+- masm.jccb (Assembler::notZero, DONE_LABEL) ;
+- masm.get_thread (scrReg) ; // beware: clobbers ICCs
+- masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ;
+- masm.xorptr(boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success
+-
+- // If the CAS fails we can either retry or pass control to the slow-path.
+- // We use the latter tactic.
+- // Pass the CAS result in the icc.ZFlag into DONE_LABEL
+- // If the CAS was successful ...
+- // Self has acquired the lock
+- // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
+- // Intentional fall-through into DONE_LABEL ...
+- } else {
+- masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
+- masm.movptr(boxReg, tmpReg) ;
+-
+- // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
+- if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
+- // prefetchw [eax + Offset(_owner)-2]
+- masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
+- }
+-
+- if ((EmitSync & 64) == 0) {
+- // Optimistic form
+- masm.xorptr (tmpReg, tmpReg) ;
+- } else {
+- // Can suffer RTS->RTO upgrades on shared or cold $ lines
+- masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
+- masm.testptr(tmpReg, tmpReg) ; // Locked ?
+- masm.jccb (Assembler::notZero, DONE_LABEL) ;
+- }
+-
+- // Appears unlocked - try to swing _owner from null to non-null.
+- // Use either "Self" (in scr) or rsp as thread identity in _owner.
+- // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
+- masm.get_thread (scrReg) ;
+- if (os::is_MP()) { masm.lock(); }
+- masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
+-
+- // If the CAS fails we can either retry or pass control to the slow-path.
+- // We use the latter tactic.
+- // Pass the CAS result in the icc.ZFlag into DONE_LABEL
+- // If the CAS was successful ...
+- // Self has acquired the lock
+- // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
+- // Intentional fall-through into DONE_LABEL ...
+- }
+-
+- // DONE_LABEL is a hot target - we'd really like to place it at the
+- // start of cache line by padding with NOPs.
+- // See the AMD and Intel software optimization manuals for the
+- // most efficient "long" NOP encodings.
+- // Unfortunately none of our alignment mechanisms suffice.
+- masm.bind(DONE_LABEL);
+-
+- // Avoid branch-to-branch on AMD processors
+- // This appears to be superstition.
+- if (EmitSync & 32) masm.nop() ;
+-
+-
+- // At DONE_LABEL the icc ZFlag is set as follows ...
+- // Fast_Unlock uses the same protocol.
+- // ZFlag == 1 -> Success
+- // ZFlag == 0 -> Failure - force control through the slow-path
+- }
+- %}
+-
+- // obj: object to unlock
+- // box: box address (displaced header location), killed. Must be EAX.
+- // rbx,: killed tmp; cannot be obj nor box.
+- //
+- // Some commentary on balanced locking:
+- //
+- // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
+- // Methods that don't have provably balanced locking are forced to run in the
+- // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
+- // The interpreter provides two properties:
+- // I1: At return-time the interpreter automatically and quietly unlocks any
+- // objects acquired the current activation (frame). Recall that the
+- // interpreter maintains an on-stack list of locks currently held by
+- // a frame.
+- // I2: If a method attempts to unlock an object that is not held by the
+- // the frame the interpreter throws IMSX.
+- //
+- // Lets say A(), which has provably balanced locking, acquires O and then calls B().
+- // B() doesn't have provably balanced locking so it runs in the interpreter.
+- // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
+- // is still locked by A().
+- //
+- // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
+- // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
+- // should not be unlocked by "normal" java-level locking and vice-versa. The specification
+- // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
+-
+- enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
+-
+- Register objReg = as_Register($obj$$reg);
+- Register boxReg = as_Register($box$$reg);
+- Register tmpReg = as_Register($tmp$$reg);
+-
+- guarantee (objReg != boxReg, "") ;
+- guarantee (objReg != tmpReg, "") ;
+- guarantee (boxReg != tmpReg, "") ;
+- guarantee (boxReg == as_Register(EAX_enc), "") ;
+- MacroAssembler masm(&cbuf);
+-
+- if (EmitSync & 4) {
+- // Disable - inhibit all inlining. Force control through the slow-path
+- masm.cmpptr (rsp, 0) ;
+- } else
+- if (EmitSync & 8) {
+- Label DONE_LABEL ;
+- if (UseBiasedLocking) {
+- masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+- }
+- // classic stack-locking code ...
+- masm.movptr(tmpReg, Address(boxReg, 0)) ;
+- masm.testptr(tmpReg, tmpReg) ;
+- masm.jcc (Assembler::zero, DONE_LABEL) ;
+- if (os::is_MP()) { masm.lock(); }
+- masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
+- masm.bind(DONE_LABEL);
+- } else {
+- Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
+-
+- // Critically, the biased locking test must have precedence over
+- // and appear before the (box->dhw == 0) recursive stack-lock test.
+- if (UseBiasedLocking && !UseOptoBiasInlining) {
+- masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+- }
+-
+- masm.cmpptr(Address(boxReg, 0), 0) ; // Examine the displaced header
+- masm.movptr(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
+- masm.jccb (Assembler::zero, DONE_LABEL) ; // 0 indicates recursive stack-lock
+-
+- masm.testptr(tmpReg, 0x02) ; // Inflated?
+- masm.jccb (Assembler::zero, Stacked) ;
+-
+- masm.bind (Inflated) ;
+- // It's inflated.
+- // Despite our balanced locking property we still check that m->_owner == Self
+- // as java routines or native JNI code called by this thread might
+- // have released the lock.
+- // Refer to the comments in synchronizer.cpp for how we might encode extra
+- // state in _succ so we can avoid fetching EntryList|cxq.
+- //
+- // I'd like to add more cases in fast_lock() and fast_unlock() --
+- // such as recursive enter and exit -- but we have to be wary of
+- // I$ bloat, T$ effects and BP$ effects.
+- //
+- // If there's no contention try a 1-0 exit. That is, exit without
+- // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
+- // we detect and recover from the race that the 1-0 exit admits.
+- //
+- // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
+- // before it STs null into _owner, releasing the lock. Updates
+- // to data protected by the critical section must be visible before
+- // we drop the lock (and thus before any other thread could acquire
+- // the lock and observe the fields protected by the lock).
+- // IA32's memory-model is SPO, so STs are ordered with respect to
+- // each other and there's no need for an explicit barrier (fence).
+- // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
+-
+- masm.get_thread (boxReg) ;
+- if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
+- // prefetchw [ebx + Offset(_owner)-2]
+- masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
+- }
+-
+- // Note that we could employ various encoding schemes to reduce
+- // the number of loads below (currently 4) to just 2 or 3.
+- // Refer to the comments in synchronizer.cpp.
+- // In practice the chain of fetches doesn't seem to impact performance, however.
+- if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
+- // Attempt to reduce branch density - AMD's branch predictor.
+- masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
+- masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
+- masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
+- masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
+- masm.jccb (Assembler::notZero, DONE_LABEL) ;
+- masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
+- masm.jmpb (DONE_LABEL) ;
+- } else {
+- masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
+- masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
+- masm.jccb (Assembler::notZero, DONE_LABEL) ;
+- masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
+- masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
+- masm.jccb (Assembler::notZero, CheckSucc) ;
+- masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
+- masm.jmpb (DONE_LABEL) ;
+- }
+-
+- // The Following code fragment (EmitSync & 65536) improves the performance of
+- // contended applications and contended synchronization microbenchmarks.
+- // Unfortunately the emission of the code - even though not executed - causes regressions
+- // in scimark and jetstream, evidently because of $ effects. Replacing the code
+- // with an equal number of never-executed NOPs results in the same regression.
+- // We leave it off by default.
+-
+- if ((EmitSync & 65536) != 0) {
+- Label LSuccess, LGoSlowPath ;
+-
+- masm.bind (CheckSucc) ;
+-
+- // Optional pre-test ... it's safe to elide this
+- if ((EmitSync & 16) == 0) {
+- masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
+- masm.jccb (Assembler::zero, LGoSlowPath) ;
+- }
+-
+- // We have a classic Dekker-style idiom:
+- // ST m->_owner = 0 ; MEMBAR; LD m->_succ
+- // There are a number of ways to implement the barrier:
+- // (1) lock:andl &m->_owner, 0
+- // is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
+- // LOCK: ANDL [ebx+Offset(_Owner)-2], 0
+- // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
+- // (2) If supported, an explicit MFENCE is appealing.
+- // In older IA32 processors MFENCE is slower than lock:add or xchg
+- // particularly if the write-buffer is full as might be the case if
+- // if stores closely precede the fence or fence-equivalent instruction.
+- // In more modern implementations MFENCE appears faster, however.
+- // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
+- // The $lines underlying the top-of-stack should be in M-state.
+- // The locked add instruction is serializing, of course.
+- // (4) Use xchg, which is serializing
+- // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
+- // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
+- // The integer condition codes will tell us if succ was 0.
+- // Since _succ and _owner should reside in the same $line and
+- // we just stored into _owner, it's likely that the $line
+- // remains in M-state for the lock:orl.
+- //
+- // We currently use (3), although it's likely that switching to (2)
+- // is correct for the future.
+-
+- masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
+- if (os::is_MP()) {
+- if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
+- masm.mfence();
+- } else {
+- masm.lock () ; masm.addptr(Address(rsp, 0), 0) ;
+- }
+- }
+- // Ratify _succ remains non-null
+- masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
+- masm.jccb (Assembler::notZero, LSuccess) ;
+-
+- masm.xorptr(boxReg, boxReg) ; // box is really EAX
+- if (os::is_MP()) { masm.lock(); }
+- masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+- masm.jccb (Assembler::notEqual, LSuccess) ;
+- // Since we're low on registers we installed rsp as a placeholding in _owner.
+- // Now install Self over rsp. This is safe as we're transitioning from
+- // non-null to non=null
+- masm.get_thread (boxReg) ;
+- masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
+- // Intentional fall-through into LGoSlowPath ...
+-
+- masm.bind (LGoSlowPath) ;
+- masm.orptr(boxReg, 1) ; // set ICC.ZF=0 to indicate failure
+- masm.jmpb (DONE_LABEL) ;
+-
+- masm.bind (LSuccess) ;
+- masm.xorptr(boxReg, boxReg) ; // set ICC.ZF=1 to indicate success
+- masm.jmpb (DONE_LABEL) ;
+- }
+-
+- masm.bind (Stacked) ;
+- // It's not inflated and it's not recursively stack-locked and it's not biased.
+- // It must be stack-locked.
+- // Try to reset the header to displaced header.
+- // The "box" value on the stack is stable, so we can reload
+- // and be assured we observe the same value as above.
+- masm.movptr(tmpReg, Address(boxReg, 0)) ;
+- if (os::is_MP()) { masm.lock(); }
+- masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
+- // Intention fall-thru into DONE_LABEL
+-
+-
+- // DONE_LABEL is a hot target - we'd really like to place it at the
+- // start of cache line by padding with NOPs.
+- // See the AMD and Intel software optimization manuals for the
+- // most efficient "long" NOP encodings.
+- // Unfortunately none of our alignment mechanisms suffice.
+- if ((EmitSync & 65536) == 0) {
+- masm.bind (CheckSucc) ;
+- }
+- masm.bind(DONE_LABEL);
+-
+- // Avoid branch to branch on AMD processors
+- if (EmitSync & 32768) { masm.nop() ; }
+- }
+- %}
+-
+-
+ enc_class enc_pop_rdx() %{
+ emit_opcode(cbuf,0x5A);
+ %}
+@@ -3768,7 +3176,7 @@
+ // automatically biased by the preserve_stack_slots field above.
+ c_calling_convention %{
+ // This is obviously always outgoing
+- (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
++ (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
+ %}
+
+ // Location of C & interpreter return values
+@@ -5704,6 +5112,19 @@
+ %}
+
+ instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
++ predicate(UseCountTrailingZerosInstruction);
++ match(Set dst (CountTrailingZerosI src));
++ effect(KILL cr);
++
++ format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %}
++ ins_encode %{
++ __ tzcntl($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(ialu_reg);
++%}
++
++instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
++ predicate(!UseCountTrailingZerosInstruction);
+ match(Set dst (CountTrailingZerosI src));
+ effect(KILL cr);
+
+@@ -5723,6 +5144,30 @@
+ %}
+
+ instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
++ predicate(UseCountTrailingZerosInstruction);
++ match(Set dst (CountTrailingZerosL src));
++ effect(TEMP dst, KILL cr);
++
++ format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t"
++ "JNC done\n\t"
++ "TZCNT $dst, $src.hi\n\t"
++ "ADD $dst, 32\n"
++ "done:" %}
++ ins_encode %{
++ Register Rdst = $dst$$Register;
++ Register Rsrc = $src$$Register;
++ Label done;
++ __ tzcntl(Rdst, Rsrc);
++ __ jccb(Assembler::carryClear, done);
++ __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
++ __ addl(Rdst, BitsPerInt);
++ __ bind(done);
++ %}
++ ins_pipe(ialu_reg);
++%}
++
++instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
++ predicate(!UseCountTrailingZerosInstruction);
+ match(Set dst (CountTrailingZerosL src));
+ effect(TEMP dst, KILL cr);
+
+@@ -7099,6 +6544,7 @@
+
+ instruct membar_acquire() %{
+ match(MemBarAcquire);
++ match(LoadFence);
+ ins_cost(400);
+
+ size(0);
+@@ -7119,6 +6565,7 @@
+
+ instruct membar_release() %{
+ match(MemBarRelease);
++ match(StoreFence);
+ ins_cost(400);
+
+ size(0);
+@@ -7535,44 +6982,6 @@
+ //----------Arithmetic Instructions--------------------------------------------
+ //----------Addition Instructions----------------------------------------------
+
+-instruct addExactI_eReg(eAXRegI dst, rRegI src, eFlagsReg cr)
+-%{
+- match(AddExactI dst src);
+- effect(DEF cr);
+-
+- format %{ "ADD $dst, $src\t# addExact int" %}
+- ins_encode %{
+- __ addl($dst$$Register, $src$$Register);
+- %}
+- ins_pipe(ialu_reg_reg);
+-%}
+-
+-instruct addExactI_eReg_imm(eAXRegI dst, immI src, eFlagsReg cr)
+-%{
+- match(AddExactI dst src);
+- effect(DEF cr);
+-
+- format %{ "ADD $dst, $src\t# addExact int" %}
+- ins_encode %{
+- __ addl($dst$$Register, $src$$constant);
+- %}
+- ins_pipe(ialu_reg_reg);
+-%}
+-
+-instruct addExactI_eReg_mem(eAXRegI dst, memory src, eFlagsReg cr)
+-%{
+- match(AddExactI dst (LoadI src));
+- effect(DEF cr);
+-
+- ins_cost(125);
+- format %{ "ADD $dst,$src\t# addExact int" %}
+- ins_encode %{
+- __ addl($dst$$Register, $src$$Address);
+- %}
+- ins_pipe( ialu_reg_mem );
+-%}
+-
+-
+ // Integer Addition Instructions
+ instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
+ match(Set dst (AddI dst src));
+@@ -7882,43 +7291,6 @@
+
+ //----------Subtraction Instructions-------------------------------------------
+
+-instruct subExactI_eReg(eAXRegI dst, rRegI src, eFlagsReg cr)
+-%{
+- match(SubExactI dst src);
+- effect(DEF cr);
+-
+- format %{ "SUB $dst, $src\t# subExact int" %}
+- ins_encode %{
+- __ subl($dst$$Register, $src$$Register);
+- %}
+- ins_pipe(ialu_reg_reg);
+-%}
+-
+-instruct subExactI_eReg_imm(eAXRegI dst, immI src, eFlagsReg cr)
+-%{
+- match(SubExactI dst src);
+- effect(DEF cr);
+-
+- format %{ "SUB $dst, $src\t# subExact int" %}
+- ins_encode %{
+- __ subl($dst$$Register, $src$$constant);
+- %}
+- ins_pipe(ialu_reg_reg);
+-%}
+-
+-instruct subExactI_eReg_mem(eAXRegI dst, memory src, eFlagsReg cr)
+-%{
+- match(SubExactI dst (LoadI src));
+- effect(DEF cr);
+-
+- ins_cost(125);
+- format %{ "SUB $dst,$src\t# subExact int" %}
+- ins_encode %{
+- __ subl($dst$$Register, $src$$Address);
+- %}
+- ins_pipe( ialu_reg_mem );
+-%}
+-
+ // Integer Subtraction Instructions
+ instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
+ match(Set dst (SubI dst src));
+@@ -7987,17 +7359,6 @@
+ ins_pipe( ialu_reg );
+ %}
+
+-instruct negExactI_eReg(eAXRegI dst, eFlagsReg cr) %{
+- match(NegExactI dst);
+- effect(DEF cr);
+-
+- format %{ "NEG $dst\t# negExact int"%}
+- ins_encode %{
+- __ negl($dst$$Register);
+- %}
+- ins_pipe(ialu_reg);
+-%}
+-
+ //----------Multiplication/Division Instructions-------------------------------
+ // Integer Multiplication Instructions
+ // Multiply Register
+@@ -8209,46 +7570,6 @@
+ ins_pipe( pipe_slow );
+ %}
+
+-instruct mulExactI_eReg(eAXRegI dst, rRegI src, eFlagsReg cr)
+-%{
+- match(MulExactI dst src);
+- effect(DEF cr);
+-
+- ins_cost(300);
+- format %{ "IMUL $dst, $src\t# mulExact int" %}
+- ins_encode %{
+- __ imull($dst$$Register, $src$$Register);
+- %}
+- ins_pipe(ialu_reg_reg_alu0);
+-%}
+-
+-instruct mulExactI_eReg_imm(eAXRegI dst, rRegI src, immI imm, eFlagsReg cr)
+-%{
+- match(MulExactI src imm);
+- effect(DEF cr);
+-
+- ins_cost(300);
+- format %{ "IMUL $dst, $src, $imm\t# mulExact int" %}
+- ins_encode %{
+- __ imull($dst$$Register, $src$$Register, $imm$$constant);
+- %}
+- ins_pipe(ialu_reg_reg_alu0);
+-%}
+-
+-instruct mulExactI_eReg_mem(eAXRegI dst, memory src, eFlagsReg cr)
+-%{
+- match(MulExactI dst (LoadI src));
+- effect(DEF cr);
+-
+- ins_cost(350);
+- format %{ "IMUL $dst, $src\t# mulExact int" %}
+- ins_encode %{
+- __ imull($dst$$Register, $src$$Address);
+- %}
+- ins_pipe(ialu_reg_mem_alu0);
+-%}
+-
+-
+ // Integer DIV with Register
+ instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
+ match(Set rax (DivI rax div));
+@@ -8692,6 +8013,123 @@
+ ins_pipe( ialu_mem_imm );
+ %}
+
++// BMI1 instructions
++instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
++ match(Set dst (AndI (XorI src1 minus_1) src2));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ format %{ "ANDNL $dst, $src1, $src2" %}
++
++ ins_encode %{
++ __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(ialu_reg);
++%}
++
++instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
++ match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ ins_cost(125);
++ format %{ "ANDNL $dst, $src1, $src2" %}
++
++ ins_encode %{
++ __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
++ %}
++ ins_pipe(ialu_reg_mem);
++%}
++
++instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
++ match(Set dst (AndI (SubI imm_zero src) src));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ format %{ "BLSIL $dst, $src" %}
++
++ ins_encode %{
++ __ blsil($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(ialu_reg);
++%}
++
++instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
++ match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ ins_cost(125);
++ format %{ "BLSIL $dst, $src" %}
++
++ ins_encode %{
++ __ blsil($dst$$Register, $src$$Address);
++ %}
++ ins_pipe(ialu_reg_mem);
++%}
++
++instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
++%{
++ match(Set dst (XorI (AddI src minus_1) src));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ format %{ "BLSMSKL $dst, $src" %}
++
++ ins_encode %{
++ __ blsmskl($dst$$Register, $src$$Register);
++ %}
++
++ ins_pipe(ialu_reg);
++%}
++
++instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
++%{
++ match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ ins_cost(125);
++ format %{ "BLSMSKL $dst, $src" %}
++
++ ins_encode %{
++ __ blsmskl($dst$$Register, $src$$Address);
++ %}
++
++ ins_pipe(ialu_reg_mem);
++%}
++
++instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
++%{
++ match(Set dst (AndI (AddI src minus_1) src) );
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ format %{ "BLSRL $dst, $src" %}
++
++ ins_encode %{
++ __ blsrl($dst$$Register, $src$$Register);
++ %}
++
++ ins_pipe(ialu_reg);
++%}
++
++instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
++%{
++ match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ ins_cost(125);
++ format %{ "BLSRL $dst, $src" %}
++
++ ins_encode %{
++ __ blsrl($dst$$Register, $src$$Address);
++ %}
++
++ ins_pipe(ialu_reg_mem);
++%}
++
+ // Or Instructions
+ // Or Register with Register
+ instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
+@@ -9114,6 +8552,91 @@
+ instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
+ match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
+ */
++//----------Overflow Math Instructions-----------------------------------------
++
++instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
++%{
++ match(Set cr (OverflowAddI op1 op2));
++ effect(DEF cr, USE_KILL op1, USE op2);
++
++ format %{ "ADD $op1, $op2\t# overflow check int" %}
++
++ ins_encode %{
++ __ addl($op1$$Register, $op2$$Register);
++ %}
++ ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
++%{
++ match(Set cr (OverflowAddI op1 op2));
++ effect(DEF cr, USE_KILL op1, USE op2);
++
++ format %{ "ADD $op1, $op2\t# overflow check int" %}
++
++ ins_encode %{
++ __ addl($op1$$Register, $op2$$constant);
++ %}
++ ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
++%{
++ match(Set cr (OverflowSubI op1 op2));
++
++ format %{ "CMP $op1, $op2\t# overflow check int" %}
++ ins_encode %{
++ __ cmpl($op1$$Register, $op2$$Register);
++ %}
++ ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
++%{
++ match(Set cr (OverflowSubI op1 op2));
++
++ format %{ "CMP $op1, $op2\t# overflow check int" %}
++ ins_encode %{
++ __ cmpl($op1$$Register, $op2$$constant);
++ %}
++ ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
++%{
++ match(Set cr (OverflowSubI zero op2));
++ effect(DEF cr, USE_KILL op2);
++
++ format %{ "NEG $op2\t# overflow check int" %}
++ ins_encode %{
++ __ negl($op2$$Register);
++ %}
++ ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
++%{
++ match(Set cr (OverflowMulI op1 op2));
++ effect(DEF cr, USE_KILL op1, USE op2);
++
++ format %{ "IMUL $op1, $op2\t# overflow check int" %}
++ ins_encode %{
++ __ imull($op1$$Register, $op2$$Register);
++ %}
++ ins_pipe(ialu_reg_reg_alu0);
++%}
++
++instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
++%{
++ match(Set cr (OverflowMulI op1 op2));
++ effect(DEF cr, TEMP tmp, USE op1, USE op2);
++
++ format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %}
++ ins_encode %{
++ __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
++ %}
++ ins_pipe(ialu_reg_reg_alu0);
++%}
+
+ //----------Long Instructions------------------------------------------------
+ // Add Long Register with Register
+@@ -9229,6 +8752,210 @@
+ ins_pipe( ialu_reg_long_mem );
+ %}
+
++// BMI1 instructions
++instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
++ match(Set dst (AndL (XorL src1 minus_1) src2));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr, TEMP dst);
++
++ format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t"
++ "ANDNL $dst.hi, $src1.hi, $src2.hi"
++ %}
++
++ ins_encode %{
++ Register Rdst = $dst$$Register;
++ Register Rsrc1 = $src1$$Register;
++ Register Rsrc2 = $src2$$Register;
++ __ andnl(Rdst, Rsrc1, Rsrc2);
++ __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
++ %}
++ ins_pipe(ialu_reg_reg_long);
++%}
++
++instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
++ match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr, TEMP dst);
++
++ ins_cost(125);
++ format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t"
++ "ANDNL $dst.hi, $src1.hi, $src2+4"
++ %}
++
++ ins_encode %{
++ Register Rdst = $dst$$Register;
++ Register Rsrc1 = $src1$$Register;
++ Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
++
++ __ andnl(Rdst, Rsrc1, $src2$$Address);
++ __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
++ %}
++ ins_pipe(ialu_reg_mem);
++%}
++
++instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
++ match(Set dst (AndL (SubL imm_zero src) src));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr, TEMP dst);
++
++ format %{ "MOVL $dst.hi, 0\n\t"
++ "BLSIL $dst.lo, $src.lo\n\t"
++ "JNZ done\n\t"
++ "BLSIL $dst.hi, $src.hi\n"
++ "done:"
++ %}
++
++ ins_encode %{
++ Label done;
++ Register Rdst = $dst$$Register;
++ Register Rsrc = $src$$Register;
++ __ movl(HIGH_FROM_LOW(Rdst), 0);
++ __ blsil(Rdst, Rsrc);
++ __ jccb(Assembler::notZero, done);
++ __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
++ __ bind(done);
++ %}
++ ins_pipe(ialu_reg);
++%}
++
++instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
++ match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr, TEMP dst);
++
++ ins_cost(125);
++ format %{ "MOVL $dst.hi, 0\n\t"
++ "BLSIL $dst.lo, $src\n\t"
++ "JNZ done\n\t"
++ "BLSIL $dst.hi, $src+4\n"
++ "done:"
++ %}
++
++ ins_encode %{
++ Label done;
++ Register Rdst = $dst$$Register;
++ Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
++
++ __ movl(HIGH_FROM_LOW(Rdst), 0);
++ __ blsil(Rdst, $src$$Address);
++ __ jccb(Assembler::notZero, done);
++ __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
++ __ bind(done);
++ %}
++ ins_pipe(ialu_reg_mem);
++%}
++
++instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
++%{
++ match(Set dst (XorL (AddL src minus_1) src));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr, TEMP dst);
++
++ format %{ "MOVL $dst.hi, 0\n\t"
++ "BLSMSKL $dst.lo, $src.lo\n\t"
++ "JNC done\n\t"
++ "BLSMSKL $dst.hi, $src.hi\n"
++ "done:"
++ %}
++
++ ins_encode %{
++ Label done;
++ Register Rdst = $dst$$Register;
++ Register Rsrc = $src$$Register;
++ __ movl(HIGH_FROM_LOW(Rdst), 0);
++ __ blsmskl(Rdst, Rsrc);
++ __ jccb(Assembler::carryClear, done);
++ __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
++ __ bind(done);
++ %}
++
++ ins_pipe(ialu_reg);
++%}
++
++instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
++%{
++ match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr, TEMP dst);
++
++ ins_cost(125);
++ format %{ "MOVL $dst.hi, 0\n\t"
++ "BLSMSKL $dst.lo, $src\n\t"
++ "JNC done\n\t"
++ "BLSMSKL $dst.hi, $src+4\n"
++ "done:"
++ %}
++
++ ins_encode %{
++ Label done;
++ Register Rdst = $dst$$Register;
++ Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
++
++ __ movl(HIGH_FROM_LOW(Rdst), 0);
++ __ blsmskl(Rdst, $src$$Address);
++ __ jccb(Assembler::carryClear, done);
++ __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
++ __ bind(done);
++ %}
++
++ ins_pipe(ialu_reg_mem);
++%}
++
++instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
++%{
++ match(Set dst (AndL (AddL src minus_1) src) );
++ predicate(UseBMI1Instructions);
++ effect(KILL cr, TEMP dst);
++
++ format %{ "MOVL $dst.hi, $src.hi\n\t"
++ "BLSRL $dst.lo, $src.lo\n\t"
++ "JNC done\n\t"
++ "BLSRL $dst.hi, $src.hi\n"
++ "done:"
++ %}
++
++ ins_encode %{
++ Label done;
++ Register Rdst = $dst$$Register;
++ Register Rsrc = $src$$Register;
++ __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
++ __ blsrl(Rdst, Rsrc);
++ __ jccb(Assembler::carryClear, done);
++ __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
++ __ bind(done);
++ %}
++
++ ins_pipe(ialu_reg);
++%}
++
++instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
++%{
++ match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr, TEMP dst);
++
++ ins_cost(125);
++ format %{ "MOVL $dst.hi, $src+4\n\t"
++ "BLSRL $dst.lo, $src\n\t"
++ "JNC done\n\t"
++ "BLSRL $dst.hi, $src+4\n"
++ "done:"
++ %}
++
++ ins_encode %{
++ Label done;
++ Register Rdst = $dst$$Register;
++ Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
++ __ movl(HIGH_FROM_LOW(Rdst), src_hi);
++ __ blsrl(Rdst, $src$$Address);
++ __ jccb(Assembler::carryClear, done);
++ __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
++ __ bind(done);
++ %}
++
++ ins_pipe(ialu_reg_mem);
++%}
++
+ // Or Long Register with Register
+ instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
+ match(Set dst (OrL dst src));
+@@ -13147,23 +12874,44 @@
+
+ // inlined locking and unlocking
+
+-
+-instruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
+- match( Set cr (FastLock object box) );
+- effect( TEMP tmp, TEMP scr, USE_KILL box );
++instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
++ predicate(Compile::current()->use_rtm());
++ match(Set cr (FastLock object box));
++ effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
++ ins_cost(300);
++ format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
++ ins_encode %{
++ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
++ $scr$$Register, $cx1$$Register, $cx2$$Register,
++ _counters, _rtm_counters, _stack_rtm_counters,
++ ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
++ true, ra_->C->profile_rtm());
++ %}
++ ins_pipe(pipe_slow);
++%}
++
++instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
++ predicate(!Compile::current()->use_rtm());
++ match(Set cr (FastLock object box));
++ effect(TEMP tmp, TEMP scr, USE_KILL box);
+ ins_cost(300);
+ format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
+- ins_encode( Fast_Lock(object,box,tmp,scr) );
+- ins_pipe( pipe_slow );
+-%}
+-
+-instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
+- match( Set cr (FastUnlock object box) );
+- effect( TEMP tmp, USE_KILL box );
++ ins_encode %{
++ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
++ $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
++ %}
++ ins_pipe(pipe_slow);
++%}
++
++instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
++ match(Set cr (FastUnlock object box));
++ effect(TEMP tmp, USE_KILL box);
+ ins_cost(300);
+ format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
+- ins_encode( Fast_Unlock(object,box,tmp) );
+- ins_pipe( pipe_slow );
++ ins_encode %{
++ __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
++ %}
++ ins_pipe(pipe_slow);
+ %}
+
+
+--- ./hotspot/src/cpu/x86/vm/x86_64.ad Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/x86/vm/x86_64.ad Wed Jul 30 03:51:43 2014 -0700
+@@ -688,6 +688,11 @@
+ return 0; // absolute addressing, no offset
+ }
+
++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) {
++ ShouldNotReachHere();
++}
++
+ void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+ // Empty encoding
+ }
+@@ -708,14 +713,15 @@
+ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
+ Compile* C = ra_->C;
+
+- int framesize = C->frame_slots() << LogBytesPerInt;
++ int framesize = C->frame_size_in_bytes();
++ int bangsize = C->bang_size_in_bytes();
+ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+ // Remove wordSize for return addr which is already pushed.
+ framesize -= wordSize;
+
+- if (C->need_stack_bang(framesize)) {
++ if (C->need_stack_bang(bangsize)) {
+ framesize -= wordSize;
+- st->print("# stack bang");
++ st->print("# stack bang (%d bytes)", bangsize);
+ st->print("\n\t");
+ st->print("pushq rbp\t# Save rbp");
+ if (framesize) {
+@@ -746,9 +752,10 @@
+ Compile* C = ra_->C;
+ MacroAssembler _masm(&cbuf);
+
+- int framesize = C->frame_slots() << LogBytesPerInt;
+-
+- __ verified_entry(framesize, C->need_stack_bang(framesize), false);
++ int framesize = C->frame_size_in_bytes();
++ int bangsize = C->bang_size_in_bytes();
++
++ __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, false);
+
+ C->set_frame_complete(cbuf.insts_size());
+
+@@ -781,7 +788,7 @@
+ st->cr(); st->print("\t");
+ }
+
+- int framesize = C->frame_slots() << LogBytesPerInt;
++ int framesize = C->frame_size_in_bytes();
+ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+ // Remove word for return adr already pushed
+ // and RBP
+@@ -817,7 +824,7 @@
+ __ vzeroupper();
+ }
+
+- int framesize = C->frame_slots() << LogBytesPerInt;
++ int framesize = C->frame_size_in_bytes();
+ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+ // Remove word for return adr already pushed
+ // and RBP
+@@ -1434,66 +1441,9 @@
+ return MachNode::size(ra_); // too many variables; just compute it
+ // the hard way
+ }
+-
++
+
+ //=============================================================================
+-uint size_exception_handler()
+-{
+- // NativeCall instruction size is the same as NativeJump.
+- // Note that this value is also credited (in output.cpp) to
+- // the size of the code section.
+- return NativeJump::instruction_size;
+-}
+-
+-// Emit exception handler code.
+-int emit_exception_handler(CodeBuffer& cbuf)
+-{
+-
+- // Note that the code buffer's insts_mark is always relative to insts.
+- // That's why we must use the macroassembler to generate a handler.
+- MacroAssembler _masm(&cbuf);
+- address base =
+- __ start_a_stub(size_exception_handler());
+- if (base == NULL) return 0; // CodeBuffer::expand failed
+- int offset = __ offset();
+- __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
+- assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
+- __ end_a_stub();
+- return offset;
+-}
+-
+-uint size_deopt_handler()
+-{
+- // three 5 byte instructions
+- return 15;
+-}
+-
+-// Emit deopt handler code.
+-int emit_deopt_handler(CodeBuffer& cbuf)
+-{
+-
+- // Note that the code buffer's insts_mark is always relative to insts.
+- // That's why we must use the macroassembler to generate a handler.
+- MacroAssembler _masm(&cbuf);
+- address base =
+- __ start_a_stub(size_deopt_handler());
+- if (base == NULL) return 0; // CodeBuffer::expand failed
+- int offset = __ offset();
+- address the_pc = (address) __ pc();
+- Label next;
+- // push a "the_pc" on the stack without destroying any registers
+- // as they all may be live.
+-
+- // push address of "next"
+- __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
+- __ bind(next);
+- // adjust it so it matches "the_pc"
+- __ subptr(Address(rsp, 0), __ offset() - offset);
+- __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+- assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
+- __ end_a_stub();
+- return offset;
+-}
+
+ int Matcher::regnum_to_fpu_offset(int regnum)
+ {
+@@ -1542,6 +1492,9 @@
+ // No CMOVF/CMOVD with SSE2
+ const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
+
++// Does the CPU require late expand (see block.cpp for description of late expand)?
++const bool Matcher::require_postalloc_expand = false;
++
+ // Should the Matcher clone shifts on addressing modes, expecting them
+ // to be subsumed into complex addressing expressions or compute them
+ // into registers? True for Intel but false for most RISCs
+@@ -1649,18 +1602,6 @@
+ return PTR_RBP_REG_mask();
+ }
+
+-const RegMask Matcher::mathExactI_result_proj_mask() {
+- return INT_RAX_REG_mask();
+-}
+-
+-const RegMask Matcher::mathExactL_result_proj_mask() {
+- return LONG_RAX_REG_mask();
+-}
+-
+-const RegMask Matcher::mathExactI_flags_proj_mask() {
+- return INT_FLAGS_mask();
+-}
+-
+ %}
+
+ //----------ENCODING BLOCK-----------------------------------------------------
+@@ -2591,231 +2532,6 @@
+ %}
+
+
+- // obj: object to lock
+- // box: box address (header location) -- killed
+- // tmp: rax -- killed
+- // scr: rbx -- killed
+- //
+- // What follows is a direct transliteration of fast_lock() and fast_unlock()
+- // from i486.ad. See that file for comments.
+- // TODO: where possible switch from movq (r, 0) to movl(r,0) and
+- // use the shorter encoding. (Movl clears the high-order 32-bits).
+-
+-
+- enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
+- %{
+- Register objReg = as_Register((int)$obj$$reg);
+- Register boxReg = as_Register((int)$box$$reg);
+- Register tmpReg = as_Register($tmp$$reg);
+- Register scrReg = as_Register($scr$$reg);
+- MacroAssembler masm(&cbuf);
+-
+- // Verify uniqueness of register assignments -- necessary but not sufficient
+- assert (objReg != boxReg && objReg != tmpReg &&
+- objReg != scrReg && tmpReg != scrReg, "invariant") ;
+-
+- if (_counters != NULL) {
+- masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
+- }
+- if (EmitSync & 1) {
+- // Without cast to int32_t a movptr will destroy r10 which is typically obj
+- masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
+- masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
+- } else
+- if (EmitSync & 2) {
+- Label DONE_LABEL;
+- if (UseBiasedLocking) {
+- // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
+- masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
+- }
+- // QQQ was movl...
+- masm.movptr(tmpReg, 0x1);
+- masm.orptr(tmpReg, Address(objReg, 0));
+- masm.movptr(Address(boxReg, 0), tmpReg);
+- if (os::is_MP()) {
+- masm.lock();
+- }
+- masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
+- masm.jcc(Assembler::equal, DONE_LABEL);
+-
+- // Recursive locking
+- masm.subptr(tmpReg, rsp);
+- masm.andptr(tmpReg, 7 - os::vm_page_size());
+- masm.movptr(Address(boxReg, 0), tmpReg);
+-
+- masm.bind(DONE_LABEL);
+- masm.nop(); // avoid branch to branch
+- } else {
+- Label DONE_LABEL, IsInflated, Egress;
+-
+- masm.movptr(tmpReg, Address(objReg, 0)) ;
+- masm.testl (tmpReg, 0x02) ; // inflated vs stack-locked|neutral|biased
+- masm.jcc (Assembler::notZero, IsInflated) ;
+-
+- // it's stack-locked, biased or neutral
+- // TODO: optimize markword triage order to reduce the number of
+- // conditional branches in the most common cases.
+- // Beware -- there's a subtle invariant that fetch of the markword
+- // at [FETCH], below, will never observe a biased encoding (*101b).
+- // If this invariant is not held we'll suffer exclusion (safety) failure.
+-
+- if (UseBiasedLocking && !UseOptoBiasInlining) {
+- masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
+- masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH]
+- }
+-
+- // was q will it destroy high?
+- masm.orl (tmpReg, 1) ;
+- masm.movptr(Address(boxReg, 0), tmpReg) ;
+- if (os::is_MP()) { masm.lock(); }
+- masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
+- if (_counters != NULL) {
+- masm.cond_inc32(Assembler::equal,
+- ExternalAddress((address) _counters->fast_path_entry_count_addr()));
+- }
+- masm.jcc (Assembler::equal, DONE_LABEL);
+-
+- // Recursive locking
+- masm.subptr(tmpReg, rsp);
+- masm.andptr(tmpReg, 7 - os::vm_page_size());
+- masm.movptr(Address(boxReg, 0), tmpReg);
+- if (_counters != NULL) {
+- masm.cond_inc32(Assembler::equal,
+- ExternalAddress((address) _counters->fast_path_entry_count_addr()));
+- }
+- masm.jmp (DONE_LABEL) ;
+-
+- masm.bind (IsInflated) ;
+- // It's inflated
+-
+- // TODO: someday avoid the ST-before-CAS penalty by
+- // relocating (deferring) the following ST.
+- // We should also think about trying a CAS without having
+- // fetched _owner. If the CAS is successful we may
+- // avoid an RTO->RTS upgrade on the $line.
+- // Without cast to int32_t a movptr will destroy r10 which is typically obj
+- masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
+-
+- masm.mov (boxReg, tmpReg) ;
+- masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
+- masm.testptr(tmpReg, tmpReg) ;
+- masm.jcc (Assembler::notZero, DONE_LABEL) ;
+-
+- // It's inflated and appears unlocked
+- if (os::is_MP()) { masm.lock(); }
+- masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
+- // Intentional fall-through into DONE_LABEL ...
+-
+- masm.bind (DONE_LABEL) ;
+- masm.nop () ; // avoid jmp to jmp
+- }
+- %}
+-
+- // obj: object to unlock
+- // box: box address (displaced header location), killed
+- // RBX: killed tmp; cannot be obj nor box
+- enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
+- %{
+-
+- Register objReg = as_Register($obj$$reg);
+- Register boxReg = as_Register($box$$reg);
+- Register tmpReg = as_Register($tmp$$reg);
+- MacroAssembler masm(&cbuf);
+-
+- if (EmitSync & 4) {
+- masm.cmpptr(rsp, 0) ;
+- } else
+- if (EmitSync & 8) {
+- Label DONE_LABEL;
+- if (UseBiasedLocking) {
+- masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+- }
+-
+- // Check whether the displaced header is 0
+- //(=> recursive unlock)
+- masm.movptr(tmpReg, Address(boxReg, 0));
+- masm.testptr(tmpReg, tmpReg);
+- masm.jcc(Assembler::zero, DONE_LABEL);
+-
+- // If not recursive lock, reset the header to displaced header
+- if (os::is_MP()) {
+- masm.lock();
+- }
+- masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
+- masm.bind(DONE_LABEL);
+- masm.nop(); // avoid branch to branch
+- } else {
+- Label DONE_LABEL, Stacked, CheckSucc ;
+-
+- if (UseBiasedLocking && !UseOptoBiasInlining) {
+- masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+- }
+-
+- masm.movptr(tmpReg, Address(objReg, 0)) ;
+- masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
+- masm.jcc (Assembler::zero, DONE_LABEL) ;
+- masm.testl (tmpReg, 0x02) ;
+- masm.jcc (Assembler::zero, Stacked) ;
+-
+- // It's inflated
+- masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
+- masm.xorptr(boxReg, r15_thread) ;
+- masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
+- masm.jcc (Assembler::notZero, DONE_LABEL) ;
+- masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
+- masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
+- masm.jcc (Assembler::notZero, CheckSucc) ;
+- masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
+- masm.jmp (DONE_LABEL) ;
+-
+- if ((EmitSync & 65536) == 0) {
+- Label LSuccess, LGoSlowPath ;
+- masm.bind (CheckSucc) ;
+- masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
+- masm.jcc (Assembler::zero, LGoSlowPath) ;
+-
+- // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
+- // the explicit ST;MEMBAR combination, but masm doesn't currently support
+- // "ANDQ M,IMM". Don't use MFENCE here. lock:add to TOS, xchg, etc
+- // are all faster when the write buffer is populated.
+- masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
+- if (os::is_MP()) {
+- masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
+- }
+- masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
+- masm.jcc (Assembler::notZero, LSuccess) ;
+-
+- masm.movptr (boxReg, (int32_t)NULL_WORD) ; // box is really EAX
+- if (os::is_MP()) { masm.lock(); }
+- masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+- masm.jcc (Assembler::notEqual, LSuccess) ;
+- // Intentional fall-through into slow-path
+-
+- masm.bind (LGoSlowPath) ;
+- masm.orl (boxReg, 1) ; // set ICC.ZF=0 to indicate failure
+- masm.jmp (DONE_LABEL) ;
+-
+- masm.bind (LSuccess) ;
+- masm.testl (boxReg, 0) ; // set ICC.ZF=1 to indicate success
+- masm.jmp (DONE_LABEL) ;
+- }
+-
+- masm.bind (Stacked) ;
+- masm.movptr(tmpReg, Address (boxReg, 0)) ; // re-fetch
+- if (os::is_MP()) { masm.lock(); }
+- masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
+-
+- if (EmitSync & 65536) {
+- masm.bind (CheckSucc) ;
+- }
+- masm.bind(DONE_LABEL);
+- if (EmitSync & 32768) {
+- masm.nop(); // avoid branch to branch
+- }
+- }
+- %}
+-
+-
+ enc_class enc_rethrow()
+ %{
+ cbuf.set_insts_mark();
+@@ -2953,7 +2669,7 @@
+ c_calling_convention
+ %{
+ // This is obviously always outgoing
+- (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
++ (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
+ %}
+
+ // Location of compiled Java return values. Same as C for now.
+@@ -6251,6 +5967,19 @@
+ %}
+
+ instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
++ predicate(UseCountTrailingZerosInstruction);
++ match(Set dst (CountTrailingZerosI src));
++ effect(KILL cr);
++
++ format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
++ ins_encode %{
++ __ tzcntl($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(ialu_reg);
++%}
++
++instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
++ predicate(!UseCountTrailingZerosInstruction);
+ match(Set dst (CountTrailingZerosI src));
+ effect(KILL cr);
+
+@@ -6270,6 +5999,19 @@
+ %}
+
+ instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
++ predicate(UseCountTrailingZerosInstruction);
++ match(Set dst (CountTrailingZerosL src));
++ effect(KILL cr);
++
++ format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
++ ins_encode %{
++ __ tzcntq($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(ialu_reg);
++%}
++
++instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
++ predicate(!UseCountTrailingZerosInstruction);
+ match(Set dst (CountTrailingZerosL src));
+ effect(KILL cr);
+
+@@ -6348,6 +6090,7 @@
+ instruct membar_acquire()
+ %{
+ match(MemBarAcquire);
++ match(LoadFence);
+ ins_cost(0);
+
+ size(0);
+@@ -6370,6 +6113,7 @@
+ instruct membar_release()
+ %{
+ match(MemBarRelease);
++ match(StoreFence);
+ ins_cost(0);
+
+ size(0);
+@@ -6953,82 +6697,6 @@
+ //----------Arithmetic Instructions--------------------------------------------
+ //----------Addition Instructions----------------------------------------------
+
+-instruct addExactI_rReg(rax_RegI dst, rRegI src, rFlagsReg cr)
+-%{
+- match(AddExactI dst src);
+- effect(DEF cr);
+-
+- format %{ "addl $dst, $src\t# addExact int" %}
+- ins_encode %{
+- __ addl($dst$$Register, $src$$Register);
+- %}
+- ins_pipe(ialu_reg_reg);
+-%}
+-
+-instruct addExactI_rReg_imm(rax_RegI dst, immI src, rFlagsReg cr)
+-%{
+- match(AddExactI dst src);
+- effect(DEF cr);
+-
+- format %{ "addl $dst, $src\t# addExact int" %}
+- ins_encode %{
+- __ addl($dst$$Register, $src$$constant);
+- %}
+- ins_pipe(ialu_reg_reg);
+-%}
+-
+-instruct addExactI_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr)
+-%{
+- match(AddExactI dst (LoadI src));
+- effect(DEF cr);
+-
+- ins_cost(125); // XXX
+- format %{ "addl $dst, $src\t# addExact int" %}
+- ins_encode %{
+- __ addl($dst$$Register, $src$$Address);
+- %}
+-
+- ins_pipe(ialu_reg_mem);
+-%}
+-
+-instruct addExactL_rReg(rax_RegL dst, rRegL src, rFlagsReg cr)
+-%{
+- match(AddExactL dst src);
+- effect(DEF cr);
+-
+- format %{ "addq $dst, $src\t# addExact long" %}
+- ins_encode %{
+- __ addq($dst$$Register, $src$$Register);
+- %}
+- ins_pipe(ialu_reg_reg);
+-%}
+-
+-instruct addExactL_rReg_imm(rax_RegL dst, immL32 src, rFlagsReg cr)
+-%{
+- match(AddExactL dst src);
+- effect(DEF cr);
+-
+- format %{ "addq $dst, $src\t# addExact long" %}
+- ins_encode %{
+- __ addq($dst$$Register, $src$$constant);
+- %}
+- ins_pipe(ialu_reg_reg);
+-%}
+-
+-instruct addExactL_rReg_mem(rax_RegL dst, memory src, rFlagsReg cr)
+-%{
+- match(AddExactL dst (LoadL src));
+- effect(DEF cr);
+-
+- ins_cost(125); // XXX
+- format %{ "addq $dst, $src\t# addExact long" %}
+- ins_encode %{
+- __ addq($dst$$Register, $src$$Address);
+- %}
+-
+- ins_pipe(ialu_reg_mem);
+-%}
+-
+ instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
+ %{
+ match(Set dst (AddI dst src));
+@@ -7641,80 +7309,6 @@
+ ins_pipe(ialu_mem_imm);
+ %}
+
+-instruct subExactI_rReg(rax_RegI dst, rRegI src, rFlagsReg cr)
+-%{
+- match(SubExactI dst src);
+- effect(DEF cr);
+-
+- format %{ "subl $dst, $src\t# subExact int" %}
+- ins_encode %{
+- __ subl($dst$$Register, $src$$Register);
+- %}
+- ins_pipe(ialu_reg_reg);
+-%}
+-
+-instruct subExactI_rReg_imm(rax_RegI dst, immI src, rFlagsReg cr)
+-%{
+- match(SubExactI dst src);
+- effect(DEF cr);
+-
+- format %{ "subl $dst, $src\t# subExact int" %}
+- ins_encode %{
+- __ subl($dst$$Register, $src$$constant);
+- %}
+- ins_pipe(ialu_reg_reg);
+-%}
+-
+-instruct subExactI_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr)
+-%{
+- match(SubExactI dst (LoadI src));
+- effect(DEF cr);
+-
+- ins_cost(125);
+- format %{ "subl $dst, $src\t# subExact int" %}
+- ins_encode %{
+- __ subl($dst$$Register, $src$$Address);
+- %}
+- ins_pipe(ialu_reg_mem);
+-%}
+-
+-instruct subExactL_rReg(rax_RegL dst, rRegL src, rFlagsReg cr)
+-%{
+- match(SubExactL dst src);
+- effect(DEF cr);
+-
+- format %{ "subq $dst, $src\t# subExact long" %}
+- ins_encode %{
+- __ subq($dst$$Register, $src$$Register);
+- %}
+- ins_pipe(ialu_reg_reg);
+-%}
+-
+-instruct subExactL_rReg_imm(rax_RegL dst, immL32 src, rFlagsReg cr)
+-%{
+- match(SubExactL dst (LoadL src));
+- effect(DEF cr);
+-
+- format %{ "subq $dst, $src\t# subExact long" %}
+- ins_encode %{
+- __ subq($dst$$Register, $src$$constant);
+- %}
+- ins_pipe(ialu_reg_reg);
+-%}
+-
+-instruct subExactL_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr)
+-%{
+- match(SubExactI dst src);
+- effect(DEF cr);
+-
+- ins_cost(125);
+- format %{ "subq $dst, $src\t# subExact long" %}
+- ins_encode %{
+- __ subq($dst$$Register, $src$$Address);
+- %}
+- ins_pipe(ialu_reg_mem);
+-%}
+-
+ instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
+ %{
+ match(Set dst (SubL dst src));
+@@ -7831,31 +7425,6 @@
+ ins_pipe(ialu_reg);
+ %}
+
+-instruct negExactI_rReg(rax_RegI dst, rFlagsReg cr)
+-%{
+- match(NegExactI dst);
+- effect(KILL cr);
+-
+- format %{ "negl $dst\t# negExact int" %}
+- ins_encode %{
+- __ negl($dst$$Register);
+- %}
+- ins_pipe(ialu_reg);
+-%}
+-
+-instruct negExactL_rReg(rax_RegL dst, rFlagsReg cr)
+-%{
+- match(NegExactL dst);
+- effect(KILL cr);
+-
+- format %{ "negq $dst\t# negExact long" %}
+- ins_encode %{
+- __ negq($dst$$Register);
+- %}
+- ins_pipe(ialu_reg);
+-%}
+-
+-
+ //----------Multiplication/Division Instructions-------------------------------
+ // Integer Multiplication Instructions
+ // Multiply Register
+@@ -7972,86 +7541,6 @@
+ ins_pipe(ialu_reg_reg_alu0);
+ %}
+
+-
+-instruct mulExactI_rReg(rax_RegI dst, rRegI src, rFlagsReg cr)
+-%{
+- match(MulExactI dst src);
+- effect(DEF cr);
+-
+- ins_cost(300);
+- format %{ "imull $dst, $src\t# mulExact int" %}
+- ins_encode %{
+- __ imull($dst$$Register, $src$$Register);
+- %}
+- ins_pipe(ialu_reg_reg_alu0);
+-%}
+-
+-
+-instruct mulExactI_rReg_imm(rax_RegI dst, rRegI src, immI imm, rFlagsReg cr)
+-%{
+- match(MulExactI src imm);
+- effect(DEF cr);
+-
+- ins_cost(300);
+- format %{ "imull $dst, $src, $imm\t# mulExact int" %}
+- ins_encode %{
+- __ imull($dst$$Register, $src$$Register, $imm$$constant);
+- %}
+- ins_pipe(ialu_reg_reg_alu0);
+-%}
+-
+-instruct mulExactI_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr)
+-%{
+- match(MulExactI dst (LoadI src));
+- effect(DEF cr);
+-
+- ins_cost(350);
+- format %{ "imull $dst, $src\t# mulExact int" %}
+- ins_encode %{
+- __ imull($dst$$Register, $src$$Address);
+- %}
+- ins_pipe(ialu_reg_mem_alu0);
+-%}
+-
+-instruct mulExactL_rReg(rax_RegL dst, rRegL src, rFlagsReg cr)
+-%{
+- match(MulExactL dst src);
+- effect(DEF cr);
+-
+- ins_cost(300);
+- format %{ "imulq $dst, $src\t# mulExact long" %}
+- ins_encode %{
+- __ imulq($dst$$Register, $src$$Register);
+- %}
+- ins_pipe(ialu_reg_reg_alu0);
+-%}
+-
+-instruct mulExactL_rReg_imm(rax_RegL dst, rRegL src, immL32 imm, rFlagsReg cr)
+-%{
+- match(MulExactL src imm);
+- effect(DEF cr);
+-
+- ins_cost(300);
+- format %{ "imulq $dst, $src, $imm\t# mulExact long" %}
+- ins_encode %{
+- __ imulq($dst$$Register, $src$$Register, $imm$$constant);
+- %}
+- ins_pipe(ialu_reg_reg_alu0);
+-%}
+-
+-instruct mulExactL_rReg_mem(rax_RegL dst, memory src, rFlagsReg cr)
+-%{
+- match(MulExactL dst (LoadL src));
+- effect(DEF cr);
+-
+- ins_cost(350);
+- format %{ "imulq $dst, $src\t# mulExact long" %}
+- ins_encode %{
+- __ imulq($dst$$Register, $src$$Address);
+- %}
+- ins_pipe(ialu_reg_mem_alu0);
+-%}
+-
+ instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
+ rFlagsReg cr)
+ %{
+@@ -9104,6 +8593,122 @@
+ ins_pipe(ialu_mem_imm);
+ %}
+
++// BMI1 instructions
++instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
++ match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ ins_cost(125);
++ format %{ "andnl $dst, $src1, $src2" %}
++
++ ins_encode %{
++ __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
++ %}
++ ins_pipe(ialu_reg_mem);
++%}
++
++instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
++ match(Set dst (AndI (XorI src1 minus_1) src2));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ format %{ "andnl $dst, $src1, $src2" %}
++
++ ins_encode %{
++ __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(ialu_reg);
++%}
++
++instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{
++ match(Set dst (AndI (SubI imm_zero src) src));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ format %{ "blsil $dst, $src" %}
++
++ ins_encode %{
++ __ blsil($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(ialu_reg);
++%}
++
++instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, rFlagsReg cr) %{
++ match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ ins_cost(125);
++ format %{ "blsil $dst, $src" %}
++
++ ins_encode %{
++ __ blsil($dst$$Register, $src$$Address);
++ %}
++ ins_pipe(ialu_reg_mem);
++%}
++
++instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
++%{
++ match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ ins_cost(125);
++ format %{ "blsmskl $dst, $src" %}
++
++ ins_encode %{
++ __ blsmskl($dst$$Register, $src$$Address);
++ %}
++ ins_pipe(ialu_reg_mem);
++%}
++
++instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
++%{
++ match(Set dst (XorI (AddI src minus_1) src));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ format %{ "blsmskl $dst, $src" %}
++
++ ins_encode %{
++ __ blsmskl($dst$$Register, $src$$Register);
++ %}
++
++ ins_pipe(ialu_reg);
++%}
++
++instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
++%{
++ match(Set dst (AndI (AddI src minus_1) src) );
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ format %{ "blsrl $dst, $src" %}
++
++ ins_encode %{
++ __ blsrl($dst$$Register, $src$$Register);
++ %}
++
++ ins_pipe(ialu_reg_mem);
++%}
++
++instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
++%{
++ match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ ins_cost(125);
++ format %{ "blsrl $dst, $src" %}
++
++ ins_encode %{
++ __ blsrl($dst$$Register, $src$$Address);
++ %}
++
++ ins_pipe(ialu_reg);
++%}
++
+ // Or Instructions
+ // Or Register with Register
+ instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
+@@ -9335,6 +8940,122 @@
+ ins_pipe(ialu_mem_imm);
+ %}
+
++// BMI1 instructions
++instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
++ match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ ins_cost(125);
++ format %{ "andnq $dst, $src1, $src2" %}
++
++ ins_encode %{
++ __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
++ %}
++ ins_pipe(ialu_reg_mem);
++%}
++
++instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
++ match(Set dst (AndL (XorL src1 minus_1) src2));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ format %{ "andnq $dst, $src1, $src2" %}
++
++ ins_encode %{
++ __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
++ %}
++ ins_pipe(ialu_reg_mem);
++%}
++
++instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
++ match(Set dst (AndL (SubL imm_zero src) src));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ format %{ "blsiq $dst, $src" %}
++
++ ins_encode %{
++ __ blsiq($dst$$Register, $src$$Register);
++ %}
++ ins_pipe(ialu_reg);
++%}
++
++instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
++ match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ ins_cost(125);
++ format %{ "blsiq $dst, $src" %}
++
++ ins_encode %{
++ __ blsiq($dst$$Register, $src$$Address);
++ %}
++ ins_pipe(ialu_reg_mem);
++%}
++
++instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
++%{
++ match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ ins_cost(125);
++ format %{ "blsmskq $dst, $src" %}
++
++ ins_encode %{
++ __ blsmskq($dst$$Register, $src$$Address);
++ %}
++ ins_pipe(ialu_reg_mem);
++%}
++
++instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
++%{
++ match(Set dst (XorL (AddL src minus_1) src));
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ format %{ "blsmskq $dst, $src" %}
++
++ ins_encode %{
++ __ blsmskq($dst$$Register, $src$$Register);
++ %}
++
++ ins_pipe(ialu_reg);
++%}
++
++instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
++%{
++ match(Set dst (AndL (AddL src minus_1) src) );
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ format %{ "blsrq $dst, $src" %}
++
++ ins_encode %{
++ __ blsrq($dst$$Register, $src$$Register);
++ %}
++
++ ins_pipe(ialu_reg);
++%}
++
++instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
++%{
++ match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
++ predicate(UseBMI1Instructions);
++ effect(KILL cr);
++
++ ins_cost(125);
++ format %{ "blsrq $dst, $src" %}
++
++ ins_encode %{
++ __ blsrq($dst$$Register, $src$$Address);
++ %}
++
++ ins_pipe(ialu_reg);
++%}
++
+ // Or Instructions
+ // Or Register with Register
+ instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
+@@ -10660,6 +10381,174 @@
+ ins_pipe( pipe_slow );
+ %}
+
++//----------Overflow Math Instructions-----------------------------------------
++
++instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
++%{
++ match(Set cr (OverflowAddI op1 op2));
++ effect(DEF cr, USE_KILL op1, USE op2);
++
++ format %{ "addl $op1, $op2\t# overflow check int" %}
++
++ ins_encode %{
++ __ addl($op1$$Register, $op2$$Register);
++ %}
++ ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
++%{
++ match(Set cr (OverflowAddI op1 op2));
++ effect(DEF cr, USE_KILL op1, USE op2);
++
++ format %{ "addl $op1, $op2\t# overflow check int" %}
++
++ ins_encode %{
++ __ addl($op1$$Register, $op2$$constant);
++ %}
++ ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
++%{
++ match(Set cr (OverflowAddL op1 op2));
++ effect(DEF cr, USE_KILL op1, USE op2);
++
++ format %{ "addq $op1, $op2\t# overflow check long" %}
++ ins_encode %{
++ __ addq($op1$$Register, $op2$$Register);
++ %}
++ ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
++%{
++ match(Set cr (OverflowAddL op1 op2));
++ effect(DEF cr, USE_KILL op1, USE op2);
++
++ format %{ "addq $op1, $op2\t# overflow check long" %}
++ ins_encode %{
++ __ addq($op1$$Register, $op2$$constant);
++ %}
++ ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
++%{
++ match(Set cr (OverflowSubI op1 op2));
++
++ format %{ "cmpl $op1, $op2\t# overflow check int" %}
++ ins_encode %{
++ __ cmpl($op1$$Register, $op2$$Register);
++ %}
++ ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
++%{
++ match(Set cr (OverflowSubI op1 op2));
++
++ format %{ "cmpl $op1, $op2\t# overflow check int" %}
++ ins_encode %{
++ __ cmpl($op1$$Register, $op2$$constant);
++ %}
++ ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
++%{
++ match(Set cr (OverflowSubL op1 op2));
++
++ format %{ "cmpq $op1, $op2\t# overflow check long" %}
++ ins_encode %{
++ __ cmpq($op1$$Register, $op2$$Register);
++ %}
++ ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
++%{
++ match(Set cr (OverflowSubL op1 op2));
++
++ format %{ "cmpq $op1, $op2\t# overflow check long" %}
++ ins_encode %{
++ __ cmpq($op1$$Register, $op2$$constant);
++ %}
++ ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowNegI_rReg(rFlagsReg cr, immI0 zero, rax_RegI op2)
++%{
++ match(Set cr (OverflowSubI zero op2));
++ effect(DEF cr, USE_KILL op2);
++
++ format %{ "negl $op2\t# overflow check int" %}
++ ins_encode %{
++ __ negl($op2$$Register);
++ %}
++ ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
++%{
++ match(Set cr (OverflowSubL zero op2));
++ effect(DEF cr, USE_KILL op2);
++
++ format %{ "negq $op2\t# overflow check long" %}
++ ins_encode %{
++ __ negq($op2$$Register);
++ %}
++ ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
++%{
++ match(Set cr (OverflowMulI op1 op2));
++ effect(DEF cr, USE_KILL op1, USE op2);
++
++ format %{ "imull $op1, $op2\t# overflow check int" %}
++ ins_encode %{
++ __ imull($op1$$Register, $op2$$Register);
++ %}
++ ins_pipe(ialu_reg_reg_alu0);
++%}
++
++instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
++%{
++ match(Set cr (OverflowMulI op1 op2));
++ effect(DEF cr, TEMP tmp, USE op1, USE op2);
++
++ format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
++ ins_encode %{
++ __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
++ %}
++ ins_pipe(ialu_reg_reg_alu0);
++%}
++
++instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
++%{
++ match(Set cr (OverflowMulL op1 op2));
++ effect(DEF cr, USE_KILL op1, USE op2);
++
++ format %{ "imulq $op1, $op2\t# overflow check long" %}
++ ins_encode %{
++ __ imulq($op1$$Register, $op2$$Register);
++ %}
++ ins_pipe(ialu_reg_reg_alu0);
++%}
++
++instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
++%{
++ match(Set cr (OverflowMulL op1 op2));
++ effect(DEF cr, TEMP tmp, USE op1, USE op2);
++
++ format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
++ ins_encode %{
++ __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
++ %}
++ ins_pipe(ialu_reg_reg_alu0);
++%}
++
+
+ //----------Control Flow Instructions------------------------------------------
+ // Signed compare Instructions
+@@ -11443,27 +11332,43 @@
+ // ============================================================================
+ // inlined locking and unlocking
+
+-instruct cmpFastLock(rFlagsReg cr,
+- rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr)
+-%{
++instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
++ predicate(Compile::current()->use_rtm());
++ match(Set cr (FastLock object box));
++ effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
++ ins_cost(300);
++ format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
++ ins_encode %{
++ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
++ $scr$$Register, $cx1$$Register, $cx2$$Register,
++ _counters, _rtm_counters, _stack_rtm_counters,
++ ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
++ true, ra_->C->profile_rtm());
++ %}
++ ins_pipe(pipe_slow);
++%}
++
++instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
++ predicate(!Compile::current()->use_rtm());
+ match(Set cr (FastLock object box));
+ effect(TEMP tmp, TEMP scr, USE_KILL box);
+-
+ ins_cost(300);
+ format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
+- ins_encode(Fast_Lock(object, box, tmp, scr));
++ ins_encode %{
++ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
++ $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
++ %}
+ ins_pipe(pipe_slow);
+ %}
+
+-instruct cmpFastUnlock(rFlagsReg cr,
+- rRegP object, rax_RegP box, rRegP tmp)
+-%{
++instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
+ match(Set cr (FastUnlock object box));
+ effect(TEMP tmp, USE_KILL box);
+-
+ ins_cost(300);
+ format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
+- ins_encode(Fast_Unlock(object, box, tmp));
++ ins_encode %{
++ __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
++ %}
+ ins_pipe(pipe_slow);
+ %}
+
+--- ./hotspot/src/cpu/zero/vm/bytecodeInterpreter_zero.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/zero/vm/bytecodeInterpreter_zero.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -132,7 +132,7 @@
+ #define LOCALS_ADDR(offset) ((address)locals[-(offset)])
+ #define LOCALS_INT(offset) (*((jint*)&locals[-(offset)]))
+ #define LOCALS_FLOAT(offset) (*((jfloat*)&locals[-(offset)]))
+-#define LOCALS_OBJECT(offset) ((oop)locals[-(offset)])
++#define LOCALS_OBJECT(offset) (cast_to_oop(locals[-(offset)]))
+ #define LOCALS_DOUBLE(offset) (((VMJavaVal64*)&locals[-((offset) + 1)])->d)
+ #define LOCALS_LONG(offset) (((VMJavaVal64*)&locals[-((offset) + 1)])->l)
+ #define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)]))
+--- ./hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -220,7 +220,7 @@
+ }
+ InvocationCounter *counter = mcs->invocation_counter();
+ counter->increment();
+- if (counter->reached_InvocationLimit()) {
++ if (counter->reached_InvocationLimit(mcs->backedge_counter())) {
+ CALL_VM_NOCHECK(
+ InterpreterRuntime::frequency_counter_overflow(thread, NULL));
+ if (HAS_PENDING_EXCEPTION)
+@@ -916,17 +916,32 @@
+ return (InterpreterFrame *) fp;
+ }
+
+-int AbstractInterpreter::layout_activation(Method* method,
+- int tempcount,
+- int popframe_extra_args,
+- int moncount,
+- int caller_actual_parameters,
+- int callee_param_count,
+- int callee_locals,
+- frame* caller,
+- frame* interpreter_frame,
+- bool is_top_frame,
+- bool is_bottom_frame) {
++int AbstractInterpreter::size_activation(int max_stack,
++ int tempcount,
++ int extra_args,
++ int moncount,
++ int callee_param_count,
++ int callee_locals,
++ bool is_top_frame) {
++ int header_words = InterpreterFrame::header_words;
++ int monitor_words = moncount * frame::interpreter_frame_monitor_size();
++ int stack_words = is_top_frame ? max_stack : tempcount;
++ int callee_extra_locals = callee_locals - callee_param_count;
++
++ return header_words + monitor_words + stack_words + callee_extra_locals;
++}
++
++void AbstractInterpreter::layout_activation(Method* method,
++ int tempcount,
++ int popframe_extra_args,
++ int moncount,
++ int caller_actual_parameters,
++ int callee_param_count,
++ int callee_locals,
++ frame* caller,
++ frame* interpreter_frame,
++ bool is_top_frame,
++ bool is_bottom_frame) {
+ assert(popframe_extra_args == 0, "what to do?");
+ assert(!is_top_frame || (!callee_locals && !callee_param_count),
+ "top frame should have no caller");
+@@ -935,39 +950,31 @@
+ // does (the full InterpreterFrame::build, that is, not the
+ // one that creates empty frames for the deoptimizer).
+ //
+- // If interpreter_frame is not NULL then it will be filled in.
+- // It's size is determined by a previous call to this method,
+- // so it should be correct.
++ // interpreter_frame will be filled in. It's size is determined by
++ // a previous call to the size_activation() method,
+ //
+ // Note that tempcount is the current size of the expression
+ // stack. For top most frames we will allocate a full sized
+ // expression stack and not the trimmed version that non-top
+ // frames have.
+
+- int header_words = InterpreterFrame::header_words;
+ int monitor_words = moncount * frame::interpreter_frame_monitor_size();
+- int stack_words = is_top_frame ? method->max_stack() : tempcount;
+- int callee_extra_locals = callee_locals - callee_param_count;
++ intptr_t *locals = interpreter_frame->fp() + method->max_locals();
++ interpreterState istate = interpreter_frame->get_interpreterState();
++ intptr_t *monitor_base = (intptr_t*) istate;
++ intptr_t *stack_base = monitor_base - monitor_words;
++ intptr_t *stack = stack_base - tempcount - 1;
+
+- if (interpreter_frame) {
+- intptr_t *locals = interpreter_frame->fp() + method->max_locals();
+- interpreterState istate = interpreter_frame->get_interpreterState();
+- intptr_t *monitor_base = (intptr_t*) istate;
+- intptr_t *stack_base = monitor_base - monitor_words;
+- intptr_t *stack = stack_base - tempcount - 1;
+-
+- BytecodeInterpreter::layout_interpreterState(istate,
+- caller,
+- NULL,
+- method,
+- locals,
+- stack,
+- stack_base,
+- monitor_base,
+- NULL,
+- is_top_frame);
+- }
+- return header_words + monitor_words + stack_words + callee_extra_locals;
++ BytecodeInterpreter::layout_interpreterState(istate,
++ caller,
++ NULL,
++ method,
++ locals,
++ stack,
++ stack_base,
++ monitor_base,
++ NULL,
++ is_top_frame);
+ }
+
+ void BytecodeInterpreter::layout_interpreterState(interpreterState istate,
+--- ./hotspot/src/cpu/zero/vm/globalDefinitions_zero.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/zero/vm/globalDefinitions_zero.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -28,4 +28,10 @@
+
+ #include
+
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are properly extended to 64 bits.
++// If set, SharedRuntime::c_calling_convention() must adapt
++// signatures accordingly.
++const bool CCallingConventionRequiresIntsAsLongs = false;
++
+ #endif // CPU_ZERO_VM_GLOBALDEFINITIONS_ZERO_HPP
+--- ./hotspot/src/cpu/zero/vm/globals_zero.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/zero/vm/globals_zero.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -38,11 +38,13 @@
+ define_pd_global(bool, NeedsDeoptSuspend, false);
+
+ define_pd_global(bool, ImplicitNullChecks, true);
++define_pd_global(bool, TrapBasedNullChecks, false);
+ define_pd_global(bool, UncommonNullCast, true);
+
+ define_pd_global(intx, CodeEntryAlignment, 32);
+ define_pd_global(intx, OptoLoopAlignment, 16);
+ define_pd_global(intx, InlineFrequencyCount, 100);
++define_pd_global(intx, InlineSmallCode, 1000 );
+ define_pd_global(intx, PreInflateSpin, 10);
+
+ define_pd_global(intx, StackYellowPages, 2);
+--- ./hotspot/src/cpu/zero/vm/sharedRuntime_zero.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/zero/vm/sharedRuntime_zero.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -135,6 +135,7 @@
+
+ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
+ VMRegPair *regs,
++ VMRegPair *regs2,
+ int total_args_passed) {
+ ShouldNotCallThis();
+ return 0;
+--- ./hotspot/src/cpu/zero/vm/shark_globals_zero.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/cpu/zero/vm/shark_globals_zero.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -50,7 +50,6 @@
+
+ define_pd_global(intx, OnStackReplacePercentage, 933 );
+ define_pd_global(intx, FreqInlineSize, 325 );
+-define_pd_global(intx, InlineSmallCode, 1000 );
+ define_pd_global(uintx, NewRatio, 12 );
+ define_pd_global(intx, NewSizeThreadIncrease, 4*K );
+ define_pd_global(intx, InitialCodeCacheSize, 160*K);
+--- ./hotspot/src/os/aix/vm/attachListener_aix.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/attachListener_aix.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,574 @@
++/*
++ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/os.hpp"
++#include "services/attachListener.hpp"
++#include "services/dtraceAttacher.hpp"
++
++#include
++#include
++#include
++#include
++#include
++#include
++
++#ifndef UNIX_PATH_MAX
++#define UNIX_PATH_MAX sizeof(((struct sockaddr_un *)0)->sun_path)
++#endif
++
++// The attach mechanism on Linux uses a UNIX domain socket. An attach listener
++// thread is created at startup or is created on-demand via a signal from
++// the client tool. The attach listener creates a socket and binds it to a file
++// in the filesystem. The attach listener then acts as a simple (single-
++// threaded) server - it waits for a client to connect, reads the request,
++// executes it, and returns the response to the client via the socket
++// connection.
++//
++// As the socket is a UNIX domain socket it means that only clients on the
++// local machine can connect. In addition there are two other aspects to
++// the security:
++// 1. The well known file that the socket is bound to has permission 400
++// 2. When a client connect, the SO_PEERID socket option is used to
++// obtain the credentials of client. We check that the effective uid
++// of the client matches this process.
++
++// forward reference
++class AixAttachOperation;
++
++class AixAttachListener: AllStatic {
++ private:
++ // the path to which we bind the UNIX domain socket
++ static char _path[UNIX_PATH_MAX];
++ static bool _has_path;
++ // Shutdown marker to prevent accept blocking during clean-up.
++ static bool _shutdown;
++
++ // the file descriptor for the listening socket
++ static int _listener;
++
++ static void set_path(char* path) {
++ if (path == NULL) {
++ _has_path = false;
++ } else {
++ strncpy(_path, path, UNIX_PATH_MAX);
++ _path[UNIX_PATH_MAX-1] = '\0';
++ _has_path = true;
++ }
++ }
++
++ static void set_listener(int s) { _listener = s; }
++
++ // reads a request from the given connected socket
++ static AixAttachOperation* read_request(int s);
++
++ public:
++ enum {
++ ATTACH_PROTOCOL_VER = 1 // protocol version
++ };
++ enum {
++ ATTACH_ERROR_BADVERSION = 101 // error codes
++ };
++
++ // initialize the listener, returns 0 if okay
++ static int init();
++
++ static char* path() { return _path; }
++ static bool has_path() { return _has_path; }
++ static int listener() { return _listener; }
++ // Shutdown marker to prevent accept blocking during clean-up
++ static void set_shutdown(bool shutdown) { _shutdown = shutdown; }
++ static bool is_shutdown() { return _shutdown; }
++
++ // write the given buffer to a socket
++ static int write_fully(int s, char* buf, int len);
++
++ static AixAttachOperation* dequeue();
++};
++
++class AixAttachOperation: public AttachOperation {
++ private:
++ // the connection to the client
++ int _socket;
++
++ public:
++ void complete(jint res, bufferedStream* st);
++
++ void set_socket(int s) { _socket = s; }
++ int socket() const { return _socket; }
++
++ AixAttachOperation(char* name) : AttachOperation(name) {
++ set_socket(-1);
++ }
++};
++
++// statics
++char AixAttachListener::_path[UNIX_PATH_MAX];
++bool AixAttachListener::_has_path;
++int AixAttachListener::_listener = -1;
++// Shutdown marker to prevent accept blocking during clean-up
++bool AixAttachListener::_shutdown = false;
++
++// Supporting class to help split a buffer into individual components
++class ArgumentIterator : public StackObj {
++ private:
++ char* _pos;
++ char* _end;
++ public:
++ ArgumentIterator(char* arg_buffer, size_t arg_size) {
++ _pos = arg_buffer;
++ _end = _pos + arg_size - 1;
++ }
++ char* next() {
++ if (*_pos == '\0') {
++ return NULL;
++ }
++ char* res = _pos;
++ char* next_pos = strchr(_pos, '\0');
++ if (next_pos < _end) {
++ next_pos++;
++ }
++ _pos = next_pos;
++ return res;
++ }
++};
++
++// On AIX if sockets block until all data has been transmitted
++// successfully in some communication domains a socket "close" may
++// never complete. We have to take care that after the socket shutdown
++// the listener never enters accept state.
++
++// atexit hook to stop listener and unlink the file that it is
++// bound too.
++
++// Some modifications to the listener logic to prevent deadlocks on exit.
++// 1. We Shutdown the socket here instead. AixAttachOperation::complete() is not the right place
++// since more than one agent in a sequence in JPLIS live tests wouldn't work (Listener thread
++// would be dead after the first operation completion).
++// 2. close(s) may never return if the listener thread is in socket accept(). Unlinking the file
++// should be sufficient for cleanup.
++extern "C" {
++ static void listener_cleanup() {
++ static int cleanup_done;
++ if (!cleanup_done) {
++ cleanup_done = 1;
++ AixAttachListener::set_shutdown(true);
++ int s = AixAttachListener::listener();
++ if (s != -1) {
++ ::shutdown(s, 2);
++ }
++ if (AixAttachListener::has_path()) {
++ ::unlink(AixAttachListener::path());
++ }
++ }
++ }
++}
++
++// Initialization - create a listener socket and bind it to a file
++
++int AixAttachListener::init() {
++ char path[UNIX_PATH_MAX]; // socket file
++ char initial_path[UNIX_PATH_MAX]; // socket file during setup
++ int listener; // listener socket (file descriptor)
++
++ // register function to cleanup
++ ::atexit(listener_cleanup);
++
++ int n = snprintf(path, UNIX_PATH_MAX, "%s/.java_pid%d",
++ os::get_temp_directory(), os::current_process_id());
++ if (n < (int)UNIX_PATH_MAX) {
++ n = snprintf(initial_path, UNIX_PATH_MAX, "%s.tmp", path);
++ }
++ if (n >= (int)UNIX_PATH_MAX) {
++ return -1;
++ }
++
++ // create the listener socket
++ listener = ::socket(PF_UNIX, SOCK_STREAM, 0);
++ if (listener == -1) {
++ return -1;
++ }
++
++ // bind socket
++ struct sockaddr_un addr;
++ addr.sun_family = AF_UNIX;
++ strcpy(addr.sun_path, initial_path);
++ ::unlink(initial_path);
++ // We must call bind with the actual socketaddr length. This is obligatory for AS400.
++ int res = ::bind(listener, (struct sockaddr*)&addr, SUN_LEN(&addr));
++ if (res == -1) {
++ RESTARTABLE(::close(listener), res);
++ return -1;
++ }
++
++ // put in listen mode, set permissions, and rename into place
++ res = ::listen(listener, 5);
++ if (res == 0) {
++ RESTARTABLE(::chmod(initial_path, (S_IREAD|S_IWRITE) & ~(S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)), res);
++ if (res == 0) {
++ res = ::rename(initial_path, path);
++ }
++ }
++ if (res == -1) {
++ RESTARTABLE(::close(listener), res);
++ ::unlink(initial_path);
++ return -1;
++ }
++ set_path(path);
++ set_listener(listener);
++ set_shutdown(false);
++
++ return 0;
++}
++
++// Given a socket that is connected to a peer we read the request and
++// create an AttachOperation. As the socket is blocking there is potential
++// for a denial-of-service if the peer does not response. However this happens
++// after the peer credentials have been checked and in the worst case it just
++// means that the attach listener thread is blocked.
++//
++AixAttachOperation* AixAttachListener::read_request(int s) {
++ char ver_str[8];
++ sprintf(ver_str, "%d", ATTACH_PROTOCOL_VER);
++
++ // The request is a sequence of strings so we first figure out the
++ // expected count and the maximum possible length of the request.
++ // The request is:
++ // 00000
++ // where is the protocol version (1), is the command
++ // name ("load", "datadump", ...), and is an argument
++ int expected_str_count = 2 + AttachOperation::arg_count_max;
++ const int max_len = (sizeof(ver_str) + 1) + (AttachOperation::name_length_max + 1) +
++ AttachOperation::arg_count_max*(AttachOperation::arg_length_max + 1);
++
++ char buf[max_len];
++ int str_count = 0;
++
++ // Read until all (expected) strings have been read, the buffer is
++ // full, or EOF.
++
++ int off = 0;
++ int left = max_len;
++
++ do {
++ int n;
++ // Don't block on interrupts because this will
++ // hang in the clean-up when shutting down.
++ n = read(s, buf+off, left);
++ if (n == -1) {
++ return NULL; // reset by peer or other error
++ }
++ if (n == 0) { // end of file reached
++ break;
++ }
++ for (int i=0; i so check it now to
++ // check for protocol mis-match
++ if (str_count == 1) {
++ if ((strlen(buf) != strlen(ver_str)) ||
++ (atoi(buf) != ATTACH_PROTOCOL_VER)) {
++ char msg[32];
++ sprintf(msg, "%d\n", ATTACH_ERROR_BADVERSION);
++ write_fully(s, msg, strlen(msg));
++ return NULL;
++ }
++ }
++ }
++ }
++ off += n;
++ left -= n;
++ } while (left > 0 && str_count < expected_str_count);
++
++ if (str_count != expected_str_count) {
++ return NULL; // incomplete request
++ }
++
++ // parse request
++
++ ArgumentIterator args(buf, (max_len)-left);
++
++ // version already checked
++ char* v = args.next();
++
++ char* name = args.next();
++ if (name == NULL || strlen(name) > AttachOperation::name_length_max) {
++ return NULL;
++ }
++
++ AixAttachOperation* op = new AixAttachOperation(name);
++
++ for (int i=0; iset_arg(i, NULL);
++ } else {
++ if (strlen(arg) > AttachOperation::arg_length_max) {
++ delete op;
++ return NULL;
++ }
++ op->set_arg(i, arg);
++ }
++ }
++
++ op->set_socket(s);
++ return op;
++}
++
++
++// Dequeue an operation
++//
++// In the Linux implementation there is only a single operation and clients
++// cannot queue commands (except at the socket level).
++//
++AixAttachOperation* AixAttachListener::dequeue() {
++ for (;;) {
++ int s;
++
++ // wait for client to connect
++ struct sockaddr addr;
++ socklen_t len = sizeof(addr);
++ memset(&addr, 0, len);
++ // We must prevent accept blocking on the socket if it has been shut down.
++ // Therefore we allow interrups and check whether we have been shut down already.
++ if (AixAttachListener::is_shutdown()) {
++ return NULL;
++ }
++ s=::accept(listener(), &addr, &len);
++ if (s == -1) {
++ return NULL; // log a warning?
++ }
++
++ // Added timeouts for read and write. If we get no request within the
++ // next AttachListenerTimeout milliseconds we just finish the connection.
++ struct timeval tv;
++ tv.tv_sec = 0;
++ tv.tv_usec = AttachListenerTimeout * 1000;
++ ::setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, (char*)&tv, sizeof(tv));
++ ::setsockopt(s, SOL_SOCKET, SO_SNDTIMEO, (char*)&tv, sizeof(tv));
++
++ // get the credentials of the peer and check the effective uid/guid
++ // - check with jeff on this.
++ struct peercred_struct cred_info;
++ socklen_t optlen = sizeof(cred_info);
++ if (::getsockopt(s, SOL_SOCKET, SO_PEERID, (void*)&cred_info, &optlen) == -1) {
++ int res;
++ RESTARTABLE(::close(s), res);
++ continue;
++ }
++ uid_t euid = geteuid();
++ gid_t egid = getegid();
++
++ if (cred_info.euid != euid || cred_info.egid != egid) {
++ int res;
++ RESTARTABLE(::close(s), res);
++ continue;
++ }
++
++ // peer credential look okay so we read the request
++ AixAttachOperation* op = read_request(s);
++ if (op == NULL) {
++ int res;
++ RESTARTABLE(::close(s), res);
++ continue;
++ } else {
++ return op;
++ }
++ }
++}
++
++// write the given buffer to the socket
++int AixAttachListener::write_fully(int s, char* buf, int len) {
++ do {
++ int n = ::write(s, buf, len);
++ if (n == -1) {
++ if (errno != EINTR) return -1;
++ } else {
++ buf += n;
++ len -= n;
++ }
++ }
++ while (len > 0);
++ return 0;
++}
++
++// Complete an operation by sending the operation result and any result
++// output to the client. At this time the socket is in blocking mode so
++// potentially we can block if there is a lot of data and the client is
++// non-responsive. For most operations this is a non-issue because the
++// default send buffer is sufficient to buffer everything. In the future
++// if there are operations that involves a very big reply then it the
++// socket could be made non-blocking and a timeout could be used.
++
++void AixAttachOperation::complete(jint result, bufferedStream* st) {
++ JavaThread* thread = JavaThread::current();
++ ThreadBlockInVM tbivm(thread);
++
++ thread->set_suspend_equivalent();
++ // cleared by handle_special_suspend_equivalent_condition() or
++ // java_suspend_self() via check_and_wait_while_suspended()
++
++ // write operation result
++ char msg[32];
++ sprintf(msg, "%d\n", result);
++ int rc = AixAttachListener::write_fully(this->socket(), msg, strlen(msg));
++
++ // write any result data
++ if (rc == 0) {
++ // Shutdown the socket in the cleanup function to enable more than
++ // one agent attach in a sequence (see comments to listener_cleanup()).
++ AixAttachListener::write_fully(this->socket(), (char*) st->base(), st->size());
++ }
++
++ // done
++ RESTARTABLE(::close(this->socket()), rc);
++
++ // were we externally suspended while we were waiting?
++ thread->check_and_wait_while_suspended();
++
++ delete this;
++}
++
++
++// AttachListener functions
++
++AttachOperation* AttachListener::dequeue() {
++ JavaThread* thread = JavaThread::current();
++ ThreadBlockInVM tbivm(thread);
++
++ thread->set_suspend_equivalent();
++ // cleared by handle_special_suspend_equivalent_condition() or
++ // java_suspend_self() via check_and_wait_while_suspended()
++
++ AttachOperation* op = AixAttachListener::dequeue();
++
++ // were we externally suspended while we were waiting?
++ thread->check_and_wait_while_suspended();
++
++ return op;
++}
++
++// Performs initialization at vm startup
++// For AIX we remove any stale .java_pid file which could cause
++// an attaching process to think we are ready to receive on the
++// domain socket before we are properly initialized
++
++void AttachListener::vm_start() {
++ char fn[UNIX_PATH_MAX];
++ struct stat64 st;
++ int ret;
++
++ int n = snprintf(fn, UNIX_PATH_MAX, "%s/.java_pid%d",
++ os::get_temp_directory(), os::current_process_id());
++ assert(n < (int)UNIX_PATH_MAX, "java_pid file name buffer overflow");
++
++ RESTARTABLE(::stat64(fn, &st), ret);
++ if (ret == 0) {
++ ret = ::unlink(fn);
++ if (ret == -1) {
++ debug_only(warning("failed to remove stale attach pid file at %s", fn));
++ }
++ }
++}
++
++int AttachListener::pd_init() {
++ JavaThread* thread = JavaThread::current();
++ ThreadBlockInVM tbivm(thread);
++
++ thread->set_suspend_equivalent();
++ // cleared by handle_special_suspend_equivalent_condition() or
++ // java_suspend_self() via check_and_wait_while_suspended()
++
++ int ret_code = AixAttachListener::init();
++
++ // were we externally suspended while we were waiting?
++ thread->check_and_wait_while_suspended();
++
++ return ret_code;
++}
++
++// Attach Listener is started lazily except in the case when
++// +ReduseSignalUsage is used
++bool AttachListener::init_at_startup() {
++ if (ReduceSignalUsage) {
++ return true;
++ } else {
++ return false;
++ }
++}
++
++// If the file .attach_pid exists in the working directory
++// or /tmp then this is the trigger to start the attach mechanism
++bool AttachListener::is_init_trigger() {
++ if (init_at_startup() || is_initialized()) {
++ return false; // initialized at startup or already initialized
++ }
++ char fn[PATH_MAX+1];
++ sprintf(fn, ".attach_pid%d", os::current_process_id());
++ int ret;
++ struct stat64 st;
++ RESTARTABLE(::stat64(fn, &st), ret);
++ if (ret == -1) {
++ snprintf(fn, sizeof(fn), "%s/.attach_pid%d",
++ os::get_temp_directory(), os::current_process_id());
++ RESTARTABLE(::stat64(fn, &st), ret);
++ }
++ if (ret == 0) {
++ // simple check to avoid starting the attach mechanism when
++ // a bogus user creates the file
++ if (st.st_uid == geteuid()) {
++ init();
++ return true;
++ }
++ }
++ return false;
++}
++
++// if VM aborts then remove listener
++void AttachListener::abort() {
++ listener_cleanup();
++}
++
++void AttachListener::pd_data_dump() {
++ os::signal_notify(SIGQUIT);
++}
++
++AttachOperationFunctionInfo* AttachListener::pd_find_operation(const char* n) {
++ return NULL;
++}
++
++jint AttachListener::pd_set_flag(AttachOperation* op, outputStream* out) {
++ out->print_cr("flag '%s' cannot be changed", op->arg(0));
++ return JNI_ERR;
++}
++
++void AttachListener::pd_detachall() {
++ // Cleanup server socket to detach clients.
++ listener_cleanup();
++}
+--- ./hotspot/src/os/aix/vm/c2_globals_aix.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/c2_globals_aix.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_AIX_VM_C2_GLOBALS_AIX_HPP
++#define OS_AIX_VM_C2_GLOBALS_AIX_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++//
++// Sets the default values for operating system dependent flags used by the
++// server compiler. (see c2_globals.hpp)
++//
++
++#endif // OS_AIX_VM_C2_GLOBALS_AIX_HPP
+--- ./hotspot/src/os/aix/vm/decoder_aix.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/decoder_aix.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,48 @@
++/*
++ * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "utilities/decoder.hpp"
++#include "porting_aix.hpp"
++
++// Provide simple AIXDecoder which enables decoding of C frames in VM.
++class AIXDecoder: public AbstractDecoder {
++ public:
++ AIXDecoder() {
++ _decoder_status = no_error;
++ }
++ ~AIXDecoder() {}
++
++ virtual bool can_decode_C_frame_in_vm() const { return true; }
++
++ virtual bool demangle(const char* symbol, char* buf, int buflen) { return false; } // demangled by getFuncName
++
++ virtual bool decode(address addr, char* buf, int buflen, int* offset, const char* modulepath) {
++ return (::getFuncName((codeptr_t)addr, buf, buflen, offset, 0, 0, 0) == 0);
++ }
++ virtual bool decode(address addr, char *buf, int buflen, int* offset, const void *base) {
++ ShouldNotReachHere();
++ return false;
++ }
++};
+--- ./hotspot/src/os/aix/vm/globals_aix.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/globals_aix.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,63 @@
++/*
++ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_AIX_VM_GLOBALS_AIX_HPP
++#define OS_AIX_VM_GLOBALS_AIX_HPP
++
++//
++// Defines Aix specific flags. They are not available on other platforms.
++//
++#define RUNTIME_OS_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \
++ \
++ /* If UseLargePages == true allow or deny usage of 16M pages. 16M pages are */ \
++ /* a scarce resource and there may be situations where we do not want the VM */ \
++ /* to run with 16M pages. (Will fall back to 64K pages). */ \
++ product_pd(bool, Use16MPages, \
++ "Use 16M pages if available.") \
++ \
++ /* use optimized addresses for the polling page, */ \
++ /* e.g. map it to a special 32-bit address. */ \
++ product_pd(bool, OptimizePollingPageLocation, \
++ "Optimize the location of the polling page used for Safepoints") \
++ \
++ product_pd(intx, AttachListenerTimeout, \
++ "Timeout in ms the attach listener waits for a request") \
++ \
++
++// Per default, do not allow 16M pages. 16M pages have to be switched on specifically.
++define_pd_global(bool, Use16MPages, false);
++define_pd_global(bool, OptimizePollingPageLocation, true);
++define_pd_global(intx, AttachListenerTimeout, 1000);
++
++//
++// Defines Aix-specific default values. The flags are available on all
++// platforms, but they may have different default values on other platforms.
++//
++define_pd_global(bool, UseLargePages, true);
++define_pd_global(bool, UseLargePagesIndividualAllocation, false);
++define_pd_global(bool, UseOSErrorReporting, false);
++define_pd_global(bool, UseThreadPriorities, true) ;
++
++#endif // OS_AIX_VM_GLOBALS_AIX_HPP
+--- ./hotspot/src/os/aix/vm/interfaceSupport_aix.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/interfaceSupport_aix.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_LINUX_VM_INTERFACESUPPORT_LINUX_HPP
++#define OS_LINUX_VM_INTERFACESUPPORT_LINUX_HPP
++
++// Contains inlined functions for class InterfaceSupport
++
++static inline void serialize_memory(JavaThread *thread) {
++ os::write_memory_serialize_page(thread);
++}
++
++#endif // OS_LINUX_VM_INTERFACESUPPORT_LINUX_HPP
+--- ./hotspot/src/os/aix/vm/jsig.c Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/jsig.c Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,233 @@
++/*
++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++/* CopyrightVersion 1.2 */
++
++/* This is a special library that should be loaded before libc &
++ * libthread to interpose the signal handler installation functions:
++ * sigaction(), signal(), sigset().
++ * Used for signal-chaining. See RFE 4381843.
++ */
++
++#include
++#include
++#include
++#include
++#include
++
++#define bool int
++#define true 1
++#define false 0
++
++// Highest so far on AIX 5.2 is SIGSAK (63)
++#define MAXSIGNUM 63
++#define MASK(sig) ((unsigned int)1 << sig)
++
++static struct sigaction sact[MAXSIGNUM]; /* saved signal handlers */
++static unsigned int jvmsigs = 0; /* signals used by jvm */
++
++/* used to synchronize the installation of signal handlers */
++static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
++static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
++static pthread_t tid = 0;
++
++typedef void (*sa_handler_t)(int);
++typedef void (*sa_sigaction_t)(int, siginfo_t *, void *);
++// signal_t is already defined on AIX
++typedef sa_handler_t (*signal_like_function_t)(int, sa_handler_t);
++typedef int (*sigaction_t)(int, const struct sigaction *, struct sigaction *);
++
++static signal_like_function_t os_signal = 0; /* os's version of signal()/sigset() */
++static sigaction_t os_sigaction = 0; /* os's version of sigaction() */
++
++static bool jvm_signal_installing = false;
++static bool jvm_signal_installed = false;
++
++static void signal_lock() {
++ pthread_mutex_lock(&mutex);
++ /* When the jvm is installing its set of signal handlers, threads
++ * other than the jvm thread should wait */
++ if (jvm_signal_installing) {
++ if (tid != pthread_self()) {
++ pthread_cond_wait(&cond, &mutex);
++ }
++ }
++}
++
++static void signal_unlock() {
++ pthread_mutex_unlock(&mutex);
++}
++
++static sa_handler_t call_os_signal(int sig, sa_handler_t disp,
++ bool is_sigset) {
++ if (os_signal == NULL) {
++ if (!is_sigset) {
++ // Aix: call functions directly instead of dlsym'ing them
++ os_signal = signal;
++ } else {
++ // Aix: call functions directly instead of dlsym'ing them
++ os_signal = sigset;
++ }
++ if (os_signal == NULL) {
++ printf("%s\n", dlerror());
++ exit(0);
++ }
++ }
++ return (*os_signal)(sig, disp);
++}
++
++static void save_signal_handler(int sig, sa_handler_t disp) {
++ sigset_t set;
++ sact[sig].sa_handler = disp;
++ sigemptyset(&set);
++ sact[sig].sa_mask = set;
++ sact[sig].sa_flags = 0;
++}
++
++static sa_handler_t set_signal(int sig, sa_handler_t disp, bool is_sigset) {
++ sa_handler_t oldhandler;
++ bool sigused;
++
++ signal_lock();
++
++ sigused = (MASK(sig) & jvmsigs) != 0;
++ if (jvm_signal_installed && sigused) {
++ /* jvm has installed its signal handler for this signal. */
++ /* Save the handler. Don't really install it. */
++ oldhandler = sact[sig].sa_handler;
++ save_signal_handler(sig, disp);
++
++ signal_unlock();
++ return oldhandler;
++ } else if (jvm_signal_installing) {
++ /* jvm is installing its signal handlers. Install the new
++ * handlers and save the old ones. jvm uses sigaction().
++ * Leave the piece here just in case. */
++ oldhandler = call_os_signal(sig, disp, is_sigset);
++ save_signal_handler(sig, oldhandler);
++
++ /* Record the signals used by jvm */
++ jvmsigs |= MASK(sig);
++
++ signal_unlock();
++ return oldhandler;
++ } else {
++ /* jvm has no relation with this signal (yet). Install the
++ * the handler. */
++ oldhandler = call_os_signal(sig, disp, is_sigset);
++
++ signal_unlock();
++ return oldhandler;
++ }
++}
++
++sa_handler_t signal(int sig, sa_handler_t disp) {
++ return set_signal(sig, disp, false);
++}
++
++sa_handler_t sigset(int sig, sa_handler_t disp) {
++ return set_signal(sig, disp, true);
++ }
++
++static int call_os_sigaction(int sig, const struct sigaction *act,
++ struct sigaction *oact) {
++ if (os_sigaction == NULL) {
++ // Aix: call functions directly instead of dlsym'ing them
++ os_sigaction = sigaction;
++ if (os_sigaction == NULL) {
++ printf("%s\n", dlerror());
++ exit(0);
++ }
++ }
++ return (*os_sigaction)(sig, act, oact);
++}
++
++int sigaction(int sig, const struct sigaction *act, struct sigaction *oact) {
++ int res;
++ bool sigused;
++ struct sigaction oldAct;
++
++ signal_lock();
++
++ sigused = (MASK(sig) & jvmsigs) != 0;
++ if (jvm_signal_installed && sigused) {
++ /* jvm has installed its signal handler for this signal. */
++ /* Save the handler. Don't really install it. */
++ if (oact != NULL) {
++ *oact = sact[sig];
++ }
++ if (act != NULL) {
++ sact[sig] = *act;
++ }
++
++ signal_unlock();
++ return 0;
++ } else if (jvm_signal_installing) {
++ /* jvm is installing its signal handlers. Install the new
++ * handlers and save the old ones. */
++ res = call_os_sigaction(sig, act, &oldAct);
++ sact[sig] = oldAct;
++ if (oact != NULL) {
++ *oact = oldAct;
++ }
++
++ /* Record the signals used by jvm */
++ jvmsigs |= MASK(sig);
++
++ signal_unlock();
++ return res;
++ } else {
++ /* jvm has no relation with this signal (yet). Install the
++ * the handler. */
++ res = call_os_sigaction(sig, act, oact);
++
++ signal_unlock();
++ return res;
++ }
++}
++
++/* The three functions for the jvm to call into */
++void JVM_begin_signal_setting() {
++ signal_lock();
++ jvm_signal_installing = true;
++ tid = pthread_self();
++ signal_unlock();
++}
++
++void JVM_end_signal_setting() {
++ signal_lock();
++ jvm_signal_installed = true;
++ jvm_signal_installing = false;
++ pthread_cond_broadcast(&cond);
++ signal_unlock();
++}
++
++struct sigaction *JVM_get_signal_action(int sig) {
++ /* Does race condition make sense here? */
++ if ((MASK(sig) & jvmsigs) != 0) {
++ return &sact[sig];
++ }
++ return NULL;
++}
+--- ./hotspot/src/os/aix/vm/jvm_aix.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/jvm_aix.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,201 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "prims/jvm.h"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/osThread.hpp"
++
++#include
++
++
++// sun.misc.Signal ///////////////////////////////////////////////////////////
++// Signal code is mostly copied from classic vm, signals_md.c 1.4 98/08/23
++/*
++ * This function is included primarily as a debugging aid. If Java is
++ * running in a console window, then pressing will cause
++ * the current state of all active threads and monitors to be written
++ * to the console window.
++ */
++
++JVM_ENTRY_NO_ENV(void*, JVM_RegisterSignal(jint sig, void* handler))
++ // Copied from classic vm
++ // signals_md.c 1.4 98/08/23
++ void* newHandler = handler == (void *)2
++ ? os::user_handler()
++ : handler;
++ switch (sig) {
++ /* The following are already used by the VM. */
++ case INTERRUPT_SIGNAL:
++ case SIGFPE:
++ case SIGILL:
++ case SIGSEGV:
++
++ /* The following signal is used by the VM to dump thread stacks unless
++ ReduceSignalUsage is set, in which case the user is allowed to set
++ his own _native_ handler for this signal; thus, in either case,
++ we do not allow JVM_RegisterSignal to change the handler. */
++ case BREAK_SIGNAL:
++ return (void *)-1;
++
++ /* The following signals are used for Shutdown Hooks support. However, if
++ ReduceSignalUsage (-Xrs) is set, Shutdown Hooks must be invoked via
++ System.exit(), Java is not allowed to use these signals, and the the
++ user is allowed to set his own _native_ handler for these signals and
++ invoke System.exit() as needed. Terminator.setup() is avoiding
++ registration of these signals when -Xrs is present.
++ - If the HUP signal is ignored (from the nohup) command, then Java
++ is not allowed to use this signal.
++ */
++
++ case SHUTDOWN1_SIGNAL:
++ case SHUTDOWN2_SIGNAL:
++ case SHUTDOWN3_SIGNAL:
++ if (ReduceSignalUsage) return (void*)-1;
++ if (os::Aix::is_sig_ignored(sig)) return (void*)1;
++ }
++
++ void* oldHandler = os::signal(sig, newHandler);
++ if (oldHandler == os::user_handler()) {
++ return (void *)2;
++ } else {
++ return oldHandler;
++ }
++JVM_END
++
++
++JVM_ENTRY_NO_ENV(jboolean, JVM_RaiseSignal(jint sig))
++ if (ReduceSignalUsage) {
++ // do not allow SHUTDOWN1_SIGNAL,SHUTDOWN2_SIGNAL,SHUTDOWN3_SIGNAL,
++ // BREAK_SIGNAL to be raised when ReduceSignalUsage is set, since
++ // no handler for them is actually registered in JVM or via
++ // JVM_RegisterSignal.
++ if (sig == SHUTDOWN1_SIGNAL || sig == SHUTDOWN2_SIGNAL ||
++ sig == SHUTDOWN3_SIGNAL || sig == BREAK_SIGNAL) {
++ return JNI_FALSE;
++ }
++ }
++ else if ((sig == SHUTDOWN1_SIGNAL || sig == SHUTDOWN2_SIGNAL ||
++ sig == SHUTDOWN3_SIGNAL) && os::Aix::is_sig_ignored(sig)) {
++ // do not allow SHUTDOWN1_SIGNAL to be raised when SHUTDOWN1_SIGNAL
++ // is ignored, since no handler for them is actually registered in JVM
++ // or via JVM_RegisterSignal.
++ // This also applies for SHUTDOWN2_SIGNAL and SHUTDOWN3_SIGNAL
++ return JNI_FALSE;
++ }
++
++ os::signal_raise(sig);
++ return JNI_TRUE;
++JVM_END
++
++/*
++ All the defined signal names for Linux.
++
++ NOTE that not all of these names are accepted by our Java implementation
++
++ Via an existing claim by the VM, sigaction restrictions, or
++ the "rules of Unix" some of these names will be rejected at runtime.
++ For example the VM sets up to handle USR1, sigaction returns EINVAL for
++ STOP, and Linux simply doesn't allow catching of KILL.
++
++ Here are the names currently accepted by a user of sun.misc.Signal with
++ 1.4.1 (ignoring potential interaction with use of chaining, etc):
++
++ HUP, INT, TRAP, ABRT, IOT, BUS, USR2, PIPE, ALRM, TERM, STKFLT,
++ CLD, CHLD, CONT, TSTP, TTIN, TTOU, URG, XCPU, XFSZ, VTALRM, PROF,
++ WINCH, POLL, IO, PWR, SYS
++
++*/
++
++struct siglabel {
++ const char *name;
++ int number;
++};
++
++struct siglabel siglabels[] = {
++ /* derived from /usr/include/bits/signum.h on RH7.2 */
++ "HUP", SIGHUP, /* Hangup (POSIX). */
++ "INT", SIGINT, /* Interrupt (ANSI). */
++ "QUIT", SIGQUIT, /* Quit (POSIX). */
++ "ILL", SIGILL, /* Illegal instruction (ANSI). */
++ "TRAP", SIGTRAP, /* Trace trap (POSIX). */
++ "ABRT", SIGABRT, /* Abort (ANSI). */
++ "IOT", SIGIOT, /* IOT trap (4.2 BSD). */
++ "BUS", SIGBUS, /* BUS error (4.2 BSD). */
++ "FPE", SIGFPE, /* Floating-point exception (ANSI). */
++ "KILL", SIGKILL, /* Kill, unblockable (POSIX). */
++ "USR1", SIGUSR1, /* User-defined signal 1 (POSIX). */
++ "SEGV", SIGSEGV, /* Segmentation violation (ANSI). */
++ "USR2", SIGUSR2, /* User-defined signal 2 (POSIX). */
++ "PIPE", SIGPIPE, /* Broken pipe (POSIX). */
++ "ALRM", SIGALRM, /* Alarm clock (POSIX). */
++ "TERM", SIGTERM, /* Termination (ANSI). */
++#ifdef SIGSTKFLT
++ "STKFLT", SIGSTKFLT, /* Stack fault. */
++#endif
++ "CLD", SIGCLD, /* Same as SIGCHLD (System V). */
++ "CHLD", SIGCHLD, /* Child status has changed (POSIX). */
++ "CONT", SIGCONT, /* Continue (POSIX). */
++ "STOP", SIGSTOP, /* Stop, unblockable (POSIX). */
++ "TSTP", SIGTSTP, /* Keyboard stop (POSIX). */
++ "TTIN", SIGTTIN, /* Background read from tty (POSIX). */
++ "TTOU", SIGTTOU, /* Background write to tty (POSIX). */
++ "URG", SIGURG, /* Urgent condition on socket (4.2 BSD). */
++ "XCPU", SIGXCPU, /* CPU limit exceeded (4.2 BSD). */
++ "XFSZ", SIGXFSZ, /* File size limit exceeded (4.2 BSD). */
++ "DANGER", SIGDANGER, /* System crash imminent; free up some page space (AIX). */
++ "VTALRM", SIGVTALRM, /* Virtual alarm clock (4.2 BSD). */
++ "PROF", SIGPROF, /* Profiling alarm clock (4.2 BSD). */
++ "WINCH", SIGWINCH, /* Window size change (4.3 BSD, Sun). */
++ "POLL", SIGPOLL, /* Pollable event occurred (System V). */
++ "IO", SIGIO, /* I/O now possible (4.2 BSD). */
++ "PWR", SIGPWR, /* Power failure restart (System V). */
++#ifdef SIGSYS
++ "SYS", SIGSYS /* Bad system call. Only on some Linuxen! */
++#endif
++ };
++
++JVM_ENTRY_NO_ENV(jint, JVM_FindSignal(const char *name))
++
++ /* find and return the named signal's number */
++
++ for(uint i=0; i /* For DIR */
++
++// Must redefine NULL because the macro gets redefined to int 0
++// by dirent.h. This redefinition is included later then the standard definition in
++// globalDefinitions_.hpp and leads to assertions in the VM initialization.
++// We definitely need NULL to have the same lengh as an address pointer.
++#ifdef _LP64
++#undef NULL
++#define NULL 0L
++#else
++#ifndef NULL
++#define NULL 0
++#endif
++#endif
++
++#include /* For MAXPATHLEN */
++#include /* For socklen_t */
++#include /* For F_OK, R_OK, W_OK */
++
++#define JNI_ONLOAD_SYMBOLS {"JNI_OnLoad"}
++#define JNI_ONUNLOAD_SYMBOLS {"JNI_OnUnload"}
++#define JVM_ONLOAD_SYMBOLS {"JVM_OnLoad"}
++#define AGENT_ONLOAD_SYMBOLS {"Agent_OnLoad"}
++#define AGENT_ONUNLOAD_SYMBOLS {"Agent_OnUnload"}
++#define AGENT_ONATTACH_SYMBOLS {"Agent_OnAttach"}
++
++#define JNI_LIB_PREFIX "lib"
++#define JNI_LIB_SUFFIX ".so"
++
++// Hack: MAXPATHLEN is 4095 on some Linux and 4096 on others. This may
++// cause problems if JVM and the rest of JDK are built on different
++// Linux releases. Here we define JVM_MAXPATHLEN to be MAXPATHLEN + 1,
++// so buffers declared in VM are always >= 4096.
++#define JVM_MAXPATHLEN MAXPATHLEN + 1
++
++#define JVM_R_OK R_OK
++#define JVM_W_OK W_OK
++#define JVM_X_OK X_OK
++#define JVM_F_OK F_OK
++
++/*
++ * File I/O
++ */
++
++#include
++#include
++#include
++#include
++
++/* O Flags */
++
++#define JVM_O_RDONLY O_RDONLY
++#define JVM_O_WRONLY O_WRONLY
++#define JVM_O_RDWR O_RDWR
++#define JVM_O_O_APPEND O_APPEND
++#define JVM_O_EXCL O_EXCL
++#define JVM_O_CREAT O_CREAT
++
++/* Signal definitions */
++
++#define BREAK_SIGNAL SIGQUIT /* Thread dumping support. */
++#define INTERRUPT_SIGNAL SIGUSR1 /* Interruptible I/O support. */
++#define SHUTDOWN1_SIGNAL SIGHUP /* Shutdown Hooks support. */
++#define SHUTDOWN2_SIGNAL SIGINT
++#define SHUTDOWN3_SIGNAL SIGTERM
++
++#endif /* JVM_MD_H */
++
++#endif // OS_AIX_VM_JVM_AIX_H
+--- ./hotspot/src/os/aix/vm/libperfstat_aix.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/libperfstat_aix.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,124 @@
++/*
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "runtime/arguments.hpp"
++#include "libperfstat_aix.hpp"
++
++// For dlopen and friends
++#include
++
++// handle to the libperfstat
++static void* g_libhandle = NULL;
++
++// whether initialization worked
++static bool g_initialized = false;
++
++
++typedef int (*fun_perfstat_cpu_total_t) (perfstat_id_t *name, perfstat_cpu_total_t* userbuff,
++ int sizeof_userbuff, int desired_number);
++
++typedef int (*fun_perfstat_memory_total_t) (perfstat_id_t *name, perfstat_memory_total_t* userbuff,
++ int sizeof_userbuff, int desired_number);
++
++typedef void (*fun_perfstat_reset_t) ();
++
++static fun_perfstat_cpu_total_t g_fun_perfstat_cpu_total = NULL;
++static fun_perfstat_memory_total_t g_fun_perfstat_memory_total = NULL;
++static fun_perfstat_reset_t g_fun_perfstat_reset = NULL;
++
++bool libperfstat::init() {
++
++ if (g_initialized) {
++ return true;
++ }
++
++ g_initialized = false;
++
++ // dynamically load the libperfstat porting library.
++ g_libhandle = dlopen("/usr/lib/libperfstat.a(shr_64.o)", RTLD_MEMBER | RTLD_NOW);
++ if (!g_libhandle) {
++ if (Verbose) {
++ fprintf(stderr, "Cannot load libperfstat.a (dlerror: %s)", dlerror());
++ }
++ return false;
++ }
++
++ // resolve function pointers
++
++#define RESOLVE_FUN_NO_ERROR(name) \
++ g_fun_##name = (fun_##name##_t) dlsym(g_libhandle, #name);
++
++#define RESOLVE_FUN(name) \
++ RESOLVE_FUN_NO_ERROR(name) \
++ if (!g_fun_##name) { \
++ if (Verbose) { \
++ fprintf(stderr, "Cannot resolve " #name "() from libperfstat.a\n" \
++ " (dlerror: %s)", dlerror()); \
++ } \
++ return false; \
++ }
++
++ RESOLVE_FUN(perfstat_cpu_total);
++ RESOLVE_FUN(perfstat_memory_total);
++ RESOLVE_FUN(perfstat_reset);
++
++ g_initialized = true;
++
++ return true;
++}
++
++void libperfstat::cleanup() {
++
++ g_initialized = false;
++
++ if (g_libhandle) {
++ dlclose(g_libhandle);
++ g_libhandle = NULL;
++ }
++
++ g_fun_perfstat_cpu_total = NULL;
++ g_fun_perfstat_memory_total = NULL;
++ g_fun_perfstat_reset = NULL;
++}
++
++int libperfstat::perfstat_memory_total(perfstat_id_t *name,
++ perfstat_memory_total_t* userbuff,
++ int sizeof_userbuff, int desired_number) {
++ assert(g_initialized, "libperfstat not initialized");
++ assert(g_fun_perfstat_memory_total, "");
++ return g_fun_perfstat_memory_total(name, userbuff, sizeof_userbuff, desired_number);
++}
++
++int libperfstat::perfstat_cpu_total(perfstat_id_t *name, perfstat_cpu_total_t* userbuff,
++ int sizeof_userbuff, int desired_number) {
++ assert(g_initialized, "libperfstat not initialized");
++ assert(g_fun_perfstat_cpu_total, "");
++ return g_fun_perfstat_cpu_total(name, userbuff, sizeof_userbuff, desired_number);
++}
++
++void libperfstat::perfstat_reset() {
++ assert(g_initialized, "libperfstat not initialized");
++ assert(g_fun_perfstat_reset, "");
++ g_fun_perfstat_reset();
++}
+--- ./hotspot/src/os/aix/vm/libperfstat_aix.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/libperfstat_aix.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,59 @@
++/*
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// encapsulates the libperfstat library.
++//
++// The purpose of this code is to dynamically load the libperfstat library
++// instead of statically linking against it. The libperfstat library is an
++// AIX-specific library which only exists on AIX, not on PASE. If I want to
++// share binaries between AIX and PASE, I cannot directly link against libperfstat.so.
++
++#ifndef OS_AIX_VM_LIBPERFSTAT_AIX_HPP
++#define OS_AIX_VM_LIBPERFSTAT_AIX_HPP
++
++#include
++
++class libperfstat {
++
++public:
++
++ // Load the libperfstat library (must be in LIBPATH).
++ // Returns true if succeeded, false if error.
++ static bool init();
++
++ // cleanup of the libo4 porting library.
++ static void cleanup();
++
++ // direct wrappers for the libperfstat functionality. All they do is
++ // to call the functions with the same name via function pointers.
++ static int perfstat_cpu_total(perfstat_id_t *name, perfstat_cpu_total_t* userbuff,
++ int sizeof_userbuff, int desired_number);
++
++ static int perfstat_memory_total(perfstat_id_t *name, perfstat_memory_total_t* userbuff,
++ int sizeof_userbuff, int desired_number);
++
++ static void perfstat_reset();
++};
++
++#endif // OS_AIX_VM_LIBPERFSTAT_AIX_HPP
+--- ./hotspot/src/os/aix/vm/loadlib_aix.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/loadlib_aix.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,185 @@
++/*
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++
++// Implementation of LoadedLibraries and friends
++
++// Ultimately this just uses loadquery()
++// See:
++// http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp
++// ?topic=/com.ibm.aix.basetechref/doc/basetrf1/loadquery.htm
++
++#ifndef __STDC_FORMAT_MACROS
++#define __STDC_FORMAT_MACROS
++#endif
++// 'allocation.inline.hpp' triggers the inclusion of 'inttypes.h' which defines macros
++// required by the definitions in 'globalDefinitions.hpp'. But these macros in 'inttypes.h'
++// are only defined if '__STDC_FORMAT_MACROS' is defined!
++#include "memory/allocation.inline.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/threadCritical.hpp"
++#include "utilities/debug.hpp"
++#include "utilities/ostream.hpp"
++#include "loadlib_aix.hpp"
++#include "porting_aix.hpp"
++
++// For loadquery()
++#include
++
++///////////////////////////////////////////////////////////////////////////////
++// Implementation for LoadedLibraryModule
++
++// output debug info
++void LoadedLibraryModule::print(outputStream* os) const {
++ os->print("%15.15s: text: " INTPTR_FORMAT " - " INTPTR_FORMAT
++ ", data: " INTPTR_FORMAT " - " INTPTR_FORMAT " ",
++ shortname, text_from, text_to, data_from, data_to);
++ os->print(" %s", fullpath);
++ if (strlen(membername) > 0) {
++ os->print("(%s)", membername);
++ }
++ os->cr();
++}
++
++
++///////////////////////////////////////////////////////////////////////////////
++// Implementation for LoadedLibraries
++
++// class variables
++LoadedLibraryModule LoadedLibraries::tab[MAX_MODULES];
++int LoadedLibraries::num_loaded = 0;
++
++// Checks whether the address p points to any of the loaded code segments.
++// If it does, returns the LoadedLibraryModule entry. If not, returns NULL.
++// static
++const LoadedLibraryModule* LoadedLibraries::find_for_text_address(const unsigned char* p) {
++
++ if (num_loaded == 0) {
++ reload();
++ }
++ for (int i = 0; i < num_loaded; i++) {
++ if (tab[i].is_in_text(p)) {
++ return &tab[i];
++ }
++ }
++ return NULL;
++}
++
++// Checks whether the address p points to any of the loaded data segments.
++// If it does, returns the LoadedLibraryModule entry. If not, returns NULL.
++// static
++const LoadedLibraryModule* LoadedLibraries::find_for_data_address(const unsigned char* p) {
++ if (num_loaded == 0) {
++ reload();
++ }
++ for (int i = 0; i < num_loaded; i++) {
++ if (tab[i].is_in_data(p)) {
++ return &tab[i];
++ }
++ }
++ return NULL;
++}
++
++// Rebuild the internal table of LoadedLibraryModule objects
++// static
++void LoadedLibraries::reload() {
++
++ ThreadCritical cs;
++
++ // discard old content
++ num_loaded = 0;
++
++ // Call loadquery(L_GETINFO..) to get a list of all loaded Dlls from AIX.
++ size_t buf_size = 4096;
++ char* loadquery_buf = AllocateHeap(buf_size, mtInternal);
++
++ while(loadquery(L_GETINFO, loadquery_buf, buf_size) == -1) {
++ if (errno == ENOMEM) {
++ buf_size *= 2;
++ loadquery_buf = ReallocateHeap(loadquery_buf, buf_size, mtInternal);
++ } else {
++ FreeHeap(loadquery_buf);
++ // Ensure that the uintptr_t pointer is valid
++ assert(errno != EFAULT, "loadquery: Invalid uintptr_t in info buffer.");
++ fprintf(stderr, "loadquery failed (%d %s)", errno, strerror(errno));
++ return;
++ }
++ }
++
++ // Iterate over the loadquery result. For details see sys/ldr.h on AIX.
++ const struct ld_info* p = (struct ld_info*) loadquery_buf;
++
++ // Ensure we have all loaded libs.
++ bool all_loaded = false;
++ while(num_loaded < MAX_MODULES) {
++ LoadedLibraryModule& mod = tab[num_loaded];
++ mod.text_from = (const unsigned char*) p->ldinfo_textorg;
++ mod.text_to = (const unsigned char*) (((char*)p->ldinfo_textorg) + p->ldinfo_textsize);
++ mod.data_from = (const unsigned char*) p->ldinfo_dataorg;
++ mod.data_to = (const unsigned char*) (((char*)p->ldinfo_dataorg) + p->ldinfo_datasize);
++ sprintf(mod.fullpath, "%.*s", sizeof(mod.fullpath), p->ldinfo_filename);
++ // do we have a member name as well (see ldr.h)?
++ const char* p_mbr_name = p->ldinfo_filename + strlen(p->ldinfo_filename) + 1;
++ if (*p_mbr_name) {
++ sprintf(mod.membername, "%.*s", sizeof(mod.membername), p_mbr_name);
++ } else {
++ mod.membername[0] = '\0';
++ }
++
++ // fill in the short name
++ const char* p_slash = strrchr(mod.fullpath, '/');
++ if (p_slash) {
++ sprintf(mod.shortname, "%.*s", sizeof(mod.shortname), p_slash + 1);
++ } else {
++ sprintf(mod.shortname, "%.*s", sizeof(mod.shortname), mod.fullpath);
++ }
++ num_loaded ++;
++
++ // next entry...
++ if (p->ldinfo_next) {
++ p = (struct ld_info*)(((char*)p) + p->ldinfo_next);
++ } else {
++ all_loaded = true;
++ break;
++ }
++ }
++
++ FreeHeap(loadquery_buf);
++
++ // Ensure we have all loaded libs
++ assert(all_loaded, "loadquery returned more entries then expected. Please increase MAX_MODULES");
++
++} // end LoadedLibraries::reload()
++
++
++// output loaded libraries table
++//static
++void LoadedLibraries::print(outputStream* os) {
++
++ for (int i = 0; i < num_loaded; i++) {
++ tab[i].print(os);
++ }
++
++}
++
+--- ./hotspot/src/os/aix/vm/loadlib_aix.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/loadlib_aix.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,128 @@
++/*
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++
++// Loadlib_aix.cpp contains support code for analysing the memory
++// layout of loaded binaries in ones own process space.
++//
++// It is needed, among other things, to provide a dladdr() emulation, because
++// that one is not provided by AIX
++
++#ifndef OS_AIX_VM_LOADLIB_AIX_HPP
++#define OS_AIX_VM_LOADLIB_AIX_HPP
++
++class outputStream;
++
++// This class holds information about a single loaded library module.
++// Note that on AIX, a single library can be spread over multiple
++// uintptr_t range on a module base, eg.
++// libC.a(shr3_64.o) or libC.a(shrcore_64.o).
++class LoadedLibraryModule {
++
++ friend class LoadedLibraries;
++
++ char fullpath[512]; // eg /usr/lib/libC.a
++ char shortname[30]; // eg libC.a
++ char membername[30]; // eg shrcore_64.o
++ const unsigned char* text_from;
++ const unsigned char* text_to;
++ const unsigned char* data_from;
++ const unsigned char* data_to;
++
++ public:
++
++ const char* get_fullpath() const {
++ return fullpath;
++ }
++ const char* get_shortname() const {
++ return shortname;
++ }
++ const char* get_membername() const {
++ return membername;
++ }
++
++ // text_from, text_to: returns the range of the text (code)
++ // segment for that module
++ const unsigned char* get_text_from() const {
++ return text_from;
++ }
++ const unsigned char* get_text_to() const {
++ return text_to;
++ }
++
++ // data_from/data_to: returns the range of the data
++ // segment for that module
++ const unsigned char* get_data_from() const {
++ return data_from;
++ }
++ const unsigned char* get_data_to() const {
++ return data_to;
++ }
++
++ // returns true if the
++ bool is_in_text(const unsigned char* p) const {
++ return p >= text_from && p < text_to ? true : false;
++ }
++
++ bool is_in_data(const unsigned char* p) const {
++ return p >= data_from && p < data_to ? true : false;
++ }
++
++ // output debug info
++ void print(outputStream* os) const;
++
++}; // end LoadedLibraryModule
++
++// This class is a singleton holding a map of all loaded binaries
++// in the AIX process space.
++class LoadedLibraries
++// : AllStatic (including allocation.hpp just for AllStatic is overkill.)
++{
++
++ private:
++
++ enum {MAX_MODULES = 100};
++ static LoadedLibraryModule tab[MAX_MODULES];
++ static int num_loaded;
++
++ public:
++
++ // rebuild the internal table of LoadedLibraryModule objects
++ static void reload();
++
++ // checks whether the address p points to any of the loaded code segments.
++ // If it does, returns the LoadedLibraryModule entry. If not, returns NULL.
++ static const LoadedLibraryModule* find_for_text_address(const unsigned char* p);
++
++ // checks whether the address p points to any of the loaded data segments.
++ // If it does, returns the LoadedLibraryModule entry. If not, returns NULL.
++ static const LoadedLibraryModule* find_for_data_address(const unsigned char* p);
++
++ // output debug info
++ static void print(outputStream* os);
++
++}; // end LoadedLibraries
++
++
++#endif // OS_AIX_VM_LOADLIB_AIX_HPP
+--- ./hotspot/src/os/aix/vm/mutex_aix.inline.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/mutex_aix.inline.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,33 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_AIX_VM_MUTEX_AIX_INLINE_HPP
++#define OS_AIX_VM_MUTEX_AIX_INLINE_HPP
++
++#include "os_aix.inline.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/thread.inline.hpp"
++
++#endif // OS_AIX_VM_MUTEX_AIX_INLINE_HPP
+--- ./hotspot/src/os/aix/vm/osThread_aix.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/osThread_aix.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,58 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// no precompiled headers
++#include "runtime/atomic.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/os.hpp"
++#include "runtime/osThread.hpp"
++#include "runtime/safepoint.hpp"
++#include "runtime/vmThread.hpp"
++#ifdef TARGET_ARCH_ppc
++# include "assembler_ppc.inline.hpp"
++#endif
++
++
++void OSThread::pd_initialize() {
++ assert(this != NULL, "check");
++ _thread_id = 0;
++ _pthread_id = 0;
++ _siginfo = NULL;
++ _ucontext = NULL;
++ _expanding_stack = 0;
++ _alt_sig_stack = NULL;
++
++ _last_cpu_times.sys = _last_cpu_times.user = 0L;
++
++ sigemptyset(&_caller_sigmask);
++
++ _startThread_lock = new Monitor(Mutex::event, "startThread_lock", true);
++ assert(_startThread_lock !=NULL, "check");
++}
++
++void OSThread::pd_destroy() {
++ delete _startThread_lock;
++}
+--- ./hotspot/src/os/aix/vm/osThread_aix.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/osThread_aix.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,144 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_AIX_VM_OSTHREAD_AIX_HPP
++#define OS_AIX_VM_OSTHREAD_AIX_HPP
++
++ public:
++ typedef pid_t thread_id_t;
++
++ private:
++ int _thread_type;
++
++ public:
++
++ int thread_type() const {
++ return _thread_type;
++ }
++ void set_thread_type(int type) {
++ _thread_type = type;
++ }
++
++ private:
++
++ // _pthread_id is the pthread id, which is used by library calls
++ // (e.g. pthread_kill).
++ pthread_t _pthread_id;
++
++ sigset_t _caller_sigmask; // Caller's signal mask
++
++ public:
++
++ // Methods to save/restore caller's signal mask
++ sigset_t caller_sigmask() const { return _caller_sigmask; }
++ void set_caller_sigmask(sigset_t sigmask) { _caller_sigmask = sigmask; }
++
++#ifndef PRODUCT
++ // Used for debugging, return a unique integer for each thread.
++ int thread_identifier() const { return _thread_id; }
++#endif
++#ifdef ASSERT
++ // We expect no reposition failures so kill vm if we get one.
++ //
++ bool valid_reposition_failure() {
++ return false;
++ }
++#endif // ASSERT
++ pthread_t pthread_id() const {
++ return _pthread_id;
++ }
++ void set_pthread_id(pthread_t tid) {
++ _pthread_id = tid;
++ }
++
++ // ***************************************************************
++ // suspension support.
++ // ***************************************************************
++
++ public:
++ // flags that support signal based suspend/resume on Linux are in a
++ // separate class to avoid confusion with many flags in OSThread that
++ // are used by VM level suspend/resume.
++ os::SuspendResume sr;
++
++ // _ucontext and _siginfo are used by SR_handler() to save thread context,
++ // and they will later be used to walk the stack or reposition thread PC.
++ // If the thread is not suspended in SR_handler() (e.g. self suspend),
++ // the value in _ucontext is meaningless, so we must use the last Java
++ // frame information as the frame. This will mean that for threads
++ // that are parked on a mutex the profiler (and safepoint mechanism)
++ // will see the thread as if it were still in the Java frame. This
++ // not a problem for the profiler since the Java frame is a close
++ // enough result. For the safepoint mechanism when the give it the
++ // Java frame we are not at a point where the safepoint needs the
++ // frame to that accurate (like for a compiled safepoint) since we
++ // should be in a place where we are native and will block ourselves
++ // if we transition.
++ private:
++ void* _siginfo;
++ ucontext_t* _ucontext;
++ int _expanding_stack; // non zero if manually expanding stack
++ address _alt_sig_stack; // address of base of alternate signal stack
++
++ public:
++ void* siginfo() const { return _siginfo; }
++ void set_siginfo(void* ptr) { _siginfo = ptr; }
++ ucontext_t* ucontext() const { return _ucontext; }
++ void set_ucontext(ucontext_t* ptr) { _ucontext = ptr; }
++ void set_expanding_stack(void) { _expanding_stack = 1; }
++ void clear_expanding_stack(void) { _expanding_stack = 0; }
++ int expanding_stack(void) { return _expanding_stack; }
++
++ void set_alt_sig_stack(address val) { _alt_sig_stack = val; }
++ address alt_sig_stack(void) { return _alt_sig_stack; }
++
++ private:
++ Monitor* _startThread_lock; // sync parent and child in thread creation
++
++ public:
++
++ Monitor* startThread_lock() const {
++ return _startThread_lock;
++ }
++
++ // ***************************************************************
++ // Platform dependent initialization and cleanup
++ // ***************************************************************
++
++ private:
++
++ void pd_initialize();
++ void pd_destroy();
++
++ public:
++
++ // The last measured values of cpu timing to prevent the "stale
++ // value return" bug in thread_cpu_time.
++ volatile struct {
++ jlong sys;
++ jlong user;
++ } _last_cpu_times;
++
++#endif // OS_AIX_VM_OSTHREAD_AIX_HPP
+--- ./hotspot/src/os/aix/vm/os_aix.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/os_aix.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,5256 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// According to the AIX OS doc #pragma alloca must be used
++// with C++ compiler before referencing the function alloca()
++#pragma alloca
++
++// no precompiled headers
++#include "classfile/classLoader.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "classfile/vmSymbols.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "compiler/compileBroker.hpp"
++#include "interpreter/interpreter.hpp"
++#include "jvm_aix.h"
++#include "libperfstat_aix.hpp"
++#include "loadlib_aix.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/filemap.hpp"
++#include "mutex_aix.inline.hpp"
++#include "oops/oop.inline.hpp"
++#include "os_share_aix.hpp"
++#include "porting_aix.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm.h"
++#include "prims/jvm_misc.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/extendedPC.hpp"
++#include "runtime/globals.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/java.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/osThread.hpp"
++#include "runtime/perfMemory.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/statSampler.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/threadCritical.hpp"
++#include "runtime/timer.hpp"
++#include "services/attachListener.hpp"
++#include "services/runtimeService.hpp"
++#include "utilities/decoder.hpp"
++#include "utilities/defaultStream.hpp"
++#include "utilities/events.hpp"
++#include "utilities/growableArray.hpp"
++#include "utilities/vmError.hpp"
++
++// put OS-includes here (sorted alphabetically)
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++
++// Add missing declarations (should be in procinfo.h but isn't until AIX 6.1).
++#if !defined(_AIXVERSION_610)
++extern "C" {
++ int getthrds64(pid_t ProcessIdentifier,
++ struct thrdentry64* ThreadBuffer,
++ int ThreadSize,
++ tid64_t* IndexPointer,
++ int Count);
++}
++#endif
++
++// Excerpts from systemcfg.h definitions newer than AIX 5.3
++#ifndef PV_7
++# define PV_7 0x200000 // Power PC 7
++# define PV_7_Compat 0x208000 // Power PC 7
++#endif
++
++#define MAX_PATH (2 * K)
++
++// for timer info max values which include all bits
++#define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
++// for multipage initialization error analysis (in 'g_multipage_error')
++#define ERROR_MP_OS_TOO_OLD 100
++#define ERROR_MP_EXTSHM_ACTIVE 101
++#define ERROR_MP_VMGETINFO_FAILED 102
++#define ERROR_MP_VMGETINFO_CLAIMS_NO_SUPPORT_FOR_64K 103
++
++// the semantics in this file are thus that codeptr_t is a *real code ptr*
++// This means that any function taking codeptr_t as arguments will assume
++// a real codeptr and won't handle function descriptors (eg getFuncName),
++// whereas functions taking address as args will deal with function
++// descriptors (eg os::dll_address_to_library_name)
++typedef unsigned int* codeptr_t;
++
++// typedefs for stackslots, stack pointers, pointers to op codes
++typedef unsigned long stackslot_t;
++typedef stackslot_t* stackptr_t;
++
++// query dimensions of the stack of the calling thread
++static void query_stack_dimensions(address* p_stack_base, size_t* p_stack_size);
++
++// function to check a given stack pointer against given stack limits
++inline bool is_valid_stackpointer(stackptr_t sp, stackptr_t stack_base, size_t stack_size) {
++ if (((uintptr_t)sp) & 0x7) {
++ return false;
++ }
++ if (sp > stack_base) {
++ return false;
++ }
++ if (sp < (stackptr_t) ((address)stack_base - stack_size)) {
++ return false;
++ }
++ return true;
++}
++
++// returns true if function is a valid codepointer
++inline bool is_valid_codepointer(codeptr_t p) {
++ if (!p) {
++ return false;
++ }
++ if (((uintptr_t)p) & 0x3) {
++ return false;
++ }
++ if (LoadedLibraries::find_for_text_address((address)p) == NULL) {
++ return false;
++ }
++ return true;
++}
++
++// macro to check a given stack pointer against given stack limits and to die if test fails
++#define CHECK_STACK_PTR(sp, stack_base, stack_size) { \
++ guarantee(is_valid_stackpointer((stackptr_t)(sp), (stackptr_t)(stack_base), stack_size), "Stack Pointer Invalid"); \
++}
++
++// macro to check the current stack pointer against given stacklimits
++#define CHECK_CURRENT_STACK_PTR(stack_base, stack_size) { \
++ address sp; \
++ sp = os::current_stack_pointer(); \
++ CHECK_STACK_PTR(sp, stack_base, stack_size); \
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// global variables (for a description see os_aix.hpp)
++
++julong os::Aix::_physical_memory = 0;
++pthread_t os::Aix::_main_thread = ((pthread_t)0);
++int os::Aix::_page_size = -1;
++int os::Aix::_on_pase = -1;
++int os::Aix::_os_version = -1;
++int os::Aix::_stack_page_size = -1;
++size_t os::Aix::_shm_default_page_size = -1;
++int os::Aix::_can_use_64K_pages = -1;
++int os::Aix::_can_use_16M_pages = -1;
++int os::Aix::_xpg_sus_mode = -1;
++int os::Aix::_extshm = -1;
++int os::Aix::_logical_cpus = -1;
++
++////////////////////////////////////////////////////////////////////////////////
++// local variables
++
++static int g_multipage_error = -1; // error analysis for multipage initialization
++static jlong initial_time_count = 0;
++static int clock_tics_per_sec = 100;
++static sigset_t check_signal_done; // For diagnostics to print a message once (see run_periodic_checks)
++static bool check_signals = true;
++static pid_t _initial_pid = 0;
++static int SR_signum = SIGUSR2; // Signal used to suspend/resume a thread (must be > SIGSEGV, see 4355769)
++static sigset_t SR_sigset;
++static pthread_mutex_t dl_mutex; // Used to protect dlsym() calls */
++
++julong os::available_memory() {
++ return Aix::available_memory();
++}
++
++julong os::Aix::available_memory() {
++ os::Aix::meminfo_t mi;
++ if (os::Aix::get_meminfo(&mi)) {
++ return mi.real_free;
++ } else {
++ return 0xFFFFFFFFFFFFFFFFLL;
++ }
++}
++
++julong os::physical_memory() {
++ return Aix::physical_memory();
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// environment support
++
++bool os::getenv(const char* name, char* buf, int len) {
++ const char* val = ::getenv(name);
++ if (val != NULL && strlen(val) < (size_t)len) {
++ strcpy(buf, val);
++ return true;
++ }
++ if (len > 0) buf[0] = 0; // return a null string
++ return false;
++}
++
++
++// Return true if user is running as root.
++
++bool os::have_special_privileges() {
++ static bool init = false;
++ static bool privileges = false;
++ if (!init) {
++ privileges = (getuid() != geteuid()) || (getgid() != getegid());
++ init = true;
++ }
++ return privileges;
++}
++
++// Helper function, emulates disclaim64 using multiple 32bit disclaims
++// because we cannot use disclaim64() on AS/400 and old AIX releases.
++static bool my_disclaim64(char* addr, size_t size) {
++
++ if (size == 0) {
++ return true;
++ }
++
++ // Maximum size 32bit disclaim() accepts. (Theoretically 4GB, but I just do not trust that.)
++ const unsigned int maxDisclaimSize = 0x80000000;
++
++ const unsigned int numFullDisclaimsNeeded = (size / maxDisclaimSize);
++ const unsigned int lastDisclaimSize = (size % maxDisclaimSize);
++
++ char* p = addr;
++
++ for (int i = 0; i < numFullDisclaimsNeeded; i ++) {
++ if (::disclaim(p, maxDisclaimSize, DISCLAIM_ZEROMEM) != 0) {
++ //if (Verbose)
++ fprintf(stderr, "Cannot disclaim %p - %p (errno %d)\n", p, p + maxDisclaimSize, errno);
++ return false;
++ }
++ p += maxDisclaimSize;
++ }
++
++ if (lastDisclaimSize > 0) {
++ if (::disclaim(p, lastDisclaimSize, DISCLAIM_ZEROMEM) != 0) {
++ //if (Verbose)
++ fprintf(stderr, "Cannot disclaim %p - %p (errno %d)\n", p, p + lastDisclaimSize, errno);
++ return false;
++ }
++ }
++
++ return true;
++}
++
++// Cpu architecture string
++#if defined(PPC32)
++static char cpu_arch[] = "ppc";
++#elif defined(PPC64)
++static char cpu_arch[] = "ppc64";
++#else
++#error Add appropriate cpu_arch setting
++#endif
++
++
++// Given an address, returns the size of the page backing that address.
++size_t os::Aix::query_pagesize(void* addr) {
++
++ vm_page_info pi;
++ pi.addr = (uint64_t)addr;
++ if (::vmgetinfo(&pi, VM_PAGE_INFO, sizeof(pi)) == 0) {
++ return pi.pagesize;
++ } else {
++ fprintf(stderr, "vmgetinfo failed to retrieve page size for address %p (errno %d).\n", addr, errno);
++ assert(false, "vmgetinfo failed to retrieve page size");
++ return SIZE_4K;
++ }
++
++}
++
++// Returns the kernel thread id of the currently running thread.
++pid_t os::Aix::gettid() {
++ return (pid_t) thread_self();
++}
++
++void os::Aix::initialize_system_info() {
++
++ // get the number of online(logical) cpus instead of configured
++ os::_processor_count = sysconf(_SC_NPROCESSORS_ONLN);
++ assert(_processor_count > 0, "_processor_count must be > 0");
++
++ // retrieve total physical storage
++ os::Aix::meminfo_t mi;
++ if (!os::Aix::get_meminfo(&mi)) {
++ fprintf(stderr, "os::Aix::get_meminfo failed.\n"); fflush(stderr);
++ assert(false, "os::Aix::get_meminfo failed.");
++ }
++ _physical_memory = (julong) mi.real_total;
++}
++
++// Helper function for tracing page sizes.
++static const char* describe_pagesize(size_t pagesize) {
++ switch (pagesize) {
++ case SIZE_4K : return "4K";
++ case SIZE_64K: return "64K";
++ case SIZE_16M: return "16M";
++ case SIZE_16G: return "16G";
++ default:
++ assert(false, "surprise");
++ return "??";
++ }
++}
++
++// Retrieve information about multipage size support. Will initialize
++// Aix::_page_size, Aix::_stack_page_size, Aix::_can_use_64K_pages,
++// Aix::_can_use_16M_pages.
++// Must be called before calling os::large_page_init().
++void os::Aix::query_multipage_support() {
++
++ guarantee(_page_size == -1 &&
++ _stack_page_size == -1 &&
++ _can_use_64K_pages == -1 &&
++ _can_use_16M_pages == -1 &&
++ g_multipage_error == -1,
++ "do not call twice");
++
++ _page_size = ::sysconf(_SC_PAGESIZE);
++
++ // This really would surprise me.
++ assert(_page_size == SIZE_4K, "surprise!");
++
++
++ // Query default data page size (default page size for C-Heap, pthread stacks and .bss).
++ // Default data page size is influenced either by linker options (-bdatapsize)
++ // or by environment variable LDR_CNTRL (suboption DATAPSIZE). If none is given,
++ // default should be 4K.
++ size_t data_page_size = SIZE_4K;
++ {
++ void* p = ::malloc(SIZE_16M);
++ guarantee(p != NULL, "malloc failed");
++ data_page_size = os::Aix::query_pagesize(p);
++ ::free(p);
++ }
++
++ // query default shm page size (LDR_CNTRL SHMPSIZE)
++ {
++ const int shmid = ::shmget(IPC_PRIVATE, 1, IPC_CREAT | S_IRUSR | S_IWUSR);
++ guarantee(shmid != -1, "shmget failed");
++ void* p = ::shmat(shmid, NULL, 0);
++ ::shmctl(shmid, IPC_RMID, NULL);
++ guarantee(p != (void*) -1, "shmat failed");
++ _shm_default_page_size = os::Aix::query_pagesize(p);
++ ::shmdt(p);
++ }
++
++ // before querying the stack page size, make sure we are not running as primordial
++ // thread (because primordial thread's stack may have different page size than
++ // pthread thread stacks). Running a VM on the primordial thread won't work for a
++ // number of reasons so we may just as well guarantee it here
++ guarantee(!os::Aix::is_primordial_thread(), "Must not be called for primordial thread");
++
++ // query stack page size
++ {
++ int dummy = 0;
++ _stack_page_size = os::Aix::query_pagesize(&dummy);
++ // everything else would surprise me and should be looked into
++ guarantee(_stack_page_size == SIZE_4K || _stack_page_size == SIZE_64K, "Wrong page size");
++ // also, just for completeness: pthread stacks are allocated from C heap, so
++ // stack page size should be the same as data page size
++ guarantee(_stack_page_size == data_page_size, "stack page size should be the same as data page size");
++ }
++
++ // EXTSHM is bad: among other things, it prevents setting pagesize dynamically
++ // for system V shm.
++ if (Aix::extshm()) {
++ if (Verbose) {
++ fprintf(stderr, "EXTSHM is active - will disable large page support.\n"
++ "Please make sure EXTSHM is OFF for large page support.\n");
++ }
++ g_multipage_error = ERROR_MP_EXTSHM_ACTIVE;
++ _can_use_64K_pages = _can_use_16M_pages = 0;
++ goto query_multipage_support_end;
++ }
++
++ // now check which page sizes the OS claims it supports, and of those, which actually can be used.
++ {
++ const int MAX_PAGE_SIZES = 4;
++ psize_t sizes[MAX_PAGE_SIZES];
++ const int num_psizes = ::vmgetinfo(sizes, VMINFO_GETPSIZES, MAX_PAGE_SIZES);
++ if (num_psizes == -1) {
++ if (Verbose) {
++ fprintf(stderr, "vmgetinfo(VMINFO_GETPSIZES) failed (errno: %d)\n", errno);
++ fprintf(stderr, "disabling multipage support.\n");
++ }
++ g_multipage_error = ERROR_MP_VMGETINFO_FAILED;
++ _can_use_64K_pages = _can_use_16M_pages = 0;
++ goto query_multipage_support_end;
++ }
++ guarantee(num_psizes > 0, "vmgetinfo(.., VMINFO_GETPSIZES, ...) failed.");
++ assert(num_psizes <= MAX_PAGE_SIZES, "Surprise! more than 4 page sizes?");
++ if (Verbose) {
++ fprintf(stderr, "vmgetinfo(.., VMINFO_GETPSIZES, ...) returns %d supported page sizes: ", num_psizes);
++ for (int i = 0; i < num_psizes; i ++) {
++ fprintf(stderr, " %s ", describe_pagesize(sizes[i]));
++ }
++ fprintf(stderr, " .\n");
++ }
++
++ // Can we use 64K, 16M pages?
++ _can_use_64K_pages = 0;
++ _can_use_16M_pages = 0;
++ for (int i = 0; i < num_psizes; i ++) {
++ if (sizes[i] == SIZE_64K) {
++ _can_use_64K_pages = 1;
++ } else if (sizes[i] == SIZE_16M) {
++ _can_use_16M_pages = 1;
++ }
++ }
++
++ if (!_can_use_64K_pages) {
++ g_multipage_error = ERROR_MP_VMGETINFO_CLAIMS_NO_SUPPORT_FOR_64K;
++ }
++
++ // Double-check for 16M pages: Even if AIX claims to be able to use 16M pages,
++ // there must be an actual 16M page pool, and we must run with enough rights.
++ if (_can_use_16M_pages) {
++ const int shmid = ::shmget(IPC_PRIVATE, SIZE_16M, IPC_CREAT | S_IRUSR | S_IWUSR);
++ guarantee(shmid != -1, "shmget failed");
++ struct shmid_ds shm_buf = { 0 };
++ shm_buf.shm_pagesize = SIZE_16M;
++ const bool can_set_pagesize = ::shmctl(shmid, SHM_PAGESIZE, &shm_buf) == 0 ? true : false;
++ const int en = errno;
++ ::shmctl(shmid, IPC_RMID, NULL);
++ if (!can_set_pagesize) {
++ if (Verbose) {
++ fprintf(stderr, "Failed to allocate even one misely 16M page. shmctl failed with %d (%s).\n"
++ "Will deactivate 16M support.\n", en, strerror(en));
++ }
++ _can_use_16M_pages = 0;
++ }
++ }
++
++ } // end: check which pages can be used for shared memory
++
++query_multipage_support_end:
++
++ guarantee(_page_size != -1 &&
++ _stack_page_size != -1 &&
++ _can_use_64K_pages != -1 &&
++ _can_use_16M_pages != -1, "Page sizes not properly initialized");
++
++ if (_can_use_64K_pages) {
++ g_multipage_error = 0;
++ }
++
++ if (Verbose) {
++ fprintf(stderr, "Data page size (C-Heap, bss, etc): %s\n", describe_pagesize(data_page_size));
++ fprintf(stderr, "Thread stack page size (pthread): %s\n", describe_pagesize(_stack_page_size));
++ fprintf(stderr, "Default shared memory page size: %s\n", describe_pagesize(_shm_default_page_size));
++ fprintf(stderr, "Can use 64K pages dynamically with shared meory: %s\n", (_can_use_64K_pages ? "yes" :"no"));
++ fprintf(stderr, "Can use 16M pages dynamically with shared memory: %s\n", (_can_use_16M_pages ? "yes" :"no"));
++ fprintf(stderr, "Multipage error details: %d\n", g_multipage_error);
++ }
++
++} // end os::Aix::query_multipage_support()
++
++// The code for this method was initially derived from the version in os_linux.cpp.
++void os::init_system_properties_values() {
++
++#define DEFAULT_LIBPATH "/usr/lib:/lib"
++#define EXTENSIONS_DIR "/lib/ext"
++#define ENDORSED_DIR "/lib/endorsed"
++
++ // Buffer that fits several sprintfs.
++ // Note that the space for the trailing null is provided
++ // by the nulls included by the sizeof operator.
++ const size_t bufsize =
++ MAX3((size_t)MAXPATHLEN, // For dll_dir & friends.
++ (size_t)MAXPATHLEN + sizeof(EXTENSIONS_DIR), // extensions dir
++ (size_t)MAXPATHLEN + sizeof(ENDORSED_DIR)); // endorsed dir
++ char *buf = (char *)NEW_C_HEAP_ARRAY(char, bufsize, mtInternal);
++
++ // sysclasspath, java_home, dll_dir
++ {
++ char *pslash;
++ os::jvm_path(buf, bufsize);
++
++ // Found the full path to libjvm.so.
++ // Now cut the path to /jre if we can.
++ *(strrchr(buf, '/')) = '\0'; // Get rid of /libjvm.so.
++ pslash = strrchr(buf, '/');
++ if (pslash != NULL) {
++ *pslash = '\0'; // Get rid of /{client|server|hotspot}.
++ }
++ Arguments::set_dll_dir(buf);
++
++ if (pslash != NULL) {
++ pslash = strrchr(buf, '/');
++ if (pslash != NULL) {
++ *pslash = '\0'; // Get rid of /.
++ pslash = strrchr(buf, '/');
++ if (pslash != NULL) {
++ *pslash = '\0'; // Get rid of /lib.
++ }
++ }
++ }
++ Arguments::set_java_home(buf);
++ set_boot_path('/', ':');
++ }
++
++ // Where to look for native libraries.
++
++ // On Aix we get the user setting of LIBPATH.
++ // Eventually, all the library path setting will be done here.
++ // Get the user setting of LIBPATH.
++ const char *v = ::getenv("LIBPATH");
++ const char *v_colon = ":";
++ if (v == NULL) { v = ""; v_colon = ""; }
++
++ // Concatenate user and invariant part of ld_library_path.
++ // That's +1 for the colon and +1 for the trailing '\0'.
++ char *ld_library_path = (char *)NEW_C_HEAP_ARRAY(char, strlen(v) + 1 + sizeof(DEFAULT_LIBPATH) + 1, mtInternal);
++ sprintf(ld_library_path, "%s%s" DEFAULT_LIBPATH, v, v_colon);
++ Arguments::set_library_path(ld_library_path);
++ FREE_C_HEAP_ARRAY(char, ld_library_path, mtInternal);
++
++ // Extensions directories.
++ sprintf(buf, "%s" EXTENSIONS_DIR, Arguments::get_java_home());
++ Arguments::set_ext_dirs(buf);
++
++ // Endorsed standards default directory.
++ sprintf(buf, "%s" ENDORSED_DIR, Arguments::get_java_home());
++ Arguments::set_endorsed_dirs(buf);
++
++ FREE_C_HEAP_ARRAY(char, buf, mtInternal);
++
++#undef DEFAULT_LIBPATH
++#undef EXTENSIONS_DIR
++#undef ENDORSED_DIR
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// breakpoint support
++
++void os::breakpoint() {
++ BREAKPOINT;
++}
++
++extern "C" void breakpoint() {
++ // use debugger to set breakpoint here
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// signal support
++
++debug_only(static bool signal_sets_initialized = false);
++static sigset_t unblocked_sigs, vm_sigs, allowdebug_blocked_sigs;
++
++bool os::Aix::is_sig_ignored(int sig) {
++ struct sigaction oact;
++ sigaction(sig, (struct sigaction*)NULL, &oact);
++ void* ohlr = oact.sa_sigaction ? CAST_FROM_FN_PTR(void*, oact.sa_sigaction)
++ : CAST_FROM_FN_PTR(void*, oact.sa_handler);
++ if (ohlr == CAST_FROM_FN_PTR(void*, SIG_IGN))
++ return true;
++ else
++ return false;
++}
++
++void os::Aix::signal_sets_init() {
++ // Should also have an assertion stating we are still single-threaded.
++ assert(!signal_sets_initialized, "Already initialized");
++ // Fill in signals that are necessarily unblocked for all threads in
++ // the VM. Currently, we unblock the following signals:
++ // SHUTDOWN{1,2,3}_SIGNAL: for shutdown hooks support (unless over-ridden
++ // by -Xrs (=ReduceSignalUsage));
++ // BREAK_SIGNAL which is unblocked only by the VM thread and blocked by all
++ // other threads. The "ReduceSignalUsage" boolean tells us not to alter
++ // the dispositions or masks wrt these signals.
++ // Programs embedding the VM that want to use the above signals for their
++ // own purposes must, at this time, use the "-Xrs" option to prevent
++ // interference with shutdown hooks and BREAK_SIGNAL thread dumping.
++ // (See bug 4345157, and other related bugs).
++ // In reality, though, unblocking these signals is really a nop, since
++ // these signals are not blocked by default.
++ sigemptyset(&unblocked_sigs);
++ sigemptyset(&allowdebug_blocked_sigs);
++ sigaddset(&unblocked_sigs, SIGILL);
++ sigaddset(&unblocked_sigs, SIGSEGV);
++ sigaddset(&unblocked_sigs, SIGBUS);
++ sigaddset(&unblocked_sigs, SIGFPE);
++ sigaddset(&unblocked_sigs, SIGTRAP);
++ sigaddset(&unblocked_sigs, SIGDANGER);
++ sigaddset(&unblocked_sigs, SR_signum);
++
++ if (!ReduceSignalUsage) {
++ if (!os::Aix::is_sig_ignored(SHUTDOWN1_SIGNAL)) {
++ sigaddset(&unblocked_sigs, SHUTDOWN1_SIGNAL);
++ sigaddset(&allowdebug_blocked_sigs, SHUTDOWN1_SIGNAL);
++ }
++ if (!os::Aix::is_sig_ignored(SHUTDOWN2_SIGNAL)) {
++ sigaddset(&unblocked_sigs, SHUTDOWN2_SIGNAL);
++ sigaddset(&allowdebug_blocked_sigs, SHUTDOWN2_SIGNAL);
++ }
++ if (!os::Aix::is_sig_ignored(SHUTDOWN3_SIGNAL)) {
++ sigaddset(&unblocked_sigs, SHUTDOWN3_SIGNAL);
++ sigaddset(&allowdebug_blocked_sigs, SHUTDOWN3_SIGNAL);
++ }
++ }
++ // Fill in signals that are blocked by all but the VM thread.
++ sigemptyset(&vm_sigs);
++ if (!ReduceSignalUsage)
++ sigaddset(&vm_sigs, BREAK_SIGNAL);
++ debug_only(signal_sets_initialized = true);
++}
++
++// These are signals that are unblocked while a thread is running Java.
++// (For some reason, they get blocked by default.)
++sigset_t* os::Aix::unblocked_signals() {
++ assert(signal_sets_initialized, "Not initialized");
++ return &unblocked_sigs;
++}
++
++// These are the signals that are blocked while a (non-VM) thread is
++// running Java. Only the VM thread handles these signals.
++sigset_t* os::Aix::vm_signals() {
++ assert(signal_sets_initialized, "Not initialized");
++ return &vm_sigs;
++}
++
++// These are signals that are blocked during cond_wait to allow debugger in
++sigset_t* os::Aix::allowdebug_blocked_signals() {
++ assert(signal_sets_initialized, "Not initialized");
++ return &allowdebug_blocked_sigs;
++}
++
++void os::Aix::hotspot_sigmask(Thread* thread) {
++
++ //Save caller's signal mask before setting VM signal mask
++ sigset_t caller_sigmask;
++ pthread_sigmask(SIG_BLOCK, NULL, &caller_sigmask);
++
++ OSThread* osthread = thread->osthread();
++ osthread->set_caller_sigmask(caller_sigmask);
++
++ pthread_sigmask(SIG_UNBLOCK, os::Aix::unblocked_signals(), NULL);
++
++ if (!ReduceSignalUsage) {
++ if (thread->is_VM_thread()) {
++ // Only the VM thread handles BREAK_SIGNAL ...
++ pthread_sigmask(SIG_UNBLOCK, vm_signals(), NULL);
++ } else {
++ // ... all other threads block BREAK_SIGNAL
++ pthread_sigmask(SIG_BLOCK, vm_signals(), NULL);
++ }
++ }
++}
++
++// retrieve memory information.
++// Returns false if something went wrong;
++// content of pmi undefined in this case.
++bool os::Aix::get_meminfo(meminfo_t* pmi) {
++
++ assert(pmi, "get_meminfo: invalid parameter");
++
++ memset(pmi, 0, sizeof(meminfo_t));
++
++ if (os::Aix::on_pase()) {
++
++ Unimplemented();
++ return false;
++
++ } else {
++
++ // On AIX, I use the (dynamically loaded) perfstat library to retrieve memory statistics
++ // See:
++ // http://publib.boulder.ibm.com/infocenter/systems/index.jsp
++ // ?topic=/com.ibm.aix.basetechref/doc/basetrf1/perfstat_memtot.htm
++ // http://publib.boulder.ibm.com/infocenter/systems/index.jsp
++ // ?topic=/com.ibm.aix.files/doc/aixfiles/libperfstat.h.htm
++
++ perfstat_memory_total_t psmt;
++ memset (&psmt, '\0', sizeof(psmt));
++ const int rc = libperfstat::perfstat_memory_total(NULL, &psmt, sizeof(psmt), 1);
++ if (rc == -1) {
++ fprintf(stderr, "perfstat_memory_total() failed (errno=%d)\n", errno);
++ assert(0, "perfstat_memory_total() failed");
++ return false;
++ }
++
++ assert(rc == 1, "perfstat_memory_total() - weird return code");
++
++ // excerpt from
++ // http://publib.boulder.ibm.com/infocenter/systems/index.jsp
++ // ?topic=/com.ibm.aix.files/doc/aixfiles/libperfstat.h.htm
++ // The fields of perfstat_memory_total_t:
++ // u_longlong_t virt_total Total virtual memory (in 4 KB pages).
++ // u_longlong_t real_total Total real memory (in 4 KB pages).
++ // u_longlong_t real_free Free real memory (in 4 KB pages).
++ // u_longlong_t pgsp_total Total paging space (in 4 KB pages).
++ // u_longlong_t pgsp_free Free paging space (in 4 KB pages).
++
++ pmi->virt_total = psmt.virt_total * 4096;
++ pmi->real_total = psmt.real_total * 4096;
++ pmi->real_free = psmt.real_free * 4096;
++ pmi->pgsp_total = psmt.pgsp_total * 4096;
++ pmi->pgsp_free = psmt.pgsp_free * 4096;
++
++ return true;
++
++ }
++} // end os::Aix::get_meminfo
++
++// Retrieve global cpu information.
++// Returns false if something went wrong;
++// the content of pci is undefined in this case.
++bool os::Aix::get_cpuinfo(cpuinfo_t* pci) {
++ assert(pci, "get_cpuinfo: invalid parameter");
++ memset(pci, 0, sizeof(cpuinfo_t));
++
++ perfstat_cpu_total_t psct;
++ memset (&psct, '\0', sizeof(psct));
++
++ if (-1 == libperfstat::perfstat_cpu_total(NULL, &psct, sizeof(perfstat_cpu_total_t), 1)) {
++ fprintf(stderr, "perfstat_cpu_total() failed (errno=%d)\n", errno);
++ assert(0, "perfstat_cpu_total() failed");
++ return false;
++ }
++
++ // global cpu information
++ strcpy (pci->description, psct.description);
++ pci->processorHZ = psct.processorHZ;
++ pci->ncpus = psct.ncpus;
++ os::Aix::_logical_cpus = psct.ncpus;
++ for (int i = 0; i < 3; i++) {
++ pci->loadavg[i] = (double) psct.loadavg[i] / (1 << SBITS);
++ }
++
++ // get the processor version from _system_configuration
++ switch (_system_configuration.version) {
++ case PV_7:
++ strcpy(pci->version, "Power PC 7");
++ break;
++ case PV_6_1:
++ strcpy(pci->version, "Power PC 6 DD1.x");
++ break;
++ case PV_6:
++ strcpy(pci->version, "Power PC 6");
++ break;
++ case PV_5:
++ strcpy(pci->version, "Power PC 5");
++ break;
++ case PV_5_2:
++ strcpy(pci->version, "Power PC 5_2");
++ break;
++ case PV_5_3:
++ strcpy(pci->version, "Power PC 5_3");
++ break;
++ case PV_5_Compat:
++ strcpy(pci->version, "PV_5_Compat");
++ break;
++ case PV_6_Compat:
++ strcpy(pci->version, "PV_6_Compat");
++ break;
++ case PV_7_Compat:
++ strcpy(pci->version, "PV_7_Compat");
++ break;
++ default:
++ strcpy(pci->version, "unknown");
++ }
++
++ return true;
++
++} //end os::Aix::get_cpuinfo
++
++//////////////////////////////////////////////////////////////////////////////
++// detecting pthread library
++
++void os::Aix::libpthread_init() {
++ return;
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// create new thread
++
++// Thread start routine for all newly created threads
++static void *java_start(Thread *thread) {
++
++ // find out my own stack dimensions
++ {
++ // actually, this should do exactly the same as thread->record_stack_base_and_size...
++ address base = 0;
++ size_t size = 0;
++ query_stack_dimensions(&base, &size);
++ thread->set_stack_base(base);
++ thread->set_stack_size(size);
++ }
++
++ // Do some sanity checks.
++ CHECK_CURRENT_STACK_PTR(thread->stack_base(), thread->stack_size());
++
++ // Try to randomize the cache line index of hot stack frames.
++ // This helps when threads of the same stack traces evict each other's
++ // cache lines. The threads can be either from the same JVM instance, or
++ // from different JVM instances. The benefit is especially true for
++ // processors with hyperthreading technology.
++
++ static int counter = 0;
++ int pid = os::current_process_id();
++ alloca(((pid ^ counter++) & 7) * 128);
++
++ ThreadLocalStorage::set_thread(thread);
++
++ OSThread* osthread = thread->osthread();
++
++ // thread_id is kernel thread id (similar to Solaris LWP id)
++ osthread->set_thread_id(os::Aix::gettid());
++
++ // initialize signal mask for this thread
++ os::Aix::hotspot_sigmask(thread);
++
++ // initialize floating point control register
++ os::Aix::init_thread_fpu_state();
++
++ assert(osthread->get_state() == RUNNABLE, "invalid os thread state");
++
++ // call one more level start routine
++ thread->run();
++
++ return 0;
++}
++
++bool os::create_thread(Thread* thread, ThreadType thr_type, size_t stack_size) {
++
++ // We want the whole function to be synchronized.
++ ThreadCritical cs;
++
++ assert(thread->osthread() == NULL, "caller responsible");
++
++ // Allocate the OSThread object
++ OSThread* osthread = new OSThread(NULL, NULL);
++ if (osthread == NULL) {
++ return false;
++ }
++
++ // set the correct thread state
++ osthread->set_thread_type(thr_type);
++
++ // Initial state is ALLOCATED but not INITIALIZED
++ osthread->set_state(ALLOCATED);
++
++ thread->set_osthread(osthread);
++
++ // init thread attributes
++ pthread_attr_t attr;
++ pthread_attr_init(&attr);
++ guarantee(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) == 0, "???");
++
++ // Make sure we run in 1:1 kernel-user-thread mode.
++ if (os::Aix::on_aix()) {
++ guarantee(pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM) == 0, "???");
++ guarantee(pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED) == 0, "???");
++ } // end: aix
++
++ // Start in suspended state, and in os::thread_start, wake the thread up.
++ guarantee(pthread_attr_setsuspendstate_np(&attr, PTHREAD_CREATE_SUSPENDED_NP) == 0, "???");
++
++ // calculate stack size if it's not specified by caller
++ if (os::Aix::supports_variable_stack_size()) {
++ if (stack_size == 0) {
++ stack_size = os::Aix::default_stack_size(thr_type);
++
++ switch (thr_type) {
++ case os::java_thread:
++ // Java threads use ThreadStackSize whose default value can be changed with the flag -Xss.
++ assert(JavaThread::stack_size_at_create() > 0, "this should be set");
++ stack_size = JavaThread::stack_size_at_create();
++ break;
++ case os::compiler_thread:
++ if (CompilerThreadStackSize > 0) {
++ stack_size = (size_t)(CompilerThreadStackSize * K);
++ break;
++ } // else fall through:
++ // use VMThreadStackSize if CompilerThreadStackSize is not defined
++ case os::vm_thread:
++ case os::pgc_thread:
++ case os::cgc_thread:
++ case os::watcher_thread:
++ if (VMThreadStackSize > 0) stack_size = (size_t)(VMThreadStackSize * K);
++ break;
++ }
++ }
++
++ stack_size = MAX2(stack_size, os::Aix::min_stack_allowed);
++ pthread_attr_setstacksize(&attr, stack_size);
++ } //else let thread_create() pick the default value (96 K on AIX)
++
++ pthread_t tid;
++ int ret = pthread_create(&tid, &attr, (void* (*)(void*)) java_start, thread);
++
++ pthread_attr_destroy(&attr);
++
++ if (ret != 0) {
++ if (PrintMiscellaneous && (Verbose || WizardMode)) {
++ perror("pthread_create()");
++ }
++ // Need to clean up stuff we've allocated so far
++ thread->set_osthread(NULL);
++ delete osthread;
++ return false;
++ }
++
++ // Store pthread info into the OSThread
++ osthread->set_pthread_id(tid);
++
++ return true;
++}
++
++/////////////////////////////////////////////////////////////////////////////
++// attach existing thread
++
++// bootstrap the main thread
++bool os::create_main_thread(JavaThread* thread) {
++ assert(os::Aix::_main_thread == pthread_self(), "should be called inside main thread");
++ return create_attached_thread(thread);
++}
++
++bool os::create_attached_thread(JavaThread* thread) {
++#ifdef ASSERT
++ thread->verify_not_published();
++#endif
++
++ // Allocate the OSThread object
++ OSThread* osthread = new OSThread(NULL, NULL);
++
++ if (osthread == NULL) {
++ return false;
++ }
++
++ // Store pthread info into the OSThread
++ osthread->set_thread_id(os::Aix::gettid());
++ osthread->set_pthread_id(::pthread_self());
++
++ // initialize floating point control register
++ os::Aix::init_thread_fpu_state();
++
++ // some sanity checks
++ CHECK_CURRENT_STACK_PTR(thread->stack_base(), thread->stack_size());
++
++ // Initial thread state is RUNNABLE
++ osthread->set_state(RUNNABLE);
++
++ thread->set_osthread(osthread);
++
++ if (UseNUMA) {
++ int lgrp_id = os::numa_get_group_id();
++ if (lgrp_id != -1) {
++ thread->set_lgrp_id(lgrp_id);
++ }
++ }
++
++ // initialize signal mask for this thread
++ // and save the caller's signal mask
++ os::Aix::hotspot_sigmask(thread);
++
++ return true;
++}
++
++void os::pd_start_thread(Thread* thread) {
++ int status = pthread_continue_np(thread->osthread()->pthread_id());
++ assert(status == 0, "thr_continue failed");
++}
++
++// Free OS resources related to the OSThread
++void os::free_thread(OSThread* osthread) {
++ assert(osthread != NULL, "osthread not set");
++
++ if (Thread::current()->osthread() == osthread) {
++ // Restore caller's signal mask
++ sigset_t sigmask = osthread->caller_sigmask();
++ pthread_sigmask(SIG_SETMASK, &sigmask, NULL);
++ }
++
++ delete osthread;
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// thread local storage
++
++int os::allocate_thread_local_storage() {
++ pthread_key_t key;
++ int rslt = pthread_key_create(&key, NULL);
++ assert(rslt == 0, "cannot allocate thread local storage");
++ return (int)key;
++}
++
++// Note: This is currently not used by VM, as we don't destroy TLS key
++// on VM exit.
++void os::free_thread_local_storage(int index) {
++ int rslt = pthread_key_delete((pthread_key_t)index);
++ assert(rslt == 0, "invalid index");
++}
++
++void os::thread_local_storage_at_put(int index, void* value) {
++ int rslt = pthread_setspecific((pthread_key_t)index, value);
++ assert(rslt == 0, "pthread_setspecific failed");
++}
++
++extern "C" Thread* get_thread() {
++ return ThreadLocalStorage::thread();
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// time support
++
++// Time since start-up in seconds to a fine granularity.
++// Used by VMSelfDestructTimer and the MemProfiler.
++double os::elapsedTime() {
++ return (double)(os::elapsed_counter()) * 0.000001;
++}
++
++jlong os::elapsed_counter() {
++ timeval time;
++ int status = gettimeofday(&time, NULL);
++ return jlong(time.tv_sec) * 1000 * 1000 + jlong(time.tv_usec) - initial_time_count;
++}
++
++jlong os::elapsed_frequency() {
++ return (1000 * 1000);
++}
++
++// For now, we say that linux does not support vtime. I have no idea
++// whether it can actually be made to (DLD, 9/13/05).
++
++bool os::supports_vtime() { return false; }
++bool os::enable_vtime() { return false; }
++bool os::vtime_enabled() { return false; }
++double os::elapsedVTime() {
++ // better than nothing, but not much
++ return elapsedTime();
++}
++
++jlong os::javaTimeMillis() {
++ timeval time;
++ int status = gettimeofday(&time, NULL);
++ assert(status != -1, "aix error at gettimeofday()");
++ return jlong(time.tv_sec) * 1000 + jlong(time.tv_usec / 1000);
++}
++
++// We need to manually declare mread_real_time,
++// because IBM didn't provide a prototype in time.h.
++// (they probably only ever tested in C, not C++)
++extern "C"
++int mread_real_time(timebasestruct_t *t, size_t size_of_timebasestruct_t);
++
++jlong os::javaTimeNanos() {
++ if (os::Aix::on_pase()) {
++ Unimplemented();
++ return 0;
++ }
++ else {
++ // On AIX use the precision of processors real time clock
++ // or time base registers.
++ timebasestruct_t time;
++ int rc;
++
++ // If the CPU has a time register, it will be used and
++ // we have to convert to real time first. After convertion we have following data:
++ // time.tb_high [seconds since 00:00:00 UTC on 1.1.1970]
++ // time.tb_low [nanoseconds after the last full second above]
++ // We better use mread_real_time here instead of read_real_time
++ // to ensure that we will get a monotonic increasing time.
++ if (mread_real_time(&time, TIMEBASE_SZ) != RTC_POWER) {
++ rc = time_base_to_time(&time, TIMEBASE_SZ);
++ assert(rc != -1, "aix error at time_base_to_time()");
++ }
++ return jlong(time.tb_high) * (1000 * 1000 * 1000) + jlong(time.tb_low);
++ }
++}
++
++void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) {
++ {
++ // gettimeofday - based on time in seconds since the Epoch thus does not wrap
++ info_ptr->max_value = ALL_64_BITS;
++
++ // gettimeofday is a real time clock so it skips
++ info_ptr->may_skip_backward = true;
++ info_ptr->may_skip_forward = true;
++ }
++
++ info_ptr->kind = JVMTI_TIMER_ELAPSED; // elapsed not CPU time
++}
++
++// Return the real, user, and system times in seconds from an
++// arbitrary fixed point in the past.
++bool os::getTimesSecs(double* process_real_time,
++ double* process_user_time,
++ double* process_system_time) {
++ struct tms ticks;
++ clock_t real_ticks = times(&ticks);
++
++ if (real_ticks == (clock_t) (-1)) {
++ return false;
++ } else {
++ double ticks_per_second = (double) clock_tics_per_sec;
++ *process_user_time = ((double) ticks.tms_utime) / ticks_per_second;
++ *process_system_time = ((double) ticks.tms_stime) / ticks_per_second;
++ *process_real_time = ((double) real_ticks) / ticks_per_second;
++
++ return true;
++ }
++}
++
++
++char * os::local_time_string(char *buf, size_t buflen) {
++ struct tm t;
++ time_t long_time;
++ time(&long_time);
++ localtime_r(&long_time, &t);
++ jio_snprintf(buf, buflen, "%d-%02d-%02d %02d:%02d:%02d",
++ t.tm_year + 1900, t.tm_mon + 1, t.tm_mday,
++ t.tm_hour, t.tm_min, t.tm_sec);
++ return buf;
++}
++
++struct tm* os::localtime_pd(const time_t* clock, struct tm* res) {
++ return localtime_r(clock, res);
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// runtime exit support
++
++// Note: os::shutdown() might be called very early during initialization, or
++// called from signal handler. Before adding something to os::shutdown(), make
++// sure it is async-safe and can handle partially initialized VM.
++void os::shutdown() {
++
++ // allow PerfMemory to attempt cleanup of any persistent resources
++ perfMemory_exit();
++
++ // needs to remove object in file system
++ AttachListener::abort();
++
++ // flush buffered output, finish log files
++ ostream_abort();
++
++ // Check for abort hook
++ abort_hook_t abort_hook = Arguments::abort_hook();
++ if (abort_hook != NULL) {
++ abort_hook();
++ }
++
++}
++
++// Note: os::abort() might be called very early during initialization, or
++// called from signal handler. Before adding something to os::abort(), make
++// sure it is async-safe and can handle partially initialized VM.
++void os::abort(bool dump_core) {
++ os::shutdown();
++ if (dump_core) {
++#ifndef PRODUCT
++ fdStream out(defaultStream::output_fd());
++ out.print_raw("Current thread is ");
++ char buf[16];
++ jio_snprintf(buf, sizeof(buf), UINTX_FORMAT, os::current_thread_id());
++ out.print_raw_cr(buf);
++ out.print_raw_cr("Dumping core ...");
++#endif
++ ::abort(); // dump core
++ }
++
++ ::exit(1);
++}
++
++// Die immediately, no exit hook, no abort hook, no cleanup.
++void os::die() {
++ ::abort();
++}
++
++// This method is a copy of JDK's sysGetLastErrorString
++// from src/solaris/hpi/src/system_md.c
++
++size_t os::lasterror(char *buf, size_t len) {
++
++ if (errno == 0) return 0;
++
++ const char *s = ::strerror(errno);
++ size_t n = ::strlen(s);
++ if (n >= len) {
++ n = len - 1;
++ }
++ ::strncpy(buf, s, n);
++ buf[n] = '\0';
++ return n;
++}
++
++intx os::current_thread_id() { return (intx)pthread_self(); }
++int os::current_process_id() {
++
++ // This implementation returns a unique pid, the pid of the
++ // launcher thread that starts the vm 'process'.
++
++ // Under POSIX, getpid() returns the same pid as the
++ // launcher thread rather than a unique pid per thread.
++ // Use gettid() if you want the old pre NPTL behaviour.
++
++ // if you are looking for the result of a call to getpid() that
++ // returns a unique pid for the calling thread, then look at the
++ // OSThread::thread_id() method in osThread_linux.hpp file
++
++ return (int)(_initial_pid ? _initial_pid : getpid());
++}
++
++// DLL functions
++
++const char* os::dll_file_extension() { return ".so"; }
++
++// This must be hard coded because it's the system's temporary
++// directory not the java application's temp directory, ala java.io.tmpdir.
++const char* os::get_temp_directory() { return "/tmp"; }
++
++static bool file_exists(const char* filename) {
++ struct stat statbuf;
++ if (filename == NULL || strlen(filename) == 0) {
++ return false;
++ }
++ return os::stat(filename, &statbuf) == 0;
++}
++
++bool os::dll_build_name(char* buffer, size_t buflen,
++ const char* pname, const char* fname) {
++ bool retval = false;
++ // Copied from libhpi
++ const size_t pnamelen = pname ? strlen(pname) : 0;
++
++ // Return error on buffer overflow.
++ if (pnamelen + strlen(fname) + 10 > (size_t) buflen) {
++ *buffer = '\0';
++ return retval;
++ }
++
++ if (pnamelen == 0) {
++ snprintf(buffer, buflen, "lib%s.so", fname);
++ retval = true;
++ } else if (strchr(pname, *os::path_separator()) != NULL) {
++ int n;
++ char** pelements = split_path(pname, &n);
++ for (int i = 0; i < n; i++) {
++ // Really shouldn't be NULL, but check can't hurt
++ if (pelements[i] == NULL || strlen(pelements[i]) == 0) {
++ continue; // skip the empty path values
++ }
++ snprintf(buffer, buflen, "%s/lib%s.so", pelements[i], fname);
++ if (file_exists(buffer)) {
++ retval = true;
++ break;
++ }
++ }
++ // release the storage
++ for (int i = 0; i < n; i++) {
++ if (pelements[i] != NULL) {
++ FREE_C_HEAP_ARRAY(char, pelements[i], mtInternal);
++ }
++ }
++ if (pelements != NULL) {
++ FREE_C_HEAP_ARRAY(char*, pelements, mtInternal);
++ }
++ } else {
++ snprintf(buffer, buflen, "%s/lib%s.so", pname, fname);
++ retval = true;
++ }
++ return retval;
++}
++
++// Check if addr is inside libjvm.so.
++bool os::address_is_in_vm(address addr) {
++
++ // Input could be a real pc or a function pointer literal. The latter
++ // would be a function descriptor residing in the data segment of a module.
++
++ const LoadedLibraryModule* lib = LoadedLibraries::find_for_text_address(addr);
++ if (lib) {
++ if (strcmp(lib->get_shortname(), "libjvm.so") == 0) {
++ return true;
++ } else {
++ return false;
++ }
++ } else {
++ lib = LoadedLibraries::find_for_data_address(addr);
++ if (lib) {
++ if (strcmp(lib->get_shortname(), "libjvm.so") == 0) {
++ return true;
++ } else {
++ return false;
++ }
++ } else {
++ return false;
++ }
++ }
++}
++
++// Resolve an AIX function descriptor literal to a code pointer.
++// If the input is a valid code pointer to a text segment of a loaded module,
++// it is returned unchanged.
++// If the input is a valid AIX function descriptor, it is resolved to the
++// code entry point.
++// If the input is neither a valid function descriptor nor a valid code pointer,
++// NULL is returned.
++static address resolve_function_descriptor_to_code_pointer(address p) {
++
++ const LoadedLibraryModule* lib = LoadedLibraries::find_for_text_address(p);
++ if (lib) {
++ // its a real code pointer
++ return p;
++ } else {
++ lib = LoadedLibraries::find_for_data_address(p);
++ if (lib) {
++ // pointer to data segment, potential function descriptor
++ address code_entry = (address)(((FunctionDescriptor*)p)->entry());
++ if (LoadedLibraries::find_for_text_address(code_entry)) {
++ // Its a function descriptor
++ return code_entry;
++ }
++ }
++ }
++ return NULL;
++}
++
++bool os::dll_address_to_function_name(address addr, char *buf,
++ int buflen, int *offset) {
++ if (offset) {
++ *offset = -1;
++ }
++ if (buf) {
++ buf[0] = '\0';
++ }
++
++ // Resolve function ptr literals first.
++ addr = resolve_function_descriptor_to_code_pointer(addr);
++ if (!addr) {
++ return false;
++ }
++
++ // Go through Decoder::decode to call getFuncName which reads the name from the traceback table.
++ return Decoder::decode(addr, buf, buflen, offset);
++}
++
++static int getModuleName(codeptr_t pc, // [in] program counter
++ char* p_name, size_t namelen, // [out] optional: function name
++ char* p_errmsg, size_t errmsglen // [out] optional: user provided buffer for error messages
++ ) {
++
++ // initialize output parameters
++ if (p_name && namelen > 0) {
++ *p_name = '\0';
++ }
++ if (p_errmsg && errmsglen > 0) {
++ *p_errmsg = '\0';
++ }
++
++ const LoadedLibraryModule* const lib = LoadedLibraries::find_for_text_address((address)pc);
++ if (lib) {
++ if (p_name && namelen > 0) {
++ sprintf(p_name, "%.*s", namelen, lib->get_shortname());
++ }
++ return 0;
++ }
++
++ if (Verbose) {
++ fprintf(stderr, "pc outside any module");
++ }
++
++ return -1;
++
++}
++
++bool os::dll_address_to_library_name(address addr, char* buf,
++ int buflen, int* offset) {
++ if (offset) {
++ *offset = -1;
++ }
++ if (buf) {
++ buf[0] = '\0';
++ }
++
++ // Resolve function ptr literals first.
++ addr = resolve_function_descriptor_to_code_pointer(addr);
++ if (!addr) {
++ return false;
++ }
++
++ if (::getModuleName((codeptr_t) addr, buf, buflen, 0, 0) == 0) {
++ return true;
++ }
++ return false;
++}
++
++// Loads .dll/.so and in case of error it checks if .dll/.so was built
++// for the same architecture as Hotspot is running on
++void *os::dll_load(const char *filename, char *ebuf, int ebuflen) {
++
++ if (ebuf && ebuflen > 0) {
++ ebuf[0] = '\0';
++ ebuf[ebuflen - 1] = '\0';
++ }
++
++ if (!filename || strlen(filename) == 0) {
++ ::strncpy(ebuf, "dll_load: empty filename specified", ebuflen - 1);
++ return NULL;
++ }
++
++ // RTLD_LAZY is currently not implemented. The dl is loaded immediately with all its dependants.
++ void * result= ::dlopen(filename, RTLD_LAZY);
++ if (result != NULL) {
++ // Reload dll cache. Don't do this in signal handling.
++ LoadedLibraries::reload();
++ return result;
++ } else {
++ // error analysis when dlopen fails
++ const char* const error_report = ::dlerror();
++ if (error_report && ebuf && ebuflen > 0) {
++ snprintf(ebuf, ebuflen - 1, "%s, LIBPATH=%s, LD_LIBRARY_PATH=%s : %s",
++ filename, ::getenv("LIBPATH"), ::getenv("LD_LIBRARY_PATH"), error_report);
++ }
++ }
++ return NULL;
++}
++
++// Glibc-2.0 libdl is not MT safe. If you are building with any glibc,
++// chances are you might want to run the generated bits against glibc-2.0
++// libdl.so, so always use locking for any version of glibc.
++void* os::dll_lookup(void* handle, const char* name) {
++ pthread_mutex_lock(&dl_mutex);
++ void* res = dlsym(handle, name);
++ pthread_mutex_unlock(&dl_mutex);
++ return res;
++}
++
++void* os::get_default_process_handle() {
++ return (void*)::dlopen(NULL, RTLD_LAZY);
++}
++
++void os::print_dll_info(outputStream *st) {
++ st->print_cr("Dynamic libraries:");
++ LoadedLibraries::print(st);
++}
++
++void os::print_os_info(outputStream* st) {
++ st->print("OS:");
++
++ st->print("uname:");
++ struct utsname name;
++ uname(&name);
++ st->print(name.sysname); st->print(" ");
++ st->print(name.nodename); st->print(" ");
++ st->print(name.release); st->print(" ");
++ st->print(name.version); st->print(" ");
++ st->print(name.machine);
++ st->cr();
++
++ // rlimit
++ st->print("rlimit:");
++ struct rlimit rlim;
++
++ st->print(" STACK ");
++ getrlimit(RLIMIT_STACK, &rlim);
++ if (rlim.rlim_cur == RLIM_INFINITY) st->print("infinity");
++ else st->print("%uk", rlim.rlim_cur >> 10);
++
++ st->print(", CORE ");
++ getrlimit(RLIMIT_CORE, &rlim);
++ if (rlim.rlim_cur == RLIM_INFINITY) st->print("infinity");
++ else st->print("%uk", rlim.rlim_cur >> 10);
++
++ st->print(", NPROC ");
++ st->print("%d", sysconf(_SC_CHILD_MAX));
++
++ st->print(", NOFILE ");
++ getrlimit(RLIMIT_NOFILE, &rlim);
++ if (rlim.rlim_cur == RLIM_INFINITY) st->print("infinity");
++ else st->print("%d", rlim.rlim_cur);
++
++ st->print(", AS ");
++ getrlimit(RLIMIT_AS, &rlim);
++ if (rlim.rlim_cur == RLIM_INFINITY) st->print("infinity");
++ else st->print("%uk", rlim.rlim_cur >> 10);
++
++ // Print limits on DATA, because it limits the C-heap.
++ st->print(", DATA ");
++ getrlimit(RLIMIT_DATA, &rlim);
++ if (rlim.rlim_cur == RLIM_INFINITY) st->print("infinity");
++ else st->print("%uk", rlim.rlim_cur >> 10);
++ st->cr();
++
++ // load average
++ st->print("load average:");
++ double loadavg[3] = {-1.L, -1.L, -1.L};
++ os::loadavg(loadavg, 3);
++ st->print("%0.02f %0.02f %0.02f", loadavg[0], loadavg[1], loadavg[2]);
++ st->cr();
++}
++
++void os::print_memory_info(outputStream* st) {
++
++ st->print_cr("Memory:");
++
++ st->print_cr(" default page size: %s", describe_pagesize(os::vm_page_size()));
++ st->print_cr(" default stack page size: %s", describe_pagesize(os::vm_page_size()));
++ st->print_cr(" default shm page size: %s", describe_pagesize(os::Aix::shm_default_page_size()));
++ st->print_cr(" can use 64K pages dynamically: %s", (os::Aix::can_use_64K_pages() ? "yes" :"no"));
++ st->print_cr(" can use 16M pages dynamically: %s", (os::Aix::can_use_16M_pages() ? "yes" :"no"));
++ if (g_multipage_error != 0) {
++ st->print_cr(" multipage error: %d", g_multipage_error);
++ }
++
++ // print out LDR_CNTRL because it affects the default page sizes
++ const char* const ldr_cntrl = ::getenv("LDR_CNTRL");
++ st->print_cr(" LDR_CNTRL=%s.", ldr_cntrl ? ldr_cntrl : "");
++
++ const char* const extshm = ::getenv("EXTSHM");
++ st->print_cr(" EXTSHM=%s.", extshm ? extshm : "");
++
++ // Call os::Aix::get_meminfo() to retrieve memory statistics.
++ os::Aix::meminfo_t mi;
++ if (os::Aix::get_meminfo(&mi)) {
++ char buffer[256];
++ if (os::Aix::on_aix()) {
++ jio_snprintf(buffer, sizeof(buffer),
++ " physical total : %llu\n"
++ " physical free : %llu\n"
++ " swap total : %llu\n"
++ " swap free : %llu\n",
++ mi.real_total,
++ mi.real_free,
++ mi.pgsp_total,
++ mi.pgsp_free);
++ } else {
++ Unimplemented();
++ }
++ st->print_raw(buffer);
++ } else {
++ st->print_cr(" (no more information available)");
++ }
++}
++
++void os::pd_print_cpu_info(outputStream* st) {
++ // cpu
++ st->print("CPU:");
++ st->print("total %d", os::processor_count());
++ // It's not safe to query number of active processors after crash
++ // st->print("(active %d)", os::active_processor_count());
++ st->print(" %s", VM_Version::cpu_features());
++ st->cr();
++}
++
++void os::print_siginfo(outputStream* st, void* siginfo) {
++ // Use common posix version.
++ os::Posix::print_siginfo_brief(st, (const siginfo_t*) siginfo);
++ st->cr();
++}
++
++
++static void print_signal_handler(outputStream* st, int sig,
++ char* buf, size_t buflen);
++
++void os::print_signal_handlers(outputStream* st, char* buf, size_t buflen) {
++ st->print_cr("Signal Handlers:");
++ print_signal_handler(st, SIGSEGV, buf, buflen);
++ print_signal_handler(st, SIGBUS , buf, buflen);
++ print_signal_handler(st, SIGFPE , buf, buflen);
++ print_signal_handler(st, SIGPIPE, buf, buflen);
++ print_signal_handler(st, SIGXFSZ, buf, buflen);
++ print_signal_handler(st, SIGILL , buf, buflen);
++ print_signal_handler(st, INTERRUPT_SIGNAL, buf, buflen);
++ print_signal_handler(st, SR_signum, buf, buflen);
++ print_signal_handler(st, SHUTDOWN1_SIGNAL, buf, buflen);
++ print_signal_handler(st, SHUTDOWN2_SIGNAL , buf, buflen);
++ print_signal_handler(st, SHUTDOWN3_SIGNAL , buf, buflen);
++ print_signal_handler(st, BREAK_SIGNAL, buf, buflen);
++ print_signal_handler(st, SIGTRAP, buf, buflen);
++ print_signal_handler(st, SIGDANGER, buf, buflen);
++}
++
++static char saved_jvm_path[MAXPATHLEN] = {0};
++
++// Find the full path to the current module, libjvm.so or libjvm_g.so
++void os::jvm_path(char *buf, jint buflen) {
++ // Error checking.
++ if (buflen < MAXPATHLEN) {
++ assert(false, "must use a large-enough buffer");
++ buf[0] = '\0';
++ return;
++ }
++ // Lazy resolve the path to current module.
++ if (saved_jvm_path[0] != 0) {
++ strcpy(buf, saved_jvm_path);
++ return;
++ }
++
++ Dl_info dlinfo;
++ int ret = dladdr(CAST_FROM_FN_PTR(void *, os::jvm_path), &dlinfo);
++ assert(ret != 0, "cannot locate libjvm");
++ char* rp = realpath((char *)dlinfo.dli_fname, buf);
++ assert(rp != NULL, "error in realpath(): maybe the 'path' argument is too long?");
++
++ strcpy(saved_jvm_path, buf);
++}
++
++void os::print_jni_name_prefix_on(outputStream* st, int args_size) {
++ // no prefix required, not even "_"
++}
++
++void os::print_jni_name_suffix_on(outputStream* st, int args_size) {
++ // no suffix required
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// sun.misc.Signal support
++
++static volatile jint sigint_count = 0;
++
++static void
++UserHandler(int sig, void *siginfo, void *context) {
++ // 4511530 - sem_post is serialized and handled by the manager thread. When
++ // the program is interrupted by Ctrl-C, SIGINT is sent to every thread. We
++ // don't want to flood the manager thread with sem_post requests.
++ if (sig == SIGINT && Atomic::add(1, &sigint_count) > 1)
++ return;
++
++ // Ctrl-C is pressed during error reporting, likely because the error
++ // handler fails to abort. Let VM die immediately.
++ if (sig == SIGINT && is_error_reported()) {
++ os::die();
++ }
++
++ os::signal_notify(sig);
++}
++
++void* os::user_handler() {
++ return CAST_FROM_FN_PTR(void*, UserHandler);
++}
++
++extern "C" {
++ typedef void (*sa_handler_t)(int);
++ typedef void (*sa_sigaction_t)(int, siginfo_t *, void *);
++}
++
++void* os::signal(int signal_number, void* handler) {
++ struct sigaction sigAct, oldSigAct;
++
++ sigfillset(&(sigAct.sa_mask));
++
++ // Do not block out synchronous signals in the signal handler.
++ // Blocking synchronous signals only makes sense if you can really
++ // be sure that those signals won't happen during signal handling,
++ // when the blocking applies. Normal signal handlers are lean and
++ // do not cause signals. But our signal handlers tend to be "risky"
++ // - secondary SIGSEGV, SIGILL, SIGBUS' may and do happen.
++ // On AIX, PASE there was a case where a SIGSEGV happened, followed
++ // by a SIGILL, which was blocked due to the signal mask. The process
++ // just hung forever. Better to crash from a secondary signal than to hang.
++ sigdelset(&(sigAct.sa_mask), SIGSEGV);
++ sigdelset(&(sigAct.sa_mask), SIGBUS);
++ sigdelset(&(sigAct.sa_mask), SIGILL);
++ sigdelset(&(sigAct.sa_mask), SIGFPE);
++ sigdelset(&(sigAct.sa_mask), SIGTRAP);
++
++ sigAct.sa_flags = SA_RESTART|SA_SIGINFO;
++
++ sigAct.sa_handler = CAST_TO_FN_PTR(sa_handler_t, handler);
++
++ if (sigaction(signal_number, &sigAct, &oldSigAct)) {
++ // -1 means registration failed
++ return (void *)-1;
++ }
++
++ return CAST_FROM_FN_PTR(void*, oldSigAct.sa_handler);
++}
++
++void os::signal_raise(int signal_number) {
++ ::raise(signal_number);
++}
++
++//
++// The following code is moved from os.cpp for making this
++// code platform specific, which it is by its very nature.
++//
++
++// Will be modified when max signal is changed to be dynamic
++int os::sigexitnum_pd() {
++ return NSIG;
++}
++
++// a counter for each possible signal value
++static volatile jint pending_signals[NSIG+1] = { 0 };
++
++// Linux(POSIX) specific hand shaking semaphore.
++static sem_t sig_sem;
++
++void os::signal_init_pd() {
++ // Initialize signal structures
++ ::memset((void*)pending_signals, 0, sizeof(pending_signals));
++
++ // Initialize signal semaphore
++ int rc = ::sem_init(&sig_sem, 0, 0);
++ guarantee(rc != -1, "sem_init failed");
++}
++
++void os::signal_notify(int sig) {
++ Atomic::inc(&pending_signals[sig]);
++ ::sem_post(&sig_sem);
++}
++
++static int check_pending_signals(bool wait) {
++ Atomic::store(0, &sigint_count);
++ for (;;) {
++ for (int i = 0; i < NSIG + 1; i++) {
++ jint n = pending_signals[i];
++ if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) {
++ return i;
++ }
++ }
++ if (!wait) {
++ return -1;
++ }
++ JavaThread *thread = JavaThread::current();
++ ThreadBlockInVM tbivm(thread);
++
++ bool threadIsSuspended;
++ do {
++ thread->set_suspend_equivalent();
++ // cleared by handle_special_suspend_equivalent_condition() or java_suspend_self()
++
++ ::sem_wait(&sig_sem);
++
++ // were we externally suspended while we were waiting?
++ threadIsSuspended = thread->handle_special_suspend_equivalent_condition();
++ if (threadIsSuspended) {
++ //
++ // The semaphore has been incremented, but while we were waiting
++ // another thread suspended us. We don't want to continue running
++ // while suspended because that would surprise the thread that
++ // suspended us.
++ //
++ ::sem_post(&sig_sem);
++
++ thread->java_suspend_self();
++ }
++ } while (threadIsSuspended);
++ }
++}
++
++int os::signal_lookup() {
++ return check_pending_signals(false);
++}
++
++int os::signal_wait() {
++ return check_pending_signals(true);
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// Virtual Memory
++
++// AddrRange describes an immutable address range
++//
++// This is a helper class for the 'shared memory bookkeeping' below.
++class AddrRange {
++ friend class ShmBkBlock;
++
++ char* _start;
++ size_t _size;
++
++public:
++
++ AddrRange(char* start, size_t size)
++ : _start(start), _size(size)
++ {}
++
++ AddrRange(const AddrRange& r)
++ : _start(r.start()), _size(r.size())
++ {}
++
++ char* start() const { return _start; }
++ size_t size() const { return _size; }
++ char* end() const { return _start + _size; }
++ bool is_empty() const { return _size == 0 ? true : false; }
++
++ static AddrRange empty_range() { return AddrRange(NULL, 0); }
++
++ bool contains(const char* p) const {
++ return start() <= p && end() > p;
++ }
++
++ bool contains(const AddrRange& range) const {
++ return start() <= range.start() && end() >= range.end();
++ }
++
++ bool intersects(const AddrRange& range) const {
++ return (range.start() <= start() && range.end() > start()) ||
++ (range.start() < end() && range.end() >= end()) ||
++ contains(range);
++ }
++
++ bool is_same_range(const AddrRange& range) const {
++ return start() == range.start() && size() == range.size();
++ }
++
++ // return the closest inside range consisting of whole pages
++ AddrRange find_closest_aligned_range(size_t pagesize) const {
++ if (pagesize == 0 || is_empty()) {
++ return empty_range();
++ }
++ char* const from = (char*)align_size_up((intptr_t)_start, pagesize);
++ char* const to = (char*)align_size_down((intptr_t)end(), pagesize);
++ if (from > to) {
++ return empty_range();
++ }
++ return AddrRange(from, to - from);
++ }
++};
++
++////////////////////////////////////////////////////////////////////////////
++// shared memory bookkeeping
++//
++// the os::reserve_memory() API and friends hand out different kind of memory, depending
++// on need and circumstances. Memory may be allocated with mmap() or with shmget/shmat.
++//
++// But these memory types have to be treated differently. For example, to uncommit
++// mmap-based memory, msync(MS_INVALIDATE) is needed, to uncommit shmat-based memory,
++// disclaim64() is needed.
++//
++// Therefore we need to keep track of the allocated memory segments and their
++// properties.
++
++// ShmBkBlock: base class for all blocks in the shared memory bookkeeping
++class ShmBkBlock {
++
++ ShmBkBlock* _next;
++
++protected:
++
++ AddrRange _range;
++ const size_t _pagesize;
++ const bool _pinned;
++
++public:
++
++ ShmBkBlock(AddrRange range, size_t pagesize, bool pinned)
++ : _range(range), _pagesize(pagesize), _pinned(pinned) , _next(NULL) {
++
++ assert(_pagesize == SIZE_4K || _pagesize == SIZE_64K || _pagesize == SIZE_16M, "invalid page size");
++ assert(!_range.is_empty(), "invalid range");
++ }
++
++ virtual void print(outputStream* st) const {
++ st->print("0x%p ... 0x%p (%llu) - %d %s pages - %s",
++ _range.start(), _range.end(), _range.size(),
++ _range.size() / _pagesize, describe_pagesize(_pagesize),
++ _pinned ? "pinned" : "");
++ }
++
++ enum Type { MMAP, SHMAT };
++ virtual Type getType() = 0;
++
++ char* base() const { return _range.start(); }
++ size_t size() const { return _range.size(); }
++
++ void setAddrRange(AddrRange range) {
++ _range = range;
++ }
++
++ bool containsAddress(const char* p) const {
++ return _range.contains(p);
++ }
++
++ bool containsRange(const char* p, size_t size) const {
++ return _range.contains(AddrRange((char*)p, size));
++ }
++
++ bool isSameRange(const char* p, size_t size) const {
++ return _range.is_same_range(AddrRange((char*)p, size));
++ }
++
++ virtual bool disclaim(char* p, size_t size) = 0;
++ virtual bool release() = 0;
++
++ // blocks live in a list.
++ ShmBkBlock* next() const { return _next; }
++ void set_next(ShmBkBlock* blk) { _next = blk; }
++
++}; // end: ShmBkBlock
++
++
++// ShmBkMappedBlock: describes an block allocated with mmap()
++class ShmBkMappedBlock : public ShmBkBlock {
++public:
++
++ ShmBkMappedBlock(AddrRange range)
++ : ShmBkBlock(range, SIZE_4K, false) {} // mmap: always 4K, never pinned
++
++ void print(outputStream* st) const {
++ ShmBkBlock::print(st);
++ st->print_cr(" - mmap'ed");
++ }
++
++ Type getType() {
++ return MMAP;
++ }
++
++ bool disclaim(char* p, size_t size) {
++
++ AddrRange r(p, size);
++
++ guarantee(_range.contains(r), "invalid disclaim");
++
++ // only disclaim whole ranges.
++ const AddrRange r2 = r.find_closest_aligned_range(_pagesize);
++ if (r2.is_empty()) {
++ return true;
++ }
++
++ const int rc = ::msync(r2.start(), r2.size(), MS_INVALIDATE);
++
++ if (rc != 0) {
++ warning("msync(0x%p, %llu, MS_INVALIDATE) failed (%d)\n", r2.start(), r2.size(), errno);
++ }
++
++ return rc == 0 ? true : false;
++ }
++
++ bool release() {
++ // mmap'ed blocks are released using munmap
++ if (::munmap(_range.start(), _range.size()) != 0) {
++ warning("munmap(0x%p, %llu) failed (%d)\n", _range.start(), _range.size(), errno);
++ return false;
++ }
++ return true;
++ }
++}; // end: ShmBkMappedBlock
++
++// ShmBkShmatedBlock: describes an block allocated with shmget/shmat()
++class ShmBkShmatedBlock : public ShmBkBlock {
++public:
++
++ ShmBkShmatedBlock(AddrRange range, size_t pagesize, bool pinned)
++ : ShmBkBlock(range, pagesize, pinned) {}
++
++ void print(outputStream* st) const {
++ ShmBkBlock::print(st);
++ st->print_cr(" - shmat'ed");
++ }
++
++ Type getType() {
++ return SHMAT;
++ }
++
++ bool disclaim(char* p, size_t size) {
++
++ AddrRange r(p, size);
++
++ if (_pinned) {
++ return true;
++ }
++
++ // shmat'ed blocks are disclaimed using disclaim64
++ guarantee(_range.contains(r), "invalid disclaim");
++
++ // only disclaim whole ranges.
++ const AddrRange r2 = r.find_closest_aligned_range(_pagesize);
++ if (r2.is_empty()) {
++ return true;
++ }
++
++ const bool rc = my_disclaim64(r2.start(), r2.size());
++
++ if (Verbose && !rc) {
++ warning("failed to disclaim shm %p-%p\n", r2.start(), r2.end());
++ }
++
++ return rc;
++ }
++
++ bool release() {
++ bool rc = false;
++ if (::shmdt(_range.start()) != 0) {
++ warning("shmdt(0x%p) failed (%d)\n", _range.start(), errno);
++ } else {
++ rc = true;
++ }
++ return rc;
++ }
++
++}; // end: ShmBkShmatedBlock
++
++static ShmBkBlock* g_shmbk_list = NULL;
++static volatile jint g_shmbk_table_lock = 0;
++
++// keep some usage statistics
++static struct {
++ int nodes; // number of nodes in list
++ size_t bytes; // reserved - not committed - bytes.
++ int reserves; // how often reserve was called
++ int lookups; // how often a lookup was made
++} g_shmbk_stats = { 0, 0, 0, 0 };
++
++// add information about a shared memory segment to the bookkeeping
++static void shmbk_register(ShmBkBlock* p_block) {
++ guarantee(p_block, "logic error");
++ p_block->set_next(g_shmbk_list);
++ g_shmbk_list = p_block;
++ g_shmbk_stats.reserves ++;
++ g_shmbk_stats.bytes += p_block->size();
++ g_shmbk_stats.nodes ++;
++}
++
++// remove information about a shared memory segment by its starting address
++static void shmbk_unregister(ShmBkBlock* p_block) {
++ ShmBkBlock* p = g_shmbk_list;
++ ShmBkBlock* prev = NULL;
++ while (p) {
++ if (p == p_block) {
++ if (prev) {
++ prev->set_next(p->next());
++ } else {
++ g_shmbk_list = p->next();
++ }
++ g_shmbk_stats.nodes --;
++ g_shmbk_stats.bytes -= p->size();
++ return;
++ }
++ prev = p;
++ p = p->next();
++ }
++ assert(false, "should not happen");
++}
++
++// given a pointer, return shared memory bookkeeping record for the segment it points into
++// using the returned block info must happen under lock protection
++static ShmBkBlock* shmbk_find_by_containing_address(const char* addr) {
++ g_shmbk_stats.lookups ++;
++ ShmBkBlock* p = g_shmbk_list;
++ while (p) {
++ if (p->containsAddress(addr)) {
++ return p;
++ }
++ p = p->next();
++ }
++ return NULL;
++}
++
++// dump all information about all memory segments allocated with os::reserve_memory()
++void shmbk_dump_info() {
++ tty->print_cr("-- shared mem bookkeeping (alive: %d segments, %llu bytes, "
++ "total reserves: %d total lookups: %d)",
++ g_shmbk_stats.nodes, g_shmbk_stats.bytes, g_shmbk_stats.reserves, g_shmbk_stats.lookups);
++ const ShmBkBlock* p = g_shmbk_list;
++ int i = 0;
++ while (p) {
++ p->print(tty);
++ p = p->next();
++ i ++;
++ }
++}
++
++#define LOCK_SHMBK { ThreadCritical _LOCK_SHMBK;
++#define UNLOCK_SHMBK }
++
++// End: shared memory bookkeeping
++////////////////////////////////////////////////////////////////////////////////////////////////////
++
++int os::vm_page_size() {
++ // Seems redundant as all get out
++ assert(os::Aix::page_size() != -1, "must call os::init");
++ return os::Aix::page_size();
++}
++
++// Aix allocates memory by pages.
++int os::vm_allocation_granularity() {
++ assert(os::Aix::page_size() != -1, "must call os::init");
++ return os::Aix::page_size();
++}
++
++int os::Aix::commit_memory_impl(char* addr, size_t size, bool exec) {
++
++ // Commit is a noop. There is no explicit commit
++ // needed on AIX. Memory is committed when touched.
++ //
++ // Debug : check address range for validity
++#ifdef ASSERT
++ LOCK_SHMBK
++ ShmBkBlock* const block = shmbk_find_by_containing_address(addr);
++ if (!block) {
++ fprintf(stderr, "invalid pointer: " INTPTR_FORMAT "\n", addr);
++ shmbk_dump_info();
++ assert(false, "invalid pointer");
++ return false;
++ } else if (!block->containsRange(addr, size)) {
++ fprintf(stderr, "invalid range: " INTPTR_FORMAT " .. " INTPTR_FORMAT "\n", addr, addr + size);
++ shmbk_dump_info();
++ assert(false, "invalid range");
++ return false;
++ }
++ UNLOCK_SHMBK
++#endif // ASSERT
++
++ return 0;
++}
++
++bool os::pd_commit_memory(char* addr, size_t size, bool exec) {
++ return os::Aix::commit_memory_impl(addr, size, exec) == 0;
++}
++
++void os::pd_commit_memory_or_exit(char* addr, size_t size, bool exec,
++ const char* mesg) {
++ assert(mesg != NULL, "mesg must be specified");
++ os::Aix::commit_memory_impl(addr, size, exec);
++}
++
++int os::Aix::commit_memory_impl(char* addr, size_t size,
++ size_t alignment_hint, bool exec) {
++ return os::Aix::commit_memory_impl(addr, size, exec);
++}
++
++bool os::pd_commit_memory(char* addr, size_t size, size_t alignment_hint,
++ bool exec) {
++ return os::Aix::commit_memory_impl(addr, size, alignment_hint, exec) == 0;
++}
++
++void os::pd_commit_memory_or_exit(char* addr, size_t size,
++ size_t alignment_hint, bool exec,
++ const char* mesg) {
++ os::Aix::commit_memory_impl(addr, size, alignment_hint, exec);
++}
++
++bool os::pd_uncommit_memory(char* addr, size_t size) {
++
++ // Delegate to ShmBkBlock class which knows how to uncommit its memory.
++
++ bool rc = false;
++ LOCK_SHMBK
++ ShmBkBlock* const block = shmbk_find_by_containing_address(addr);
++ if (!block) {
++ fprintf(stderr, "invalid pointer: 0x%p.\n", addr);
++ shmbk_dump_info();
++ assert(false, "invalid pointer");
++ return false;
++ } else if (!block->containsRange(addr, size)) {
++ fprintf(stderr, "invalid range: 0x%p .. 0x%p.\n", addr, addr + size);
++ shmbk_dump_info();
++ assert(false, "invalid range");
++ return false;
++ }
++ rc = block->disclaim(addr, size);
++ UNLOCK_SHMBK
++
++ if (Verbose && !rc) {
++ warning("failed to disclaim 0x%p .. 0x%p (0x%llX bytes).", addr, addr + size, size);
++ }
++ return rc;
++}
++
++bool os::pd_create_stack_guard_pages(char* addr, size_t size) {
++ return os::guard_memory(addr, size);
++}
++
++bool os::remove_stack_guard_pages(char* addr, size_t size) {
++ return os::unguard_memory(addr, size);
++}
++
++void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
++}
++
++void os::pd_free_memory(char *addr, size_t bytes, size_t alignment_hint) {
++}
++
++void os::numa_make_global(char *addr, size_t bytes) {
++}
++
++void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
++}
++
++bool os::numa_topology_changed() {
++ return false;
++}
++
++size_t os::numa_get_groups_num() {
++ return 1;
++}
++
++int os::numa_get_group_id() {
++ return 0;
++}
++
++size_t os::numa_get_leaf_groups(int *ids, size_t size) {
++ if (size > 0) {
++ ids[0] = 0;
++ return 1;
++ }
++ return 0;
++}
++
++bool os::get_page_info(char *start, page_info* info) {
++ return false;
++}
++
++char *os::scan_pages(char *start, char* end, page_info* page_expected, page_info* page_found) {
++ return end;
++}
++
++// Flags for reserve_shmatted_memory:
++#define RESSHM_WISHADDR_OR_FAIL 1
++#define RESSHM_TRY_16M_PAGES 2
++#define RESSHM_16M_PAGES_OR_FAIL 4
++
++// Result of reserve_shmatted_memory:
++struct shmatted_memory_info_t {
++ char* addr;
++ size_t pagesize;
++ bool pinned;
++};
++
++// Reserve a section of shmatted memory.
++// params:
++// bytes [in]: size of memory, in bytes
++// requested_addr [in]: wish address.
++// NULL = no wish.
++// If RESSHM_WISHADDR_OR_FAIL is set in flags and wish address cannot
++// be obtained, function will fail. Otherwise wish address is treated as hint and
++// another pointer is returned.
++// flags [in]: some flags. Valid flags are:
++// RESSHM_WISHADDR_OR_FAIL - fail if wish address is given and cannot be obtained.
++// RESSHM_TRY_16M_PAGES - try to allocate from 16M page pool
++// (requires UseLargePages and Use16MPages)
++// RESSHM_16M_PAGES_OR_FAIL - if you cannot allocate from 16M page pool, fail.
++// Otherwise any other page size will do.
++// p_info [out] : holds information about the created shared memory segment.
++static bool reserve_shmatted_memory(size_t bytes, char* requested_addr, int flags, shmatted_memory_info_t* p_info) {
++
++ assert(p_info, "parameter error");
++
++ // init output struct.
++ p_info->addr = NULL;
++
++ // neither should we be here for EXTSHM=ON.
++ if (os::Aix::extshm()) {
++ ShouldNotReachHere();
++ }
++
++ // extract flags. sanity checks.
++ const bool wishaddr_or_fail =
++ flags & RESSHM_WISHADDR_OR_FAIL;
++ const bool try_16M_pages =
++ flags & RESSHM_TRY_16M_PAGES;
++ const bool f16M_pages_or_fail =
++ flags & RESSHM_16M_PAGES_OR_FAIL;
++
++ // first check: if a wish address is given and it is mandatory, but not aligned to segment boundary,
++ // shmat will fail anyway, so save some cycles by failing right away
++ if (requested_addr && ((uintptr_t)requested_addr % SIZE_256M == 0)) {
++ if (wishaddr_or_fail) {
++ return false;
++ } else {
++ requested_addr = NULL;
++ }
++ }
++
++ char* addr = NULL;
++
++ // Align size of shm up to the largest possible page size, to avoid errors later on when we try to change
++ // pagesize dynamically.
++ const size_t size = align_size_up(bytes, SIZE_16M);
++
++ // reserve the shared segment
++ int shmid = shmget(IPC_PRIVATE, size, IPC_CREAT | S_IRUSR | S_IWUSR);
++ if (shmid == -1) {
++ warning("shmget(.., %lld, ..) failed (errno: %d).", size, errno);
++ return false;
++ }
++
++ // Important note:
++ // It is very important that we, upon leaving this function, do not leave a shm segment alive.
++ // We must right after attaching it remove it from the system. System V shm segments are global and
++ // survive the process.
++ // So, from here on: Do not assert. Do not return. Always do a "goto cleanup_shm".
++
++ // try forcing the page size
++ size_t pagesize = -1; // unknown so far
++
++ if (UseLargePages) {
++
++ struct shmid_ds shmbuf;
++ memset(&shmbuf, 0, sizeof(shmbuf));
++
++ // First, try to take from 16M page pool if...
++ if (os::Aix::can_use_16M_pages() // we can ...
++ && Use16MPages // we are not explicitly forbidden to do so (-XX:-Use16MPages)..
++ && try_16M_pages) { // caller wants us to.
++ shmbuf.shm_pagesize = SIZE_16M;
++ if (shmctl(shmid, SHM_PAGESIZE, &shmbuf) == 0) {
++ pagesize = SIZE_16M;
++ } else {
++ warning("Failed to allocate %d 16M pages. 16M page pool might be exhausted. (shmctl failed with %d)",
++ size / SIZE_16M, errno);
++ if (f16M_pages_or_fail) {
++ goto cleanup_shm;
++ }
++ }
++ }
++
++ // Nothing yet? Try setting 64K pages. Note that I never saw this fail, but in theory it might,
++ // because the 64K page pool may also be exhausted.
++ if (pagesize == -1) {
++ shmbuf.shm_pagesize = SIZE_64K;
++ if (shmctl(shmid, SHM_PAGESIZE, &shmbuf) == 0) {
++ pagesize = SIZE_64K;
++ } else {
++ warning("Failed to allocate %d 64K pages. (shmctl failed with %d)",
++ size / SIZE_64K, errno);
++ // here I give up. leave page_size -1 - later, after attaching, we will query the
++ // real page size of the attached memory. (in theory, it may be something different
++ // from 4K if LDR_CNTRL SHM_PSIZE is set)
++ }
++ }
++ }
++
++ // sanity point
++ assert(pagesize == -1 || pagesize == SIZE_16M || pagesize == SIZE_64K, "wrong page size");
++
++ // Now attach the shared segment.
++ addr = (char*) shmat(shmid, requested_addr, 0);
++ if (addr == (char*)-1) {
++ // How to handle attach failure:
++ // If it failed for a specific wish address, tolerate this: in that case, if wish address was
++ // mandatory, fail, if not, retry anywhere.
++ // If it failed for any other reason, treat that as fatal error.
++ addr = NULL;
++ if (requested_addr) {
++ if (wishaddr_or_fail) {
++ goto cleanup_shm;
++ } else {
++ addr = (char*) shmat(shmid, NULL, 0);
++ if (addr == (char*)-1) { // fatal
++ addr = NULL;
++ warning("shmat failed (errno: %d)", errno);
++ goto cleanup_shm;
++ }
++ }
++ } else { // fatal
++ addr = NULL;
++ warning("shmat failed (errno: %d)", errno);
++ goto cleanup_shm;
++ }
++ }
++
++ // sanity point
++ assert(addr && addr != (char*) -1, "wrong address");
++
++ // after successful Attach remove the segment - right away.
++ if (::shmctl(shmid, IPC_RMID, NULL) == -1) {
++ warning("shmctl(%u, IPC_RMID) failed (%d)\n", shmid, errno);
++ guarantee(false, "failed to remove shared memory segment!");
++ }
++ shmid = -1;
++
++ // query the real page size. In case setting the page size did not work (see above), the system
++ // may have given us something other then 4K (LDR_CNTRL)
++ {
++ const size_t real_pagesize = os::Aix::query_pagesize(addr);
++ if (pagesize != -1) {
++ assert(pagesize == real_pagesize, "unexpected pagesize after shmat");
++ } else {
++ pagesize = real_pagesize;
++ }
++ }
++
++ // Now register the reserved block with internal book keeping.
++ LOCK_SHMBK
++ const bool pinned = pagesize >= SIZE_16M ? true : false;
++ ShmBkShmatedBlock* const p_block = new ShmBkShmatedBlock(AddrRange(addr, size), pagesize, pinned);
++ assert(p_block, "");
++ shmbk_register(p_block);
++ UNLOCK_SHMBK
++
++cleanup_shm:
++
++ // if we have not done so yet, remove the shared memory segment. This is very important.
++ if (shmid != -1) {
++ if (::shmctl(shmid, IPC_RMID, NULL) == -1) {
++ warning("shmctl(%u, IPC_RMID) failed (%d)\n", shmid, errno);
++ guarantee(false, "failed to remove shared memory segment!");
++ }
++ shmid = -1;
++ }
++
++ // trace
++ if (Verbose && !addr) {
++ if (requested_addr != NULL) {
++ warning("failed to shm-allocate 0x%llX bytes at wish address 0x%p.", size, requested_addr);
++ } else {
++ warning("failed to shm-allocate 0x%llX bytes at any address.", size);
++ }
++ }
++
++ // hand info to caller
++ if (addr) {
++ p_info->addr = addr;
++ p_info->pagesize = pagesize;
++ p_info->pinned = pagesize == SIZE_16M ? true : false;
++ }
++
++ // sanity test:
++ if (requested_addr && addr && wishaddr_or_fail) {
++ guarantee(addr == requested_addr, "shmat error");
++ }
++
++ // just one more test to really make sure we have no dangling shm segments.
++ guarantee(shmid == -1, "dangling shm segments");
++
++ return addr ? true : false;
++
++} // end: reserve_shmatted_memory
++
++// Reserve memory using mmap. Behaves the same as reserve_shmatted_memory():
++// will return NULL in case of an error.
++static char* reserve_mmaped_memory(size_t bytes, char* requested_addr) {
++
++ // if a wish address is given, but not aligned to 4K page boundary, mmap will fail.
++ if (requested_addr && ((uintptr_t)requested_addr % os::vm_page_size() != 0)) {
++ warning("Wish address 0x%p not aligned to page boundary.", requested_addr);
++ return NULL;
++ }
++
++ const size_t size = align_size_up(bytes, SIZE_4K);
++
++ // Note: MAP_SHARED (instead of MAP_PRIVATE) needed to be able to
++ // msync(MS_INVALIDATE) (see os::uncommit_memory)
++ int flags = MAP_ANONYMOUS | MAP_SHARED;
++
++ // MAP_FIXED is needed to enforce requested_addr - manpage is vague about what
++ // it means if wishaddress is given but MAP_FIXED is not set.
++ //
++ // Note however that this changes semantics in SPEC1170 mode insofar as MAP_FIXED
++ // clobbers the address range, which is probably not what the caller wants. That's
++ // why I assert here (again) that the SPEC1170 compat mode is off.
++ // If we want to be able to run under SPEC1170, we have to do some porting and
++ // testing.
++ if (requested_addr != NULL) {
++ assert(!os::Aix::xpg_sus_mode(), "SPEC1170 mode not allowed.");
++ flags |= MAP_FIXED;
++ }
++
++ char* addr = (char*)::mmap(requested_addr, size, PROT_READ|PROT_WRITE|PROT_EXEC, flags, -1, 0);
++
++ if (addr == MAP_FAILED) {
++ // attach failed: tolerate for specific wish addresses. Not being able to attach
++ // anywhere is a fatal error.
++ if (requested_addr == NULL) {
++ // It's ok to fail here if the machine has not enough memory.
++ warning("mmap(NULL, 0x%llX, ..) failed (%d)", size, errno);
++ }
++ addr = NULL;
++ goto cleanup_mmap;
++ }
++
++ // If we did request a specific address and that address was not available, fail.
++ if (addr && requested_addr) {
++ guarantee(addr == requested_addr, "unexpected");
++ }
++
++ // register this mmap'ed segment with book keeping
++ LOCK_SHMBK
++ ShmBkMappedBlock* const p_block = new ShmBkMappedBlock(AddrRange(addr, size));
++ assert(p_block, "");
++ shmbk_register(p_block);
++ UNLOCK_SHMBK
++
++cleanup_mmap:
++
++ // trace
++ if (Verbose) {
++ if (addr) {
++ fprintf(stderr, "mmap-allocated 0x%p .. 0x%p (0x%llX bytes)\n", addr, addr + bytes, bytes);
++ }
++ else {
++ if (requested_addr != NULL) {
++ warning("failed to mmap-allocate 0x%llX bytes at wish address 0x%p.", bytes, requested_addr);
++ } else {
++ warning("failed to mmap-allocate 0x%llX bytes at any address.", bytes);
++ }
++ }
++ }
++
++ return addr;
++
++} // end: reserve_mmaped_memory
++
++// Reserves and attaches a shared memory segment.
++// Will assert if a wish address is given and could not be obtained.
++char* os::pd_reserve_memory(size_t bytes, char* requested_addr, size_t alignment_hint) {
++ return os::attempt_reserve_memory_at(bytes, requested_addr);
++}
++
++bool os::pd_release_memory(char* addr, size_t size) {
++
++ // delegate to ShmBkBlock class which knows how to uncommit its memory.
++
++ bool rc = false;
++ LOCK_SHMBK
++ ShmBkBlock* const block = shmbk_find_by_containing_address(addr);
++ if (!block) {
++ fprintf(stderr, "invalid pointer: 0x%p.\n", addr);
++ shmbk_dump_info();
++ assert(false, "invalid pointer");
++ return false;
++ }
++ else if (!block->isSameRange(addr, size)) {
++ if (block->getType() == ShmBkBlock::MMAP) {
++ // Release only the same range or a the beginning or the end of a range.
++ if (block->base() == addr && size < block->size()) {
++ ShmBkMappedBlock* const b = new ShmBkMappedBlock(AddrRange(block->base() + size, block->size() - size));
++ assert(b, "");
++ shmbk_register(b);
++ block->setAddrRange(AddrRange(addr, size));
++ }
++ else if (addr > block->base() && addr + size == block->base() + block->size()) {
++ ShmBkMappedBlock* const b = new ShmBkMappedBlock(AddrRange(block->base(), block->size() - size));
++ assert(b, "");
++ shmbk_register(b);
++ block->setAddrRange(AddrRange(addr, size));
++ }
++ else {
++ fprintf(stderr, "invalid mmap range: 0x%p .. 0x%p.\n", addr, addr + size);
++ shmbk_dump_info();
++ assert(false, "invalid mmap range");
++ return false;
++ }
++ }
++ else {
++ // Release only the same range. No partial release allowed.
++ // Soften the requirement a bit, because the user may think he owns a smaller size
++ // than the block is due to alignment etc.
++ if (block->base() != addr || block->size() < size) {
++ fprintf(stderr, "invalid shmget range: 0x%p .. 0x%p.\n", addr, addr + size);
++ shmbk_dump_info();
++ assert(false, "invalid shmget range");
++ return false;
++ }
++ }
++ }
++ rc = block->release();
++ assert(rc, "release failed");
++ // remove block from bookkeeping
++ shmbk_unregister(block);
++ delete block;
++ UNLOCK_SHMBK
++
++ if (!rc) {
++ warning("failed to released %lu bytes at 0x%p", size, addr);
++ }
++
++ return rc;
++}
++
++static bool checked_mprotect(char* addr, size_t size, int prot) {
++
++ // Little problem here: if SPEC1170 behaviour is off, mprotect() on AIX will
++ // not tell me if protection failed when trying to protect an un-protectable range.
++ //
++ // This means if the memory was allocated using shmget/shmat, protection wont work
++ // but mprotect will still return 0:
++ //
++ // See http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.basetechref/doc/basetrf1/mprotect.htm
++
++ bool rc = ::mprotect(addr, size, prot) == 0 ? true : false;
++
++ if (!rc) {
++ const char* const s_errno = strerror(errno);
++ warning("mprotect(" PTR_FORMAT "-" PTR_FORMAT ", 0x%X) failed (%s).", addr, addr + size, prot, s_errno);
++ return false;
++ }
++
++ // mprotect success check
++ //
++ // Mprotect said it changed the protection but can I believe it?
++ //
++ // To be sure I need to check the protection afterwards. Try to
++ // read from protected memory and check whether that causes a segfault.
++ //
++ if (!os::Aix::xpg_sus_mode()) {
++
++ if (StubRoutines::SafeFetch32_stub()) {
++
++ const bool read_protected =
++ (SafeFetch32((int*)addr, 0x12345678) == 0x12345678 &&
++ SafeFetch32((int*)addr, 0x76543210) == 0x76543210) ? true : false;
++
++ if (prot & PROT_READ) {
++ rc = !read_protected;
++ } else {
++ rc = read_protected;
++ }
++ }
++ }
++ if (!rc) {
++ assert(false, "mprotect failed.");
++ }
++ return rc;
++}
++
++// Set protections specified
++bool os::protect_memory(char* addr, size_t size, ProtType prot, bool is_committed) {
++ unsigned int p = 0;
++ switch (prot) {
++ case MEM_PROT_NONE: p = PROT_NONE; break;
++ case MEM_PROT_READ: p = PROT_READ; break;
++ case MEM_PROT_RW: p = PROT_READ|PROT_WRITE; break;
++ case MEM_PROT_RWX: p = PROT_READ|PROT_WRITE|PROT_EXEC; break;
++ default:
++ ShouldNotReachHere();
++ }
++ // is_committed is unused.
++ return checked_mprotect(addr, size, p);
++}
++
++bool os::guard_memory(char* addr, size_t size) {
++ return checked_mprotect(addr, size, PROT_NONE);
++}
++
++bool os::unguard_memory(char* addr, size_t size) {
++ return checked_mprotect(addr, size, PROT_READ|PROT_WRITE|PROT_EXEC);
++}
++
++// Large page support
++
++static size_t _large_page_size = 0;
++
++// Enable large page support if OS allows that.
++void os::large_page_init() {
++
++ // Note: os::Aix::query_multipage_support must run first.
++
++ if (!UseLargePages) {
++ return;
++ }
++
++ if (!Aix::can_use_64K_pages()) {
++ assert(!Aix::can_use_16M_pages(), "64K is a precondition for 16M.");
++ UseLargePages = false;
++ return;
++ }
++
++ if (!Aix::can_use_16M_pages() && Use16MPages) {
++ fprintf(stderr, "Cannot use 16M pages. Please ensure that there is a 16M page pool "
++ " and that the VM runs with CAP_BYPASS_RAC_VMM and CAP_PROPAGATE capabilities.\n");
++ }
++
++ // Do not report 16M page alignment as part of os::_page_sizes if we are
++ // explicitly forbidden from using 16M pages. Doing so would increase the
++ // alignment the garbage collector calculates with, slightly increasing
++ // heap usage. We should only pay for 16M alignment if we really want to
++ // use 16M pages.
++ if (Use16MPages && Aix::can_use_16M_pages()) {
++ _large_page_size = SIZE_16M;
++ _page_sizes[0] = SIZE_16M;
++ _page_sizes[1] = SIZE_64K;
++ _page_sizes[2] = SIZE_4K;
++ _page_sizes[3] = 0;
++ } else if (Aix::can_use_64K_pages()) {
++ _large_page_size = SIZE_64K;
++ _page_sizes[0] = SIZE_64K;
++ _page_sizes[1] = SIZE_4K;
++ _page_sizes[2] = 0;
++ }
++
++ if (Verbose) {
++ ("Default large page size is 0x%llX.", _large_page_size);
++ }
++} // end: os::large_page_init()
++
++char* os::reserve_memory_special(size_t bytes, size_t alignment, char* req_addr, bool exec) {
++ // "exec" is passed in but not used. Creating the shared image for
++ // the code cache doesn't have an SHM_X executable permission to check.
++ Unimplemented();
++ return 0;
++}
++
++bool os::release_memory_special(char* base, size_t bytes) {
++ // detaching the SHM segment will also delete it, see reserve_memory_special()
++ Unimplemented();
++ return false;
++}
++
++size_t os::large_page_size() {
++ return _large_page_size;
++}
++
++bool os::can_commit_large_page_memory() {
++ // Well, sadly we cannot commit anything at all (see comment in
++ // os::commit_memory) but we claim to so we can make use of large pages
++ return true;
++}
++
++bool os::can_execute_large_page_memory() {
++ // We can do that
++ return true;
++}
++
++// Reserve memory at an arbitrary address, only if that area is
++// available (and not reserved for something else).
++char* os::pd_attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
++
++ bool use_mmap = false;
++
++ // mmap: smaller graining, no large page support
++ // shm: large graining (256M), large page support, limited number of shm segments
++ //
++ // Prefer mmap wherever we either do not need large page support or have OS limits
++
++ if (!UseLargePages || bytes < SIZE_16M) {
++ use_mmap = true;
++ }
++
++ char* addr = NULL;
++ if (use_mmap) {
++ addr = reserve_mmaped_memory(bytes, requested_addr);
++ } else {
++ // shmat: wish address is mandatory, and do not try 16M pages here.
++ shmatted_memory_info_t info;
++ const int flags = RESSHM_WISHADDR_OR_FAIL;
++ if (reserve_shmatted_memory(bytes, requested_addr, flags, &info)) {
++ addr = info.addr;
++ }
++ }
++
++ return addr;
++}
++
++size_t os::read(int fd, void *buf, unsigned int nBytes) {
++ return ::read(fd, buf, nBytes);
++}
++
++#define NANOSECS_PER_MILLISEC 1000000
++
++int os::sleep(Thread* thread, jlong millis, bool interruptible) {
++ assert(thread == Thread::current(), "thread consistency check");
++
++ // Prevent nasty overflow in deadline calculation
++ // by handling long sleeps similar to solaris or windows.
++ const jlong limit = INT_MAX;
++ int result;
++ while (millis > limit) {
++ if ((result = os::sleep(thread, limit, interruptible)) != OS_OK) {
++ return result;
++ }
++ millis -= limit;
++ }
++
++ ParkEvent * const slp = thread->_SleepEvent;
++ slp->reset();
++ OrderAccess::fence();
++
++ if (interruptible) {
++ jlong prevtime = javaTimeNanos();
++
++ // Prevent precision loss and too long sleeps
++ jlong deadline = prevtime + millis * NANOSECS_PER_MILLISEC;
++
++ for (;;) {
++ if (os::is_interrupted(thread, true)) {
++ return OS_INTRPT;
++ }
++
++ jlong newtime = javaTimeNanos();
++
++ assert(newtime >= prevtime, "time moving backwards");
++ // Doing prevtime and newtime in microseconds doesn't help precision,
++ // and trying to round up to avoid lost milliseconds can result in a
++ // too-short delay.
++ millis -= (newtime - prevtime) / NANOSECS_PER_MILLISEC;
++
++ if (millis <= 0) {
++ return OS_OK;
++ }
++
++ // Stop sleeping if we passed the deadline
++ if (newtime >= deadline) {
++ return OS_OK;
++ }
++
++ prevtime = newtime;
++
++ {
++ assert(thread->is_Java_thread(), "sanity check");
++ JavaThread *jt = (JavaThread *) thread;
++ ThreadBlockInVM tbivm(jt);
++ OSThreadWaitState osts(jt->osthread(), false /* not Object.wait() */);
++
++ jt->set_suspend_equivalent();
++
++ slp->park(millis);
++
++ // were we externally suspended while we were waiting?
++ jt->check_and_wait_while_suspended();
++ }
++ }
++ } else {
++ OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */);
++ jlong prevtime = javaTimeNanos();
++
++ // Prevent precision loss and too long sleeps
++ jlong deadline = prevtime + millis * NANOSECS_PER_MILLISEC;
++
++ for (;;) {
++ // It'd be nice to avoid the back-to-back javaTimeNanos() calls on
++ // the 1st iteration ...
++ jlong newtime = javaTimeNanos();
++
++ if (newtime - prevtime < 0) {
++ // time moving backwards, should only happen if no monotonic clock
++ // not a guarantee() because JVM should not abort on kernel/glibc bugs
++ // - HS14 Commented out as not implemented.
++ // - TODO Maybe we should implement it?
++ //assert(!Aix::supports_monotonic_clock(), "time moving backwards");
++ } else {
++ millis -= (newtime - prevtime) / NANOSECS_PER_MILLISEC;
++ }
++
++ if (millis <= 0) break;
++
++ if (newtime >= deadline) {
++ break;
++ }
++
++ prevtime = newtime;
++ slp->park(millis);
++ }
++ return OS_OK;
++ }
++}
++
++void os::naked_short_sleep(jlong ms) {
++ struct timespec req;
++
++ assert(ms < 1000, "Un-interruptable sleep, short time use only");
++ req.tv_sec = 0;
++ if (ms > 0) {
++ req.tv_nsec = (ms % 1000) * 1000000;
++ }
++ else {
++ req.tv_nsec = 1;
++ }
++
++ nanosleep(&req, NULL);
++
++ return;
++}
++
++// Sleep forever; naked call to OS-specific sleep; use with CAUTION
++void os::infinite_sleep() {
++ while (true) { // sleep forever ...
++ ::sleep(100); // ... 100 seconds at a time
++ }
++}
++
++// Used to convert frequent JVM_Yield() to nops
++bool os::dont_yield() {
++ return DontYieldALot;
++}
++
++void os::yield() {
++ sched_yield();
++}
++
++os::YieldResult os::NakedYield() { sched_yield(); return os::YIELD_UNKNOWN; }
++
++void os::yield_all(int attempts) {
++ // Yields to all threads, including threads with lower priorities
++ // Threads on Linux are all with same priority. The Solaris style
++ // os::yield_all() with nanosleep(1ms) is not necessary.
++ sched_yield();
++}
++
++// Called from the tight loops to possibly influence time-sharing heuristics
++void os::loop_breaker(int attempts) {
++ os::yield_all(attempts);
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// thread priority support
++
++// From AIX manpage to pthread_setschedparam
++// (see: http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?
++// topic=/com.ibm.aix.basetechref/doc/basetrf1/pthread_setschedparam.htm):
++//
++// "If schedpolicy is SCHED_OTHER, then sched_priority must be in the
++// range from 40 to 80, where 40 is the least favored priority and 80
++// is the most favored."
++//
++// (Actually, I doubt this even has an impact on AIX, as we do kernel
++// scheduling there; however, this still leaves iSeries.)
++//
++// We use the same values for AIX and PASE.
++int os::java_to_os_priority[CriticalPriority + 1] = {
++ 54, // 0 Entry should never be used
++
++ 55, // 1 MinPriority
++ 55, // 2
++ 56, // 3
++
++ 56, // 4
++ 57, // 5 NormPriority
++ 57, // 6
++
++ 58, // 7
++ 58, // 8
++ 59, // 9 NearMaxPriority
++
++ 60, // 10 MaxPriority
++
++ 60 // 11 CriticalPriority
++};
++
++OSReturn os::set_native_priority(Thread* thread, int newpri) {
++ if (!UseThreadPriorities) return OS_OK;
++ pthread_t thr = thread->osthread()->pthread_id();
++ int policy = SCHED_OTHER;
++ struct sched_param param;
++ param.sched_priority = newpri;
++ int ret = pthread_setschedparam(thr, policy, ¶m);
++
++ if (Verbose) {
++ if (ret == 0) {
++ fprintf(stderr, "changed priority of thread %d to %d\n", (int)thr, newpri);
++ } else {
++ fprintf(stderr, "Could not changed priority for thread %d to %d (error %d, %s)\n",
++ (int)thr, newpri, ret, strerror(ret));
++ }
++ }
++ return (ret == 0) ? OS_OK : OS_ERR;
++}
++
++OSReturn os::get_native_priority(const Thread* const thread, int *priority_ptr) {
++ if (!UseThreadPriorities) {
++ *priority_ptr = java_to_os_priority[NormPriority];
++ return OS_OK;
++ }
++ pthread_t thr = thread->osthread()->pthread_id();
++ int policy = SCHED_OTHER;
++ struct sched_param param;
++ int ret = pthread_getschedparam(thr, &policy, ¶m);
++ *priority_ptr = param.sched_priority;
++
++ return (ret == 0) ? OS_OK : OS_ERR;
++}
++
++// Hint to the underlying OS that a task switch would not be good.
++// Void return because it's a hint and can fail.
++void os::hint_no_preempt() {}
++
++////////////////////////////////////////////////////////////////////////////////
++// suspend/resume support
++
++// the low-level signal-based suspend/resume support is a remnant from the
++// old VM-suspension that used to be for java-suspension, safepoints etc,
++// within hotspot. Now there is a single use-case for this:
++// - calling get_thread_pc() on the VMThread by the flat-profiler task
++// that runs in the watcher thread.
++// The remaining code is greatly simplified from the more general suspension
++// code that used to be used.
++//
++// The protocol is quite simple:
++// - suspend:
++// - sends a signal to the target thread
++// - polls the suspend state of the osthread using a yield loop
++// - target thread signal handler (SR_handler) sets suspend state
++// and blocks in sigsuspend until continued
++// - resume:
++// - sets target osthread state to continue
++// - sends signal to end the sigsuspend loop in the SR_handler
++//
++// Note that the SR_lock plays no role in this suspend/resume protocol.
++//
++
++static void resume_clear_context(OSThread *osthread) {
++ osthread->set_ucontext(NULL);
++ osthread->set_siginfo(NULL);
++}
++
++static void suspend_save_context(OSThread *osthread, siginfo_t* siginfo, ucontext_t* context) {
++ osthread->set_ucontext(context);
++ osthread->set_siginfo(siginfo);
++}
++
++//
++// Handler function invoked when a thread's execution is suspended or
++// resumed. We have to be careful that only async-safe functions are
++// called here (Note: most pthread functions are not async safe and
++// should be avoided.)
++//
++// Note: sigwait() is a more natural fit than sigsuspend() from an
++// interface point of view, but sigwait() prevents the signal hander
++// from being run. libpthread would get very confused by not having
++// its signal handlers run and prevents sigwait()'s use with the
++// mutex granting granting signal.
++//
++// Currently only ever called on the VMThread and JavaThreads (PC sampling).
++//
++static void SR_handler(int sig, siginfo_t* siginfo, ucontext_t* context) {
++ // Save and restore errno to avoid confusing native code with EINTR
++ // after sigsuspend.
++ int old_errno = errno;
++
++ Thread* thread = Thread::current();
++ OSThread* osthread = thread->osthread();
++ assert(thread->is_VM_thread() || thread->is_Java_thread(), "Must be VMThread or JavaThread");
++
++ os::SuspendResume::State current = osthread->sr.state();
++ if (current == os::SuspendResume::SR_SUSPEND_REQUEST) {
++ suspend_save_context(osthread, siginfo, context);
++
++ // attempt to switch the state, we assume we had a SUSPEND_REQUEST
++ os::SuspendResume::State state = osthread->sr.suspended();
++ if (state == os::SuspendResume::SR_SUSPENDED) {
++ sigset_t suspend_set; // signals for sigsuspend()
++
++ // get current set of blocked signals and unblock resume signal
++ pthread_sigmask(SIG_BLOCK, NULL, &suspend_set);
++ sigdelset(&suspend_set, SR_signum);
++
++ // wait here until we are resumed
++ while (1) {
++ sigsuspend(&suspend_set);
++
++ os::SuspendResume::State result = osthread->sr.running();
++ if (result == os::SuspendResume::SR_RUNNING) {
++ break;
++ }
++ }
++
++ } else if (state == os::SuspendResume::SR_RUNNING) {
++ // request was cancelled, continue
++ } else {
++ ShouldNotReachHere();
++ }
++
++ resume_clear_context(osthread);
++ } else if (current == os::SuspendResume::SR_RUNNING) {
++ // request was cancelled, continue
++ } else if (current == os::SuspendResume::SR_WAKEUP_REQUEST) {
++ // ignore
++ } else {
++ ShouldNotReachHere();
++ }
++
++ errno = old_errno;
++}
++
++
++static int SR_initialize() {
++ struct sigaction act;
++ char *s;
++ // Get signal number to use for suspend/resume
++ if ((s = ::getenv("_JAVA_SR_SIGNUM")) != 0) {
++ int sig = ::strtol(s, 0, 10);
++ if (sig > 0 || sig < NSIG) {
++ SR_signum = sig;
++ }
++ }
++
++ assert(SR_signum > SIGSEGV && SR_signum > SIGBUS,
++ "SR_signum must be greater than max(SIGSEGV, SIGBUS), see 4355769");
++
++ sigemptyset(&SR_sigset);
++ sigaddset(&SR_sigset, SR_signum);
++
++ // Set up signal handler for suspend/resume.
++ act.sa_flags = SA_RESTART|SA_SIGINFO;
++ act.sa_handler = (void (*)(int)) SR_handler;
++
++ // SR_signum is blocked by default.
++ // 4528190 - We also need to block pthread restart signal (32 on all
++ // supported Linux platforms). Note that LinuxThreads need to block
++ // this signal for all threads to work properly. So we don't have
++ // to use hard-coded signal number when setting up the mask.
++ pthread_sigmask(SIG_BLOCK, NULL, &act.sa_mask);
++
++ if (sigaction(SR_signum, &act, 0) == -1) {
++ return -1;
++ }
++
++ // Save signal flag
++ os::Aix::set_our_sigflags(SR_signum, act.sa_flags);
++ return 0;
++}
++
++static int SR_finalize() {
++ return 0;
++}
++
++static int sr_notify(OSThread* osthread) {
++ int status = pthread_kill(osthread->pthread_id(), SR_signum);
++ assert_status(status == 0, status, "pthread_kill");
++ return status;
++}
++
++// "Randomly" selected value for how long we want to spin
++// before bailing out on suspending a thread, also how often
++// we send a signal to a thread we want to resume
++static const int RANDOMLY_LARGE_INTEGER = 1000000;
++static const int RANDOMLY_LARGE_INTEGER2 = 100;
++
++// returns true on success and false on error - really an error is fatal
++// but this seems the normal response to library errors
++static bool do_suspend(OSThread* osthread) {
++ assert(osthread->sr.is_running(), "thread should be running");
++ // mark as suspended and send signal
++
++ if (osthread->sr.request_suspend() != os::SuspendResume::SR_SUSPEND_REQUEST) {
++ // failed to switch, state wasn't running?
++ ShouldNotReachHere();
++ return false;
++ }
++
++ if (sr_notify(osthread) != 0) {
++ // try to cancel, switch to running
++
++ os::SuspendResume::State result = osthread->sr.cancel_suspend();
++ if (result == os::SuspendResume::SR_RUNNING) {
++ // cancelled
++ return false;
++ } else if (result == os::SuspendResume::SR_SUSPENDED) {
++ // somehow managed to suspend
++ return true;
++ } else {
++ ShouldNotReachHere();
++ return false;
++ }
++ }
++
++ // managed to send the signal and switch to SUSPEND_REQUEST, now wait for SUSPENDED
++
++ for (int n = 0; !osthread->sr.is_suspended(); n++) {
++ for (int i = 0; i < RANDOMLY_LARGE_INTEGER2 && !osthread->sr.is_suspended(); i++) {
++ os::yield_all(i);
++ }
++
++ // timeout, try to cancel the request
++ if (n >= RANDOMLY_LARGE_INTEGER) {
++ os::SuspendResume::State cancelled = osthread->sr.cancel_suspend();
++ if (cancelled == os::SuspendResume::SR_RUNNING) {
++ return false;
++ } else if (cancelled == os::SuspendResume::SR_SUSPENDED) {
++ return true;
++ } else {
++ ShouldNotReachHere();
++ return false;
++ }
++ }
++ }
++
++ guarantee(osthread->sr.is_suspended(), "Must be suspended");
++ return true;
++}
++
++static void do_resume(OSThread* osthread) {
++ //assert(osthread->sr.is_suspended(), "thread should be suspended");
++
++ if (osthread->sr.request_wakeup() != os::SuspendResume::SR_WAKEUP_REQUEST) {
++ // failed to switch to WAKEUP_REQUEST
++ ShouldNotReachHere();
++ return;
++ }
++
++ while (!osthread->sr.is_running()) {
++ if (sr_notify(osthread) == 0) {
++ for (int n = 0; n < RANDOMLY_LARGE_INTEGER && !osthread->sr.is_running(); n++) {
++ for (int i = 0; i < 100 && !osthread->sr.is_running(); i++) {
++ os::yield_all(i);
++ }
++ }
++ } else {
++ ShouldNotReachHere();
++ }
++ }
++
++ guarantee(osthread->sr.is_running(), "Must be running!");
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// interrupt support
++
++void os::interrupt(Thread* thread) {
++ assert(Thread::current() == thread || Threads_lock->owned_by_self(),
++ "possibility of dangling Thread pointer");
++
++ OSThread* osthread = thread->osthread();
++
++ if (!osthread->interrupted()) {
++ osthread->set_interrupted(true);
++ // More than one thread can get here with the same value of osthread,
++ // resulting in multiple notifications. We do, however, want the store
++ // to interrupted() to be visible to other threads before we execute unpark().
++ OrderAccess::fence();
++ ParkEvent * const slp = thread->_SleepEvent;
++ if (slp != NULL) slp->unpark();
++ }
++
++ // For JSR166. Unpark even if interrupt status already was set
++ if (thread->is_Java_thread())
++ ((JavaThread*)thread)->parker()->unpark();
++
++ ParkEvent * ev = thread->_ParkEvent;
++ if (ev != NULL) ev->unpark();
++
++}
++
++bool os::is_interrupted(Thread* thread, bool clear_interrupted) {
++ assert(Thread::current() == thread || Threads_lock->owned_by_self(),
++ "possibility of dangling Thread pointer");
++
++ OSThread* osthread = thread->osthread();
++
++ bool interrupted = osthread->interrupted();
++
++ if (interrupted && clear_interrupted) {
++ osthread->set_interrupted(false);
++ // consider thread->_SleepEvent->reset() ... optional optimization
++ }
++
++ return interrupted;
++}
++
++///////////////////////////////////////////////////////////////////////////////////
++// signal handling (except suspend/resume)
++
++// This routine may be used by user applications as a "hook" to catch signals.
++// The user-defined signal handler must pass unrecognized signals to this
++// routine, and if it returns true (non-zero), then the signal handler must
++// return immediately. If the flag "abort_if_unrecognized" is true, then this
++// routine will never retun false (zero), but instead will execute a VM panic
++// routine kill the process.
++//
++// If this routine returns false, it is OK to call it again. This allows
++// the user-defined signal handler to perform checks either before or after
++// the VM performs its own checks. Naturally, the user code would be making
++// a serious error if it tried to handle an exception (such as a null check
++// or breakpoint) that the VM was generating for its own correct operation.
++//
++// This routine may recognize any of the following kinds of signals:
++// SIGBUS, SIGSEGV, SIGILL, SIGFPE, SIGQUIT, SIGPIPE, SIGXFSZ, SIGUSR1.
++// It should be consulted by handlers for any of those signals.
++//
++// The caller of this routine must pass in the three arguments supplied
++// to the function referred to in the "sa_sigaction" (not the "sa_handler")
++// field of the structure passed to sigaction(). This routine assumes that
++// the sa_flags field passed to sigaction() includes SA_SIGINFO and SA_RESTART.
++//
++// Note that the VM will print warnings if it detects conflicting signal
++// handlers, unless invoked with the option "-XX:+AllowUserSignalHandlers".
++//
++extern "C" JNIEXPORT int
++JVM_handle_aix_signal(int signo, siginfo_t* siginfo, void* ucontext, int abort_if_unrecognized);
++
++// Set thread signal mask (for some reason on AIX sigthreadmask() seems
++// to be the thing to call; documentation is not terribly clear about whether
++// pthread_sigmask also works, and if it does, whether it does the same.
++bool set_thread_signal_mask(int how, const sigset_t* set, sigset_t* oset) {
++ const int rc = ::pthread_sigmask(how, set, oset);
++ // return value semantics differ slightly for error case:
++ // pthread_sigmask returns error number, sigthreadmask -1 and sets global errno
++ // (so, pthread_sigmask is more theadsafe for error handling)
++ // But success is always 0.
++ return rc == 0 ? true : false;
++}
++
++// Function to unblock all signals which are, according
++// to POSIX, typical program error signals. If they happen while being blocked,
++// they typically will bring down the process immediately.
++bool unblock_program_error_signals() {
++ sigset_t set;
++ ::sigemptyset(&set);
++ ::sigaddset(&set, SIGILL);
++ ::sigaddset(&set, SIGBUS);
++ ::sigaddset(&set, SIGFPE);
++ ::sigaddset(&set, SIGSEGV);
++ return set_thread_signal_mask(SIG_UNBLOCK, &set, NULL);
++}
++
++// Renamed from 'signalHandler' to avoid collision with other shared libs.
++void javaSignalHandler(int sig, siginfo_t* info, void* uc) {
++ assert(info != NULL && uc != NULL, "it must be old kernel");
++
++ // Never leave program error signals blocked;
++ // on all our platforms they would bring down the process immediately when
++ // getting raised while being blocked.
++ unblock_program_error_signals();
++
++ JVM_handle_aix_signal(sig, info, uc, true);
++}
++
++
++// This boolean allows users to forward their own non-matching signals
++// to JVM_handle_aix_signal, harmlessly.
++bool os::Aix::signal_handlers_are_installed = false;
++
++// For signal-chaining
++struct sigaction os::Aix::sigact[MAXSIGNUM];
++unsigned int os::Aix::sigs = 0;
++bool os::Aix::libjsig_is_loaded = false;
++typedef struct sigaction *(*get_signal_t)(int);
++get_signal_t os::Aix::get_signal_action = NULL;
++
++struct sigaction* os::Aix::get_chained_signal_action(int sig) {
++ struct sigaction *actp = NULL;
++
++ if (libjsig_is_loaded) {
++ // Retrieve the old signal handler from libjsig
++ actp = (*get_signal_action)(sig);
++ }
++ if (actp == NULL) {
++ // Retrieve the preinstalled signal handler from jvm
++ actp = get_preinstalled_handler(sig);
++ }
++
++ return actp;
++}
++
++static bool call_chained_handler(struct sigaction *actp, int sig,
++ siginfo_t *siginfo, void *context) {
++ // Call the old signal handler
++ if (actp->sa_handler == SIG_DFL) {
++ // It's more reasonable to let jvm treat it as an unexpected exception
++ // instead of taking the default action.
++ return false;
++ } else if (actp->sa_handler != SIG_IGN) {
++ if ((actp->sa_flags & SA_NODEFER) == 0) {
++ // automaticlly block the signal
++ sigaddset(&(actp->sa_mask), sig);
++ }
++
++ sa_handler_t hand = NULL;
++ sa_sigaction_t sa = NULL;
++ bool siginfo_flag_set = (actp->sa_flags & SA_SIGINFO) != 0;
++ // retrieve the chained handler
++ if (siginfo_flag_set) {
++ sa = actp->sa_sigaction;
++ } else {
++ hand = actp->sa_handler;
++ }
++
++ if ((actp->sa_flags & SA_RESETHAND) != 0) {
++ actp->sa_handler = SIG_DFL;
++ }
++
++ // try to honor the signal mask
++ sigset_t oset;
++ pthread_sigmask(SIG_SETMASK, &(actp->sa_mask), &oset);
++
++ // call into the chained handler
++ if (siginfo_flag_set) {
++ (*sa)(sig, siginfo, context);
++ } else {
++ (*hand)(sig);
++ }
++
++ // restore the signal mask
++ pthread_sigmask(SIG_SETMASK, &oset, 0);
++ }
++ // Tell jvm's signal handler the signal is taken care of.
++ return true;
++}
++
++bool os::Aix::chained_handler(int sig, siginfo_t* siginfo, void* context) {
++ bool chained = false;
++ // signal-chaining
++ if (UseSignalChaining) {
++ struct sigaction *actp = get_chained_signal_action(sig);
++ if (actp != NULL) {
++ chained = call_chained_handler(actp, sig, siginfo, context);
++ }
++ }
++ return chained;
++}
++
++struct sigaction* os::Aix::get_preinstalled_handler(int sig) {
++ if ((((unsigned int)1 << sig) & sigs) != 0) {
++ return &sigact[sig];
++ }
++ return NULL;
++}
++
++void os::Aix::save_preinstalled_handler(int sig, struct sigaction& oldAct) {
++ assert(sig > 0 && sig < MAXSIGNUM, "vm signal out of expected range");
++ sigact[sig] = oldAct;
++ sigs |= (unsigned int)1 << sig;
++}
++
++// for diagnostic
++int os::Aix::sigflags[MAXSIGNUM];
++
++int os::Aix::get_our_sigflags(int sig) {
++ assert(sig > 0 && sig < MAXSIGNUM, "vm signal out of expected range");
++ return sigflags[sig];
++}
++
++void os::Aix::set_our_sigflags(int sig, int flags) {
++ assert(sig > 0 && sig < MAXSIGNUM, "vm signal out of expected range");
++ sigflags[sig] = flags;
++}
++
++void os::Aix::set_signal_handler(int sig, bool set_installed) {
++ // Check for overwrite.
++ struct sigaction oldAct;
++ sigaction(sig, (struct sigaction*)NULL, &oldAct);
++
++ void* oldhand = oldAct.sa_sigaction
++ ? CAST_FROM_FN_PTR(void*, oldAct.sa_sigaction)
++ : CAST_FROM_FN_PTR(void*, oldAct.sa_handler);
++ // Renamed 'signalHandler' to avoid collision with other shared libs.
++ if (oldhand != CAST_FROM_FN_PTR(void*, SIG_DFL) &&
++ oldhand != CAST_FROM_FN_PTR(void*, SIG_IGN) &&
++ oldhand != CAST_FROM_FN_PTR(void*, (sa_sigaction_t)javaSignalHandler)) {
++ if (AllowUserSignalHandlers || !set_installed) {
++ // Do not overwrite; user takes responsibility to forward to us.
++ return;
++ } else if (UseSignalChaining) {
++ // save the old handler in jvm
++ save_preinstalled_handler(sig, oldAct);
++ // libjsig also interposes the sigaction() call below and saves the
++ // old sigaction on it own.
++ } else {
++ fatal(err_msg("Encountered unexpected pre-existing sigaction handler "
++ "%#lx for signal %d.", (long)oldhand, sig));
++ }
++ }
++
++ struct sigaction sigAct;
++ sigfillset(&(sigAct.sa_mask));
++ if (!set_installed) {
++ sigAct.sa_handler = SIG_DFL;
++ sigAct.sa_flags = SA_RESTART;
++ } else {
++ // Renamed 'signalHandler' to avoid collision with other shared libs.
++ sigAct.sa_sigaction = javaSignalHandler;
++ sigAct.sa_flags = SA_SIGINFO|SA_RESTART;
++ }
++ // Save flags, which are set by ours
++ assert(sig > 0 && sig < MAXSIGNUM, "vm signal out of expected range");
++ sigflags[sig] = sigAct.sa_flags;
++
++ int ret = sigaction(sig, &sigAct, &oldAct);
++ assert(ret == 0, "check");
++
++ void* oldhand2 = oldAct.sa_sigaction
++ ? CAST_FROM_FN_PTR(void*, oldAct.sa_sigaction)
++ : CAST_FROM_FN_PTR(void*, oldAct.sa_handler);
++ assert(oldhand2 == oldhand, "no concurrent signal handler installation");
++}
++
++// install signal handlers for signals that HotSpot needs to
++// handle in order to support Java-level exception handling.
++void os::Aix::install_signal_handlers() {
++ if (!signal_handlers_are_installed) {
++ signal_handlers_are_installed = true;
++
++ // signal-chaining
++ typedef void (*signal_setting_t)();
++ signal_setting_t begin_signal_setting = NULL;
++ signal_setting_t end_signal_setting = NULL;
++ begin_signal_setting = CAST_TO_FN_PTR(signal_setting_t,
++ dlsym(RTLD_DEFAULT, "JVM_begin_signal_setting"));
++ if (begin_signal_setting != NULL) {
++ end_signal_setting = CAST_TO_FN_PTR(signal_setting_t,
++ dlsym(RTLD_DEFAULT, "JVM_end_signal_setting"));
++ get_signal_action = CAST_TO_FN_PTR(get_signal_t,
++ dlsym(RTLD_DEFAULT, "JVM_get_signal_action"));
++ libjsig_is_loaded = true;
++ assert(UseSignalChaining, "should enable signal-chaining");
++ }
++ if (libjsig_is_loaded) {
++ // Tell libjsig jvm is setting signal handlers
++ (*begin_signal_setting)();
++ }
++
++ set_signal_handler(SIGSEGV, true);
++ set_signal_handler(SIGPIPE, true);
++ set_signal_handler(SIGBUS, true);
++ set_signal_handler(SIGILL, true);
++ set_signal_handler(SIGFPE, true);
++ set_signal_handler(SIGTRAP, true);
++ set_signal_handler(SIGXFSZ, true);
++ set_signal_handler(SIGDANGER, true);
++
++ if (libjsig_is_loaded) {
++ // Tell libjsig jvm finishes setting signal handlers
++ (*end_signal_setting)();
++ }
++
++ // We don't activate signal checker if libjsig is in place, we trust ourselves
++ // and if UserSignalHandler is installed all bets are off.
++ // Log that signal checking is off only if -verbose:jni is specified.
++ if (CheckJNICalls) {
++ if (libjsig_is_loaded) {
++ tty->print_cr("Info: libjsig is activated, all active signal checking is disabled");
++ check_signals = false;
++ }
++ if (AllowUserSignalHandlers) {
++ tty->print_cr("Info: AllowUserSignalHandlers is activated, all active signal checking is disabled");
++ check_signals = false;
++ }
++ // need to initialize check_signal_done
++ ::sigemptyset(&check_signal_done);
++ }
++ }
++}
++
++static const char* get_signal_handler_name(address handler,
++ char* buf, int buflen) {
++ int offset;
++ bool found = os::dll_address_to_library_name(handler, buf, buflen, &offset);
++ if (found) {
++ // skip directory names
++ const char *p1, *p2;
++ p1 = buf;
++ size_t len = strlen(os::file_separator());
++ while ((p2 = strstr(p1, os::file_separator())) != NULL) p1 = p2 + len;
++ // The way os::dll_address_to_library_name is implemented on Aix
++ // right now, it always returns -1 for the offset which is not
++ // terribly informative.
++ // Will fix that. For now, omit the offset.
++ jio_snprintf(buf, buflen, "%s", p1);
++ } else {
++ jio_snprintf(buf, buflen, PTR_FORMAT, handler);
++ }
++ return buf;
++}
++
++static void print_signal_handler(outputStream* st, int sig,
++ char* buf, size_t buflen) {
++ struct sigaction sa;
++ sigaction(sig, NULL, &sa);
++
++ st->print("%s: ", os::exception_name(sig, buf, buflen));
++
++ address handler = (sa.sa_flags & SA_SIGINFO)
++ ? CAST_FROM_FN_PTR(address, sa.sa_sigaction)
++ : CAST_FROM_FN_PTR(address, sa.sa_handler);
++
++ if (handler == CAST_FROM_FN_PTR(address, SIG_DFL)) {
++ st->print("SIG_DFL");
++ } else if (handler == CAST_FROM_FN_PTR(address, SIG_IGN)) {
++ st->print("SIG_IGN");
++ } else {
++ st->print("[%s]", get_signal_handler_name(handler, buf, buflen));
++ }
++
++ // Print readable mask.
++ st->print(", sa_mask[0]=");
++ os::Posix::print_signal_set_short(st, &sa.sa_mask);
++
++ address rh = VMError::get_resetted_sighandler(sig);
++ // May be, handler was resetted by VMError?
++ if (rh != NULL) {
++ handler = rh;
++ sa.sa_flags = VMError::get_resetted_sigflags(sig);
++ }
++
++ // Print textual representation of sa_flags.
++ st->print(", sa_flags=");
++ os::Posix::print_sa_flags(st, sa.sa_flags);
++
++ // Check: is it our handler?
++ if (handler == CAST_FROM_FN_PTR(address, (sa_sigaction_t)javaSignalHandler) ||
++ handler == CAST_FROM_FN_PTR(address, (sa_sigaction_t)SR_handler)) {
++ // It is our signal handler.
++ // Check for flags, reset system-used one!
++ if ((int)sa.sa_flags != os::Aix::get_our_sigflags(sig)) {
++ st->print(", flags was changed from " PTR32_FORMAT ", consider using jsig library",
++ os::Aix::get_our_sigflags(sig));
++ }
++ }
++ st->cr();
++}
++
++
++#define DO_SIGNAL_CHECK(sig) \
++ if (!sigismember(&check_signal_done, sig)) \
++ os::Aix::check_signal_handler(sig)
++
++// This method is a periodic task to check for misbehaving JNI applications
++// under CheckJNI, we can add any periodic checks here
++
++void os::run_periodic_checks() {
++
++ if (check_signals == false) return;
++
++ // SEGV and BUS if overridden could potentially prevent
++ // generation of hs*.log in the event of a crash, debugging
++ // such a case can be very challenging, so we absolutely
++ // check the following for a good measure:
++ DO_SIGNAL_CHECK(SIGSEGV);
++ DO_SIGNAL_CHECK(SIGILL);
++ DO_SIGNAL_CHECK(SIGFPE);
++ DO_SIGNAL_CHECK(SIGBUS);
++ DO_SIGNAL_CHECK(SIGPIPE);
++ DO_SIGNAL_CHECK(SIGXFSZ);
++ if (UseSIGTRAP) {
++ DO_SIGNAL_CHECK(SIGTRAP);
++ }
++ DO_SIGNAL_CHECK(SIGDANGER);
++
++ // ReduceSignalUsage allows the user to override these handlers
++ // see comments at the very top and jvm_solaris.h
++ if (!ReduceSignalUsage) {
++ DO_SIGNAL_CHECK(SHUTDOWN1_SIGNAL);
++ DO_SIGNAL_CHECK(SHUTDOWN2_SIGNAL);
++ DO_SIGNAL_CHECK(SHUTDOWN3_SIGNAL);
++ DO_SIGNAL_CHECK(BREAK_SIGNAL);
++ }
++
++ DO_SIGNAL_CHECK(SR_signum);
++ DO_SIGNAL_CHECK(INTERRUPT_SIGNAL);
++}
++
++typedef int (*os_sigaction_t)(int, const struct sigaction *, struct sigaction *);
++
++static os_sigaction_t os_sigaction = NULL;
++
++void os::Aix::check_signal_handler(int sig) {
++ char buf[O_BUFLEN];
++ address jvmHandler = NULL;
++
++ struct sigaction act;
++ if (os_sigaction == NULL) {
++ // only trust the default sigaction, in case it has been interposed
++ os_sigaction = (os_sigaction_t)dlsym(RTLD_DEFAULT, "sigaction");
++ if (os_sigaction == NULL) return;
++ }
++
++ os_sigaction(sig, (struct sigaction*)NULL, &act);
++
++ address thisHandler = (act.sa_flags & SA_SIGINFO)
++ ? CAST_FROM_FN_PTR(address, act.sa_sigaction)
++ : CAST_FROM_FN_PTR(address, act.sa_handler);
++
++
++ switch(sig) {
++ case SIGSEGV:
++ case SIGBUS:
++ case SIGFPE:
++ case SIGPIPE:
++ case SIGILL:
++ case SIGXFSZ:
++ // Renamed 'signalHandler' to avoid collision with other shared libs.
++ jvmHandler = CAST_FROM_FN_PTR(address, (sa_sigaction_t)javaSignalHandler);
++ break;
++
++ case SHUTDOWN1_SIGNAL:
++ case SHUTDOWN2_SIGNAL:
++ case SHUTDOWN3_SIGNAL:
++ case BREAK_SIGNAL:
++ jvmHandler = (address)user_handler();
++ break;
++
++ case INTERRUPT_SIGNAL:
++ jvmHandler = CAST_FROM_FN_PTR(address, SIG_DFL);
++ break;
++
++ default:
++ if (sig == SR_signum) {
++ jvmHandler = CAST_FROM_FN_PTR(address, (sa_sigaction_t)SR_handler);
++ } else {
++ return;
++ }
++ break;
++ }
++
++ if (thisHandler != jvmHandler) {
++ tty->print("Warning: %s handler ", exception_name(sig, buf, O_BUFLEN));
++ tty->print("expected:%s", get_signal_handler_name(jvmHandler, buf, O_BUFLEN));
++ tty->print_cr(" found:%s", get_signal_handler_name(thisHandler, buf, O_BUFLEN));
++ // No need to check this sig any longer
++ sigaddset(&check_signal_done, sig);
++ } else if (os::Aix::get_our_sigflags(sig) != 0 && (int)act.sa_flags != os::Aix::get_our_sigflags(sig)) {
++ tty->print("Warning: %s handler flags ", exception_name(sig, buf, O_BUFLEN));
++ tty->print("expected:" PTR32_FORMAT, os::Aix::get_our_sigflags(sig));
++ tty->print_cr(" found:" PTR32_FORMAT, act.sa_flags);
++ // No need to check this sig any longer
++ sigaddset(&check_signal_done, sig);
++ }
++
++ // Dump all the signal
++ if (sigismember(&check_signal_done, sig)) {
++ print_signal_handlers(tty, buf, O_BUFLEN);
++ }
++}
++
++extern bool signal_name(int signo, char* buf, size_t len);
++
++const char* os::exception_name(int exception_code, char* buf, size_t size) {
++ if (0 < exception_code && exception_code <= SIGRTMAX) {
++ // signal
++ if (!signal_name(exception_code, buf, size)) {
++ jio_snprintf(buf, size, "SIG%d", exception_code);
++ }
++ return buf;
++ } else {
++ return NULL;
++ }
++}
++
++// To install functions for atexit system call
++extern "C" {
++ static void perfMemory_exit_helper() {
++ perfMemory_exit();
++ }
++}
++
++// This is called _before_ the most of global arguments have been parsed.
++void os::init(void) {
++ // This is basic, we want to know if that ever changes.
++ // (shared memory boundary is supposed to be a 256M aligned)
++ assert(SHMLBA == ((uint64_t)0x10000000ULL)/*256M*/, "unexpected");
++
++ // First off, we need to know whether we run on AIX or PASE, and
++ // the OS level we run on.
++ os::Aix::initialize_os_info();
++
++ // Scan environment (SPEC1170 behaviour, etc)
++ os::Aix::scan_environment();
++
++ // Check which pages are supported by AIX.
++ os::Aix::query_multipage_support();
++
++ // Next, we need to initialize libo4 and libperfstat libraries.
++ if (os::Aix::on_pase()) {
++ os::Aix::initialize_libo4();
++ } else {
++ os::Aix::initialize_libperfstat();
++ }
++
++ // Reset the perfstat information provided by ODM.
++ if (os::Aix::on_aix()) {
++ libperfstat::perfstat_reset();
++ }
++
++ // Now initialze basic system properties. Note that for some of the values we
++ // need libperfstat etc.
++ os::Aix::initialize_system_info();
++
++ // Initialize large page support.
++ if (UseLargePages) {
++ os::large_page_init();
++ if (!UseLargePages) {
++ // initialize os::_page_sizes
++ _page_sizes[0] = Aix::page_size();
++ _page_sizes[1] = 0;
++ if (Verbose) {
++ fprintf(stderr, "Large Page initialization failed: setting UseLargePages=0.\n");
++ }
++ }
++ } else {
++ // initialize os::_page_sizes
++ _page_sizes[0] = Aix::page_size();
++ _page_sizes[1] = 0;
++ }
++
++ // debug trace
++ if (Verbose) {
++ fprintf(stderr, "os::vm_page_size 0x%llX\n", os::vm_page_size());
++ fprintf(stderr, "os::large_page_size 0x%llX\n", os::large_page_size());
++ fprintf(stderr, "os::_page_sizes = ( ");
++ for (int i = 0; _page_sizes[i]; i ++) {
++ fprintf(stderr, " %s ", describe_pagesize(_page_sizes[i]));
++ }
++ fprintf(stderr, ")\n");
++ }
++
++ _initial_pid = getpid();
++
++ clock_tics_per_sec = sysconf(_SC_CLK_TCK);
++
++ init_random(1234567);
++
++ ThreadCritical::initialize();
++
++ // Main_thread points to the aboriginal thread.
++ Aix::_main_thread = pthread_self();
++
++ initial_time_count = os::elapsed_counter();
++ pthread_mutex_init(&dl_mutex, NULL);
++}
++
++// this is called _after_ the global arguments have been parsed
++jint os::init_2(void) {
++
++ if (Verbose) {
++ fprintf(stderr, "processor count: %d\n", os::_processor_count);
++ fprintf(stderr, "physical memory: %lu\n", Aix::_physical_memory);
++ }
++
++ // initially build up the loaded dll map
++ LoadedLibraries::reload();
++
++ const int page_size = Aix::page_size();
++ const int map_size = page_size;
++
++ address map_address = (address) MAP_FAILED;
++ const int prot = PROT_READ;
++ const int flags = MAP_PRIVATE|MAP_ANONYMOUS;
++
++ // use optimized addresses for the polling page,
++ // e.g. map it to a special 32-bit address.
++ if (OptimizePollingPageLocation) {
++ // architecture-specific list of address wishes:
++ address address_wishes[] = {
++ // AIX: addresses lower than 0x30000000 don't seem to work on AIX.
++ // PPC64: all address wishes are non-negative 32 bit values where
++ // the lower 16 bits are all zero. we can load these addresses
++ // with a single ppc_lis instruction.
++ (address) 0x30000000, (address) 0x31000000,
++ (address) 0x32000000, (address) 0x33000000,
++ (address) 0x40000000, (address) 0x41000000,
++ (address) 0x42000000, (address) 0x43000000,
++ (address) 0x50000000, (address) 0x51000000,
++ (address) 0x52000000, (address) 0x53000000,
++ (address) 0x60000000, (address) 0x61000000,
++ (address) 0x62000000, (address) 0x63000000
++ };
++ int address_wishes_length = sizeof(address_wishes)/sizeof(address);
++
++ // iterate over the list of address wishes:
++ for (int i=0; i %p\n",
++ address_wishes[i], map_address + (ssize_t)page_size);
++ }
++
++ if (map_address + (ssize_t)page_size == address_wishes[i]) {
++ // map succeeded and map_address is at wished address, exit loop.
++ break;
++ }
++
++ if (map_address != (address) MAP_FAILED) {
++ // map succeeded, but polling_page is not at wished address, unmap and continue.
++ ::munmap(map_address, map_size);
++ map_address = (address) MAP_FAILED;
++ }
++ // map failed, continue loop.
++ }
++ } // end OptimizePollingPageLocation
++
++ if (map_address == (address) MAP_FAILED) {
++ map_address = (address) ::mmap(NULL, map_size, prot, flags, -1, 0);
++ }
++ guarantee(map_address != MAP_FAILED, "os::init_2: failed to allocate polling page");
++ os::set_polling_page(map_address);
++
++ if (!UseMembar) {
++ address mem_serialize_page = (address) ::mmap(NULL, Aix::page_size(), PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
++ guarantee(mem_serialize_page != NULL, "mmap Failed for memory serialize page");
++ os::set_memory_serialize_page(mem_serialize_page);
++
++#ifndef PRODUCT
++ if (Verbose && PrintMiscellaneous)
++ tty->print("[Memory Serialize Page address: " INTPTR_FORMAT "]\n", (intptr_t)mem_serialize_page);
++#endif
++ }
++
++ // initialize suspend/resume support - must do this before signal_sets_init()
++ if (SR_initialize() != 0) {
++ perror("SR_initialize failed");
++ return JNI_ERR;
++ }
++
++ Aix::signal_sets_init();
++ Aix::install_signal_handlers();
++
++ // Check minimum allowable stack size for thread creation and to initialize
++ // the java system classes, including StackOverflowError - depends on page
++ // size. Add a page for compiler2 recursion in main thread.
++ // Add in 2*BytesPerWord times page size to account for VM stack during
++ // class initialization depending on 32 or 64 bit VM.
++ os::Aix::min_stack_allowed = MAX2(os::Aix::min_stack_allowed,
++ (size_t)(StackYellowPages+StackRedPages+StackShadowPages +
++ 2*BytesPerWord COMPILER2_PRESENT(+1)) * Aix::page_size());
++
++ size_t threadStackSizeInBytes = ThreadStackSize * K;
++ if (threadStackSizeInBytes != 0 &&
++ threadStackSizeInBytes < os::Aix::min_stack_allowed) {
++ tty->print_cr("\nThe stack size specified is too small, "
++ "Specify at least %dk",
++ os::Aix::min_stack_allowed / K);
++ return JNI_ERR;
++ }
++
++ // Make the stack size a multiple of the page size so that
++ // the yellow/red zones can be guarded.
++ // note that this can be 0, if no default stacksize was set
++ JavaThread::set_stack_size_at_create(round_to(threadStackSizeInBytes, vm_page_size()));
++
++ Aix::libpthread_init();
++
++ if (MaxFDLimit) {
++ // set the number of file descriptors to max. print out error
++ // if getrlimit/setrlimit fails but continue regardless.
++ struct rlimit nbr_files;
++ int status = getrlimit(RLIMIT_NOFILE, &nbr_files);
++ if (status != 0) {
++ if (PrintMiscellaneous && (Verbose || WizardMode))
++ perror("os::init_2 getrlimit failed");
++ } else {
++ nbr_files.rlim_cur = nbr_files.rlim_max;
++ status = setrlimit(RLIMIT_NOFILE, &nbr_files);
++ if (status != 0) {
++ if (PrintMiscellaneous && (Verbose || WizardMode))
++ perror("os::init_2 setrlimit failed");
++ }
++ }
++ }
++
++ if (PerfAllowAtExitRegistration) {
++ // only register atexit functions if PerfAllowAtExitRegistration is set.
++ // atexit functions can be delayed until process exit time, which
++ // can be problematic for embedded VM situations. Embedded VMs should
++ // call DestroyJavaVM() to assure that VM resources are released.
++
++ // note: perfMemory_exit_helper atexit function may be removed in
++ // the future if the appropriate cleanup code can be added to the
++ // VM_Exit VMOperation's doit method.
++ if (atexit(perfMemory_exit_helper) != 0) {
++ warning("os::init_2 atexit(perfMemory_exit_helper) failed");
++ }
++ }
++
++ return JNI_OK;
++}
++
++// this is called at the end of vm_initialization
++void os::init_3(void) {
++ return;
++}
++
++// Mark the polling page as unreadable
++void os::make_polling_page_unreadable(void) {
++ if (!guard_memory((char*)_polling_page, Aix::page_size())) {
++ fatal("Could not disable polling page");
++ }
++};
++
++// Mark the polling page as readable
++void os::make_polling_page_readable(void) {
++ // Changed according to os_linux.cpp.
++ if (!checked_mprotect((char *)_polling_page, Aix::page_size(), PROT_READ)) {
++ fatal(err_msg("Could not enable polling page at " PTR_FORMAT, _polling_page));
++ }
++};
++
++int os::active_processor_count() {
++ int online_cpus = ::sysconf(_SC_NPROCESSORS_ONLN);
++ assert(online_cpus > 0 && online_cpus <= processor_count(), "sanity check");
++ return online_cpus;
++}
++
++void os::set_native_thread_name(const char *name) {
++ // Not yet implemented.
++ return;
++}
++
++bool os::distribute_processes(uint length, uint* distribution) {
++ // Not yet implemented.
++ return false;
++}
++
++bool os::bind_to_processor(uint processor_id) {
++ // Not yet implemented.
++ return false;
++}
++
++void os::SuspendedThreadTask::internal_do_task() {
++ if (do_suspend(_thread->osthread())) {
++ SuspendedThreadTaskContext context(_thread, _thread->osthread()->ucontext());
++ do_task(context);
++ do_resume(_thread->osthread());
++ }
++}
++
++class PcFetcher : public os::SuspendedThreadTask {
++public:
++ PcFetcher(Thread* thread) : os::SuspendedThreadTask(thread) {}
++ ExtendedPC result();
++protected:
++ void do_task(const os::SuspendedThreadTaskContext& context);
++private:
++ ExtendedPC _epc;
++};
++
++ExtendedPC PcFetcher::result() {
++ guarantee(is_done(), "task is not done yet.");
++ return _epc;
++}
++
++void PcFetcher::do_task(const os::SuspendedThreadTaskContext& context) {
++ Thread* thread = context.thread();
++ OSThread* osthread = thread->osthread();
++ if (osthread->ucontext() != NULL) {
++ _epc = os::Aix::ucontext_get_pc((ucontext_t *) context.ucontext());
++ } else {
++ // NULL context is unexpected, double-check this is the VMThread.
++ guarantee(thread->is_VM_thread(), "can only be called for VMThread");
++ }
++}
++
++// Suspends the target using the signal mechanism and then grabs the PC before
++// resuming the target. Used by the flat-profiler only
++ExtendedPC os::get_thread_pc(Thread* thread) {
++ // Make sure that it is called by the watcher for the VMThread.
++ assert(Thread::current()->is_Watcher_thread(), "Must be watcher");
++ assert(thread->is_VM_thread(), "Can only be called for VMThread");
++
++ PcFetcher fetcher(thread);
++ fetcher.run();
++ return fetcher.result();
++}
++
++// Not neede on Aix.
++// int os::Aix::safe_cond_timedwait(pthread_cond_t *_cond, pthread_mutex_t *_mutex, const struct timespec *_abstime) {
++// }
++
++////////////////////////////////////////////////////////////////////////////////
++// debug support
++
++static address same_page(address x, address y) {
++ intptr_t page_bits = -os::vm_page_size();
++ if ((intptr_t(x) & page_bits) == (intptr_t(y) & page_bits))
++ return x;
++ else if (x > y)
++ return (address)(intptr_t(y) | ~page_bits) + 1;
++ else
++ return (address)(intptr_t(y) & page_bits);
++}
++
++bool os::find(address addr, outputStream* st) {
++
++ st->print(PTR_FORMAT ": ", addr);
++
++ const LoadedLibraryModule* lib = LoadedLibraries::find_for_text_address(addr);
++ if (lib) {
++ lib->print(st);
++ return true;
++ } else {
++ lib = LoadedLibraries::find_for_data_address(addr);
++ if (lib) {
++ lib->print(st);
++ return true;
++ } else {
++ st->print_cr("(outside any module)");
++ }
++ }
++
++ return false;
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// misc
++
++// This does not do anything on Aix. This is basically a hook for being
++// able to use structured exception handling (thread-local exception filters)
++// on, e.g., Win32.
++void
++os::os_exception_wrapper(java_call_t f, JavaValue* value, methodHandle* method,
++ JavaCallArguments* args, Thread* thread) {
++ f(value, method, args, thread);
++}
++
++void os::print_statistics() {
++}
++
++int os::message_box(const char* title, const char* message) {
++ int i;
++ fdStream err(defaultStream::error_fd());
++ for (i = 0; i < 78; i++) err.print_raw("=");
++ err.cr();
++ err.print_raw_cr(title);
++ for (i = 0; i < 78; i++) err.print_raw("-");
++ err.cr();
++ err.print_raw_cr(message);
++ for (i = 0; i < 78; i++) err.print_raw("=");
++ err.cr();
++
++ char buf[16];
++ // Prevent process from exiting upon "read error" without consuming all CPU
++ while (::read(0, buf, sizeof(buf)) <= 0) { ::sleep(100); }
++
++ return buf[0] == 'y' || buf[0] == 'Y';
++}
++
++int os::stat(const char *path, struct stat *sbuf) {
++ char pathbuf[MAX_PATH];
++ if (strlen(path) > MAX_PATH - 1) {
++ errno = ENAMETOOLONG;
++ return -1;
++ }
++ os::native_path(strcpy(pathbuf, path));
++ return ::stat(pathbuf, sbuf);
++}
++
++bool os::check_heap(bool force) {
++ return true;
++}
++
++// int local_vsnprintf(char* buf, size_t count, const char* format, va_list args) {
++// return ::vsnprintf(buf, count, format, args);
++// }
++
++// Is a (classpath) directory empty?
++bool os::dir_is_empty(const char* path) {
++ DIR *dir = NULL;
++ struct dirent *ptr;
++
++ dir = opendir(path);
++ if (dir == NULL) return true;
++
++ /* Scan the directory */
++ bool result = true;
++ char buf[sizeof(struct dirent) + MAX_PATH];
++ while (result && (ptr = ::readdir(dir)) != NULL) {
++ if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0) {
++ result = false;
++ }
++ }
++ closedir(dir);
++ return result;
++}
++
++// This code originates from JDK's sysOpen and open64_w
++// from src/solaris/hpi/src/system_md.c
++
++#ifndef O_DELETE
++#define O_DELETE 0x10000
++#endif
++
++// Open a file. Unlink the file immediately after open returns
++// if the specified oflag has the O_DELETE flag set.
++// O_DELETE is used only in j2se/src/share/native/java/util/zip/ZipFile.c
++
++int os::open(const char *path, int oflag, int mode) {
++
++ if (strlen(path) > MAX_PATH - 1) {
++ errno = ENAMETOOLONG;
++ return -1;
++ }
++ int fd;
++ int o_delete = (oflag & O_DELETE);
++ oflag = oflag & ~O_DELETE;
++
++ fd = ::open64(path, oflag, mode);
++ if (fd == -1) return -1;
++
++ // If the open succeeded, the file might still be a directory.
++ {
++ struct stat64 buf64;
++ int ret = ::fstat64(fd, &buf64);
++ int st_mode = buf64.st_mode;
++
++ if (ret != -1) {
++ if ((st_mode & S_IFMT) == S_IFDIR) {
++ errno = EISDIR;
++ ::close(fd);
++ return -1;
++ }
++ } else {
++ ::close(fd);
++ return -1;
++ }
++ }
++
++ // All file descriptors that are opened in the JVM and not
++ // specifically destined for a subprocess should have the
++ // close-on-exec flag set. If we don't set it, then careless 3rd
++ // party native code might fork and exec without closing all
++ // appropriate file descriptors (e.g. as we do in closeDescriptors in
++ // UNIXProcess.c), and this in turn might:
++ //
++ // - cause end-of-file to fail to be detected on some file
++ // descriptors, resulting in mysterious hangs, or
++ //
++ // - might cause an fopen in the subprocess to fail on a system
++ // suffering from bug 1085341.
++ //
++ // (Yes, the default setting of the close-on-exec flag is a Unix
++ // design flaw.)
++ //
++ // See:
++ // 1085341: 32-bit stdio routines should support file descriptors >255
++ // 4843136: (process) pipe file descriptor from Runtime.exec not being closed
++ // 6339493: (process) Runtime.exec does not close all file descriptors on Solaris 9
++#ifdef FD_CLOEXEC
++ {
++ int flags = ::fcntl(fd, F_GETFD);
++ if (flags != -1)
++ ::fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
++ }
++#endif
++
++ if (o_delete != 0) {
++ ::unlink(path);
++ }
++ return fd;
++}
++
++
++// create binary file, rewriting existing file if required
++int os::create_binary_file(const char* path, bool rewrite_existing) {
++ int oflags = O_WRONLY | O_CREAT;
++ if (!rewrite_existing) {
++ oflags |= O_EXCL;
++ }
++ return ::open64(path, oflags, S_IREAD | S_IWRITE);
++}
++
++// return current position of file pointer
++jlong os::current_file_offset(int fd) {
++ return (jlong)::lseek64(fd, (off64_t)0, SEEK_CUR);
++}
++
++// move file pointer to the specified offset
++jlong os::seek_to_file_offset(int fd, jlong offset) {
++ return (jlong)::lseek64(fd, (off64_t)offset, SEEK_SET);
++}
++
++// This code originates from JDK's sysAvailable
++// from src/solaris/hpi/src/native_threads/src/sys_api_td.c
++
++int os::available(int fd, jlong *bytes) {
++ jlong cur, end;
++ int mode;
++ struct stat64 buf64;
++
++ if (::fstat64(fd, &buf64) >= 0) {
++ mode = buf64.st_mode;
++ if (S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) {
++ // XXX: is the following call interruptible? If so, this might
++ // need to go through the INTERRUPT_IO() wrapper as for other
++ // blocking, interruptible calls in this file.
++ int n;
++ if (::ioctl(fd, FIONREAD, &n) >= 0) {
++ *bytes = n;
++ return 1;
++ }
++ }
++ }
++ if ((cur = ::lseek64(fd, 0L, SEEK_CUR)) == -1) {
++ return 0;
++ } else if ((end = ::lseek64(fd, 0L, SEEK_END)) == -1) {
++ return 0;
++ } else if (::lseek64(fd, cur, SEEK_SET) == -1) {
++ return 0;
++ }
++ *bytes = end - cur;
++ return 1;
++}
++
++int os::socket_available(int fd, jint *pbytes) {
++ // Linux doc says EINTR not returned, unlike Solaris
++ int ret = ::ioctl(fd, FIONREAD, pbytes);
++
++ //%% note ioctl can return 0 when successful, JVM_SocketAvailable
++ // is expected to return 0 on failure and 1 on success to the jdk.
++ return (ret < 0) ? 0 : 1;
++}
++
++// Map a block of memory.
++char* os::pd_map_memory(int fd, const char* file_name, size_t file_offset,
++ char *addr, size_t bytes, bool read_only,
++ bool allow_exec) {
++ Unimplemented();
++ return NULL;
++}
++
++
++// Remap a block of memory.
++char* os::pd_remap_memory(int fd, const char* file_name, size_t file_offset,
++ char *addr, size_t bytes, bool read_only,
++ bool allow_exec) {
++ // same as map_memory() on this OS
++ return os::map_memory(fd, file_name, file_offset, addr, bytes, read_only,
++ allow_exec);
++}
++
++// Unmap a block of memory.
++bool os::pd_unmap_memory(char* addr, size_t bytes) {
++ return munmap(addr, bytes) == 0;
++}
++
++// current_thread_cpu_time(bool) and thread_cpu_time(Thread*, bool)
++// are used by JVM M&M and JVMTI to get user+sys or user CPU time
++// of a thread.
++//
++// current_thread_cpu_time() and thread_cpu_time(Thread*) returns
++// the fast estimate available on the platform.
++
++jlong os::current_thread_cpu_time() {
++ // return user + sys since the cost is the same
++ const jlong n = os::thread_cpu_time(Thread::current(), true /* user + sys */);
++ assert(n >= 0, "negative CPU time");
++ return n;
++}
++
++jlong os::thread_cpu_time(Thread* thread) {
++ // consistent with what current_thread_cpu_time() returns
++ const jlong n = os::thread_cpu_time(thread, true /* user + sys */);
++ assert(n >= 0, "negative CPU time");
++ return n;
++}
++
++jlong os::current_thread_cpu_time(bool user_sys_cpu_time) {
++ const jlong n = os::thread_cpu_time(Thread::current(), user_sys_cpu_time);
++ assert(n >= 0, "negative CPU time");
++ return n;
++}
++
++static bool thread_cpu_time_unchecked(Thread* thread, jlong* p_sys_time, jlong* p_user_time) {
++ bool error = false;
++
++ jlong sys_time = 0;
++ jlong user_time = 0;
++
++ // reimplemented using getthrds64().
++ //
++ // goes like this:
++ // For the thread in question, get the kernel thread id. Then get the
++ // kernel thread statistics using that id.
++ //
++ // This only works of course when no pthread scheduling is used,
++ // ie there is a 1:1 relationship to kernel threads.
++ // On AIX, see AIXTHREAD_SCOPE variable.
++
++ pthread_t pthtid = thread->osthread()->pthread_id();
++
++ // retrieve kernel thread id for the pthread:
++ tid64_t tid = 0;
++ struct __pthrdsinfo pinfo;
++ // I just love those otherworldly IBM APIs which force me to hand down
++ // dummy buffers for stuff I dont care for...
++ char dummy[1];
++ int dummy_size = sizeof(dummy);
++ if (pthread_getthrds_np(&pthtid, PTHRDSINFO_QUERY_TID, &pinfo, sizeof(pinfo),
++ dummy, &dummy_size) == 0) {
++ tid = pinfo.__pi_tid;
++ } else {
++ tty->print_cr("pthread_getthrds_np failed.");
++ error = true;
++ }
++
++ // retrieve kernel timing info for that kernel thread
++ if (!error) {
++ struct thrdentry64 thrdentry;
++ if (getthrds64(getpid(), &thrdentry, sizeof(thrdentry), &tid, 1) == 1) {
++ sys_time = thrdentry.ti_ru.ru_stime.tv_sec * 1000000000LL + thrdentry.ti_ru.ru_stime.tv_usec * 1000LL;
++ user_time = thrdentry.ti_ru.ru_utime.tv_sec * 1000000000LL + thrdentry.ti_ru.ru_utime.tv_usec * 1000LL;
++ } else {
++ tty->print_cr("pthread_getthrds_np failed.");
++ error = true;
++ }
++ }
++
++ if (p_sys_time) {
++ *p_sys_time = sys_time;
++ }
++
++ if (p_user_time) {
++ *p_user_time = user_time;
++ }
++
++ if (error) {
++ return false;
++ }
++
++ return true;
++}
++
++jlong os::thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
++ jlong sys_time;
++ jlong user_time;
++
++ if (!thread_cpu_time_unchecked(thread, &sys_time, &user_time)) {
++ return -1;
++ }
++
++ return user_sys_cpu_time ? sys_time + user_time : user_time;
++}
++
++void os::current_thread_cpu_time_info(jvmtiTimerInfo *info_ptr) {
++ info_ptr->max_value = ALL_64_BITS; // will not wrap in less than 64 bits
++ info_ptr->may_skip_backward = false; // elapsed time not wall time
++ info_ptr->may_skip_forward = false; // elapsed time not wall time
++ info_ptr->kind = JVMTI_TIMER_TOTAL_CPU; // user+system time is returned
++}
++
++void os::thread_cpu_time_info(jvmtiTimerInfo *info_ptr) {
++ info_ptr->max_value = ALL_64_BITS; // will not wrap in less than 64 bits
++ info_ptr->may_skip_backward = false; // elapsed time not wall time
++ info_ptr->may_skip_forward = false; // elapsed time not wall time
++ info_ptr->kind = JVMTI_TIMER_TOTAL_CPU; // user+system time is returned
++}
++
++bool os::is_thread_cpu_time_supported() {
++ return true;
++}
++
++// System loadavg support. Returns -1 if load average cannot be obtained.
++// For now just return the system wide load average (no processor sets).
++int os::loadavg(double values[], int nelem) {
++
++ // Implemented using libperfstat on AIX.
++
++ guarantee(nelem >= 0 && nelem <= 3, "argument error");
++ guarantee(values, "argument error");
++
++ if (os::Aix::on_pase()) {
++ Unimplemented();
++ return -1;
++ } else {
++ // AIX: use libperfstat
++ //
++ // See also:
++ // http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.basetechref/doc/basetrf1/perfstat_cputot.htm
++ // /usr/include/libperfstat.h:
++
++ // Use the already AIX version independent get_cpuinfo.
++ os::Aix::cpuinfo_t ci;
++ if (os::Aix::get_cpuinfo(&ci)) {
++ for (int i = 0; i < nelem; i++) {
++ values[i] = ci.loadavg[i];
++ }
++ } else {
++ return -1;
++ }
++ return nelem;
++ }
++}
++
++void os::pause() {
++ char filename[MAX_PATH];
++ if (PauseAtStartupFile && PauseAtStartupFile[0]) {
++ jio_snprintf(filename, MAX_PATH, PauseAtStartupFile);
++ } else {
++ jio_snprintf(filename, MAX_PATH, "./vm.paused.%d", current_process_id());
++ }
++
++ int fd = ::open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666);
++ if (fd != -1) {
++ struct stat buf;
++ ::close(fd);
++ while (::stat(filename, &buf) == 0) {
++ (void)::poll(NULL, 0, 100);
++ }
++ } else {
++ jio_fprintf(stderr,
++ "Could not open pause file '%s', continuing immediately.\n", filename);
++ }
++}
++
++bool os::Aix::is_primordial_thread() {
++ if (pthread_self() == (pthread_t)1) {
++ return true;
++ } else {
++ return false;
++ }
++}
++
++// OS recognitions (PASE/AIX, OS level) call this before calling any
++// one of Aix::on_pase(), Aix::os_version() static
++void os::Aix::initialize_os_info() {
++
++ assert(_on_pase == -1 && _os_version == -1, "already called.");
++
++ struct utsname uts;
++ memset(&uts, 0, sizeof(uts));
++ strcpy(uts.sysname, "?");
++ if (::uname(&uts) == -1) {
++ fprintf(stderr, "uname failed (%d)\n", errno);
++ guarantee(0, "Could not determine whether we run on AIX or PASE");
++ } else {
++ if (Verbose) {
++ fprintf(stderr,"uname says: sysname \"%s\" version \"%s\" release \"%s\" "
++ "node \"%s\" machine \"%s\"\n",
++ uts.sysname, uts.version, uts.release, uts.nodename, uts.machine);
++ }
++ const int major = atoi(uts.version);
++ assert(major > 0, "invalid OS version");
++ const int minor = atoi(uts.release);
++ assert(minor > 0, "invalid OS release");
++ _os_version = (major << 8) | minor;
++ if (strcmp(uts.sysname, "OS400") == 0) {
++ Unimplemented();
++ } else if (strcmp(uts.sysname, "AIX") == 0) {
++ // We run on AIX. We do not support versions older than AIX 5.3.
++ _on_pase = 0;
++ if (_os_version < 0x0503) {
++ fprintf(stderr, "AIX release older than AIX 5.3 not supported.\n");
++ assert(false, "AIX release too old.");
++ } else {
++ if (Verbose) {
++ fprintf(stderr, "We run on AIX %d.%d\n", major, minor);
++ }
++ }
++ } else {
++ assert(false, "unknown OS");
++ }
++ }
++
++ guarantee(_on_pase != -1 && _os_version, "Could not determine AIX/OS400 release");
++
++} // end: os::Aix::initialize_os_info()
++
++// Scan environment for important settings which might effect the VM.
++// Trace out settings. Warn about invalid settings and/or correct them.
++//
++// Must run after os::Aix::initialue_os_info().
++void os::Aix::scan_environment() {
++
++ char* p;
++ int rc;
++
++ // Warn explicity if EXTSHM=ON is used. That switch changes how
++ // System V shared memory behaves. One effect is that page size of
++ // shared memory cannot be change dynamically, effectivly preventing
++ // large pages from working.
++ // This switch was needed on AIX 32bit, but on AIX 64bit the general
++ // recommendation is (in OSS notes) to switch it off.
++ p = ::getenv("EXTSHM");
++ if (Verbose) {
++ fprintf(stderr, "EXTSHM=%s.\n", p ? p : "");
++ }
++ if (p && strcmp(p, "ON") == 0) {
++ fprintf(stderr, "Unsupported setting: EXTSHM=ON. Large Page support will be disabled.\n");
++ _extshm = 1;
++ } else {
++ _extshm = 0;
++ }
++
++ // SPEC1170 behaviour: will change the behaviour of a number of POSIX APIs.
++ // Not tested, not supported.
++ //
++ // Note that it might be worth the trouble to test and to require it, if only to
++ // get useful return codes for mprotect.
++ //
++ // Note: Setting XPG_SUS_ENV in the process is too late. Must be set earlier (before
++ // exec() ? before loading the libjvm ? ....)
++ p = ::getenv("XPG_SUS_ENV");
++ if (Verbose) {
++ fprintf(stderr, "XPG_SUS_ENV=%s.\n", p ? p : "");
++ }
++ if (p && strcmp(p, "ON") == 0) {
++ _xpg_sus_mode = 1;
++ fprintf(stderr, "Unsupported setting: XPG_SUS_ENV=ON\n");
++ // This is not supported. Worst of all, it changes behaviour of mmap MAP_FIXED to
++ // clobber address ranges. If we ever want to support that, we have to do some
++ // testing first.
++ guarantee(false, "XPG_SUS_ENV=ON not supported");
++ } else {
++ _xpg_sus_mode = 0;
++ }
++
++ // Switch off AIX internal (pthread) guard pages. This has
++ // immediate effect for any pthread_create calls which follow.
++ p = ::getenv("AIXTHREAD_GUARDPAGES");
++ if (Verbose) {
++ fprintf(stderr, "AIXTHREAD_GUARDPAGES=%s.\n", p ? p : "");
++ fprintf(stderr, "setting AIXTHREAD_GUARDPAGES=0.\n");
++ }
++ rc = ::putenv("AIXTHREAD_GUARDPAGES=0");
++ guarantee(rc == 0, "");
++
++} // end: os::Aix::scan_environment()
++
++// PASE: initialize the libo4 library (AS400 PASE porting library).
++void os::Aix::initialize_libo4() {
++ Unimplemented();
++}
++
++// AIX: initialize the libperfstat library (we load this dynamically
++// because it is only available on AIX.
++void os::Aix::initialize_libperfstat() {
++
++ assert(os::Aix::on_aix(), "AIX only");
++
++ if (!libperfstat::init()) {
++ fprintf(stderr, "libperfstat initialization failed.\n");
++ assert(false, "libperfstat initialization failed");
++ } else {
++ if (Verbose) {
++ fprintf(stderr, "libperfstat initialized.\n");
++ }
++ }
++} // end: os::Aix::initialize_libperfstat
++
++/////////////////////////////////////////////////////////////////////////////
++// thread stack
++
++// function to query the current stack size using pthread_getthrds_np
++//
++// ! do not change anything here unless you know what you are doing !
++static void query_stack_dimensions(address* p_stack_base, size_t* p_stack_size) {
++
++ // This only works when invoked on a pthread. As we agreed not to use
++ // primordial threads anyway, I assert here
++ guarantee(!os::Aix::is_primordial_thread(), "not allowed on the primordial thread");
++
++ // information about this api can be found (a) in the pthread.h header and
++ // (b) in http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.basetechref/doc/basetrf1/pthread_getthrds_np.htm
++ //
++ // The use of this API to find out the current stack is kind of undefined.
++ // But after a lot of tries and asking IBM about it, I concluded that it is safe
++ // enough for cases where I let the pthread library create its stacks. For cases
++ // where I create an own stack and pass this to pthread_create, it seems not to
++ // work (the returned stack size in that case is 0).
++
++ pthread_t tid = pthread_self();
++ struct __pthrdsinfo pinfo;
++ char dummy[1]; // we only need this to satisfy the api and to not get E
++ int dummy_size = sizeof(dummy);
++
++ memset(&pinfo, 0, sizeof(pinfo));
++
++ const int rc = pthread_getthrds_np (&tid, PTHRDSINFO_QUERY_ALL, &pinfo,
++ sizeof(pinfo), dummy, &dummy_size);
++
++ if (rc != 0) {
++ fprintf(stderr, "pthread_getthrds_np failed (%d)\n", rc);
++ guarantee(0, "pthread_getthrds_np failed");
++ }
++
++ guarantee(pinfo.__pi_stackend, "returned stack base invalid");
++
++ // the following can happen when invoking pthread_getthrds_np on a pthread running on a user provided stack
++ // (when handing down a stack to pthread create, see pthread_attr_setstackaddr).
++ // Not sure what to do here - I feel inclined to forbid this use case completely.
++ guarantee(pinfo.__pi_stacksize, "returned stack size invalid");
++
++ // On AIX, stacks are not necessarily page aligned so round the base and size accordingly
++ if (p_stack_base) {
++ (*p_stack_base) = (address) align_size_up((intptr_t)pinfo.__pi_stackend, os::Aix::stack_page_size());
++ }
++
++ if (p_stack_size) {
++ (*p_stack_size) = pinfo.__pi_stacksize - os::Aix::stack_page_size();
++ }
++
++#ifndef PRODUCT
++ if (Verbose) {
++ fprintf(stderr,
++ "query_stack_dimensions() -> real stack_base=" INTPTR_FORMAT ", real stack_addr=" INTPTR_FORMAT
++ ", real stack_size=" INTPTR_FORMAT
++ ", stack_base=" INTPTR_FORMAT ", stack_size=" INTPTR_FORMAT "\n",
++ (intptr_t)pinfo.__pi_stackend, (intptr_t)pinfo.__pi_stackaddr, pinfo.__pi_stacksize,
++ (intptr_t)align_size_up((intptr_t)pinfo.__pi_stackend, os::Aix::stack_page_size()),
++ pinfo.__pi_stacksize - os::Aix::stack_page_size());
++ }
++#endif
++
++} // end query_stack_dimensions
++
++// get the current stack base from the OS (actually, the pthread library)
++address os::current_stack_base() {
++ address p;
++ query_stack_dimensions(&p, 0);
++ return p;
++}
++
++// get the current stack size from the OS (actually, the pthread library)
++size_t os::current_stack_size() {
++ size_t s;
++ query_stack_dimensions(0, &s);
++ return s;
++}
++
++// Refer to the comments in os_solaris.cpp park-unpark.
++//
++// Beware -- Some versions of NPTL embody a flaw where pthread_cond_timedwait() can
++// hang indefinitely. For instance NPTL 0.60 on 2.4.21-4ELsmp is vulnerable.
++// For specifics regarding the bug see GLIBC BUGID 261237 :
++// http://www.mail-archive.com/debian-glibc@lists.debian.org/msg10837.html.
++// Briefly, pthread_cond_timedwait() calls with an expiry time that's not in the future
++// will either hang or corrupt the condvar, resulting in subsequent hangs if the condvar
++// is used. (The simple C test-case provided in the GLIBC bug report manifests the
++// hang). The JVM is vulernable via sleep(), Object.wait(timo), LockSupport.parkNanos()
++// and monitorenter when we're using 1-0 locking. All those operations may result in
++// calls to pthread_cond_timedwait(). Using LD_ASSUME_KERNEL to use an older version
++// of libpthread avoids the problem, but isn't practical.
++//
++// Possible remedies:
++//
++// 1. Establish a minimum relative wait time. 50 to 100 msecs seems to work.
++// This is palliative and probabilistic, however. If the thread is preempted
++// between the call to compute_abstime() and pthread_cond_timedwait(), more
++// than the minimum period may have passed, and the abstime may be stale (in the
++// past) resultin in a hang. Using this technique reduces the odds of a hang
++// but the JVM is still vulnerable, particularly on heavily loaded systems.
++//
++// 2. Modify park-unpark to use per-thread (per ParkEvent) pipe-pairs instead
++// of the usual flag-condvar-mutex idiom. The write side of the pipe is set
++// NDELAY. unpark() reduces to write(), park() reduces to read() and park(timo)
++// reduces to poll()+read(). This works well, but consumes 2 FDs per extant
++// thread.
++//
++// 3. Embargo pthread_cond_timedwait() and implement a native "chron" thread
++// that manages timeouts. We'd emulate pthread_cond_timedwait() by enqueuing
++// a timeout request to the chron thread and then blocking via pthread_cond_wait().
++// This also works well. In fact it avoids kernel-level scalability impediments
++// on certain platforms that don't handle lots of active pthread_cond_timedwait()
++// timers in a graceful fashion.
++//
++// 4. When the abstime value is in the past it appears that control returns
++// correctly from pthread_cond_timedwait(), but the condvar is left corrupt.
++// Subsequent timedwait/wait calls may hang indefinitely. Given that, we
++// can avoid the problem by reinitializing the condvar -- by cond_destroy()
++// followed by cond_init() -- after all calls to pthread_cond_timedwait().
++// It may be possible to avoid reinitialization by checking the return
++// value from pthread_cond_timedwait(). In addition to reinitializing the
++// condvar we must establish the invariant that cond_signal() is only called
++// within critical sections protected by the adjunct mutex. This prevents
++// cond_signal() from "seeing" a condvar that's in the midst of being
++// reinitialized or that is corrupt. Sadly, this invariant obviates the
++// desirable signal-after-unlock optimization that avoids futile context switching.
++//
++// I'm also concerned that some versions of NTPL might allocate an auxilliary
++// structure when a condvar is used or initialized. cond_destroy() would
++// release the helper structure. Our reinitialize-after-timedwait fix
++// put excessive stress on malloc/free and locks protecting the c-heap.
++//
++// We currently use (4). See the WorkAroundNTPLTimedWaitHang flag.
++// It may be possible to refine (4) by checking the kernel and NTPL verisons
++// and only enabling the work-around for vulnerable environments.
++
++// utility to compute the abstime argument to timedwait:
++// millis is the relative timeout time
++// abstime will be the absolute timeout time
++// TODO: replace compute_abstime() with unpackTime()
++
++static struct timespec* compute_abstime(timespec* abstime, jlong millis) {
++ if (millis < 0) millis = 0;
++ struct timeval now;
++ int status = gettimeofday(&now, NULL);
++ assert(status == 0, "gettimeofday");
++ jlong seconds = millis / 1000;
++ millis %= 1000;
++ if (seconds > 50000000) { // see man cond_timedwait(3T)
++ seconds = 50000000;
++ }
++ abstime->tv_sec = now.tv_sec + seconds;
++ long usec = now.tv_usec + millis * 1000;
++ if (usec >= 1000000) {
++ abstime->tv_sec += 1;
++ usec -= 1000000;
++ }
++ abstime->tv_nsec = usec * 1000;
++ return abstime;
++}
++
++
++// Test-and-clear _Event, always leaves _Event set to 0, returns immediately.
++// Conceptually TryPark() should be equivalent to park(0).
++
++int os::PlatformEvent::TryPark() {
++ for (;;) {
++ const int v = _Event;
++ guarantee ((v == 0) || (v == 1), "invariant");
++ if (Atomic::cmpxchg (0, &_Event, v) == v) return v;
++ }
++}
++
++void os::PlatformEvent::park() { // AKA "down()"
++ // Invariant: Only the thread associated with the Event/PlatformEvent
++ // may call park().
++ // TODO: assert that _Assoc != NULL or _Assoc == Self
++ int v;
++ for (;;) {
++ v = _Event;
++ if (Atomic::cmpxchg (v-1, &_Event, v) == v) break;
++ }
++ guarantee (v >= 0, "invariant");
++ if (v == 0) {
++ // Do this the hard way by blocking ...
++ int status = pthread_mutex_lock(_mutex);
++ assert_status(status == 0, status, "mutex_lock");
++ guarantee (_nParked == 0, "invariant");
++ ++ _nParked;
++ while (_Event < 0) {
++ status = pthread_cond_wait(_cond, _mutex);
++ assert_status(status == 0 || status == ETIMEDOUT, status, "cond_timedwait");
++ }
++ -- _nParked;
++
++ // In theory we could move the ST of 0 into _Event past the unlock(),
++ // but then we'd need a MEMBAR after the ST.
++ _Event = 0;
++ status = pthread_mutex_unlock(_mutex);
++ assert_status(status == 0, status, "mutex_unlock");
++ }
++ guarantee (_Event >= 0, "invariant");
++}
++
++int os::PlatformEvent::park(jlong millis) {
++ guarantee (_nParked == 0, "invariant");
++
++ int v;
++ for (;;) {
++ v = _Event;
++ if (Atomic::cmpxchg (v-1, &_Event, v) == v) break;
++ }
++ guarantee (v >= 0, "invariant");
++ if (v != 0) return OS_OK;
++
++ // We do this the hard way, by blocking the thread.
++ // Consider enforcing a minimum timeout value.
++ struct timespec abst;
++ compute_abstime(&abst, millis);
++
++ int ret = OS_TIMEOUT;
++ int status = pthread_mutex_lock(_mutex);
++ assert_status(status == 0, status, "mutex_lock");
++ guarantee (_nParked == 0, "invariant");
++ ++_nParked;
++
++ // Object.wait(timo) will return because of
++ // (a) notification
++ // (b) timeout
++ // (c) thread.interrupt
++ //
++ // Thread.interrupt and object.notify{All} both call Event::set.
++ // That is, we treat thread.interrupt as a special case of notification.
++ // The underlying Solaris implementation, cond_timedwait, admits
++ // spurious/premature wakeups, but the JLS/JVM spec prevents the
++ // JVM from making those visible to Java code. As such, we must
++ // filter out spurious wakeups. We assume all ETIME returns are valid.
++ //
++ // TODO: properly differentiate simultaneous notify+interrupt.
++ // In that case, we should propagate the notify to another waiter.
++
++ while (_Event < 0) {
++ status = pthread_cond_timedwait(_cond, _mutex, &abst);
++ assert_status(status == 0 || status == ETIMEDOUT,
++ status, "cond_timedwait");
++ if (!FilterSpuriousWakeups) break; // previous semantics
++ if (status == ETIMEDOUT) break;
++ // We consume and ignore EINTR and spurious wakeups.
++ }
++ --_nParked;
++ if (_Event >= 0) {
++ ret = OS_OK;
++ }
++ _Event = 0;
++ status = pthread_mutex_unlock(_mutex);
++ assert_status(status == 0, status, "mutex_unlock");
++ assert (_nParked == 0, "invariant");
++ return ret;
++}
++
++void os::PlatformEvent::unpark() {
++ int v, AnyWaiters;
++ for (;;) {
++ v = _Event;
++ if (v > 0) {
++ // The LD of _Event could have reordered or be satisfied
++ // by a read-aside from this processor's write buffer.
++ // To avoid problems execute a barrier and then
++ // ratify the value.
++ OrderAccess::fence();
++ if (_Event == v) return;
++ continue;
++ }
++ if (Atomic::cmpxchg (v+1, &_Event, v) == v) break;
++ }
++ if (v < 0) {
++ // Wait for the thread associated with the event to vacate
++ int status = pthread_mutex_lock(_mutex);
++ assert_status(status == 0, status, "mutex_lock");
++ AnyWaiters = _nParked;
++
++ if (AnyWaiters != 0) {
++ // We intentional signal *after* dropping the lock
++ // to avoid a common class of futile wakeups.
++ status = pthread_cond_signal(_cond);
++ assert_status(status == 0, status, "cond_signal");
++ }
++ // Mutex should be locked for pthread_cond_signal(_cond).
++ status = pthread_mutex_unlock(_mutex);
++ assert_status(status == 0, status, "mutex_unlock");
++ }
++
++ // Note that we signal() _after dropping the lock for "immortal" Events.
++ // This is safe and avoids a common class of futile wakeups. In rare
++ // circumstances this can cause a thread to return prematurely from
++ // cond_{timed}wait() but the spurious wakeup is benign and the victim will
++ // simply re-test the condition and re-park itself.
++}
++
++
++// JSR166
++// -------------------------------------------------------
++
++//
++// The solaris and linux implementations of park/unpark are fairly
++// conservative for now, but can be improved. They currently use a
++// mutex/condvar pair, plus a a count.
++// Park decrements count if > 0, else does a condvar wait. Unpark
++// sets count to 1 and signals condvar. Only one thread ever waits
++// on the condvar. Contention seen when trying to park implies that someone
++// is unparking you, so don't wait. And spurious returns are fine, so there
++// is no need to track notifications.
++//
++
++#define MAX_SECS 100000000
++//
++// This code is common to linux and solaris and will be moved to a
++// common place in dolphin.
++//
++// The passed in time value is either a relative time in nanoseconds
++// or an absolute time in milliseconds. Either way it has to be unpacked
++// into suitable seconds and nanoseconds components and stored in the
++// given timespec structure.
++// Given time is a 64-bit value and the time_t used in the timespec is only
++// a signed-32-bit value (except on 64-bit Linux) we have to watch for
++// overflow if times way in the future are given. Further on Solaris versions
++// prior to 10 there is a restriction (see cond_timedwait) that the specified
++// number of seconds, in abstime, is less than current_time + 100,000,000.
++// As it will be 28 years before "now + 100000000" will overflow we can
++// ignore overflow and just impose a hard-limit on seconds using the value
++// of "now + 100,000,000". This places a limit on the timeout of about 3.17
++// years from "now".
++//
++
++static void unpackTime(timespec* absTime, bool isAbsolute, jlong time) {
++ assert (time > 0, "convertTime");
++
++ struct timeval now;
++ int status = gettimeofday(&now, NULL);
++ assert(status == 0, "gettimeofday");
++
++ time_t max_secs = now.tv_sec + MAX_SECS;
++
++ if (isAbsolute) {
++ jlong secs = time / 1000;
++ if (secs > max_secs) {
++ absTime->tv_sec = max_secs;
++ }
++ else {
++ absTime->tv_sec = secs;
++ }
++ absTime->tv_nsec = (time % 1000) * NANOSECS_PER_MILLISEC;
++ }
++ else {
++ jlong secs = time / NANOSECS_PER_SEC;
++ if (secs >= MAX_SECS) {
++ absTime->tv_sec = max_secs;
++ absTime->tv_nsec = 0;
++ }
++ else {
++ absTime->tv_sec = now.tv_sec + secs;
++ absTime->tv_nsec = (time % NANOSECS_PER_SEC) + now.tv_usec*1000;
++ if (absTime->tv_nsec >= NANOSECS_PER_SEC) {
++ absTime->tv_nsec -= NANOSECS_PER_SEC;
++ ++absTime->tv_sec; // note: this must be <= max_secs
++ }
++ }
++ }
++ assert(absTime->tv_sec >= 0, "tv_sec < 0");
++ assert(absTime->tv_sec <= max_secs, "tv_sec > max_secs");
++ assert(absTime->tv_nsec >= 0, "tv_nsec < 0");
++ assert(absTime->tv_nsec < NANOSECS_PER_SEC, "tv_nsec >= nanos_per_sec");
++}
++
++void Parker::park(bool isAbsolute, jlong time) {
++ // Optional fast-path check:
++ // Return immediately if a permit is available.
++ if (_counter > 0) {
++ _counter = 0;
++ OrderAccess::fence();
++ return;
++ }
++
++ Thread* thread = Thread::current();
++ assert(thread->is_Java_thread(), "Must be JavaThread");
++ JavaThread *jt = (JavaThread *)thread;
++
++ // Optional optimization -- avoid state transitions if there's an interrupt pending.
++ // Check interrupt before trying to wait
++ if (Thread::is_interrupted(thread, false)) {
++ return;
++ }
++
++ // Next, demultiplex/decode time arguments
++ timespec absTime;
++ if (time < 0 || (isAbsolute && time == 0)) { // don't wait at all
++ return;
++ }
++ if (time > 0) {
++ unpackTime(&absTime, isAbsolute, time);
++ }
++
++
++ // Enter safepoint region
++ // Beware of deadlocks such as 6317397.
++ // The per-thread Parker:: mutex is a classic leaf-lock.
++ // In particular a thread must never block on the Threads_lock while
++ // holding the Parker:: mutex. If safepoints are pending both the
++ // the ThreadBlockInVM() CTOR and DTOR may grab Threads_lock.
++ ThreadBlockInVM tbivm(jt);
++
++ // Don't wait if cannot get lock since interference arises from
++ // unblocking. Also. check interrupt before trying wait
++ if (Thread::is_interrupted(thread, false) || pthread_mutex_trylock(_mutex) != 0) {
++ return;
++ }
++
++ int status;
++ if (_counter > 0) { // no wait needed
++ _counter = 0;
++ status = pthread_mutex_unlock(_mutex);
++ assert (status == 0, "invariant");
++ OrderAccess::fence();
++ return;
++ }
++
++#ifdef ASSERT
++ // Don't catch signals while blocked; let the running threads have the signals.
++ // (This allows a debugger to break into the running thread.)
++ sigset_t oldsigs;
++ sigset_t* allowdebug_blocked = os::Aix::allowdebug_blocked_signals();
++ pthread_sigmask(SIG_BLOCK, allowdebug_blocked, &oldsigs);
++#endif
++
++ OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */);
++ jt->set_suspend_equivalent();
++ // cleared by handle_special_suspend_equivalent_condition() or java_suspend_self()
++
++ if (time == 0) {
++ status = pthread_cond_wait (_cond, _mutex);
++ } else {
++ status = pthread_cond_timedwait (_cond, _mutex, &absTime);
++ if (status != 0 && WorkAroundNPTLTimedWaitHang) {
++ pthread_cond_destroy (_cond);
++ pthread_cond_init (_cond, NULL);
++ }
++ }
++ assert_status(status == 0 || status == EINTR ||
++ status == ETIME || status == ETIMEDOUT,
++ status, "cond_timedwait");
++
++#ifdef ASSERT
++ pthread_sigmask(SIG_SETMASK, &oldsigs, NULL);
++#endif
++
++ _counter = 0;
++ status = pthread_mutex_unlock(_mutex);
++ assert_status(status == 0, status, "invariant");
++ // If externally suspended while waiting, re-suspend
++ if (jt->handle_special_suspend_equivalent_condition()) {
++ jt->java_suspend_self();
++ }
++
++ OrderAccess::fence();
++}
++
++void Parker::unpark() {
++ int s, status;
++ status = pthread_mutex_lock(_mutex);
++ assert (status == 0, "invariant");
++ s = _counter;
++ _counter = 1;
++ if (s < 1) {
++ if (WorkAroundNPTLTimedWaitHang) {
++ status = pthread_cond_signal (_cond);
++ assert (status == 0, "invariant");
++ status = pthread_mutex_unlock(_mutex);
++ assert (status == 0, "invariant");
++ } else {
++ status = pthread_mutex_unlock(_mutex);
++ assert (status == 0, "invariant");
++ status = pthread_cond_signal (_cond);
++ assert (status == 0, "invariant");
++ }
++ } else {
++ pthread_mutex_unlock(_mutex);
++ assert (status == 0, "invariant");
++ }
++}
++
++
++extern char** environ;
++
++// Run the specified command in a separate process. Return its exit value,
++// or -1 on failure (e.g. can't fork a new process).
++// Unlike system(), this function can be called from signal handler. It
++// doesn't block SIGINT et al.
++int os::fork_and_exec(char* cmd) {
++ char * argv[4] = {"sh", "-c", cmd, NULL};
++
++ pid_t pid = fork();
++
++ if (pid < 0) {
++ // fork failed
++ return -1;
++
++ } else if (pid == 0) {
++ // child process
++
++ // try to be consistent with system(), which uses "/usr/bin/sh" on AIX
++ execve("/usr/bin/sh", argv, environ);
++
++ // execve failed
++ _exit(-1);
++
++ } else {
++ // copied from J2SE ..._waitForProcessExit() in UNIXProcess_md.c; we don't
++ // care about the actual exit code, for now.
++
++ int status;
++
++ // Wait for the child process to exit. This returns immediately if
++ // the child has already exited. */
++ while (waitpid(pid, &status, 0) < 0) {
++ switch (errno) {
++ case ECHILD: return 0;
++ case EINTR: break;
++ default: return -1;
++ }
++ }
++
++ if (WIFEXITED(status)) {
++ // The child exited normally; get its exit code.
++ return WEXITSTATUS(status);
++ } else if (WIFSIGNALED(status)) {
++ // The child exited because of a signal
++ // The best value to return is 0x80 + signal number,
++ // because that is what all Unix shells do, and because
++ // it allows callers to distinguish between process exit and
++ // process death by signal.
++ return 0x80 + WTERMSIG(status);
++ } else {
++ // Unknown exit code; pass it through
++ return status;
++ }
++ }
++ // Remove warning.
++ return -1;
++}
++
++// is_headless_jre()
++//
++// Test for the existence of xawt/libmawt.so or libawt_xawt.so
++// in order to report if we are running in a headless jre.
++//
++// Since JDK8 xawt/libmawt.so is moved into the same directory
++// as libawt.so, and renamed libawt_xawt.so
++bool os::is_headless_jre() {
++ struct stat statbuf;
++ char buf[MAXPATHLEN];
++ char libmawtpath[MAXPATHLEN];
++ const char *xawtstr = "/xawt/libmawt.so";
++ const char *new_xawtstr = "/libawt_xawt.so";
++
++ char *p;
++
++ // Get path to libjvm.so
++ os::jvm_path(buf, sizeof(buf));
++
++ // Get rid of libjvm.so
++ p = strrchr(buf, '/');
++ if (p == NULL) return false;
++ else *p = '\0';
++
++ // Get rid of client or server
++ p = strrchr(buf, '/');
++ if (p == NULL) return false;
++ else *p = '\0';
++
++ // check xawt/libmawt.so
++ strcpy(libmawtpath, buf);
++ strcat(libmawtpath, xawtstr);
++ if (::stat(libmawtpath, &statbuf) == 0) return false;
++
++ // check libawt_xawt.so
++ strcpy(libmawtpath, buf);
++ strcat(libmawtpath, new_xawtstr);
++ if (::stat(libmawtpath, &statbuf) == 0) return false;
++
++ return true;
++}
++
++// Get the default path to the core file
++// Returns the length of the string
++int os::get_core_path(char* buffer, size_t bufferSize) {
++ const char* p = get_current_directory(buffer, bufferSize);
++
++ if (p == NULL) {
++ assert(p != NULL, "failed to get current directory");
++ return 0;
++ }
++
++ return strlen(buffer);
++}
++
++#ifndef PRODUCT
++void TestReserveMemorySpecial_test() {
++ // No tests available for this platform
++}
++#endif
+--- ./hotspot/src/os/aix/vm/os_aix.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/os_aix.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,385 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_AIX_VM_OS_AIX_HPP
++#define OS_AIX_VM_OS_AIX_HPP
++
++// Information about the protection of the page at address '0' on this os.
++static bool zero_page_read_protected() { return false; }
++
++// Class Aix defines the interface to the Aix operating systems.
++
++class Aix {
++ friend class os;
++
++ // For signal-chaining
++ // highest so far (AIX 5.2) is SIGSAK (63)
++#define MAXSIGNUM 63
++ // length of strings included in the libperfstat structures
++#define IDENTIFIER_LENGTH 64
++
++ static struct sigaction sigact[MAXSIGNUM]; // saved preinstalled sigactions
++ static unsigned int sigs; // mask of signals that have
++ // preinstalled signal handlers
++ static bool libjsig_is_loaded; // libjsig that interposes sigaction(),
++ // __sigaction(), signal() is loaded
++ static struct sigaction *(*get_signal_action)(int);
++ static struct sigaction *get_preinstalled_handler(int);
++ static void save_preinstalled_handler(int, struct sigaction&);
++
++ static void check_signal_handler(int sig);
++
++ // For signal flags diagnostics
++ static int sigflags[MAXSIGNUM];
++
++ protected:
++
++ static julong _physical_memory;
++ static pthread_t _main_thread;
++ static Mutex* _createThread_lock;
++ static int _page_size;
++ static int _logical_cpus;
++
++ // -1 = uninitialized, 0 = AIX, 1 = OS/400 (PASE)
++ static int _on_pase;
++
++ // -1 = uninitialized, otherwise 16 bit number:
++ // lower 8 bit - minor version
++ // higher 8 bit - major version
++ // For AIX, e.g. 0x0601 for AIX 6.1
++ // for OS/400 e.g. 0x0504 for OS/400 V5R4
++ static int _os_version;
++
++ // -1 = uninitialized,
++ // 0 - SPEC1170 not requested (XPG_SUS_ENV is OFF or not set)
++ // 1 - SPEC1170 requested (XPG_SUS_ENV is ON)
++ static int _xpg_sus_mode;
++
++ // -1 = uninitialized,
++ // 0 - EXTSHM=OFF or not set
++ // 1 - EXTSHM=ON
++ static int _extshm;
++
++ // page sizes on AIX.
++ //
++ // AIX supports four different page sizes - 4K, 64K, 16MB, 16GB. The latter two
++ // (16M "large" resp. 16G "huge" pages) require special setup and are normally
++ // not available.
++ //
++ // AIX supports multiple page sizes per process, for:
++ // - Stack (of the primordial thread, so not relevant for us)
++ // - Data - data, bss, heap, for us also pthread stacks
++ // - Text - text code
++ // - shared memory
++ //
++ // Default page sizes can be set via linker options (-bdatapsize, -bstacksize, ...)
++ // and via environment variable LDR_CNTRL (DATAPSIZE, STACKPSIZE, ...)
++ //
++ // For shared memory, page size can be set dynamically via shmctl(). Different shared memory
++ // regions can have different page sizes.
++ //
++ // More information can be found at AIBM info center:
++ // http://publib.boulder.ibm.com/infocenter/aix/v6r1/index.jsp?topic=/com.ibm.aix.prftungd/doc/prftungd/multiple_page_size_app_support.htm
++ //
++ // -----
++ // We want to support 4K and 64K and, if the machine is set up correctly, 16MB pages.
++ //
++
++ // page size of the stack of newly created pthreads
++ // (should be LDR_CNTRL DATAPSIZE because stack is allocated on heap by pthread lib)
++ static int _stack_page_size;
++
++ // Default shm page size. Read: what page size shared memory will be backed
++ // with if no page size was set explicitly using shmctl(SHM_PAGESIZE).
++ // Should be LDR_CNTRL SHMPSIZE.
++ static size_t _shm_default_page_size;
++
++ // True if sys V shm can be used with 64K pages dynamically.
++ // (via shmctl(.. SHM_PAGESIZE..). Should be true for AIX 53 and
++ // newer / PASE V6R1 and newer. (0 or 1, -1 if not initialized)
++ static int _can_use_64K_pages;
++
++ // True if sys V shm can be used with 16M pages dynamically.
++ // (via shmctl(.. SHM_PAGESIZE..). Only true on AIX 5.3 and
++ // newer, if the system was set up to use 16M pages and the
++ // jvm has enough user rights. (0 or 1, -1 if not initialized)
++ static int _can_use_16M_pages;
++
++ static julong available_memory();
++ static julong physical_memory() { return _physical_memory; }
++ static void initialize_system_info();
++
++ // OS recognitions (PASE/AIX, OS level) call this before calling any
++ // one of Aix::on_pase(), Aix::os_version().
++ static void initialize_os_info();
++
++ static int commit_memory_impl(char* addr, size_t bytes, bool exec);
++ static int commit_memory_impl(char* addr, size_t bytes,
++ size_t alignment_hint, bool exec);
++
++ // Scan environment for important settings which might effect the
++ // VM. Trace out settings. Warn about invalid settings and/or
++ // correct them.
++ //
++ // Must run after os::Aix::initialue_os_info().
++ static void scan_environment();
++
++ // Retrieve information about multipage size support. Will initialize
++ // _page_size, _stack_page_size, _can_use_64K_pages/_can_use_16M_pages
++ static void query_multipage_support();
++
++ // Initialize libo4 (on PASE) and libperfstat (on AIX). Call this
++ // before relying on functions from either lib, e.g. Aix::get_meminfo().
++ static void initialize_libo4();
++ static void initialize_libperfstat();
++
++ static bool supports_variable_stack_size();
++
++ public:
++ static void init_thread_fpu_state();
++ static pthread_t main_thread(void) { return _main_thread; }
++ // returns kernel thread id (similar to LWP id on Solaris), which can be
++ // used to access /proc
++ static pid_t gettid();
++ static void set_createThread_lock(Mutex* lk) { _createThread_lock = lk; }
++ static Mutex* createThread_lock(void) { return _createThread_lock; }
++ static void hotspot_sigmask(Thread* thread);
++
++ // Given an address, returns the size of the page backing that address
++ static size_t query_pagesize(void* p);
++
++ // Return `true' if the calling thread is the primordial thread. The
++ // primordial thread is the thread which contains the main function,
++ // *not* necessarily the thread which initialized the VM by calling
++ // JNI_CreateJavaVM.
++ static bool is_primordial_thread(void);
++
++ static int page_size(void) {
++ assert(_page_size != -1, "not initialized");
++ return _page_size;
++ }
++
++ // Accessor methods for stack page size which may be different from usual page size.
++ static int stack_page_size(void) {
++ assert(_stack_page_size != -1, "not initialized");
++ return _stack_page_size;
++ }
++
++ // default shm page size. Read: what page size shared memory
++ // will be backed with if no page size was set explicitly using shmctl(SHM_PAGESIZE).
++ // Should be LDR_CNTRL SHMPSIZE.
++ static int shm_default_page_size(void) {
++ assert(_shm_default_page_size != -1, "not initialized");
++ return _shm_default_page_size;
++ }
++
++ // Return true if sys V shm can be used with 64K pages dynamically
++ // (via shmctl(.. SHM_PAGESIZE..).
++ static bool can_use_64K_pages () {
++ assert(_can_use_64K_pages != -1, "not initialized");
++ return _can_use_64K_pages == 1 ? true : false;
++ }
++
++ // Return true if sys V shm can be used with 16M pages dynamically.
++ // (via shmctl(.. SHM_PAGESIZE..).
++ static bool can_use_16M_pages () {
++ assert(_can_use_16M_pages != -1, "not initialized");
++ return _can_use_16M_pages == 1 ? true : false;
++ }
++
++ static address ucontext_get_pc(ucontext_t* uc);
++ static intptr_t* ucontext_get_sp(ucontext_t* uc);
++ static intptr_t* ucontext_get_fp(ucontext_t* uc);
++ // Set PC into context. Needed for continuation after signal.
++ static void ucontext_set_pc(ucontext_t* uc, address pc);
++
++ // This boolean allows users to forward their own non-matching signals
++ // to JVM_handle_aix_signal, harmlessly.
++ static bool signal_handlers_are_installed;
++
++ static int get_our_sigflags(int);
++ static void set_our_sigflags(int, int);
++ static void signal_sets_init();
++ static void install_signal_handlers();
++ static void set_signal_handler(int, bool);
++ static bool is_sig_ignored(int sig);
++
++ static sigset_t* unblocked_signals();
++ static sigset_t* vm_signals();
++ static sigset_t* allowdebug_blocked_signals();
++
++ // For signal-chaining
++ static struct sigaction *get_chained_signal_action(int sig);
++ static bool chained_handler(int sig, siginfo_t* siginfo, void* context);
++
++ // libpthread version string
++ static void libpthread_init();
++
++ // Minimum stack size a thread can be created with (allowing
++ // the VM to completely create the thread and enter user code)
++ static size_t min_stack_allowed;
++
++ // Return default stack size or guard size for the specified thread type
++ static size_t default_stack_size(os::ThreadType thr_type);
++ static size_t default_guard_size(os::ThreadType thr_type);
++
++ // Function returns true if we run on OS/400 (pase), false if we run
++ // on AIX.
++ static bool on_pase() {
++ assert(_on_pase != -1, "not initialized");
++ return _on_pase ? true : false;
++ }
++
++ // Function returns true if we run on AIX, false if we run on OS/400
++ // (pase).
++ static bool on_aix() {
++ assert(_on_pase != -1, "not initialized");
++ return _on_pase ? false : true;
++ }
++
++ // -1 = uninitialized, otherwise 16 bit number:
++ // lower 8 bit - minor version
++ // higher 8 bit - major version
++ // For AIX, e.g. 0x0601 for AIX 6.1
++ // for OS/400 e.g. 0x0504 for OS/400 V5R4
++ static int os_version () {
++ assert(_os_version != -1, "not initialized");
++ return _os_version;
++ }
++
++ // Convenience method: returns true if running on AIX 5.3 or older.
++ static bool on_aix_53_or_older() {
++ return on_aix() && os_version() <= 0x0503;
++ }
++
++ // Returns true if we run in SPEC1170 compliant mode (XPG_SUS_ENV=ON).
++ static bool xpg_sus_mode() {
++ assert(_xpg_sus_mode != -1, "not initialized");
++ return _xpg_sus_mode;
++ }
++
++ // Returns true if EXTSHM=ON.
++ static bool extshm() {
++ assert(_extshm != -1, "not initialized");
++ return _extshm;
++ }
++
++ // result struct for get_meminfo()
++ struct meminfo_t {
++
++ // Amount of virtual memory (in units of 4 KB pages)
++ unsigned long long virt_total;
++
++ // Amount of real memory, in bytes
++ unsigned long long real_total;
++
++ // Amount of free real memory, in bytes
++ unsigned long long real_free;
++
++ // Total amount of paging space, in bytes
++ unsigned long long pgsp_total;
++
++ // Amount of free paging space, in bytes
++ unsigned long long pgsp_free;
++
++ };
++
++ // Result struct for get_cpuinfo().
++ struct cpuinfo_t {
++ char description[IDENTIFIER_LENGTH]; // processor description (type/official name)
++ u_longlong_t processorHZ; // processor speed in Hz
++ int ncpus; // number of active logical processors
++ double loadavg[3]; // (1<.
++ char version[20]; // processor version from _system_configuration (sys/systemcfg.h)
++ };
++
++ // Functions to retrieve memory information on AIX, PASE.
++ // (on AIX, using libperfstat, on PASE with libo4.so).
++ // Returns true if ok, false if error.
++ static bool get_meminfo(meminfo_t* pmi);
++
++ // Function to retrieve cpu information on AIX
++ // (on AIX, using libperfstat)
++ // Returns true if ok, false if error.
++ static bool get_cpuinfo(cpuinfo_t* pci);
++
++}; // os::Aix class
++
++
++class PlatformEvent : public CHeapObj {
++ private:
++ double CachePad [4]; // increase odds that _mutex is sole occupant of cache line
++ volatile int _Event;
++ volatile int _nParked;
++ pthread_mutex_t _mutex [1];
++ pthread_cond_t _cond [1];
++ double PostPad [2];
++ Thread * _Assoc;
++
++ public: // TODO-FIXME: make dtor private
++ ~PlatformEvent() { guarantee (0, "invariant"); }
++
++ public:
++ PlatformEvent() {
++ int status;
++ status = pthread_cond_init (_cond, NULL);
++ assert_status(status == 0, status, "cond_init");
++ status = pthread_mutex_init (_mutex, NULL);
++ assert_status(status == 0, status, "mutex_init");
++ _Event = 0;
++ _nParked = 0;
++ _Assoc = NULL;
++ }
++
++ // Use caution with reset() and fired() -- they may require MEMBARs
++ void reset() { _Event = 0; }
++ int fired() { return _Event; }
++ void park ();
++ void unpark ();
++ int TryPark ();
++ int park (jlong millis);
++ void SetAssociation (Thread * a) { _Assoc = a; }
++};
++
++class PlatformParker : public CHeapObj {
++ protected:
++ pthread_mutex_t _mutex [1];
++ pthread_cond_t _cond [1];
++
++ public: // TODO-FIXME: make dtor private
++ ~PlatformParker() { guarantee (0, "invariant"); }
++
++ public:
++ PlatformParker() {
++ int status;
++ status = pthread_cond_init (_cond, NULL);
++ assert_status(status == 0, status, "cond_init");
++ status = pthread_mutex_init (_mutex, NULL);
++ assert_status(status == 0, status, "mutex_init");
++ }
++};
++
++#endif // OS_AIX_VM_OS_AIX_HPP
+--- ./hotspot/src/os/aix/vm/os_aix.inline.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/os_aix.inline.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,286 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_AIX_VM_OS_AIX_INLINE_HPP
++#define OS_AIX_VM_OS_AIX_INLINE_HPP
++
++#include "runtime/atomic.hpp"
++#include "runtime/os.hpp"
++#ifdef TARGET_OS_ARCH_aix_ppc
++# include "atomic_aix_ppc.inline.hpp"
++# include "orderAccess_aix_ppc.inline.hpp"
++#endif
++
++// System includes
++
++#include
++#include
++#include
++#include
++#include
++
++// Defined in the system headers included above.
++#undef rem_size
++
++inline void* os::thread_local_storage_at(int index) {
++ return pthread_getspecific((pthread_key_t)index);
++}
++
++inline const char* os::file_separator() {
++ return "/";
++}
++
++inline const char* os::line_separator() {
++ return "\n";
++}
++
++inline const char* os::path_separator() {
++ return ":";
++}
++
++// File names are case-sensitive on windows only
++inline int os::file_name_strcmp(const char* s1, const char* s2) {
++ return strcmp(s1, s2);
++}
++
++inline bool os::obsolete_option(const JavaVMOption *option) {
++ return false;
++}
++
++inline bool os::uses_stack_guard_pages() {
++ return true;
++}
++
++inline bool os::allocate_stack_guard_pages() {
++ assert(uses_stack_guard_pages(), "sanity check");
++ return true;
++}
++
++
++// On Aix, reservations are made on a page by page basis, nothing to do.
++inline void os::pd_split_reserved_memory(char *base, size_t size,
++ size_t split, bool realloc) {
++}
++
++
++// Bang the shadow pages if they need to be touched to be mapped.
++inline void os::bang_stack_shadow_pages() {
++}
++
++inline void os::dll_unload(void *lib) {
++ ::dlclose(lib);
++}
++
++inline const int os::default_file_open_flags() { return 0;}
++
++inline DIR* os::opendir(const char* dirname)
++{
++ assert(dirname != NULL, "just checking");
++ return ::opendir(dirname);
++}
++
++inline int os::readdir_buf_size(const char *path)
++{
++ // according to aix sys/limits, NAME_MAX must be retrieved at runtime. */
++ const long my_NAME_MAX = pathconf(path, _PC_NAME_MAX);
++ return my_NAME_MAX + sizeof(dirent) + 1;
++}
++
++inline jlong os::lseek(int fd, jlong offset, int whence) {
++ return (jlong) ::lseek64(fd, offset, whence);
++}
++
++inline int os::fsync(int fd) {
++ return ::fsync(fd);
++}
++
++inline char* os::native_path(char *path) {
++ return path;
++}
++
++inline int os::ftruncate(int fd, jlong length) {
++ return ::ftruncate64(fd, length);
++}
++
++inline struct dirent* os::readdir(DIR* dirp, dirent *dbuf)
++{
++ dirent* p;
++ int status;
++ assert(dirp != NULL, "just checking");
++
++ // NOTE: Linux readdir_r (on RH 6.2 and 7.2 at least) is NOT like the POSIX
++ // version. Here is the doc for this function:
++ // http://www.gnu.org/manual/glibc-2.2.3/html_node/libc_262.html
++
++ if((status = ::readdir_r(dirp, dbuf, &p)) != 0) {
++ errno = status;
++ return NULL;
++ } else
++ return p;
++}
++
++inline int os::closedir(DIR *dirp) {
++ assert(dirp != NULL, "argument is NULL");
++ return ::closedir(dirp);
++}
++
++// macros for restartable system calls
++
++#define RESTARTABLE(_cmd, _result) do { \
++ _result = _cmd; \
++ } while(((int)_result == OS_ERR) && (errno == EINTR))
++
++#define RESTARTABLE_RETURN_INT(_cmd) do { \
++ int _result; \
++ RESTARTABLE(_cmd, _result); \
++ return _result; \
++} while(false)
++
++// We don't have NUMA support on Aix, but we need this for compilation.
++inline bool os::numa_has_static_binding() { ShouldNotReachHere(); return true; }
++inline bool os::numa_has_group_homing() { ShouldNotReachHere(); return false; }
++
++inline size_t os::restartable_read(int fd, void *buf, unsigned int nBytes) {
++ size_t res;
++ RESTARTABLE( (size_t) ::read(fd, buf, (size_t) nBytes), res);
++ return res;
++}
++
++inline size_t os::write(int fd, const void *buf, unsigned int nBytes) {
++ size_t res;
++ RESTARTABLE((size_t) ::write(fd, buf, (size_t) nBytes), res);
++ return res;
++}
++
++inline int os::close(int fd) {
++ return ::close(fd);
++}
++
++inline int os::socket_close(int fd) {
++ return ::close(fd);
++}
++
++inline int os::socket(int domain, int type, int protocol) {
++ return ::socket(domain, type, protocol);
++}
++
++inline int os::recv(int fd, char* buf, size_t nBytes, uint flags) {
++ RESTARTABLE_RETURN_INT(::recv(fd, buf, nBytes, flags));
++}
++
++inline int os::send(int fd, char* buf, size_t nBytes, uint flags) {
++ RESTARTABLE_RETURN_INT(::send(fd, buf, nBytes, flags));
++}
++
++inline int os::raw_send(int fd, char* buf, size_t nBytes, uint flags) {
++ return os::send(fd, buf, nBytes, flags);
++}
++
++inline int os::timeout(int fd, long timeout) {
++ julong prevtime,newtime;
++ struct timeval t;
++
++ gettimeofday(&t, NULL);
++ prevtime = ((julong)t.tv_sec * 1000) + t.tv_usec / 1000;
++
++ for(;;) {
++ struct pollfd pfd;
++
++ pfd.fd = fd;
++ pfd.events = POLLIN | POLLERR;
++
++ int res = ::poll(&pfd, 1, timeout);
++
++ if (res == OS_ERR && errno == EINTR) {
++
++ // On Linux any value < 0 means "forever"
++
++ if(timeout >= 0) {
++ gettimeofday(&t, NULL);
++ newtime = ((julong)t.tv_sec * 1000) + t.tv_usec / 1000;
++ timeout -= newtime - prevtime;
++ if(timeout <= 0)
++ return OS_OK;
++ prevtime = newtime;
++ }
++ } else
++ return res;
++ }
++}
++
++inline int os::listen(int fd, int count) {
++ return ::listen(fd, count);
++}
++
++inline int os::connect(int fd, struct sockaddr* him, socklen_t len) {
++ RESTARTABLE_RETURN_INT(::connect(fd, him, len));
++}
++
++inline int os::accept(int fd, struct sockaddr* him, socklen_t* len) {
++ // Linux doc says this can't return EINTR, unlike accept() on Solaris.
++ // But see attachListener_linux.cpp, LinuxAttachListener::dequeue().
++ return (int)::accept(fd, him, len);
++}
++
++inline int os::recvfrom(int fd, char* buf, size_t nBytes, uint flags,
++ sockaddr* from, socklen_t* fromlen) {
++ RESTARTABLE_RETURN_INT((int)::recvfrom(fd, buf, nBytes, flags, from, fromlen));
++}
++
++inline int os::sendto(int fd, char* buf, size_t len, uint flags,
++ struct sockaddr* to, socklen_t tolen) {
++ RESTARTABLE_RETURN_INT((int)::sendto(fd, buf, len, flags, to, tolen));
++}
++
++inline int os::socket_shutdown(int fd, int howto) {
++ return ::shutdown(fd, howto);
++}
++
++inline int os::bind(int fd, struct sockaddr* him, socklen_t len) {
++ return ::bind(fd, him, len);
++}
++
++inline int os::get_sock_name(int fd, struct sockaddr* him, socklen_t* len) {
++ return ::getsockname(fd, him, len);
++}
++
++inline int os::get_host_name(char* name, int namelen) {
++ return ::gethostname(name, namelen);
++}
++
++inline struct hostent* os::get_host_by_name(char* name) {
++ return ::gethostbyname(name);
++}
++
++inline int os::get_sock_opt(int fd, int level, int optname,
++ char* optval, socklen_t* optlen) {
++ return ::getsockopt(fd, level, optname, optval, optlen);
++}
++
++inline int os::set_sock_opt(int fd, int level, int optname,
++ const char* optval, socklen_t optlen) {
++ return ::setsockopt(fd, level, optname, optval, optlen);
++}
++#endif // OS_AIX_VM_OS_AIX_INLINE_HPP
+--- ./hotspot/src/os/aix/vm/os_share_aix.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/os_share_aix.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_AIX_VM_OS_SHARE_AIX_HPP
++#define OS_AIX_VM_OS_SHARE_AIX_HPP
++
++// misc
++void signalHandler(int, siginfo_t*, ucontext_t*);
++void handle_unexpected_exception(Thread* thread, int sig, siginfo_t* info, address pc, address adjusted_pc);
++#ifndef PRODUCT
++void continue_with_dump(void);
++#endif
++
++#define PROCFILE_LENGTH 128
++
++#endif // OS_AIX_VM_OS_SHARE_AIX_HPP
+--- ./hotspot/src/os/aix/vm/perfMemory_aix.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/perfMemory_aix.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,1026 @@
++/*
++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "classfile/vmSymbols.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/oop.inline.hpp"
++#include "os_aix.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/perfMemory.hpp"
++#include "utilities/exceptions.hpp"
++
++// put OS-includes here
++# include
++# include
++# include
++# include
++# include
++# include
++# include
++# include
++
++static char* backing_store_file_name = NULL; // name of the backing store
++ // file, if successfully created.
++
++// Standard Memory Implementation Details
++
++// create the PerfData memory region in standard memory.
++//
++static char* create_standard_memory(size_t size) {
++
++ // allocate an aligned chuck of memory
++ char* mapAddress = os::reserve_memory(size);
++
++ if (mapAddress == NULL) {
++ return NULL;
++ }
++
++ // commit memory
++ if (!os::commit_memory(mapAddress, size, !ExecMem)) {
++ if (PrintMiscellaneous && Verbose) {
++ warning("Could not commit PerfData memory\n");
++ }
++ os::release_memory(mapAddress, size);
++ return NULL;
++ }
++
++ return mapAddress;
++}
++
++// delete the PerfData memory region
++//
++static void delete_standard_memory(char* addr, size_t size) {
++
++ // there are no persistent external resources to cleanup for standard
++ // memory. since DestroyJavaVM does not support unloading of the JVM,
++ // cleanup of the memory resource is not performed. The memory will be
++ // reclaimed by the OS upon termination of the process.
++ //
++ return;
++}
++
++// save the specified memory region to the given file
++//
++// Note: this function might be called from signal handler (by os::abort()),
++// don't allocate heap memory.
++//
++static void save_memory_to_file(char* addr, size_t size) {
++
++ const char* destfile = PerfMemory::get_perfdata_file_path();
++ assert(destfile[0] != '\0', "invalid PerfData file path");
++
++ int result;
++
++ RESTARTABLE(::open(destfile, O_CREAT|O_WRONLY|O_TRUNC, S_IREAD|S_IWRITE),
++ result);;
++ if (result == OS_ERR) {
++ if (PrintMiscellaneous && Verbose) {
++ warning("Could not create Perfdata save file: %s: %s\n",
++ destfile, strerror(errno));
++ }
++ } else {
++ int fd = result;
++
++ for (size_t remaining = size; remaining > 0;) {
++
++ RESTARTABLE(::write(fd, addr, remaining), result);
++ if (result == OS_ERR) {
++ if (PrintMiscellaneous && Verbose) {
++ warning("Could not write Perfdata save file: %s: %s\n",
++ destfile, strerror(errno));
++ }
++ break;
++ }
++
++ remaining -= (size_t)result;
++ addr += result;
++ }
++
++ RESTARTABLE(::close(fd), result);
++ if (PrintMiscellaneous && Verbose) {
++ if (result == OS_ERR) {
++ warning("Could not close %s: %s\n", destfile, strerror(errno));
++ }
++ }
++ }
++ FREE_C_HEAP_ARRAY(char, destfile, mtInternal);
++}
++
++
++// Shared Memory Implementation Details
++
++// Note: the solaris and linux shared memory implementation uses the mmap
++// interface with a backing store file to implement named shared memory.
++// Using the file system as the name space for shared memory allows a
++// common name space to be supported across a variety of platforms. It
++// also provides a name space that Java applications can deal with through
++// simple file apis.
++//
++// The solaris and linux implementations store the backing store file in
++// a user specific temporary directory located in the /tmp file system,
++// which is always a local file system and is sometimes a RAM based file
++// system.
++
++// return the user specific temporary directory name.
++//
++// the caller is expected to free the allocated memory.
++//
++static char* get_user_tmp_dir(const char* user) {
++
++ const char* tmpdir = os::get_temp_directory();
++ const char* perfdir = PERFDATA_NAME;
++ size_t nbytes = strlen(tmpdir) + strlen(perfdir) + strlen(user) + 3;
++ char* dirname = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
++
++ // construct the path name to user specific tmp directory
++ snprintf(dirname, nbytes, "%s/%s_%s", tmpdir, perfdir, user);
++
++ return dirname;
++}
++
++// convert the given file name into a process id. if the file
++// does not meet the file naming constraints, return 0.
++//
++static pid_t filename_to_pid(const char* filename) {
++
++ // a filename that doesn't begin with a digit is not a
++ // candidate for conversion.
++ //
++ if (!isdigit(*filename)) {
++ return 0;
++ }
++
++ // check if file name can be converted to an integer without
++ // any leftover characters.
++ //
++ char* remainder = NULL;
++ errno = 0;
++ pid_t pid = (pid_t)strtol(filename, &remainder, 10);
++
++ if (errno != 0) {
++ return 0;
++ }
++
++ // check for left over characters. If any, then the filename is
++ // not a candidate for conversion.
++ //
++ if (remainder != NULL && *remainder != '\0') {
++ return 0;
++ }
++
++ // successful conversion, return the pid
++ return pid;
++}
++
++
++// check if the given path is considered a secure directory for
++// the backing store files. Returns true if the directory exists
++// and is considered a secure location. Returns false if the path
++// is a symbolic link or if an error occurred.
++//
++static bool is_directory_secure(const char* path) {
++ struct stat statbuf;
++ int result = 0;
++
++ RESTARTABLE(::lstat(path, &statbuf), result);
++ if (result == OS_ERR) {
++ return false;
++ }
++
++ // the path exists, now check it's mode
++ if (S_ISLNK(statbuf.st_mode) || !S_ISDIR(statbuf.st_mode)) {
++ // the path represents a link or some non-directory file type,
++ // which is not what we expected. declare it insecure.
++ //
++ return false;
++ }
++ else {
++ // we have an existing directory, check if the permissions are safe.
++ //
++ if ((statbuf.st_mode & (S_IWGRP|S_IWOTH)) != 0) {
++ // the directory is open for writing and could be subjected
++ // to a symlnk attack. declare it insecure.
++ //
++ return false;
++ }
++ }
++ return true;
++}
++
++
++// return the user name for the given user id
++//
++// the caller is expected to free the allocated memory.
++//
++static char* get_user_name(uid_t uid) {
++
++ struct passwd pwent;
++
++ // determine the max pwbuf size from sysconf, and hardcode
++ // a default if this not available through sysconf.
++ //
++ long bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
++ if (bufsize == -1)
++ bufsize = 1024;
++
++ char* pwbuf = NEW_C_HEAP_ARRAY(char, bufsize, mtInternal);
++
++ // POSIX interface to getpwuid_r is used on LINUX
++ struct passwd* p;
++ int result = getpwuid_r(uid, &pwent, pwbuf, (size_t)bufsize, &p);
++
++ if (result != 0 || p == NULL || p->pw_name == NULL || *(p->pw_name) == '\0') {
++ if (PrintMiscellaneous && Verbose) {
++ if (result != 0) {
++ warning("Could not retrieve passwd entry: %s\n",
++ strerror(result));
++ }
++ else if (p == NULL) {
++ // this check is added to protect against an observed problem
++ // with getpwuid_r() on RedHat 9 where getpwuid_r returns 0,
++ // indicating success, but has p == NULL. This was observed when
++ // inserting a file descriptor exhaustion fault prior to the call
++ // getpwuid_r() call. In this case, error is set to the appropriate
++ // error condition, but this is undocumented behavior. This check
++ // is safe under any condition, but the use of errno in the output
++ // message may result in an erroneous message.
++ // Bug Id 89052 was opened with RedHat.
++ //
++ warning("Could not retrieve passwd entry: %s\n",
++ strerror(errno));
++ }
++ else {
++ warning("Could not determine user name: %s\n",
++ p->pw_name == NULL ? "pw_name = NULL" :
++ "pw_name zero length");
++ }
++ }
++ FREE_C_HEAP_ARRAY(char, pwbuf, mtInternal);
++ return NULL;
++ }
++
++ char* user_name = NEW_C_HEAP_ARRAY(char, strlen(p->pw_name) + 1, mtInternal);
++ strcpy(user_name, p->pw_name);
++
++ FREE_C_HEAP_ARRAY(char, pwbuf, mtInternal);
++ return user_name;
++}
++
++// return the name of the user that owns the process identified by vmid.
++//
++// This method uses a slow directory search algorithm to find the backing
++// store file for the specified vmid and returns the user name, as determined
++// by the user name suffix of the hsperfdata_ directory name.
++//
++// the caller is expected to free the allocated memory.
++//
++static char* get_user_name_slow(int vmid, TRAPS) {
++
++ // short circuit the directory search if the process doesn't even exist.
++ if (kill(vmid, 0) == OS_ERR) {
++ if (errno == ESRCH) {
++ THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(),
++ "Process not found");
++ }
++ else /* EPERM */ {
++ THROW_MSG_0(vmSymbols::java_io_IOException(), strerror(errno));
++ }
++ }
++
++ // directory search
++ char* oldest_user = NULL;
++ time_t oldest_ctime = 0;
++
++ const char* tmpdirname = os::get_temp_directory();
++
++ DIR* tmpdirp = os::opendir(tmpdirname);
++
++ if (tmpdirp == NULL) {
++ return NULL;
++ }
++
++ // for each entry in the directory that matches the pattern hsperfdata_*,
++ // open the directory and check if the file for the given vmid exists.
++ // The file with the expected name and the latest creation date is used
++ // to determine the user name for the process id.
++ //
++ struct dirent* dentry;
++ char* tdbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(tmpdirname), mtInternal);
++ errno = 0;
++ while ((dentry = os::readdir(tmpdirp, (struct dirent *)tdbuf)) != NULL) {
++
++ // check if the directory entry is a hsperfdata file
++ if (strncmp(dentry->d_name, PERFDATA_NAME, strlen(PERFDATA_NAME)) != 0) {
++ continue;
++ }
++
++ char* usrdir_name = NEW_C_HEAP_ARRAY(char,
++ strlen(tmpdirname) + strlen(dentry->d_name) + 2, mtInternal);
++ strcpy(usrdir_name, tmpdirname);
++ strcat(usrdir_name, "/");
++ strcat(usrdir_name, dentry->d_name);
++
++ DIR* subdirp = os::opendir(usrdir_name);
++
++ if (subdirp == NULL) {
++ FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
++ continue;
++ }
++
++ // Since we don't create the backing store files in directories
++ // pointed to by symbolic links, we also don't follow them when
++ // looking for the files. We check for a symbolic link after the
++ // call to opendir in order to eliminate a small window where the
++ // symlink can be exploited.
++ //
++ if (!is_directory_secure(usrdir_name)) {
++ FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
++ os::closedir(subdirp);
++ continue;
++ }
++
++ struct dirent* udentry;
++ char* udbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(usrdir_name), mtInternal);
++ errno = 0;
++ while ((udentry = os::readdir(subdirp, (struct dirent *)udbuf)) != NULL) {
++
++ if (filename_to_pid(udentry->d_name) == vmid) {
++ struct stat statbuf;
++ int result;
++
++ char* filename = NEW_C_HEAP_ARRAY(char,
++ strlen(usrdir_name) + strlen(udentry->d_name) + 2, mtInternal);
++
++ strcpy(filename, usrdir_name);
++ strcat(filename, "/");
++ strcat(filename, udentry->d_name);
++
++ // don't follow symbolic links for the file
++ RESTARTABLE(::lstat(filename, &statbuf), result);
++ if (result == OS_ERR) {
++ FREE_C_HEAP_ARRAY(char, filename, mtInternal);
++ continue;
++ }
++
++ // skip over files that are not regular files.
++ if (!S_ISREG(statbuf.st_mode)) {
++ FREE_C_HEAP_ARRAY(char, filename, mtInternal);
++ continue;
++ }
++
++ // compare and save filename with latest creation time
++ if (statbuf.st_size > 0 && statbuf.st_ctime > oldest_ctime) {
++
++ if (statbuf.st_ctime > oldest_ctime) {
++ char* user = strchr(dentry->d_name, '_') + 1;
++
++ if (oldest_user != NULL) FREE_C_HEAP_ARRAY(char, oldest_user, mtInternal);
++ oldest_user = NEW_C_HEAP_ARRAY(char, strlen(user)+1, mtInternal);
++
++ strcpy(oldest_user, user);
++ oldest_ctime = statbuf.st_ctime;
++ }
++ }
++
++ FREE_C_HEAP_ARRAY(char, filename, mtInternal);
++ }
++ }
++ os::closedir(subdirp);
++ FREE_C_HEAP_ARRAY(char, udbuf, mtInternal);
++ FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
++ }
++ os::closedir(tmpdirp);
++ FREE_C_HEAP_ARRAY(char, tdbuf, mtInternal);
++
++ return(oldest_user);
++}
++
++// return the name of the user that owns the JVM indicated by the given vmid.
++//
++static char* get_user_name(int vmid, TRAPS) {
++ return get_user_name_slow(vmid, CHECK_NULL);
++}
++
++// return the file name of the backing store file for the named
++// shared memory region for the given user name and vmid.
++//
++// the caller is expected to free the allocated memory.
++//
++static char* get_sharedmem_filename(const char* dirname, int vmid) {
++
++ // add 2 for the file separator and a null terminator.
++ size_t nbytes = strlen(dirname) + UINT_CHARS + 2;
++
++ char* name = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
++ snprintf(name, nbytes, "%s/%d", dirname, vmid);
++
++ return name;
++}
++
++
++// remove file
++//
++// this method removes the file specified by the given path
++//
++static void remove_file(const char* path) {
++
++ int result;
++
++ // if the file is a directory, the following unlink will fail. since
++ // we don't expect to find directories in the user temp directory, we
++ // won't try to handle this situation. even if accidentially or
++ // maliciously planted, the directory's presence won't hurt anything.
++ //
++ RESTARTABLE(::unlink(path), result);
++ if (PrintMiscellaneous && Verbose && result == OS_ERR) {
++ if (errno != ENOENT) {
++ warning("Could not unlink shared memory backing"
++ " store file %s : %s\n", path, strerror(errno));
++ }
++ }
++}
++
++
++// remove file
++//
++// this method removes the file with the given file name in the
++// named directory.
++//
++static void remove_file(const char* dirname, const char* filename) {
++
++ size_t nbytes = strlen(dirname) + strlen(filename) + 2;
++ char* path = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
++
++ strcpy(path, dirname);
++ strcat(path, "/");
++ strcat(path, filename);
++
++ remove_file(path);
++
++ FREE_C_HEAP_ARRAY(char, path, mtInternal);
++}
++
++
++// cleanup stale shared memory resources
++//
++// This method attempts to remove all stale shared memory files in
++// the named user temporary directory. It scans the named directory
++// for files matching the pattern ^$[0-9]*$. For each file found, the
++// process id is extracted from the file name and a test is run to
++// determine if the process is alive. If the process is not alive,
++// any stale file resources are removed.
++//
++static void cleanup_sharedmem_resources(const char* dirname) {
++
++ // open the user temp directory
++ DIR* dirp = os::opendir(dirname);
++
++ if (dirp == NULL) {
++ // directory doesn't exist, so there is nothing to cleanup
++ return;
++ }
++
++ if (!is_directory_secure(dirname)) {
++ // the directory is not a secure directory
++ return;
++ }
++
++ // for each entry in the directory that matches the expected file
++ // name pattern, determine if the file resources are stale and if
++ // so, remove the file resources. Note, instrumented HotSpot processes
++ // for this user may start and/or terminate during this search and
++ // remove or create new files in this directory. The behavior of this
++ // loop under these conditions is dependent upon the implementation of
++ // opendir/readdir.
++ //
++ struct dirent* entry;
++ char* dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(dirname), mtInternal);
++ errno = 0;
++ while ((entry = os::readdir(dirp, (struct dirent *)dbuf)) != NULL) {
++
++ pid_t pid = filename_to_pid(entry->d_name);
++
++ if (pid == 0) {
++
++ if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
++
++ // attempt to remove all unexpected files, except "." and ".."
++ remove_file(dirname, entry->d_name);
++ }
++
++ errno = 0;
++ continue;
++ }
++
++ // we now have a file name that converts to a valid integer
++ // that could represent a process id . if this process id
++ // matches the current process id or the process is not running,
++ // then remove the stale file resources.
++ //
++ // process liveness is detected by sending signal number 0 to
++ // the process id (see kill(2)). if kill determines that the
++ // process does not exist, then the file resources are removed.
++ // if kill determines that that we don't have permission to
++ // signal the process, then the file resources are assumed to
++ // be stale and are removed because the resources for such a
++ // process should be in a different user specific directory.
++ //
++ if ((pid == os::current_process_id()) ||
++ (kill(pid, 0) == OS_ERR && (errno == ESRCH || errno == EPERM))) {
++
++ remove_file(dirname, entry->d_name);
++ }
++ errno = 0;
++ }
++ os::closedir(dirp);
++ FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
++}
++
++// make the user specific temporary directory. Returns true if
++// the directory exists and is secure upon return. Returns false
++// if the directory exists but is either a symlink, is otherwise
++// insecure, or if an error occurred.
++//
++static bool make_user_tmp_dir(const char* dirname) {
++
++ // create the directory with 0755 permissions. note that the directory
++ // will be owned by euid::egid, which may not be the same as uid::gid.
++ //
++ if (mkdir(dirname, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) == OS_ERR) {
++ if (errno == EEXIST) {
++ // The directory already exists and was probably created by another
++ // JVM instance. However, this could also be the result of a
++ // deliberate symlink. Verify that the existing directory is safe.
++ //
++ if (!is_directory_secure(dirname)) {
++ // directory is not secure
++ if (PrintMiscellaneous && Verbose) {
++ warning("%s directory is insecure\n", dirname);
++ }
++ return false;
++ }
++ }
++ else {
++ // we encountered some other failure while attempting
++ // to create the directory
++ //
++ if (PrintMiscellaneous && Verbose) {
++ warning("could not create directory %s: %s\n",
++ dirname, strerror(errno));
++ }
++ return false;
++ }
++ }
++ return true;
++}
++
++// create the shared memory file resources
++//
++// This method creates the shared memory file with the given size
++// This method also creates the user specific temporary directory, if
++// it does not yet exist.
++//
++static int create_sharedmem_resources(const char* dirname, const char* filename, size_t size) {
++
++ // make the user temporary directory
++ if (!make_user_tmp_dir(dirname)) {
++ // could not make/find the directory or the found directory
++ // was not secure
++ return -1;
++ }
++
++ int result;
++
++ RESTARTABLE(::open(filename, O_RDWR|O_CREAT|O_TRUNC, S_IREAD|S_IWRITE), result);
++ if (result == OS_ERR) {
++ if (PrintMiscellaneous && Verbose) {
++ warning("could not create file %s: %s\n", filename, strerror(errno));
++ }
++ return -1;
++ }
++
++ // save the file descriptor
++ int fd = result;
++
++ // set the file size
++ RESTARTABLE(::ftruncate(fd, (off_t)size), result);
++ if (result == OS_ERR) {
++ if (PrintMiscellaneous && Verbose) {
++ warning("could not set shared memory file size: %s\n", strerror(errno));
++ }
++ RESTARTABLE(::close(fd), result);
++ return -1;
++ }
++
++ return fd;
++}
++
++// open the shared memory file for the given user and vmid. returns
++// the file descriptor for the open file or -1 if the file could not
++// be opened.
++//
++static int open_sharedmem_file(const char* filename, int oflags, TRAPS) {
++
++ // open the file
++ int result;
++ RESTARTABLE(::open(filename, oflags), result);
++ if (result == OS_ERR) {
++ if (errno == ENOENT) {
++ THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(),
++ "Process not found");
++ }
++ else if (errno == EACCES) {
++ THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(),
++ "Permission denied");
++ }
++ else {
++ THROW_MSG_0(vmSymbols::java_io_IOException(), strerror(errno));
++ }
++ }
++
++ return result;
++}
++
++// create a named shared memory region. returns the address of the
++// memory region on success or NULL on failure. A return value of
++// NULL will ultimately disable the shared memory feature.
++//
++// On Solaris and Linux, the name space for shared memory objects
++// is the file system name space.
++//
++// A monitoring application attaching to a JVM does not need to know
++// the file system name of the shared memory object. However, it may
++// be convenient for applications to discover the existence of newly
++// created and terminating JVMs by watching the file system name space
++// for files being created or removed.
++//
++static char* mmap_create_shared(size_t size) {
++
++ int result;
++ int fd;
++ char* mapAddress;
++
++ int vmid = os::current_process_id();
++
++ char* user_name = get_user_name(geteuid());
++
++ if (user_name == NULL)
++ return NULL;
++
++ char* dirname = get_user_tmp_dir(user_name);
++ char* filename = get_sharedmem_filename(dirname, vmid);
++
++ // cleanup any stale shared memory files
++ cleanup_sharedmem_resources(dirname);
++
++ assert(((size > 0) && (size % os::vm_page_size() == 0)),
++ "unexpected PerfMemory region size");
++
++ fd = create_sharedmem_resources(dirname, filename, size);
++
++ FREE_C_HEAP_ARRAY(char, user_name, mtInternal);
++ FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
++
++ if (fd == -1) {
++ FREE_C_HEAP_ARRAY(char, filename, mtInternal);
++ return NULL;
++ }
++
++ mapAddress = (char*)::mmap((char*)0, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
++
++ // attempt to close the file - restart it if it was interrupted,
++ // but ignore other failures
++ RESTARTABLE(::close(fd), result);
++ assert(result != OS_ERR, "could not close file");
++
++ if (mapAddress == MAP_FAILED) {
++ if (PrintMiscellaneous && Verbose) {
++ warning("mmap failed - %s\n", strerror(errno));
++ }
++ remove_file(filename);
++ FREE_C_HEAP_ARRAY(char, filename, mtInternal);
++ return NULL;
++ }
++
++ // save the file name for use in delete_shared_memory()
++ backing_store_file_name = filename;
++
++ // clear the shared memory region
++ (void)::memset((void*) mapAddress, 0, size);
++
++ return mapAddress;
++}
++
++// release a named shared memory region
++//
++static void unmap_shared(char* addr, size_t bytes) {
++ // Do not rely on os::reserve_memory/os::release_memory to use mmap.
++ // Use os::reserve_memory/os::release_memory for PerfDisableSharedMem=1, mmap/munmap for PerfDisableSharedMem=0
++ if (::munmap(addr, bytes) == -1) {
++ warning("perfmemory: munmap failed (%d)\n", errno);
++ }
++}
++
++// create the PerfData memory region in shared memory.
++//
++static char* create_shared_memory(size_t size) {
++
++ // create the shared memory region.
++ return mmap_create_shared(size);
++}
++
++// delete the shared PerfData memory region
++//
++static void delete_shared_memory(char* addr, size_t size) {
++
++ // cleanup the persistent shared memory resources. since DestroyJavaVM does
++ // not support unloading of the JVM, unmapping of the memory resource is
++ // not performed. The memory will be reclaimed by the OS upon termination of
++ // the process. The backing store file is deleted from the file system.
++
++ assert(!PerfDisableSharedMem, "shouldn't be here");
++
++ if (backing_store_file_name != NULL) {
++ remove_file(backing_store_file_name);
++ // Don't.. Free heap memory could deadlock os::abort() if it is called
++ // from signal handler. OS will reclaim the heap memory.
++ // FREE_C_HEAP_ARRAY(char, backing_store_file_name, mtInternal);
++ backing_store_file_name = NULL;
++ }
++}
++
++// return the size of the file for the given file descriptor
++// or 0 if it is not a valid size for a shared memory file
++//
++static size_t sharedmem_filesize(int fd, TRAPS) {
++
++ struct stat statbuf;
++ int result;
++
++ RESTARTABLE(::fstat(fd, &statbuf), result);
++ if (result == OS_ERR) {
++ if (PrintMiscellaneous && Verbose) {
++ warning("fstat failed: %s\n", strerror(errno));
++ }
++ THROW_MSG_0(vmSymbols::java_io_IOException(),
++ "Could not determine PerfMemory size");
++ }
++
++ if ((statbuf.st_size == 0) ||
++ ((size_t)statbuf.st_size % os::vm_page_size() != 0)) {
++ THROW_MSG_0(vmSymbols::java_lang_Exception(),
++ "Invalid PerfMemory size");
++ }
++
++ return (size_t)statbuf.st_size;
++}
++
++// attach to a named shared memory region.
++//
++static void mmap_attach_shared(const char* user, int vmid, PerfMemory::PerfMemoryMode mode, char** addr, size_t* sizep, TRAPS) {
++
++ char* mapAddress;
++ int result;
++ int fd;
++ size_t size;
++ const char* luser = NULL;
++
++ int mmap_prot;
++ int file_flags;
++
++ ResourceMark rm;
++
++ // map the high level access mode to the appropriate permission
++ // constructs for the file and the shared memory mapping.
++ if (mode == PerfMemory::PERF_MODE_RO) {
++ mmap_prot = PROT_READ;
++ file_flags = O_RDONLY;
++ }
++ else if (mode == PerfMemory::PERF_MODE_RW) {
++#ifdef LATER
++ mmap_prot = PROT_READ | PROT_WRITE;
++ file_flags = O_RDWR;
++#else
++ THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
++ "Unsupported access mode");
++#endif
++ }
++ else {
++ THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
++ "Illegal access mode");
++ }
++
++ if (user == NULL || strlen(user) == 0) {
++ luser = get_user_name(vmid, CHECK);
++ }
++ else {
++ luser = user;
++ }
++
++ if (luser == NULL) {
++ THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
++ "Could not map vmid to user Name");
++ }
++
++ char* dirname = get_user_tmp_dir(luser);
++
++ // since we don't follow symbolic links when creating the backing
++ // store file, we don't follow them when attaching either.
++ //
++ if (!is_directory_secure(dirname)) {
++ FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
++ THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
++ "Process not found");
++ }
++
++ char* filename = get_sharedmem_filename(dirname, vmid);
++
++ // copy heap memory to resource memory. the open_sharedmem_file
++ // method below need to use the filename, but could throw an
++ // exception. using a resource array prevents the leak that
++ // would otherwise occur.
++ char* rfilename = NEW_RESOURCE_ARRAY(char, strlen(filename) + 1);
++ strcpy(rfilename, filename);
++
++ // free the c heap resources that are no longer needed
++ if (luser != user) FREE_C_HEAP_ARRAY(char, luser, mtInternal);
++ FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
++ FREE_C_HEAP_ARRAY(char, filename, mtInternal);
++
++ // open the shared memory file for the give vmid
++ fd = open_sharedmem_file(rfilename, file_flags, CHECK);
++ assert(fd != OS_ERR, "unexpected value");
++
++ if (*sizep == 0) {
++ size = sharedmem_filesize(fd, CHECK);
++ assert(size != 0, "unexpected size");
++ } else {
++ size = *sizep;
++ }
++
++ mapAddress = (char*)::mmap((char*)0, size, mmap_prot, MAP_SHARED, fd, 0);
++
++ // attempt to close the file - restart if it gets interrupted,
++ // but ignore other failures
++ RESTARTABLE(::close(fd), result);
++ assert(result != OS_ERR, "could not close file");
++
++ if (mapAddress == MAP_FAILED) {
++ if (PrintMiscellaneous && Verbose) {
++ warning("mmap failed: %s\n", strerror(errno));
++ }
++ THROW_MSG(vmSymbols::java_lang_OutOfMemoryError(),
++ "Could not map PerfMemory");
++ }
++
++ *addr = mapAddress;
++ *sizep = size;
++
++ if (PerfTraceMemOps) {
++ tty->print("mapped " SIZE_FORMAT " bytes for vmid %d at "
++ INTPTR_FORMAT "\n", size, vmid, (void*)mapAddress);
++ }
++}
++
++
++
++
++// create the PerfData memory region
++//
++// This method creates the memory region used to store performance
++// data for the JVM. The memory may be created in standard or
++// shared memory.
++//
++void PerfMemory::create_memory_region(size_t size) {
++
++ if (PerfDisableSharedMem) {
++ // do not share the memory for the performance data.
++ _start = create_standard_memory(size);
++ }
++ else {
++ _start = create_shared_memory(size);
++ if (_start == NULL) {
++
++ // creation of the shared memory region failed, attempt
++ // to create a contiguous, non-shared memory region instead.
++ //
++ if (PrintMiscellaneous && Verbose) {
++ warning("Reverting to non-shared PerfMemory region.\n");
++ }
++ PerfDisableSharedMem = true;
++ _start = create_standard_memory(size);
++ }
++ }
++
++ if (_start != NULL) _capacity = size;
++
++}
++
++// delete the PerfData memory region
++//
++// This method deletes the memory region used to store performance
++// data for the JVM. The memory region indicated by the
++// tuple will be inaccessible after a call to this method.
++//
++void PerfMemory::delete_memory_region() {
++
++ assert((start() != NULL && capacity() > 0), "verify proper state");
++
++ // If user specifies PerfDataSaveFile, it will save the performance data
++ // to the specified file name no matter whether PerfDataSaveToFile is specified
++ // or not. In other word, -XX:PerfDataSaveFile=.. overrides flag
++ // -XX:+PerfDataSaveToFile.
++ if (PerfDataSaveToFile || PerfDataSaveFile != NULL) {
++ save_memory_to_file(start(), capacity());
++ }
++
++ if (PerfDisableSharedMem) {
++ delete_standard_memory(start(), capacity());
++ }
++ else {
++ delete_shared_memory(start(), capacity());
++ }
++}
++
++// attach to the PerfData memory region for another JVM
++//
++// This method returns an tuple that points to
++// a memory buffer that is kept reasonably synchronized with
++// the PerfData memory region for the indicated JVM. This
++// buffer may be kept in synchronization via shared memory
++// or some other mechanism that keeps the buffer updated.
++//
++// If the JVM chooses not to support the attachability feature,
++// this method should throw an UnsupportedOperation exception.
++//
++// This implementation utilizes named shared memory to map
++// the indicated process's PerfData memory region into this JVMs
++// address space.
++//
++void PerfMemory::attach(const char* user, int vmid, PerfMemoryMode mode, char** addrp, size_t* sizep, TRAPS) {
++
++ if (vmid == 0 || vmid == os::current_process_id()) {
++ *addrp = start();
++ *sizep = capacity();
++ return;
++ }
++
++ mmap_attach_shared(user, vmid, mode, addrp, sizep, CHECK);
++}
++
++// detach from the PerfData memory region of another JVM
++//
++// This method detaches the PerfData memory region of another
++// JVM, specified as an tuple of a buffer
++// in this process's address space. This method may perform
++// arbitrary actions to accomplish the detachment. The memory
++// region specified by will be inaccessible after
++// a call to this method.
++//
++// If the JVM chooses not to support the attachability feature,
++// this method should throw an UnsupportedOperation exception.
++//
++// This implementation utilizes named shared memory to detach
++// the indicated process's PerfData memory region from this
++// process's address space.
++//
++void PerfMemory::detach(char* addr, size_t bytes, TRAPS) {
++
++ assert(addr != 0, "address sanity check");
++ assert(bytes > 0, "capacity sanity check");
++
++ if (PerfMemory::contains(addr) || PerfMemory::contains(addr + bytes - 1)) {
++ // prevent accidental detachment of this process's PerfMemory region
++ return;
++ }
++
++ unmap_shared(addr, bytes);
++}
++
++char* PerfMemory::backing_store_filename() {
++ return backing_store_file_name;
++}
+--- ./hotspot/src/os/aix/vm/porting_aix.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/porting_aix.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,367 @@
++/*
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "asm/assembler.hpp"
++#include "loadlib_aix.hpp"
++#include "porting_aix.hpp"
++#include "utilities/debug.hpp"
++
++#include
++#include
++
++//////////////////////////////////
++// Provide implementation for dladdr based on LoadedLibraries pool and
++// traceback table scan (see getFuncName).
++
++// Search traceback table in stack,
++// return procedure name from trace back table.
++#define MAX_FUNC_SEARCH_LEN 0x10000
++// Any PC below this value is considered toast.
++#define MINIMUM_VALUE_FOR_PC ((unsigned int*)0x1024)
++
++#define PTRDIFF_BYTES(p1,p2) (((ptrdiff_t)p1) - ((ptrdiff_t)p2))
++
++// Align a pointer without having to cast.
++inline char* align_ptr_up(char* ptr, intptr_t alignment) {
++ return (char*) align_size_up((intptr_t)ptr, alignment);
++}
++
++// Trace if verbose to tty.
++// I use these now instead of the Xtrace system because the latter is
++// not available at init time, hence worthless. Until we fix this, all
++// tracing here is done with -XX:+Verbose.
++#define trcVerbose(fmt, ...) { \
++ if (Verbose) { \
++ fprintf(stderr, fmt, ##__VA_ARGS__); \
++ fputc('\n', stderr); fflush(stderr); \
++ } \
++}
++#define ERRBYE(s) { trcVerbose(s); return -1; }
++
++// Unfortunately, the interface of dladdr makes the implementator
++// responsible for maintaining memory for function name/library
++// name. I guess this is because most OS's keep those values as part
++// of the mapped executable image ready to use. On AIX, this doesn't
++// work, so I have to keep the returned strings. For now, I do this in
++// a primitive string map. Should this turn out to be a performance
++// problem, a better hashmap has to be used.
++class fixed_strings {
++ struct node {
++ char* v;
++ node* next;
++ };
++
++ node* first;
++
++ public:
++
++ fixed_strings() : first(0) {}
++ ~fixed_strings() {
++ node* n = first;
++ while (n) {
++ node* p = n;
++ n = n->next;
++ free(p->v);
++ delete p;
++ }
++ }
++
++ char* intern(const char* s) {
++ for (node* n = first; n; n = n->next) {
++ if (strcmp(n->v, s) == 0) {
++ return n->v;
++ }
++ }
++ node* p = new node;
++ p->v = strdup(s);
++ p->next = first;
++ first = p;
++ return p->v;
++ }
++};
++
++static fixed_strings dladdr_fixed_strings;
++
++// Given a code pointer, returns the function name and the displacement.
++// Function looks for the traceback table at the end of the function.
++extern "C" int getFuncName(
++ codeptr_t pc, // [in] program counter
++ char* p_name, size_t namelen, // [out] optional: function name ("" if not available)
++ int* p_displacement, // [out] optional: displacement (-1 if not available)
++ const struct tbtable** p_tb, // [out] optional: ptr to traceback table to get further
++ // information (NULL if not available)
++ char* p_errmsg, size_t errmsglen // [out] optional: user provided buffer for error messages
++ ) {
++ struct tbtable* tb = 0;
++ unsigned int searchcount = 0;
++
++ // initialize output parameters
++ if (p_name && namelen > 0) {
++ *p_name = '\0';
++ }
++ if (p_errmsg && errmsglen > 0) {
++ *p_errmsg = '\0';
++ }
++ if (p_displacement) {
++ *p_displacement = -1;
++ }
++ if (p_tb) {
++ *p_tb = NULL;
++ }
++
++ // weed out obvious bogus states
++ if (pc < MINIMUM_VALUE_FOR_PC) {
++ ERRBYE("invalid program counter");
++ }
++
++ codeptr_t pc2 = pc;
++
++ // make sure the pointer is word aligned.
++ pc2 = (codeptr_t) align_ptr_up((char*)pc2, 4);
++
++ // Find start of traceback table.
++ // (starts after code, is marked by word-aligned (32bit) zeros)
++ while ((*pc2 != NULL) && (searchcount++ < MAX_FUNC_SEARCH_LEN)) {
++ pc2++;
++ }
++ if (*pc2 != 0) {
++ ERRBYE("could not find traceback table within 5000 bytes of program counter");
++ }
++ //
++ // Set up addressability to the traceback table
++ //
++ tb = (struct tbtable*) (pc2 + 1);
++
++ // Is this really a traceback table? No way to be sure but
++ // some indicators we can check.
++ if (tb->tb.lang >= 0xf && tb->tb.lang <= 0xfb) {
++ // Language specifiers, go from 0 (C) to 14 (Objective C).
++ // According to spec, 0xf-0xfa reserved, 0xfb-0xff reserved for ibm.
++ ERRBYE("not a traceback table");
++ }
++
++ // Existence of fields in the tbtable extension are contingent upon
++ // specific fields in the base table. Check for their existence so
++ // that we can address the function name if it exists.
++ pc2 = (codeptr_t) tb +
++ sizeof(struct tbtable_short)/sizeof(int);
++ if (tb->tb.fixedparms != 0 || tb->tb.floatparms != 0)
++ pc2++;
++
++ if (tb->tb.has_tboff == TRUE) {
++
++ // I want to know the displacement
++ const unsigned int tb_offset = *pc2;
++ codeptr_t start_of_procedure =
++ (codeptr_t)(((char*)tb) - 4 - tb_offset); // (-4 to omit leading 0000)
++
++ // Weed out the cases where we did find the wrong traceback table.
++ if (pc < start_of_procedure) {
++ ERRBYE("could not find (the real) traceback table within 5000 bytes of program counter");
++ }
++
++ // return the displacement
++ if (p_displacement) {
++ (*p_displacement) = (int) PTRDIFF_BYTES(pc, start_of_procedure);
++ }
++
++ pc2++;
++ } else {
++ // return -1 for displacement
++ if (p_displacement) {
++ (*p_displacement) = -1;
++ }
++ }
++
++ if (tb->tb.int_hndl == TRUE)
++ pc2++;
++
++ if (tb->tb.has_ctl == TRUE)
++ pc2 += (*pc2) + 1; // don't care
++
++ //
++ // return function name if it exists.
++ //
++ if (p_name && namelen > 0) {
++ if (tb->tb.name_present) {
++ char buf[256];
++ const short l = MIN2(*((short*)pc2), sizeof(buf) - 1);
++ memcpy(buf, (char*)pc2 + sizeof(short), l);
++ buf[l] = '\0';
++
++ p_name[0] = '\0';
++
++ // If it is a C++ name, try and demangle it using the Demangle interface (see demangle.h).
++ char* rest;
++ Name* const name = Demangle(buf, rest);
++ if (name) {
++ const char* const demangled_name = name->Text();
++ if (demangled_name) {
++ strncpy(p_name, demangled_name, namelen-1);
++ p_name[namelen-1] = '\0';
++ }
++ delete name;
++ }
++
++ // Fallback: if demangling did not work, just provide the unmangled name.
++ if (p_name[0] == '\0') {
++ strncpy(p_name, buf, namelen-1);
++ p_name[namelen-1] = '\0';
++ }
++
++ } else {
++ strncpy(p_name, "", namelen-1);
++ p_name[namelen-1] = '\0';
++ }
++ }
++ // Return traceback table, if user wants it.
++ if (p_tb) {
++ (*p_tb) = tb;
++ }
++
++ return 0;
++}
++
++// Special implementation of dladdr for Aix based on LoadedLibraries
++// Note: dladdr returns non-zero for ok, 0 for error!
++// Note: dladdr is not posix, but a non-standard GNU extension. So this tries to
++// fulfill the contract of dladdr on Linux (see http://linux.die.net/man/3/dladdr)
++// Note: addr may be both an AIX function descriptor or a real code pointer
++// to the entry of a function.
++extern "C"
++int dladdr(void* addr, Dl_info* info) {
++
++ if (!addr) {
++ return 0;
++ }
++
++ assert(info, "");
++
++ int rc = 0;
++
++ const char* const ZEROSTRING = "";
++
++ // Always return a string, even if a "" one. Linux dladdr manpage
++ // does not say anything about returning NULL
++ info->dli_fname = ZEROSTRING;
++ info->dli_sname = ZEROSTRING;
++ info->dli_saddr = NULL;
++
++ address p = (address) addr;
++ const LoadedLibraryModule* lib = NULL;
++
++ enum { noclue, code, data } type = noclue;
++
++ trcVerbose("dladdr(%p)...", p);
++
++ // Note: input address may be a function. I accept both a pointer to
++ // the entry of a function and a pointer to the function decriptor.
++ // (see ppc64 ABI)
++ lib = LoadedLibraries::find_for_text_address(p);
++ if (lib) {
++ type = code;
++ }
++
++ if (!lib) {
++ // Not a pointer into any text segment. Is it a function descriptor?
++ const FunctionDescriptor* const pfd = (const FunctionDescriptor*) p;
++ p = pfd->entry();
++ if (p) {
++ lib = LoadedLibraries::find_for_text_address(p);
++ if (lib) {
++ type = code;
++ }
++ }
++ }
++
++ if (!lib) {
++ // Neither direct code pointer nor function descriptor. A data ptr?
++ p = (address)addr;
++ lib = LoadedLibraries::find_for_data_address(p);
++ if (lib) {
++ type = data;
++ }
++ }
++
++ // If we did find the shared library this address belongs to (either
++ // code or data segment) resolve library path and, if possible, the
++ // symbol name.
++ if (lib) {
++ const char* const interned_libpath =
++ dladdr_fixed_strings.intern(lib->get_fullpath());
++ if (interned_libpath) {
++ info->dli_fname = interned_libpath;
++ }
++
++ if (type == code) {
++
++ // For code symbols resolve function name and displacement. Use
++ // displacement to calc start of function.
++ char funcname[256] = "";
++ int displacement = 0;
++
++ if (getFuncName((codeptr_t) p, funcname, sizeof(funcname), &displacement,
++ NULL, NULL, 0) == 0) {
++ if (funcname[0] != '\0') {
++ const char* const interned = dladdr_fixed_strings.intern(funcname);
++ info->dli_sname = interned;
++ trcVerbose("... function name: %s ...", interned);
++ }
++
++ // From the displacement calculate the start of the function.
++ if (displacement != -1) {
++ info->dli_saddr = p - displacement;
++ } else {
++ info->dli_saddr = p;
++ }
++ } else {
++
++ // No traceback table found. Just assume the pointer is it.
++ info->dli_saddr = p;
++
++ }
++
++ } else if (type == data) {
++
++ // For data symbols.
++ info->dli_saddr = p;
++
++ } else {
++ ShouldNotReachHere();
++ }
++
++ rc = 1; // success: return 1 [sic]
++
++ }
++
++ // sanity checks.
++ if (rc) {
++ assert(info->dli_fname, "");
++ assert(info->dli_sname, "");
++ assert(info->dli_saddr, "");
++ }
++
++ return rc; // error: return 0 [sic]
++
++}
+--- ./hotspot/src/os/aix/vm/porting_aix.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/porting_aix.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,81 @@
++/*
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include
++
++// Header file to contain porting-relevant code which does not have a
++// home anywhere else and which can not go into os_.h because
++// that header is included inside the os class definition, hence all
++// its content is part of the os class.
++
++// Aix' own version of dladdr().
++// This function tries to mimick dladdr(3) on Linux
++// (see http://linux.die.net/man/3/dladdr)
++// dladdr(3) is not POSIX but a GNU extension, and is not available on AIX.
++//
++// Differences between AIX dladdr and Linux dladdr:
++//
++// 1) Dl_info.dli_fbase: can never work, is disabled.
++// A loaded image on AIX is divided in multiple segments, at least two
++// (text and data) but potentially also far more. This is because the loader may
++// load each member into an own segment, as for instance happens with the libC.a
++// 2) Dl_info.dli_sname: This only works for code symbols (functions); for data, a
++// zero-length string is returned ("").
++// 3) Dl_info.dli_saddr: For code, this will return the entry point of the function,
++// not the function descriptor.
++
++typedef struct {
++ const char *dli_fname; // file path of loaded library
++ // void *dli_fbase;
++ const char *dli_sname; // symbol name; "" if not known
++ void *dli_saddr; // address of *entry* of function; not function descriptor;
++} Dl_info;
++
++// Note: we export this to use it inside J2se too
++#ifdef __cplusplus
++extern "C"
++#endif
++int dladdr(void *addr, Dl_info *info);
++
++
++// The semantics in this file are thus that codeptr_t is a *real code ptr*.
++// This means that any function taking codeptr_t as arguments will assume
++// a real codeptr and won't handle function descriptors (eg getFuncName),
++// whereas functions taking address as args will deal with function
++// descriptors (eg os::dll_address_to_library_name).
++typedef unsigned int* codeptr_t;
++
++// helper function - given a program counter, tries to locate the traceback table and
++// returns info from it (like, most importantly, function name, displacement of the
++// pc inside the function, and the traceback table itself.
++#ifdef __cplusplus
++extern "C"
++#endif
++int getFuncName(
++ codeptr_t pc, // [in] program counter
++ char* p_name, size_t namelen, // [out] optional: user provided buffer for the function name
++ int* p_displacement, // [out] optional: displacement
++ const struct tbtable** p_tb, // [out] optional: ptr to traceback table to get further information
++ char* p_errmsg, size_t errmsglen // [out] optional: user provided buffer for error messages
++ );
+--- ./hotspot/src/os/aix/vm/threadCritical_aix.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/threadCritical_aix.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,68 @@
++/*
++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2014 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/threadCritical.hpp"
++#include "runtime/thread.inline.hpp"
++
++// put OS-includes here
++# include
++
++//
++// See threadCritical.hpp for details of this class.
++//
++
++static pthread_t tc_owner = 0;
++static pthread_mutex_t tc_mutex = PTHREAD_MUTEX_INITIALIZER;
++static int tc_count = 0;
++
++void ThreadCritical::initialize() {
++}
++
++void ThreadCritical::release() {
++}
++
++ThreadCritical::ThreadCritical() {
++ pthread_t self = pthread_self();
++ if (self != tc_owner) {
++ int ret = pthread_mutex_lock(&tc_mutex);
++ guarantee(ret == 0, "fatal error with pthread_mutex_lock()");
++ assert(tc_count == 0, "Lock acquired with illegal reentry count.");
++ tc_owner = self;
++ }
++ tc_count++;
++}
++
++ThreadCritical::~ThreadCritical() {
++ assert(tc_owner == pthread_self(), "must have correct owner");
++ assert(tc_count > 0, "must have correct count");
++
++ tc_count--;
++ if (tc_count == 0) {
++ tc_owner = 0;
++ int ret = pthread_mutex_unlock(&tc_mutex);
++ guarantee(ret == 0, "fatal error with pthread_mutex_unlock()");
++ }
++}
+--- ./hotspot/src/os/aix/vm/thread_aix.inline.hpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/thread_aix.inline.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2012, 2013 SAP AG. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_AIX_VM_THREAD_AIX_INLINE_HPP
++#define OS_AIX_VM_THREAD_AIX_INLINE_HPP
++
++#include "runtime/atomic.hpp"
++#include "runtime/prefetch.hpp"
++#include "runtime/thread.hpp"
++#include "runtime/threadLocalStorage.hpp"
++
++#include "atomic_aix_ppc.inline.hpp"
++#include "orderAccess_aix_ppc.inline.hpp"
++#include "prefetch_aix_ppc.inline.hpp"
++
++// Contains inlined functions for class Thread and ThreadLocalStorage
++
++inline void ThreadLocalStorage::pd_invalidate_all() {} // nothing to do
++
++#endif // OS_AIX_VM_THREAD_AIX_INLINE_HPP
+--- ./hotspot/src/os/aix/vm/vmError_aix.cpp Thu Jan 01 00:00:00 1970 +0000
++++ ./hotspot/src/os/aix/vm/vmError_aix.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -0,0 +1,122 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/os.hpp"
++#include "runtime/thread.hpp"
++#include "utilities/vmError.hpp"
++
++#include
++#include
++#include
++#include
++
++void VMError::show_message_box(char *buf, int buflen) {
++ bool yes;
++ do {
++ error_string(buf, buflen);
++ int len = (int)strlen(buf);
++ char *p = &buf[len];
++
++ jio_snprintf(p, buflen - len,
++ "\n\n"
++ "Do you want to debug the problem?\n\n"
++ "To debug, run 'dbx -a %d'; then switch to thread tid " INTX_FORMAT ", k-tid " INTX_FORMAT "\n"
++ "Enter 'yes' to launch dbx automatically (PATH must include dbx)\n"
++ "Otherwise, press RETURN to abort...",
++ os::current_process_id(),
++ os::current_thread_id(), thread_self());
++
++ yes = os::message_box("Unexpected Error", buf);
++
++ if (yes) {
++ // yes, user asked VM to launch debugger
++ jio_snprintf(buf, buflen, "dbx -a %d", os::current_process_id());
++
++ os::fork_and_exec(buf);
++ yes = false;
++ }
++ } while (yes);
++}
++
++// Handle all synchronous signals which may happen during signal handling,
++// not just SIGSEGV and SIGBUS.
++static const int SIGNALS[] = { SIGSEGV, SIGBUS, SIGILL, SIGFPE, SIGTRAP }; // add more if needed
++static const int NUM_SIGNALS = sizeof(SIGNALS) / sizeof(int);
++
++// Space for our "saved" signal flags and handlers
++static int resettedSigflags[NUM_SIGNALS];
++static address resettedSighandler[NUM_SIGNALS];
++
++static void save_signal(int idx, int sig) {
++ struct sigaction sa;
++ sigaction(sig, NULL, &sa);
++ resettedSigflags[idx] = sa.sa_flags;
++ resettedSighandler[idx] = (sa.sa_flags & SA_SIGINFO)
++ ? CAST_FROM_FN_PTR(address, sa.sa_sigaction)
++ : CAST_FROM_FN_PTR(address, sa.sa_handler);
++}
++
++int VMError::get_resetted_sigflags(int sig) {
++ // Handle all program errors.
++ for (int i = 0; i < NUM_SIGNALS; i++) {
++ if (SIGNALS[i] == sig) {
++ return resettedSigflags[i];
++ }
++ }
++ return -1;
++}
++
++address VMError::get_resetted_sighandler(int sig) {
++ // Handle all program errors.
++ for (int i = 0; i < NUM_SIGNALS; i++) {
++ if (SIGNALS[i] == sig) {
++ return resettedSighandler[i];
++ }
++ }
++ return NULL;
++}
++
++static void crash_handler(int sig, siginfo_t* info, void* ucVoid) {
++ // Unmask current signal.
++ sigset_t newset;
++ sigemptyset(&newset);
++ sigaddset(&newset, sig);
++
++ Unimplemented();
++}
++
++void VMError::reset_signal_handlers() {
++ sigset_t newset;
++ sigemptyset(&newset);
++
++ for (int i = 0; i < NUM_SIGNALS; i++) {
++ save_signal(i, SIGNALS[i]);
++ os::signal(SIGNALS[i], CAST_FROM_FN_PTR(void *, crash_handler));
++ sigaddset(&newset, SIGNALS[i]);
++ }
++
++ sigthreadmask(SIG_UNBLOCK, &newset, NULL);
++}
+--- ./hotspot/src/os/bsd/vm/decoder_machO.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/os/bsd/vm/decoder_machO.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -53,7 +53,7 @@
+ struct symtab_command * symt = (struct symtab_command *)
+ mach_find_command((struct mach_header_64 *)mach_base, LC_SYMTAB);
+ if (symt == NULL) {
+- DEBUG_ONLY(tty->print_cr("no symtab in mach file at 0x%lx", mach_base));
++ DEBUG_ONLY(tty->print_cr("no symtab in mach file at 0x%lx", p2i(mach_base)));
+ return false;
+ }
+ uint32_t off = symt->symoff; /* symbol table offset (within this mach file) */
+--- ./hotspot/src/os/bsd/vm/os_bsd.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/os/bsd/vm/os_bsd.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -123,12 +123,19 @@
+ #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
+
+ #define LARGEPAGES_BIT (1 << 6)
++
++PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
++
+ ////////////////////////////////////////////////////////////////////////////////
+ // global variables
+ julong os::Bsd::_physical_memory = 0;
+
+-
++#ifdef __APPLE__
++mach_timebase_info_data_t os::Bsd::_timebase_info = {0, 0};
++volatile uint64_t os::Bsd::_max_abstime = 0;
++#else
+ int (*os::Bsd::_clock_gettime)(clockid_t, struct timespec *) = NULL;
++#endif
+ pthread_t os::Bsd::_main_thread;
+ int os::Bsd::_page_size = -1;
+
+@@ -219,7 +226,7 @@
+ static char cpu_arch[] = "amd64";
+ #elif defined(ARM)
+ static char cpu_arch[] = "arm";
+-#elif defined(PPC)
++#elif defined(PPC32)
+ static char cpu_arch[] = "ppc";
+ #elif defined(SPARC)
+ # ifdef _LP64
+@@ -306,9 +313,6 @@
+ #endif
+
+ void os::init_system_properties_values() {
+-// char arch[12];
+-// sysinfo(SI_ARCHITECTURE, arch, sizeof(arch));
+-
+ // The next steps are taken in the product version:
+ //
+ // Obtain the JAVA_HOME value from the location of libjvm.so.
+@@ -335,199 +339,205 @@
+ // Important note: if the location of libjvm.so changes this
+ // code needs to be changed accordingly.
+
+- // The next few definitions allow the code to be verbatim:
+-#define malloc(n) (char*)NEW_C_HEAP_ARRAY(char, (n), mtInternal)
+-#define getenv(n) ::getenv(n)
+-
+-/*
+- * See ld(1):
+- * The linker uses the following search paths to locate required
+- * shared libraries:
+- * 1: ...
+- * ...
+- * 7: The default directories, normally /lib and /usr/lib.
+- */
++// See ld(1):
++// The linker uses the following search paths to locate required
++// shared libraries:
++// 1: ...
++// ...
++// 7: The default directories, normally /lib and /usr/lib.
+ #ifndef DEFAULT_LIBPATH
+ #define DEFAULT_LIBPATH "/lib:/usr/lib"
+ #endif
+
++// Base path of extensions installed on the system.
++#define SYS_EXT_DIR "/usr/java/packages"
+ #define EXTENSIONS_DIR "/lib/ext"
+ #define ENDORSED_DIR "/lib/endorsed"
+-#define REG_DIR "/usr/java/packages"
+-
+-#ifdef __APPLE__
++
++#ifndef __APPLE__
++
++ // Buffer that fits several sprintfs.
++ // Note that the space for the colon and the trailing null are provided
++ // by the nulls included by the sizeof operator.
++ const size_t bufsize =
++ MAX3((size_t)MAXPATHLEN, // For dll_dir & friends.
++ (size_t)MAXPATHLEN + sizeof(EXTENSIONS_DIR) + sizeof(SYS_EXT_DIR) + sizeof(EXTENSIONS_DIR), // extensions dir
++ (size_t)MAXPATHLEN + sizeof(ENDORSED_DIR)); // endorsed dir
++ char *buf = (char *)NEW_C_HEAP_ARRAY(char, bufsize, mtInternal);
++
++ // sysclasspath, java_home, dll_dir
++ {
++ char *pslash;
++ os::jvm_path(buf, bufsize);
++
++ // Found the full path to libjvm.so.
++ // Now cut the path to /jre if we can.
++ *(strrchr(buf, '/')) = '\0'; // Get rid of /libjvm.so.
++ pslash = strrchr(buf, '/');
++ if (pslash != NULL) {
++ *pslash = '\0'; // Get rid of /{client|server|hotspot}.
++ }
++ Arguments::set_dll_dir(buf);
++
++ if (pslash != NULL) {
++ pslash = strrchr(buf, '/');
++ if (pslash != NULL) {
++ *pslash = '\0'; // Get rid of /.
++ pslash = strrchr(buf, '/');
++ if (pslash != NULL) {
++ *pslash = '\0'; // Get rid of /lib.
++ }
++ }
++ }
++ Arguments::set_java_home(buf);
++ set_boot_path('/', ':');
++ }
++
++ // Where to look for native libraries.
++ //
++ // Note: Due to a legacy implementation, most of the library path
++ // is set in the launcher. This was to accomodate linking restrictions
++ // on legacy Bsd implementations (which are no longer supported).
++ // Eventually, all the library path setting will be done here.
++ //
++ // However, to prevent the proliferation of improperly built native
++ // libraries, the new path component /usr/java/packages is added here.
++ // Eventually, all the library path setting will be done here.
++ {
++ // Get the user setting of LD_LIBRARY_PATH, and prepended it. It
++ // should always exist (until the legacy problem cited above is
++ // addressed).
++ const char *v = ::getenv("LD_LIBRARY_PATH");
++ const char *v_colon = ":";
++ if (v == NULL) { v = ""; v_colon = ""; }
++ // That's +1 for the colon and +1 for the trailing '\0'.
++ char *ld_library_path = (char *)NEW_C_HEAP_ARRAY(char,
++ strlen(v) + 1 +
++ sizeof(SYS_EXT_DIR) + sizeof("/lib/") + strlen(cpu_arch) + sizeof(DEFAULT_LIBPATH) + 1,
++ mtInternal);
++ sprintf(ld_library_path, "%s%s" SYS_EXT_DIR "/lib/%s:" DEFAULT_LIBPATH, v, v_colon, cpu_arch);
++ Arguments::set_library_path(ld_library_path);
++ FREE_C_HEAP_ARRAY(char, ld_library_path, mtInternal);
++ }
++
++ // Extensions directories.
++ sprintf(buf, "%s" EXTENSIONS_DIR ":" SYS_EXT_DIR EXTENSIONS_DIR, Arguments::get_java_home());
++ Arguments::set_ext_dirs(buf);
++
++ // Endorsed standards default directory.
++ sprintf(buf, "%s" ENDORSED_DIR, Arguments::get_java_home());
++ Arguments::set_endorsed_dirs(buf);
++
++ FREE_C_HEAP_ARRAY(char, buf, mtInternal);
++
++#else // __APPLE__
++
+ #define SYS_EXTENSIONS_DIR "/Library/Java/Extensions"
+ #define SYS_EXTENSIONS_DIRS SYS_EXTENSIONS_DIR ":/Network" SYS_EXTENSIONS_DIR ":/System" SYS_EXTENSIONS_DIR ":/usr/lib/java"
+- const char *user_home_dir = get_home();
+- // the null in SYS_EXTENSIONS_DIRS counts for the size of the colon after user_home_dir
+- int system_ext_size = strlen(user_home_dir) + sizeof(SYS_EXTENSIONS_DIR) +
+- sizeof(SYS_EXTENSIONS_DIRS);
+-#endif
+-
++
++ const char *user_home_dir = get_home();
++ // The null in SYS_EXTENSIONS_DIRS counts for the size of the colon after user_home_dir.
++ size_t system_ext_size = strlen(user_home_dir) + sizeof(SYS_EXTENSIONS_DIR) +
++ sizeof(SYS_EXTENSIONS_DIRS);
++
++ // Buffer that fits several sprintfs.
++ // Note that the space for the colon and the trailing null are provided
++ // by the nulls included by the sizeof operator.
++ const size_t bufsize =
++ MAX3((size_t)MAXPATHLEN, // for dll_dir & friends.
++ (size_t)MAXPATHLEN + sizeof(EXTENSIONS_DIR) + system_ext_size, // extensions dir
++ (size_t)MAXPATHLEN + sizeof(ENDORSED_DIR)); // endorsed dir
++ char *buf = (char *)NEW_C_HEAP_ARRAY(char, bufsize, mtInternal);
++
++ // sysclasspath, java_home, dll_dir
+ {
+- /* sysclasspath, java_home, dll_dir */
+- {
+- char *home_path;
+- char *dll_path;
+- char *pslash;
+- char buf[MAXPATHLEN];
+- os::jvm_path(buf, sizeof(buf));
+-
+- // Found the full path to libjvm.so.
+- // Now cut the path to /jre if we can.
+- *(strrchr(buf, '/')) = '\0'; /* get rid of /libjvm.so */
+- pslash = strrchr(buf, '/');
+- if (pslash != NULL)
+- *pslash = '\0'; /* get rid of /{client|server|hotspot} */
+- dll_path = malloc(strlen(buf) + 1);
+- if (dll_path == NULL)
+- return;
+- strcpy(dll_path, buf);
+- Arguments::set_dll_dir(dll_path);
+-
+- if (pslash != NULL) {
+- pslash = strrchr(buf, '/');
+- if (pslash != NULL) {
+- *pslash = '\0'; /* get rid of / (/lib on macosx) */
+-#ifndef __APPLE__
+- pslash = strrchr(buf, '/');
+- if (pslash != NULL)
+- *pslash = '\0'; /* get rid of /lib */
+-#endif
+- }
+- }
+-
+- home_path = malloc(strlen(buf) + 1);
+- if (home_path == NULL)
+- return;
+- strcpy(home_path, buf);
+- Arguments::set_java_home(home_path);
+-
+- if (!set_boot_path('/', ':'))
+- return;
++ char *pslash;
++ os::jvm_path(buf, bufsize);
++
++ // Found the full path to libjvm.so.
++ // Now cut the path to /jre if we can.
++ *(strrchr(buf, '/')) = '\0'; // Get rid of /libjvm.so.
++ pslash = strrchr(buf, '/');
++ if (pslash != NULL) {
++ *pslash = '\0'; // Get rid of /{client|server|hotspot}.
+ }
+-
+- /*
+- * Where to look for native libraries
+- *
+- * Note: Due to a legacy implementation, most of the library path
+- * is set in the launcher. This was to accomodate linking restrictions
+- * on legacy Bsd implementations (which are no longer supported).
+- * Eventually, all the library path setting will be done here.
+- *
+- * However, to prevent the proliferation of improperly built native
+- * libraries, the new path component /usr/java/packages is added here.
+- * Eventually, all the library path setting will be done here.
+- */
+- {
+- char *ld_library_path;
+-
+- /*
+- * Construct the invariant part of ld_library_path. Note that the
+- * space for the colon and the trailing null are provided by the
+- * nulls included by the sizeof operator (so actually we allocate
+- * a byte more than necessary).
+- */
+-#ifdef __APPLE__
+- ld_library_path = (char *) malloc(system_ext_size);
+- sprintf(ld_library_path, "%s" SYS_EXTENSIONS_DIR ":" SYS_EXTENSIONS_DIRS, user_home_dir);
+-#else
+- ld_library_path = (char *) malloc(sizeof(REG_DIR) + sizeof("/lib/") +
+- strlen(cpu_arch) + sizeof(DEFAULT_LIBPATH));
+- sprintf(ld_library_path, REG_DIR "/lib/%s:" DEFAULT_LIBPATH, cpu_arch);
+-#endif
+-
+- /*
+- * Get the user setting of LD_LIBRARY_PATH, and prepended it. It
+- * should always exist (until the legacy problem cited above is
+- * addressed).
+- */
+-#ifdef __APPLE__
+- // Prepend the default path with the JAVA_LIBRARY_PATH so that the app launcher code can specify a directory inside an app wrapper
+- char *l = getenv("JAVA_LIBRARY_PATH");
+- if (l != NULL) {
+- char *t = ld_library_path;
+- /* That's +1 for the colon and +1 for the trailing '\0' */
+- ld_library_path = (char *) malloc(strlen(l) + 1 + strlen(t) + 1);
+- sprintf(ld_library_path, "%s:%s", l, t);
+- free(t);
+- }
+-
+- char *v = getenv("DYLD_LIBRARY_PATH");
+-#else
+- char *v = getenv("LD_LIBRARY_PATH");
+-#endif
+- if (v != NULL) {
+- char *t = ld_library_path;
+- /* That's +1 for the colon and +1 for the trailing '\0' */
+- ld_library_path = (char *) malloc(strlen(v) + 1 + strlen(t) + 1);
+- sprintf(ld_library_path, "%s:%s", v, t);
+- free(t);
+- }
+-
+-#ifdef __APPLE__
+- // Apple's Java6 has "." at the beginning of java.library.path.
+- // OpenJDK on Windows has "." at the end of java.library.path.
+- // OpenJDK on Linux and Solaris don't have "." in java.library.path
+- // at all. To ease the transition from Apple's Java6 to OpenJDK7,
+- // "." is appended to the end of java.library.path. Yes, this
+- // could cause a change in behavior, but Apple's Java6 behavior
+- // can be achieved by putting "." at the beginning of the
+- // JAVA_LIBRARY_PATH environment variable.
+- {
+- char *t = ld_library_path;
+- // that's +3 for appending ":." and the trailing '\0'
+- ld_library_path = (char *) malloc(strlen(t) + 3);
+- sprintf(ld_library_path, "%s:%s", t, ".");
+- free(t);
+- }
+-#endif
+-
+- Arguments::set_library_path(ld_library_path);
++ Arguments::set_dll_dir(buf);
++
++ if (pslash != NULL) {
++ pslash = strrchr(buf, '/');
++ if (pslash != NULL) {
++ *pslash = '\0'; // Get rid of /lib.
++ }
+ }
+-
+- /*
+- * Extensions directories.
+- *
+- * Note that the space for the colon and the trailing null are provided
+- * by the nulls included by the sizeof operator (so actually one byte more
+- * than necessary is allocated).
+- */
+- {
+-#ifdef __APPLE__
+- char *buf = malloc(strlen(Arguments::get_java_home()) +
+- sizeof(EXTENSIONS_DIR) + system_ext_size);
+- sprintf(buf, "%s" SYS_EXTENSIONS_DIR ":%s" EXTENSIONS_DIR ":"
+- SYS_EXTENSIONS_DIRS, user_home_dir, Arguments::get_java_home());
+-#else
+- char *buf = malloc(strlen(Arguments::get_java_home()) +
+- sizeof(EXTENSIONS_DIR) + sizeof(REG_DIR) + sizeof(EXTENSIONS_DIR));
+- sprintf(buf, "%s" EXTENSIONS_DIR ":" REG_DIR EXTENSIONS_DIR,
+- Arguments::get_java_home());
+-#endif
+-
+- Arguments::set_ext_dirs(buf);
+- }
+-
+- /* Endorsed standards default directory. */
+- {
+- char * buf;
+- buf = malloc(strlen(Arguments::get_java_home()) + sizeof(ENDORSED_DIR));
+- sprintf(buf, "%s" ENDORSED_DIR, Arguments::get_java_home());
+- Arguments::set_endorsed_dirs(buf);
+- }
++ Arguments::set_java_home(buf);
++ set_boot_path('/', ':');
+ }
+
+-#ifdef __APPLE__
++ // Where to look for native libraries.
++ //
++ // Note: Due to a legacy implementation, most of the library path
++ // is set in the launcher. This was to accomodate linking restrictions
++ // on legacy Bsd implementations (which are no longer supported).
++ // Eventually, all the library path setting will be done here.
++ //
++ // However, to prevent the proliferation of improperly built native
++ // libraries, the new path component /usr/java/packages is added here.
++ // Eventually, all the library path setting will be done here.
++ {
++ // Get the user setting of LD_LIBRARY_PATH, and prepended it. It
++ // should always exist (until the legacy problem cited above is
++ // addressed).
++ // Prepend the default path with the JAVA_LIBRARY_PATH so that the app launcher code
++ // can specify a directory inside an app wrapper
++ const char *l = ::getenv("JAVA_LIBRARY_PATH");
++ const char *l_colon = ":";
++ if (l == NULL) { l = ""; l_colon = ""; }
++
++ const char *v = ::getenv("DYLD_LIBRARY_PATH");
++ const char *v_colon = ":";
++ if (v == NULL) { v = ""; v_colon = ""; }
++
++ // Apple's Java6 has "." at the beginning of java.library.path.
++ // OpenJDK on Windows has "." at the end of java.library.path.
++ // OpenJDK on Linux and Solaris don't have "." in java.library.path
++ // at all. To ease the transition from Apple's Java6 to OpenJDK7,
++ // "." is appended to the end of java.library.path. Yes, this
++ // could cause a change in behavior, but Apple's Java6 behavior
++ // can be achieved by putting "." at the beginning of the
++ // JAVA_LIBRARY_PATH environment variable.
++ char *ld_library_path = (char *)NEW_C_HEAP_ARRAY(char,
++ strlen(v) + 1 + strlen(l) + 1 +
++ system_ext_size + 3,
++ mtInternal);
++ sprintf(ld_library_path, "%s%s%s%s%s" SYS_EXTENSIONS_DIR ":" SYS_EXTENSIONS_DIRS ":.",
++ v, v_colon, l, l_colon, user_home_dir);
++ Arguments::set_library_path(ld_library_path);
++ FREE_C_HEAP_ARRAY(char, ld_library_path, mtInternal);
++ }
++
++ // Extensions directories.
++ //
++ // Note that the space for the colon and the trailing null are provided
++ // by the nulls included by the sizeof operator (so actually one byte more
++ // than necessary is allocated).
++ sprintf(buf, "%s" SYS_EXTENSIONS_DIR ":%s" EXTENSIONS_DIR ":" SYS_EXTENSIONS_DIRS,
++ user_home_dir, Arguments::get_java_home());
++ Arguments::set_ext_dirs(buf);
++
++ // Endorsed standards default directory.
++ sprintf(buf, "%s" ENDORSED_DIR, Arguments::get_java_home());
++ Arguments::set_endorsed_dirs(buf);
++
++ FREE_C_HEAP_ARRAY(char, buf, mtInternal);
++
+ #undef SYS_EXTENSIONS_DIR
+-#endif
+-#undef malloc
+-#undef getenv
++#undef SYS_EXTENSIONS_DIRS
++
++#endif // __APPLE__
++
++#undef SYS_EXT_DIR
+ #undef EXTENSIONS_DIR
+ #undef ENDORSED_DIR
+-
+- // Done
+- return;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+@@ -914,9 +924,20 @@
+ //////////////////////////////////////////////////////////////////////////////
+ // thread local storage
+
++// Restore the thread pointer if the destructor is called. This is in case
++// someone from JNI code sets up a destructor with pthread_key_create to run
++// detachCurrentThread on thread death. Unless we restore the thread pointer we
++// will hang or crash. When detachCurrentThread is called the key will be set
++// to null and we will not be called again. If detachCurrentThread is never
++// called we could loop forever depending on the pthread implementation.
++static void restore_thread_pointer(void* p) {
++ Thread* thread = (Thread*) p;
++ os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread);
++}
++
+ int os::allocate_thread_local_storage() {
+ pthread_key_t key;
+- int rslt = pthread_key_create(&key, NULL);
++ int rslt = pthread_key_create(&key, restore_thread_pointer);
+ assert(rslt == 0, "cannot allocate thread local storage");
+ return (int)key;
+ }
+@@ -972,13 +993,15 @@
+ return jlong(time.tv_sec) * 1000 + jlong(time.tv_usec / 1000);
+ }
+
++#ifndef __APPLE__
+ #ifndef CLOCK_MONOTONIC
+ #define CLOCK_MONOTONIC (1)
+ #endif
++#endif
+
+ #ifdef __APPLE__
+ void os::Bsd::clock_init() {
+- // XXXDARWIN: Investigate replacement monotonic clock
++ mach_timebase_info(&_timebase_info);
+ }
+ #else
+ void os::Bsd::clock_init() {
+@@ -993,10 +1016,38 @@
+ #endif
+
+
++#ifdef __APPLE__
++
++jlong os::javaTimeNanos() {
++ const uint64_t tm = mach_absolute_time();
++ const uint64_t now = (tm * Bsd::_timebase_info.numer) / Bsd::_timebase_info.denom;
++ const uint64_t prev = Bsd::_max_abstime;
++ if (now <= prev) {
++ return prev; // same or retrograde time;
++ }
++ const uint64_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&Bsd::_max_abstime, prev);
++ assert(obsv >= prev, "invariant"); // Monotonicity
++ // If the CAS succeeded then we're done and return "now".
++ // If the CAS failed and the observed value "obsv" is >= now then
++ // we should return "obsv". If the CAS failed and now > obsv > prv then
++ // some other thread raced this thread and installed a new value, in which case
++ // we could either (a) retry the entire operation, (b) retry trying to install now
++ // or (c) just return obsv. We use (c). No loop is required although in some cases
++ // we might discard a higher "now" value in deference to a slightly lower but freshly
++ // installed obsv value. That's entirely benign -- it admits no new orderings compared
++ // to (a) or (b) -- and greatly reduces coherence traffic.
++ // We might also condition (c) on the magnitude of the delta between obsv and now.
++ // Avoiding excessive CAS operations to hot RW locations is critical.
++ // See https://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate
++ return (prev == obsv) ? now : obsv;
++}
++
++#else // __APPLE__
++
+ jlong os::javaTimeNanos() {
+ if (Bsd::supports_monotonic_clock()) {
+ struct timespec tp;
+- int status = Bsd::clock_gettime(CLOCK_MONOTONIC, &tp);
++ int status = Bsd::_clock_gettime(CLOCK_MONOTONIC, &tp);
+ assert(status == 0, "gettime error");
+ jlong result = jlong(tp.tv_sec) * (1000 * 1000 * 1000) + jlong(tp.tv_nsec);
+ return result;
+@@ -1009,6 +1060,8 @@
+ }
+ }
+
++#endif // __APPLE__
++
+ void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) {
+ if (Bsd::supports_monotonic_clock()) {
+ info_ptr->max_value = ALL_64_BITS;
+@@ -1553,6 +1606,17 @@
+ }
+ #endif /* !__APPLE__ */
+
++void* os::get_default_process_handle() {
++#ifdef __APPLE__
++ // MacOS X needs to use RTLD_FIRST instead of RTLD_LAZY
++ // to avoid finding unexpected symbols on second (or later)
++ // loads of a library.
++ return (void*)::dlopen(NULL, RTLD_FIRST);
++#else
++ return (void*)::dlopen(NULL, RTLD_LAZY);
++#endif
++}
++
+ // XXX: Do we need a lock around this as per Linux?
+ void* os::dll_lookup(void* handle, const char* name) {
+ return dlsym(handle, name);
+@@ -1662,58 +1726,12 @@
+ st->cr();
+ }
+
+-// Taken from /usr/include/bits/siginfo.h Supposed to be architecture specific
+-// but they're the same for all the bsd arch that we support
+-// and they're the same for solaris but there's no common place to put this.
+-const char *ill_names[] = { "ILL0", "ILL_ILLOPC", "ILL_ILLOPN", "ILL_ILLADR",
+- "ILL_ILLTRP", "ILL_PRVOPC", "ILL_PRVREG",
+- "ILL_COPROC", "ILL_BADSTK" };
+-
+-const char *fpe_names[] = { "FPE0", "FPE_INTDIV", "FPE_INTOVF", "FPE_FLTDIV",
+- "FPE_FLTOVF", "FPE_FLTUND", "FPE_FLTRES",
+- "FPE_FLTINV", "FPE_FLTSUB", "FPE_FLTDEN" };
+-
+-const char *segv_names[] = { "SEGV0", "SEGV_MAPERR", "SEGV_ACCERR" };
+-
+-const char *bus_names[] = { "BUS0", "BUS_ADRALN", "BUS_ADRERR", "BUS_OBJERR" };
+-
+ void os::print_siginfo(outputStream* st, void* siginfo) {
+- st->print("siginfo:");
+-
+- const int buflen = 100;
+- char buf[buflen];
+- siginfo_t *si = (siginfo_t*)siginfo;
+- st->print("si_signo=%s: ", os::exception_name(si->si_signo, buf, buflen));
+- if (si->si_errno != 0 && strerror_r(si->si_errno, buf, buflen) == 0) {
+- st->print("si_errno=%s", buf);
+- } else {
+- st->print("si_errno=%d", si->si_errno);
+- }
+- const int c = si->si_code;
+- assert(c > 0, "unexpected si_code");
+- switch (si->si_signo) {
+- case SIGILL:
+- st->print(", si_code=%d (%s)", c, c > 8 ? "" : ill_names[c]);
+- st->print(", si_addr=" PTR_FORMAT, si->si_addr);
+- break;
+- case SIGFPE:
+- st->print(", si_code=%d (%s)", c, c > 9 ? "" : fpe_names[c]);
+- st->print(", si_addr=" PTR_FORMAT, si->si_addr);
+- break;
+- case SIGSEGV:
+- st->print(", si_code=%d (%s)", c, c > 2 ? "" : segv_names[c]);
+- st->print(", si_addr=" PTR_FORMAT, si->si_addr);
+- break;
+- case SIGBUS:
+- st->print(", si_code=%d (%s)", c, c > 3 ? "" : bus_names[c]);
+- st->print(", si_addr=" PTR_FORMAT, si->si_addr);
+- break;
+- default:
+- st->print(", si_code=%d", si->si_code);
+- // no si_addr
+- }
+-
+- if ((si->si_signo == SIGBUS || si->si_signo == SIGSEGV) &&
++ const siginfo_t* si = (const siginfo_t*)siginfo;
++
++ os::Posix::print_siginfo_brief(st, si);
++
++ if (si && (si->si_signo == SIGBUS || si->si_signo == SIGSEGV) &&
+ UseSharedSpaces) {
+ FileMapInfo* mapinfo = FileMapInfo::current_info();
+ if (mapinfo->is_in_shared_space(si->si_addr)) {
+@@ -2372,7 +2390,6 @@
+ (!FLAG_IS_DEFAULT(UseLargePages) ||
+ !FLAG_IS_DEFAULT(LargePageSizeInBytes)
+ );
+- char msg[128];
+
+ // Create a large shared memory region to attach to based on size.
+ // Currently, size is the total size of the heap
+@@ -2393,8 +2410,7 @@
+ // coalesce into large pages. Try to reserve large pages when
+ // the system is still "fresh".
+ if (warn_on_failure) {
+- jio_snprintf(msg, sizeof(msg), "Failed to reserve shared memory (errno = %d).", errno);
+- warning(msg);
++ warning("Failed to reserve shared memory (errno = %d).", errno);
+ }
+ return NULL;
+ }
+@@ -2411,8 +2427,7 @@
+
+ if ((intptr_t)addr == -1) {
+ if (warn_on_failure) {
+- jio_snprintf(msg, sizeof(msg), "Failed to attach shared memory (errno = %d).", err);
+- warning(msg);
++ warning("Failed to attach shared memory (errno = %d).", err);
+ }
+ return NULL;
+ }
+@@ -2622,9 +2637,21 @@
+ }
+ }
+
+-int os::naked_sleep() {
+- // %% make the sleep time an integer flag. for now use 1 millisec.
+- return os::sleep(Thread::current(), 1, false);
++void os::naked_short_sleep(jlong ms) {
++ struct timespec req;
++
++ assert(ms < 1000, "Un-interruptable sleep, short time use only");
++ req.tv_sec = 0;
++ if (ms > 0) {
++ req.tv_nsec = (ms % 1000) * 1000000;
++ }
++ else {
++ req.tv_nsec = 1;
++ }
++
++ nanosleep(&req, NULL);
++
++ return;
+ }
+
+ // Sleep forever; naked call to OS-specific sleep; use with CAUTION
+@@ -3232,7 +3259,7 @@
+ sigAct.sa_sigaction = signalHandler;
+ sigAct.sa_flags = SA_SIGINFO|SA_RESTART;
+ }
+-#if __APPLE__
++#ifdef __APPLE__
+ // Needed for main thread as XNU (Mac OS X kernel) will only deliver SIGSEGV
+ // (which starts as SIGBUS) on main thread with faulting address inside "stack+guard pages"
+ // if the signal handler declares it will handle it on alternate stack.
+@@ -3389,7 +3416,8 @@
+ st->print("[%s]", get_signal_handler_name(handler, buf, buflen));
+ }
+
+- st->print(", sa_mask[0]=" PTR32_FORMAT, *(uint32_t*)&sa.sa_mask);
++ st->print(", sa_mask[0]=");
++ os::Posix::print_signal_set_short(st, &sa.sa_mask);
+
+ address rh = VMError::get_resetted_sighandler(sig);
+ // May be, handler was resetted by VMError?
+@@ -3398,7 +3426,8 @@
+ sa.sa_flags = VMError::get_resetted_sigflags(sig) & SIGNIFICANT_SIGNAL_MASK;
+ }
+
+- st->print(", sa_flags=" PTR32_FORMAT, sa.sa_flags);
++ st->print(", sa_flags=");
++ os::Posix::print_sa_flags(st, sa.sa_flags);
+
+ // Check: is it our handler?
+ if(handler == CAST_FROM_FN_PTR(address, (sa_sigaction_t)signalHandler) ||
+@@ -3897,6 +3926,7 @@
+ return true;
+ }
+
++ATTRIBUTE_PRINTF(3, 0)
+ int local_vsnprintf(char* buf, size_t count, const char* format, va_list args) {
+ return ::vsnprintf(buf, count, format, args);
+ }
+--- ./hotspot/src/os/bsd/vm/os_bsd.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/os/bsd/vm/os_bsd.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -27,6 +27,9 @@
+
+ // Bsd_OS defines the interface to Bsd operating systems
+
++// Information about the protection of the page at address '0' on this os.
++static bool zero_page_read_protected() { return true; }
++
+ /* pthread_getattr_np comes with BsdThreads-0.9-7 on RedHat 7.1 */
+ typedef int (*pthread_getattr_func_type) (pthread_t, pthread_attr_t *);
+
+@@ -55,7 +58,13 @@
+ // For signal flags diagnostics
+ static int sigflags[MAXSIGNUM];
+
++#ifdef __APPLE__
++ // mach_absolute_time
++ static mach_timebase_info_data_t _timebase_info;
++ static volatile uint64_t _max_abstime;
++#else
+ static int (*_clock_gettime)(clockid_t, struct timespec *);
++#endif
+
+ static GrowableArray* _cpu_to_node;
+
+@@ -132,11 +141,11 @@
+ static void clock_init(void);
+
+ static inline bool supports_monotonic_clock() {
++#ifdef __APPLE__
++ return true;
++#else
+ return _clock_gettime != NULL;
+- }
+-
+- static int clock_gettime(clockid_t clock_id, struct timespec *tp) {
+- return _clock_gettime ? _clock_gettime(clock_id, tp) : -1;
++#endif
+ }
+
+ // Stack repair handling
+--- ./hotspot/src/os/bsd/vm/perfMemory_bsd.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/os/bsd/vm/perfMemory_bsd.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -925,7 +925,7 @@
+
+ if (PerfTraceMemOps) {
+ tty->print("mapped " SIZE_FORMAT " bytes for vmid %d at "
+- INTPTR_FORMAT "\n", size, vmid, (void*)mapAddress);
++ INTPTR_FORMAT "\n", size, vmid, p2i((void*)mapAddress));
+ }
+ }
+
+--- ./hotspot/src/os/linux/vm/decoder_linux.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/os/linux/vm/decoder_linux.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -32,6 +32,12 @@
+ char* result;
+ size_t size = (size_t)buflen;
+
++#ifdef PPC64
++ // On PPC64 ElfDecoder::decode() may return a dot (.) prefixed name
++ // (see elfFuncDescTable.hpp for details)
++ if (symbol && *symbol == '.') symbol += 1;
++#endif
++
+ // Don't pass buf to __cxa_demangle. In case of the 'buf' is too small,
+ // __cxa_demangle will call system "realloc" for additional memory, which
+ // may use different malloc/realloc mechanism that allocates 'buf'.
+--- ./hotspot/src/os/linux/vm/os_linux.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/os/linux/vm/os_linux.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -101,6 +101,8 @@
+ # include
+ # include
+
++PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
++
+ // if RUSAGE_THREAD for getrusage() has not been defined, do it here. The code calling
+ // getrusage() is prepared to handle the associated failure.
+ #ifndef RUSAGE_THREAD
+@@ -109,6 +111,8 @@
+
+ #define MAX_PATH (2 * K)
+
++#define MAX_SECS 100000000
++
+ // for timer info max values which include all bits
+ #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
+
+@@ -139,7 +143,7 @@
+
+ // For diagnostics to print a message once. see run_periodic_checks
+ static sigset_t check_signal_done;
+-static bool check_signals = true;;
++static bool check_signals = true;
+
+ static pid_t _initial_pid = 0;
+
+@@ -257,8 +261,10 @@
+ static char cpu_arch[] = "amd64";
+ #elif defined(ARM)
+ static char cpu_arch[] = "arm";
+-#elif defined(PPC)
++#elif defined(PPC32)
+ static char cpu_arch[] = "ppc";
++#elif defined(PPC64)
++static char cpu_arch[] = "ppc64";
+ #elif defined(SPARC)
+ # ifdef _LP64
+ static char cpu_arch[] = "sparcv9";
+@@ -315,9 +321,6 @@
+ }
+
+ void os::init_system_properties_values() {
+-// char arch[12];
+-// sysinfo(SI_ARCHITECTURE, arch, sizeof(arch));
+-
+ // The next steps are taken in the product version:
+ //
+ // Obtain the JAVA_HOME value from the location of libjvm.so.
+@@ -344,140 +347,101 @@
+ // Important note: if the location of libjvm.so changes this
+ // code needs to be changed accordingly.
+
+- // The next few definitions allow the code to be verbatim:
+-#define malloc(n) (char*)NEW_C_HEAP_ARRAY(char, (n), mtInternal)
+-#define getenv(n) ::getenv(n)
+-
+-/*
+- * See ld(1):
+- * The linker uses the following search paths to locate required
+- * shared libraries:
+- * 1: ...
+- * ...
+- * 7: The default directories, normally /lib and /usr/lib.
+- */
++// See ld(1):
++// The linker uses the following search paths to locate required
++// shared libraries:
++// 1: ...
++// ...
++// 7: The default directories, normally /lib and /usr/lib.
+ #if defined(AMD64) || defined(_LP64) && (defined(SPARC) || defined(PPC) || defined(S390))
+ #define DEFAULT_LIBPATH "/usr/lib64:/lib64:/lib:/usr/lib"
+ #else
+ #define DEFAULT_LIBPATH "/lib:/usr/lib"
+ #endif
+
++// Base path of extensions installed on the system.
++#define SYS_EXT_DIR "/usr/java/packages"
+ #define EXTENSIONS_DIR "/lib/ext"
+ #define ENDORSED_DIR "/lib/endorsed"
+-#define REG_DIR "/usr/java/packages"
+-
++
++ // Buffer that fits several sprintfs.
++ // Note that the space for the colon and the trailing null are provided
++ // by the nulls included by the sizeof operator.
++ const size_t bufsize =
++ MAX3((size_t)MAXPATHLEN, // For dll_dir & friends.
++ (size_t)MAXPATHLEN + sizeof(EXTENSIONS_DIR) + sizeof(SYS_EXT_DIR) + sizeof(EXTENSIONS_DIR), // extensions dir
++ (size_t)MAXPATHLEN + sizeof(ENDORSED_DIR)); // endorsed dir
++ char *buf = (char *)NEW_C_HEAP_ARRAY(char, bufsize, mtInternal);
++
++ // sysclasspath, java_home, dll_dir
+ {
+- /* sysclasspath, java_home, dll_dir */
+- {
+- char *home_path;
+- char *dll_path;
+- char *pslash;
+- char buf[MAXPATHLEN];
+- os::jvm_path(buf, sizeof(buf));
+-
+- // Found the full path to libjvm.so.
+- // Now cut the path to /jre if we can.
+- *(strrchr(buf, '/')) = '\0'; /* get rid of /libjvm.so */
++ char *pslash;
++ os::jvm_path(buf, bufsize);
++
++ // Found the full path to libjvm.so.
++ // Now cut the path to /jre if we can.
++ *(strrchr(buf, '/')) = '\0'; // Get rid of /libjvm.so.
++ pslash = strrchr(buf, '/');
++ if (pslash != NULL) {
++ *pslash = '\0'; // Get rid of /{client|server|hotspot}.
++ }
++ Arguments::set_dll_dir(buf);
++
++ if (pslash != NULL) {
++ pslash = strrchr(buf, '/');
++ if (pslash != NULL) {
++ *pslash = '\0'; // Get rid of /.
+ pslash = strrchr(buf, '/');
+- if (pslash != NULL)
+- *pslash = '\0'; /* get rid of /{client|server|hotspot} */
+- dll_path = malloc(strlen(buf) + 1);
+- if (dll_path == NULL)
+- return;
+- strcpy(dll_path, buf);
+- Arguments::set_dll_dir(dll_path);
+-
+ if (pslash != NULL) {
+- pslash = strrchr(buf, '/');
+- if (pslash != NULL) {
+- *pslash = '\0'; /* get rid of / */
+- pslash = strrchr(buf, '/');
+- if (pslash != NULL)
+- *pslash = '\0'; /* get rid of /lib */
+- }
++ *pslash = '\0'; // Get rid of /lib.
+ }
+-
+- home_path = malloc(strlen(buf) + 1);
+- if (home_path == NULL)
+- return;
+- strcpy(home_path, buf);
+- Arguments::set_java_home(home_path);
+-
+- if (!set_boot_path('/', ':'))
+- return;
++ }
+ }
+-
+- /*
+- * Where to look for native libraries
+- *
+- * Note: Due to a legacy implementation, most of the library path
+- * is set in the launcher. This was to accomodate linking restrictions
+- * on legacy Linux implementations (which are no longer supported).
+- * Eventually, all the library path setting will be done here.
+- *
+- * However, to prevent the proliferation of improperly built native
+- * libraries, the new path component /usr/java/packages is added here.
+- * Eventually, all the library path setting will be done here.
+- */
+- {
+- char *ld_library_path;
+-
+- /*
+- * Construct the invariant part of ld_library_path. Note that the
+- * space for the colon and the trailing null are provided by the
+- * nulls included by the sizeof operator (so actually we allocate
+- * a byte more than necessary).
+- */
+- ld_library_path = (char *) malloc(sizeof(REG_DIR) + sizeof("/lib/") +
+- strlen(cpu_arch) + sizeof(DEFAULT_LIBPATH));
+- sprintf(ld_library_path, REG_DIR "/lib/%s:" DEFAULT_LIBPATH, cpu_arch);
+-
+- /*
+- * Get the user setting of LD_LIBRARY_PATH, and prepended it. It
+- * should always exist (until the legacy problem cited above is
+- * addressed).
+- */
+- char *v = getenv("LD_LIBRARY_PATH");
+- if (v != NULL) {
+- char *t = ld_library_path;
+- /* That's +1 for the colon and +1 for the trailing '\0' */
+- ld_library_path = (char *) malloc(strlen(v) + 1 + strlen(t) + 1);
+- sprintf(ld_library_path, "%s:%s", v, t);
+- }
+- Arguments::set_library_path(ld_library_path);
+- }
+-
+- /*
+- * Extensions directories.
+- *
+- * Note that the space for the colon and the trailing null are provided
+- * by the nulls included by the sizeof operator (so actually one byte more
+- * than necessary is allocated).
+- */
+- {
+- char *buf = malloc(strlen(Arguments::get_java_home()) +
+- sizeof(EXTENSIONS_DIR) + sizeof(REG_DIR) + sizeof(EXTENSIONS_DIR));
+- sprintf(buf, "%s" EXTENSIONS_DIR ":" REG_DIR EXTENSIONS_DIR,
+- Arguments::get_java_home());
+- Arguments::set_ext_dirs(buf);
+- }
+-
+- /* Endorsed standards default directory. */
+- {
+- char * buf;
+- buf = malloc(strlen(Arguments::get_java_home()) + sizeof(ENDORSED_DIR));
+- sprintf(buf, "%s" ENDORSED_DIR, Arguments::get_java_home());
+- Arguments::set_endorsed_dirs(buf);
+- }
+- }
+-
+-#undef malloc
+-#undef getenv
++ Arguments::set_java_home(buf);
++ set_boot_path('/', ':');
++ }
++
++ // Where to look for native libraries.
++ //
++ // Note: Due to a legacy implementation, most of the library path
++ // is set in the launcher. This was to accomodate linking restrictions
++ // on legacy Linux implementations (which are no longer supported).
++ // Eventually, all the library path setting will be done here.
++ //
++ // However, to prevent the proliferation of improperly built native
++ // libraries, the new path component /usr/java/packages is added here.
++ // Eventually, all the library path setting will be done here.
++ {
++ // Get the user setting of LD_LIBRARY_PATH, and prepended it. It
++ // should always exist (until the legacy problem cited above is
++ // addressed).
++ const char *v = ::getenv("LD_LIBRARY_PATH");
++ const char *v_colon = ":";
++ if (v == NULL) { v = ""; v_colon = ""; }
++ // That's +1 for the colon and +1 for the trailing '\0'.
++ char *ld_library_path = (char *)NEW_C_HEAP_ARRAY(char,
++ strlen(v) + 1 +
++ sizeof(SYS_EXT_DIR) + sizeof("/lib/") + strlen(cpu_arch) + sizeof(DEFAULT_LIBPATH) + 1,
++ mtInternal);
++ sprintf(ld_library_path, "%s%s" SYS_EXT_DIR "/lib/%s:" DEFAULT_LIBPATH, v, v_colon, cpu_arch);
++ Arguments::set_library_path(ld_library_path);
++ FREE_C_HEAP_ARRAY(char, ld_library_path, mtInternal);
++ }
++
++ // Extensions directories.
++ sprintf(buf, "%s" EXTENSIONS_DIR ":" SYS_EXT_DIR EXTENSIONS_DIR, Arguments::get_java_home());
++ Arguments::set_ext_dirs(buf);
++
++ // Endorsed standards default directory.
++ sprintf(buf, "%s" ENDORSED_DIR, Arguments::get_java_home());
++ Arguments::set_endorsed_dirs(buf);
++
++ FREE_C_HEAP_ARRAY(char, buf, mtInternal);
++
++#undef DEFAULT_LIBPATH
++#undef SYS_EXT_DIR
+ #undef EXTENSIONS_DIR
+ #undef ENDORSED_DIR
+-
+- // Done
+- return;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+@@ -530,6 +494,9 @@
+ sigaddset(&unblocked_sigs, SIGSEGV);
+ sigaddset(&unblocked_sigs, SIGBUS);
+ sigaddset(&unblocked_sigs, SIGFPE);
++#if defined(PPC64)
++ sigaddset(&unblocked_sigs, SIGTRAP);
++#endif
+ sigaddset(&unblocked_sigs, SR_signum);
+
+ if (!ReduceSignalUsage) {
+@@ -1067,9 +1034,20 @@
+ //////////////////////////////////////////////////////////////////////////////
+ // thread local storage
+
++// Restore the thread pointer if the destructor is called. This is in case
++// someone from JNI code sets up a destructor with pthread_key_create to run
++// detachCurrentThread on thread death. Unless we restore the thread pointer we
++// will hang or crash. When detachCurrentThread is called the key will be set
++// to null and we will not be called again. If detachCurrentThread is never
++// called we could loop forever depending on the pthread implementation.
++static void restore_thread_pointer(void* p) {
++ Thread* thread = (Thread*) p;
++ os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread);
++}
++
+ int os::allocate_thread_local_storage() {
+ pthread_key_t key;
+- int rslt = pthread_key_create(&key, NULL);
++ int rslt = pthread_key_create(&key, restore_thread_pointer);
+ assert(rslt == 0, "cannot allocate thread local storage");
+ return (int)key;
+ }
+@@ -1953,7 +1931,11 @@
+ {EM_SPARC32PLUS, EM_SPARC, ELFCLASS32, ELFDATA2MSB, (char*)"Sparc 32"},
+ {EM_SPARCV9, EM_SPARCV9, ELFCLASS64, ELFDATA2MSB, (char*)"Sparc v9 64"},
+ {EM_PPC, EM_PPC, ELFCLASS32, ELFDATA2MSB, (char*)"Power PC 32"},
++#if defined(VM_LITTLE_ENDIAN)
++ {EM_PPC64, EM_PPC64, ELFCLASS64, ELFDATA2LSB, (char*)"Power PC 64"},
++#else
+ {EM_PPC64, EM_PPC64, ELFCLASS64, ELFDATA2MSB, (char*)"Power PC 64"},
++#endif
+ {EM_ARM, EM_ARM, ELFCLASS32, ELFDATA2LSB, (char*)"ARM"},
+ {EM_S390, EM_S390, ELFCLASSNONE, ELFDATA2MSB, (char*)"IBM System/390"},
+ {EM_ALPHA, EM_ALPHA, ELFCLASS64, ELFDATA2LSB, (char*)"Alpha"},
+@@ -2101,6 +2083,9 @@
+ return res;
+ }
+
++void* os::get_default_process_handle() {
++ return (void*)::dlopen(NULL, RTLD_LAZY);
++}
+
+ static bool _print_ascii_file(const char* filename, outputStream* st) {
+ int fd = ::open(filename, O_RDONLY);
+@@ -2151,7 +2136,7 @@
+ // Print warning if unsafe chroot environment detected
+ if (unsafe_chroot_detected) {
+ st->print("WARNING!! ");
+- st->print_cr(unstable_chroot_error);
++ st->print_cr("%s", unstable_chroot_error);
+ }
+
+ os::Linux::print_libversion_info(st);
+@@ -2212,8 +2197,8 @@
+ void os::Linux::print_libversion_info(outputStream* st) {
+ // libc, pthread
+ st->print("libc:");
+- st->print(os::Linux::glibc_version()); st->print(" ");
+- st->print(os::Linux::libpthread_version()); st->print(" ");
++ st->print("%s ", os::Linux::glibc_version());
++ st->print("%s ", os::Linux::libpthread_version());
+ if (os::Linux::is_LinuxThreads()) {
+ st->print("(%s stack)", os::Linux::is_floating_stack() ? "floating" : "fixed");
+ }
+@@ -2254,58 +2239,12 @@
+ st->cr();
+ }
+
+-// Taken from /usr/include/bits/siginfo.h Supposed to be architecture specific
+-// but they're the same for all the linux arch that we support
+-// and they're the same for solaris but there's no common place to put this.
+-const char *ill_names[] = { "ILL0", "ILL_ILLOPC", "ILL_ILLOPN", "ILL_ILLADR",
+- "ILL_ILLTRP", "ILL_PRVOPC", "ILL_PRVREG",
+- "ILL_COPROC", "ILL_BADSTK" };
+-
+-const char *fpe_names[] = { "FPE0", "FPE_INTDIV", "FPE_INTOVF", "FPE_FLTDIV",
+- "FPE_FLTOVF", "FPE_FLTUND", "FPE_FLTRES",
+- "FPE_FLTINV", "FPE_FLTSUB", "FPE_FLTDEN" };
+-
+-const char *segv_names[] = { "SEGV0", "SEGV_MAPERR", "SEGV_ACCERR" };
+-
+-const char *bus_names[] = { "BUS0", "BUS_ADRALN", "BUS_ADRERR", "BUS_OBJERR" };
+-
+ void os::print_siginfo(outputStream* st, void* siginfo) {
+- st->print("siginfo:");
+-
+- const int buflen = 100;
+- char buf[buflen];
+- siginfo_t *si = (siginfo_t*)siginfo;
+- st->print("si_signo=%s: ", os::exception_name(si->si_signo, buf, buflen));
+- if (si->si_errno != 0 && strerror_r(si->si_errno, buf, buflen) == 0) {
+- st->print("si_errno=%s", buf);
+- } else {
+- st->print("si_errno=%d", si->si_errno);
+- }
+- const int c = si->si_code;
+- assert(c > 0, "unexpected si_code");
+- switch (si->si_signo) {
+- case SIGILL:
+- st->print(", si_code=%d (%s)", c, c > 8 ? "" : ill_names[c]);
+- st->print(", si_addr=" PTR_FORMAT, si->si_addr);
+- break;
+- case SIGFPE:
+- st->print(", si_code=%d (%s)", c, c > 9 ? "" : fpe_names[c]);
+- st->print(", si_addr=" PTR_FORMAT, si->si_addr);
+- break;
+- case SIGSEGV:
+- st->print(", si_code=%d (%s)", c, c > 2 ? "" : segv_names[c]);
+- st->print(", si_addr=" PTR_FORMAT, si->si_addr);
+- break;
+- case SIGBUS:
+- st->print(", si_code=%d (%s)", c, c > 3 ? "" : bus_names[c]);
+- st->print(", si_addr=" PTR_FORMAT, si->si_addr);
+- break;
+- default:
+- st->print(", si_code=%d", si->si_code);
+- // no si_addr
+- }
+-
+- if ((si->si_signo == SIGBUS || si->si_signo == SIGSEGV) &&
++ const siginfo_t* si = (const siginfo_t*)siginfo;
++
++ os::Posix::print_siginfo_brief(st, si);
++
++ if (si && (si->si_signo == SIGBUS || si->si_signo == SIGSEGV) &&
+ UseSharedSpaces) {
+ FileMapInfo* mapinfo = FileMapInfo::current_info();
+ if (mapinfo->is_in_shared_space(si->si_addr)) {
+@@ -2335,6 +2274,9 @@
+ print_signal_handler(st, SHUTDOWN2_SIGNAL , buf, buflen);
+ print_signal_handler(st, SHUTDOWN3_SIGNAL , buf, buflen);
+ print_signal_handler(st, BREAK_SIGNAL, buf, buflen);
++#if defined(PPC64)
++ print_signal_handler(st, SIGTRAP, buf, buflen);
++#endif
+ }
+
+ static char saved_jvm_path[MAXPATHLEN] = {0};
+@@ -2466,7 +2408,6 @@
+ sem_t _semaphore;
+ };
+
+-
+ Semaphore::Semaphore() {
+ sem_init(&_semaphore, 0, 0);
+ }
+@@ -2488,8 +2429,22 @@
+ }
+
+ bool Semaphore::timedwait(unsigned int sec, int nsec) {
++
+ struct timespec ts;
+- unpackTime(&ts, false, (sec * NANOSECS_PER_SEC) + nsec);
++ // Semaphore's are always associated with CLOCK_REALTIME
++ os::Linux::clock_gettime(CLOCK_REALTIME, &ts);
++ // see unpackTime for discussion on overflow checking
++ if (sec >= MAX_SECS) {
++ ts.tv_sec += MAX_SECS;
++ ts.tv_nsec = 0;
++ } else {
++ ts.tv_sec += sec;
++ ts.tv_nsec += nsec;
++ if (ts.tv_nsec >= NANOSECS_PER_SEC) {
++ ts.tv_nsec -= NANOSECS_PER_SEC;
++ ++ts.tv_sec; // note: this must be <= max_secs
++ }
++ }
+
+ while (1) {
+ int result = sem_timedwait(&_semaphore, &ts);
+@@ -2994,7 +2949,9 @@
+
+ unsigned char vec[1];
+ unsigned imin = 1, imax = pages + 1, imid;
+- int mincore_return_value;
++ int mincore_return_value = 0;
++
++ assert(imin <= imax, "Unexpected page size");
+
+ while (imin < imax) {
+ imid = (imax + imin) / 2;
+@@ -3458,7 +3415,7 @@
+ // the system is still "fresh".
+ if (warn_on_failure) {
+ jio_snprintf(msg, sizeof(msg), "Failed to reserve shared memory (errno = %d).", errno);
+- warning(msg);
++ warning("%s", msg);
+ }
+ return NULL;
+ }
+@@ -3476,7 +3433,7 @@
+ if ((intptr_t)addr == -1) {
+ if (warn_on_failure) {
+ jio_snprintf(msg, sizeof(msg), "Failed to attach shared memory (errno = %d).", err);
+- warning(msg);
++ warning("%s", msg);
+ }
+ return NULL;
+ }
+@@ -3496,7 +3453,7 @@
+ char msg[128];
+ jio_snprintf(msg, sizeof(msg), "Failed to reserve large pages memory req_addr: "
+ PTR_FORMAT " bytes: " SIZE_FORMAT " (errno = %d).", req_addr, bytes, error);
+- warning(msg);
++ warning("%s", msg);
+ }
+ }
+
+@@ -3866,9 +3823,33 @@
+ }
+ }
+
+-int os::naked_sleep() {
+- // %% make the sleep time an integer flag. for now use 1 millisec.
+- return os::sleep(Thread::current(), 1, false);
++//
++// Short sleep, direct OS call.
++//
++// Note: certain versions of Linux CFS scheduler (since 2.6.23) do not guarantee
++// sched_yield(2) will actually give up the CPU:
++//
++// * Alone on this pariticular CPU, keeps running.
++// * Before the introduction of "skip_buddy" with "compat_yield" disabled
++// (pre 2.6.39).
++//
++// So calling this with 0 is an alternative.
++//
++void os::naked_short_sleep(jlong ms) {
++ struct timespec req;
++
++ assert(ms < 1000, "Un-interruptable sleep, short time use only");
++ req.tv_sec = 0;
++ if (ms > 0) {
++ req.tv_nsec = (ms % 1000) * 1000000;
++ }
++ else {
++ req.tv_nsec = 1;
++ }
++
++ nanosleep(&req, NULL);
++
++ return;
+ }
+
+ // Sleep forever; naked call to OS-specific sleep; use with CAUTION
+@@ -4465,6 +4446,9 @@
+ set_signal_handler(SIGBUS, true);
+ set_signal_handler(SIGILL, true);
+ set_signal_handler(SIGFPE, true);
++#if defined(PPC64)
++ set_signal_handler(SIGTRAP, true);
++#endif
+ set_signal_handler(SIGXFSZ, true);
+
+ if (libjsig_is_loaded) {
+@@ -4558,7 +4542,8 @@
+ st->print("[%s]", get_signal_handler_name(handler, buf, buflen));
+ }
+
+- st->print(", sa_mask[0]=" PTR32_FORMAT, *(uint32_t*)&sa.sa_mask);
++ st->print(", sa_mask[0]=");
++ os::Posix::print_signal_set_short(st, &sa.sa_mask);
+
+ address rh = VMError::get_resetted_sighandler(sig);
+ // May be, handler was resetted by VMError?
+@@ -4567,7 +4552,8 @@
+ sa.sa_flags = VMError::get_resetted_sigflags(sig) & SIGNIFICANT_SIGNAL_MASK;
+ }
+
+- st->print(", sa_flags=" PTR32_FORMAT, sa.sa_flags);
++ st->print(", sa_flags=");
++ os::Posix::print_sa_flags(st, sa.sa_flags);
+
+ // Check: is it our handler?
+ if(handler == CAST_FROM_FN_PTR(address, (sa_sigaction_t)signalHandler) ||
+@@ -4605,7 +4591,9 @@
+ DO_SIGNAL_CHECK(SIGBUS);
+ DO_SIGNAL_CHECK(SIGPIPE);
+ DO_SIGNAL_CHECK(SIGXFSZ);
+-
++#if defined(PPC64)
++ DO_SIGNAL_CHECK(SIGTRAP);
++#endif
+
+ // ReduceSignalUsage allows the user to override these handlers
+ // see comments at the very top and jvm_solaris.h
+@@ -4927,7 +4915,7 @@
+ // the future if the appropriate cleanup code can be added to the
+ // VM_Exit VMOperation's doit method.
+ if (atexit(perfMemory_exit_helper) != 0) {
+- warning("os::init2 atexit(perfMemory_exit_helper) failed");
++ warning("os::init_2 atexit(perfMemory_exit_helper) failed");
+ }
+ }
+
+@@ -4938,8 +4926,7 @@
+ }
+
+ // this is called at the end of vm_initialization
+-void os::init_3(void)
+-{
++void os::init_3(void) {
+ #ifdef JAVASE_EMBEDDED
+ // Start the MemNotifyThread
+ if (LowMemoryProtection) {
+@@ -5405,6 +5392,8 @@
+ // -1 on error.
+ //
+
++PRAGMA_DIAG_PUSH
++PRAGMA_FORMAT_NONLITERAL_IGNORED
+ static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
+ static bool proc_task_unchecked = true;
+ static const char *proc_stat_path = "/proc/%d/stat";
+@@ -5466,6 +5455,7 @@
+ return (jlong)user_time * (1000000000 / clock_tics_per_sec);
+ }
+ }
++PRAGMA_DIAG_POP
+
+ void os::current_thread_cpu_time_info(jvmtiTimerInfo *info_ptr) {
+ info_ptr->max_value = ALL_64_BITS; // will not wrap in less than 64 bits
+@@ -5779,7 +5769,6 @@
+ * is no need to track notifications.
+ */
+
+-#define MAX_SECS 100000000
+ /*
+ * This code is common to linux and solaris and will be moved to a
+ * common place in dolphin.
+--- ./hotspot/src/os/linux/vm/os_linux.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/os/linux/vm/os_linux.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -30,6 +30,9 @@
+ /* pthread_getattr_np comes with LinuxThreads-0.9-7 on RedHat 7.1 */
+ typedef int (*pthread_getattr_func_type) (pthread_t, pthread_attr_t *);
+
++// Information about the protection of the page at address '0' on this os.
++static bool zero_page_read_protected() { return true; }
++
+ class Linux {
+ friend class os;
+ friend class TestReserveMemorySpecial;
+--- ./hotspot/src/os/linux/vm/perfMemory_linux.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/os/linux/vm/perfMemory_linux.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+@@ -891,8 +891,16 @@
+ FREE_C_HEAP_ARRAY(char, filename, mtInternal);
+
+ // open the shared memory file for the give vmid
+- fd = open_sharedmem_file(rfilename, file_flags, CHECK);
+- assert(fd != OS_ERR, "unexpected value");
++ fd = open_sharedmem_file(rfilename, file_flags, THREAD);
++
++ if (fd == OS_ERR) {
++ return;
++ }
++
++ if (HAS_PENDING_EXCEPTION) {
++ ::close(fd);
++ return;
++ }
+
+ if (*sizep == 0) {
+ size = sharedmem_filesize(fd, CHECK);
+@@ -923,7 +931,7 @@
+
+ if (PerfTraceMemOps) {
+ tty->print("mapped " SIZE_FORMAT " bytes for vmid %d at "
+- INTPTR_FORMAT "\n", size, vmid, (void*)mapAddress);
++ INTPTR_FORMAT "\n", size, vmid, p2i((void*)mapAddress));
+ }
+ }
+
+--- ./hotspot/src/os/posix/vm/os_posix.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/os/posix/vm/os_posix.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -1,38 +1,48 @@
+ /*
+-* Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+-* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+-*
+-* This code is free software; you can redistribute it and/or modify it
+-* under the terms of the GNU General Public License version 2 only, as
+-* published by the Free Software Foundation.
+-*
+-* This code is distributed in the hope that it will be useful, but WITHOUT
+-* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+-* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+-* version 2 for more details (a copy is included in the LICENSE file that
+-* accompanied this code).
+-*
+-* You should have received a copy of the GNU General Public License version
+-* 2 along with this work; if not, write to the Free Software Foundation,
+-* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+-*
+-* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+-* or visit www.oracle.com if you need additional information or have any
+-* questions.
+-*
+-*/
++* Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
+
++#include "utilities/globalDefinitions.hpp"
+ #include "prims/jvm.h"
+ #include "runtime/frame.inline.hpp"
+ #include "runtime/os.hpp"
+ #include "utilities/vmError.hpp"
+
++#include
+ #include
+ #include
+ #include
+ #include
+ #include
+
++PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
++
++// Todo: provide a os::get_max_process_id() or similar. Number of processes
++// may have been configured, can be read more accurately from proc fs etc.
++#ifndef MAX_PID
++#define MAX_PID INT_MAX
++#endif
++#define IS_VALID_PID(p) (p > 0 && p < MAX_PID)
+
+ // Check core dump limit and report possible place where core can be found
+ void os::check_or_create_dump(void* exceptionRecord, void* contextRecord, char* buffer, size_t bufferSize) {
+@@ -158,8 +168,8 @@
+ if (rlim.rlim_cur == RLIM_INFINITY) st->print("infinity");
+ else st->print("%uk", rlim.rlim_cur >> 10);
+
+- //Isn't there on solaris
+-#ifndef TARGET_OS_FAMILY_solaris
++ // Isn't there on solaris
++#if !defined(TARGET_OS_FAMILY_solaris) && !defined(TARGET_OS_FAMILY_aix)
+ st->print(", NPROC ");
+ getrlimit(RLIMIT_NPROC, &rlim);
+ if (rlim.rlim_cur == RLIM_INFINITY) st->print("infinity");
+@@ -183,10 +193,10 @@
+ st->print("uname:");
+ struct utsname name;
+ uname(&name);
+- st->print(name.sysname); st->print(" ");
+- st->print(name.release); st->print(" ");
+- st->print(name.version); st->print(" ");
+- st->print(name.machine);
++ st->print("%s ", name.sysname);
++ st->print("%s ", name.release);
++ st->print("%s ", name.version);
++ st->print("%s", name.machine);
+ st->cr();
+ }
+
+@@ -262,10 +272,6 @@
+ return ::fdopen(fd, mode);
+ }
+
+-void* os::get_default_process_handle() {
+- return (void*)::dlopen(NULL, RTLD_LAZY);
+-}
+-
+ // Builds a platform dependent Agent_OnLoad_ function name
+ // which is used to find statically linked in agents.
+ // Parameters:
+@@ -311,6 +317,483 @@
+ return agent_entry_name;
+ }
+
++// Returned string is a constant. For unknown signals "UNKNOWN" is returned.
++const char* os::Posix::get_signal_name(int sig, char* out, size_t outlen) {
++
++ static const struct {
++ int sig; const char* name;
++ }
++ info[] =
++ {
++ { SIGABRT, "SIGABRT" },
++#ifdef SIGAIO
++ { SIGAIO, "SIGAIO" },
++#endif
++ { SIGALRM, "SIGALRM" },
++#ifdef SIGALRM1
++ { SIGALRM1, "SIGALRM1" },
++#endif
++ { SIGBUS, "SIGBUS" },
++#ifdef SIGCANCEL
++ { SIGCANCEL, "SIGCANCEL" },
++#endif
++ { SIGCHLD, "SIGCHLD" },
++#ifdef SIGCLD
++ { SIGCLD, "SIGCLD" },
++#endif
++ { SIGCONT, "SIGCONT" },
++#ifdef SIGCPUFAIL
++ { SIGCPUFAIL, "SIGCPUFAIL" },
++#endif
++#ifdef SIGDANGER
++ { SIGDANGER, "SIGDANGER" },
++#endif
++#ifdef SIGDIL
++ { SIGDIL, "SIGDIL" },
++#endif
++#ifdef SIGEMT
++ { SIGEMT, "SIGEMT" },
++#endif
++ { SIGFPE, "SIGFPE" },
++#ifdef SIGFREEZE
++ { SIGFREEZE, "SIGFREEZE" },
++#endif
++#ifdef SIGGFAULT
++ { SIGGFAULT, "SIGGFAULT" },
++#endif
++#ifdef SIGGRANT
++ { SIGGRANT, "SIGGRANT" },
++#endif
++ { SIGHUP, "SIGHUP" },
++ { SIGILL, "SIGILL" },
++ { SIGINT, "SIGINT" },
++#ifdef SIGIO
++ { SIGIO, "SIGIO" },
++#endif
++#ifdef SIGIOINT
++ { SIGIOINT, "SIGIOINT" },
++#endif
++#ifdef SIGIOT
++ // SIGIOT is there for BSD compatibility, but on most Unices just a
++ // synonym for SIGABRT. The result should be "SIGABRT", not
++ // "SIGIOT".
++ #if (SIGIOT != SIGABRT )
++ { SIGIOT, "SIGIOT" },
++ #endif
++#endif
++#ifdef SIGKAP
++ { SIGKAP, "SIGKAP" },
++#endif
++ { SIGKILL, "SIGKILL" },
++#ifdef SIGLOST
++ { SIGLOST, "SIGLOST" },
++#endif
++#ifdef SIGLWP
++ { SIGLWP, "SIGLWP" },
++#endif
++#ifdef SIGLWPTIMER
++ { SIGLWPTIMER, "SIGLWPTIMER" },
++#endif
++#ifdef SIGMIGRATE
++ { SIGMIGRATE, "SIGMIGRATE" },
++#endif
++#ifdef SIGMSG
++ { SIGMSG, "SIGMSG" },
++#endif
++ { SIGPIPE, "SIGPIPE" },
++#ifdef SIGPOLL
++ { SIGPOLL, "SIGPOLL" },
++#endif
++#ifdef SIGPRE
++ { SIGPRE, "SIGPRE" },
++#endif
++ { SIGPROF, "SIGPROF" },
++#ifdef SIGPTY
++ { SIGPTY, "SIGPTY" },
++#endif
++#ifdef SIGPWR
++ { SIGPWR, "SIGPWR" },
++#endif
++ { SIGQUIT, "SIGQUIT" },
++#ifdef SIGRECONFIG
++ { SIGRECONFIG, "SIGRECONFIG" },
++#endif
++#ifdef SIGRECOVERY
++ { SIGRECOVERY, "SIGRECOVERY" },
++#endif
++#ifdef SIGRESERVE
++ { SIGRESERVE, "SIGRESERVE" },
++#endif
++#ifdef SIGRETRACT
++ { SIGRETRACT, "SIGRETRACT" },
++#endif
++#ifdef SIGSAK
++ { SIGSAK, "SIGSAK" },
++#endif
++ { SIGSEGV, "SIGSEGV" },
++#ifdef SIGSOUND
++ { SIGSOUND, "SIGSOUND" },
++#endif
++ { SIGSTOP, "SIGSTOP" },
++ { SIGSYS, "SIGSYS" },
++#ifdef SIGSYSERROR
++ { SIGSYSERROR, "SIGSYSERROR" },
++#endif
++#ifdef SIGTALRM
++ { SIGTALRM, "SIGTALRM" },
++#endif
++ { SIGTERM, "SIGTERM" },
++#ifdef SIGTHAW
++ { SIGTHAW, "SIGTHAW" },
++#endif
++ { SIGTRAP, "SIGTRAP" },
++#ifdef SIGTSTP
++ { SIGTSTP, "SIGTSTP" },
++#endif
++ { SIGTTIN, "SIGTTIN" },
++ { SIGTTOU, "SIGTTOU" },
++#ifdef SIGURG
++ { SIGURG, "SIGURG" },
++#endif
++ { SIGUSR1, "SIGUSR1" },
++ { SIGUSR2, "SIGUSR2" },
++#ifdef SIGVIRT
++ { SIGVIRT, "SIGVIRT" },
++#endif
++ { SIGVTALRM, "SIGVTALRM" },
++#ifdef SIGWAITING
++ { SIGWAITING, "SIGWAITING" },
++#endif
++#ifdef SIGWINCH
++ { SIGWINCH, "SIGWINCH" },
++#endif
++#ifdef SIGWINDOW
++ { SIGWINDOW, "SIGWINDOW" },
++#endif
++ { SIGXCPU, "SIGXCPU" },
++ { SIGXFSZ, "SIGXFSZ" },
++#ifdef SIGXRES
++ { SIGXRES, "SIGXRES" },
++#endif
++ { -1, NULL }
++ };
++
++ const char* ret = NULL;
++
++#ifdef SIGRTMIN
++ if (sig >= SIGRTMIN && sig <= SIGRTMAX) {
++ if (sig == SIGRTMIN) {
++ ret = "SIGRTMIN";
++ } else if (sig == SIGRTMAX) {
++ ret = "SIGRTMAX";
++ } else {
++ jio_snprintf(out, outlen, "SIGRTMIN+%d", sig - SIGRTMIN);
++ return out;
++ }
++ }
++#endif
++
++ if (sig > 0) {
++ for (int idx = 0; info[idx].sig != -1; idx ++) {
++ if (info[idx].sig == sig) {
++ ret = info[idx].name;
++ break;
++ }
++ }
++ }
++
++ if (!ret) {
++ if (!is_valid_signal(sig)) {
++ ret = "INVALID";
++ } else {
++ ret = "UNKNOWN";
++ }
++ }
++
++ jio_snprintf(out, outlen, ret);
++ return out;
++}
++
++// Returns true if signal number is valid.
++bool os::Posix::is_valid_signal(int sig) {
++ // MacOS not really POSIX compliant: sigaddset does not return
++ // an error for invalid signal numbers. However, MacOS does not
++ // support real time signals and simply seems to have just 33
++ // signals with no holes in the signal range.
++#ifdef __APPLE__
++ return sig >= 1 && sig < NSIG;
++#else
++ // Use sigaddset to check for signal validity.
++ sigset_t set;
++ if (sigaddset(&set, sig) == -1 && errno == EINVAL) {
++ return false;
++ }
++ return true;
++#endif
++}
++
++#define NUM_IMPORTANT_SIGS 32
++// Returns one-line short description of a signal set in a user provided buffer.
++const char* os::Posix::describe_signal_set_short(const sigset_t* set, char* buffer, size_t buf_size) {
++ assert(buf_size == (NUM_IMPORTANT_SIGS + 1), "wrong buffer size");
++ // Note: for shortness, just print out the first 32. That should
++ // cover most of the useful ones, apart from realtime signals.
++ for (int sig = 1; sig <= NUM_IMPORTANT_SIGS; sig++) {
++ const int rc = sigismember(set, sig);
++ if (rc == -1 && errno == EINVAL) {
++ buffer[sig-1] = '?';
++ } else {
++ buffer[sig-1] = rc == 0 ? '0' : '1';
++ }
++ }
++ buffer[NUM_IMPORTANT_SIGS] = 0;
++ return buffer;
++}
++
++// Prints one-line description of a signal set.
++void os::Posix::print_signal_set_short(outputStream* st, const sigset_t* set) {
++ char buf[NUM_IMPORTANT_SIGS + 1];
++ os::Posix::describe_signal_set_short(set, buf, sizeof(buf));
++ st->print("%s", buf);
++}
++
++// Writes one-line description of a combination of sigaction.sa_flags into a user
++// provided buffer. Returns that buffer.
++const char* os::Posix::describe_sa_flags(int flags, char* buffer, size_t size) {
++ char* p = buffer;
++ size_t remaining = size;
++ bool first = true;
++ int idx = 0;
++
++ assert(buffer, "invalid argument");
++
++ if (size == 0) {
++ return buffer;
++ }
++
++ strncpy(buffer, "none", size);
++
++ const struct {
++ int i;
++ const char* s;
++ } flaginfo [] = {
++ { SA_NOCLDSTOP, "SA_NOCLDSTOP" },
++ { SA_ONSTACK, "SA_ONSTACK" },
++ { SA_RESETHAND, "SA_RESETHAND" },
++ { SA_RESTART, "SA_RESTART" },
++ { SA_SIGINFO, "SA_SIGINFO" },
++ { SA_NOCLDWAIT, "SA_NOCLDWAIT" },
++ { SA_NODEFER, "SA_NODEFER" },
++#ifdef AIX
++ { SA_ONSTACK, "SA_ONSTACK" },
++ { SA_OLDSTYLE, "SA_OLDSTYLE" },
++#endif
++ { 0, NULL }
++ };
++
++ for (idx = 0; flaginfo[idx].s && remaining > 1; idx++) {
++ if (flags & flaginfo[idx].i) {
++ if (first) {
++ jio_snprintf(p, remaining, "%s", flaginfo[idx].s);
++ first = false;
++ } else {
++ jio_snprintf(p, remaining, "|%s", flaginfo[idx].s);
++ }
++ const size_t len = strlen(p);
++ p += len;
++ remaining -= len;
++ }
++ }
++
++ buffer[size - 1] = '\0';
++
++ return buffer;
++}
++
++// Prints one-line description of a combination of sigaction.sa_flags.
++void os::Posix::print_sa_flags(outputStream* st, int flags) {
++ char buffer[0x100];
++ os::Posix::describe_sa_flags(flags, buffer, sizeof(buffer));
++ st->print("%s", buffer);
++}
++
++// Helper function for os::Posix::print_siginfo_...():
++// return a textual description for signal code.
++struct enum_sigcode_desc_t {
++ const char* s_name;
++ const char* s_desc;
++};
++
++static bool get_signal_code_description(const siginfo_t* si, enum_sigcode_desc_t* out) {
++
++ const struct {
++ int sig; int code; const char* s_code; const char* s_desc;
++ } t1 [] = {
++ { SIGILL, ILL_ILLOPC, "ILL_ILLOPC", "Illegal opcode." },
++ { SIGILL, ILL_ILLOPN, "ILL_ILLOPN", "Illegal operand." },
++ { SIGILL, ILL_ILLADR, "ILL_ILLADR", "Illegal addressing mode." },
++ { SIGILL, ILL_ILLTRP, "ILL_ILLTRP", "Illegal trap." },
++ { SIGILL, ILL_PRVOPC, "ILL_PRVOPC", "Privileged opcode." },
++ { SIGILL, ILL_PRVREG, "ILL_PRVREG", "Privileged register." },
++ { SIGILL, ILL_COPROC, "ILL_COPROC", "Coprocessor error." },
++ { SIGILL, ILL_BADSTK, "ILL_BADSTK", "Internal stack error." },
++#if defined(IA64) && defined(LINUX)
++ { SIGILL, ILL_BADIADDR, "ILL_BADIADDR", "Unimplemented instruction address" },
++ { SIGILL, ILL_BREAK, "ILL_BREAK", "Application Break instruction" },
++#endif
++ { SIGFPE, FPE_INTDIV, "FPE_INTDIV", "Integer divide by zero." },
++ { SIGFPE, FPE_INTOVF, "FPE_INTOVF", "Integer overflow." },
++ { SIGFPE, FPE_FLTDIV, "FPE_FLTDIV", "Floating-point divide by zero." },
++ { SIGFPE, FPE_FLTOVF, "FPE_FLTOVF", "Floating-point overflow." },
++ { SIGFPE, FPE_FLTUND, "FPE_FLTUND", "Floating-point underflow." },
++ { SIGFPE, FPE_FLTRES, "FPE_FLTRES", "Floating-point inexact result." },
++ { SIGFPE, FPE_FLTINV, "FPE_FLTINV", "Invalid floating-point operation." },
++ { SIGFPE, FPE_FLTSUB, "FPE_FLTSUB", "Subscript out of range." },
++ { SIGSEGV, SEGV_MAPERR, "SEGV_MAPERR", "Address not mapped to object." },
++ { SIGSEGV, SEGV_ACCERR, "SEGV_ACCERR", "Invalid permissions for mapped object." },
++#ifdef AIX
++ // no explanation found what keyerr would be
++ { SIGSEGV, SEGV_KEYERR, "SEGV_KEYERR", "key error" },
++#endif
++#if defined(IA64) && !defined(AIX)
++ { SIGSEGV, SEGV_PSTKOVF, "SEGV_PSTKOVF", "Paragraph stack overflow" },
++#endif
++ { SIGBUS, BUS_ADRALN, "BUS_ADRALN", "Invalid address alignment." },
++ { SIGBUS, BUS_ADRERR, "BUS_ADRERR", "Nonexistent physical address." },
++ { SIGBUS, BUS_OBJERR, "BUS_OBJERR", "Object-specific hardware error." },
++ { SIGTRAP, TRAP_BRKPT, "TRAP_BRKPT", "Process breakpoint." },
++ { SIGTRAP, TRAP_TRACE, "TRAP_TRACE", "Process trace trap." },
++ { SIGCHLD, CLD_EXITED, "CLD_EXITED", "Child has exited." },
++ { SIGCHLD, CLD_KILLED, "CLD_KILLED", "Child has terminated abnormally and did not create a core file." },
++ { SIGCHLD, CLD_DUMPED, "CLD_DUMPED", "Child has terminated abnormally and created a core file." },
++ { SIGCHLD, CLD_TRAPPED, "CLD_TRAPPED", "Traced child has trapped." },
++ { SIGCHLD, CLD_STOPPED, "CLD_STOPPED", "Child has stopped." },
++ { SIGCHLD, CLD_CONTINUED,"CLD_CONTINUED","Stopped child has continued." },
++#ifdef SIGPOLL
++ { SIGPOLL, POLL_OUT, "POLL_OUT", "Output buffers available." },
++ { SIGPOLL, POLL_MSG, "POLL_MSG", "Input message available." },
++ { SIGPOLL, POLL_ERR, "POLL_ERR", "I/O error." },
++ { SIGPOLL, POLL_PRI, "POLL_PRI", "High priority input available." },
++ { SIGPOLL, POLL_HUP, "POLL_HUP", "Device disconnected. [Option End]" },
++#endif
++ { -1, -1, NULL, NULL }
++ };
++
++ // Codes valid in any signal context.
++ const struct {
++ int code; const char* s_code; const char* s_desc;
++ } t2 [] = {
++ { SI_USER, "SI_USER", "Signal sent by kill()." },
++ { SI_QUEUE, "SI_QUEUE", "Signal sent by the sigqueue()." },
++ { SI_TIMER, "SI_TIMER", "Signal generated by expiration of a timer set by timer_settime()." },
++ { SI_ASYNCIO, "SI_ASYNCIO", "Signal generated by completion of an asynchronous I/O request." },
++ { SI_MESGQ, "SI_MESGQ", "Signal generated by arrival of a message on an empty message queue." },
++ // Linux specific
++#ifdef SI_TKILL
++ { SI_TKILL, "SI_TKILL", "Signal sent by tkill (pthread_kill)" },
++#endif
++#ifdef SI_DETHREAD
++ { SI_DETHREAD, "SI_DETHREAD", "Signal sent by execve() killing subsidiary threads" },
++#endif
++#ifdef SI_KERNEL
++ { SI_KERNEL, "SI_KERNEL", "Signal sent by kernel." },
++#endif
++#ifdef SI_SIGIO
++ { SI_SIGIO, "SI_SIGIO", "Signal sent by queued SIGIO" },
++#endif
++
++#ifdef AIX
++ { SI_UNDEFINED, "SI_UNDEFINED","siginfo contains partial information" },
++ { SI_EMPTY, "SI_EMPTY", "siginfo contains no useful information" },
++#endif
++
++#ifdef __sun
++ { SI_NOINFO, "SI_NOINFO", "No signal information" },
++ { SI_RCTL, "SI_RCTL", "kernel generated signal via rctl action" },
++ { SI_LWP, "SI_LWP", "Signal sent via lwp_kill" },
++#endif
++
++ { -1, NULL, NULL }
++ };
++
++ const char* s_code = NULL;
++ const char* s_desc = NULL;
++
++ for (int i = 0; t1[i].sig != -1; i ++) {
++ if (t1[i].sig == si->si_signo && t1[i].code == si->si_code) {
++ s_code = t1[i].s_code;
++ s_desc = t1[i].s_desc;
++ break;
++ }
++ }
++
++ if (s_code == NULL) {
++ for (int i = 0; t2[i].s_code != NULL; i ++) {
++ if (t2[i].code == si->si_code) {
++ s_code = t2[i].s_code;
++ s_desc = t2[i].s_desc;
++ }
++ }
++ }
++
++ if (s_code == NULL) {
++ out->s_name = "unknown";
++ out->s_desc = "unknown";
++ return false;
++ }
++
++ out->s_name = s_code;
++ out->s_desc = s_desc;
++
++ return true;
++}
++
++// A POSIX conform, platform-independend siginfo print routine.
++// Short print out on one line.
++void os::Posix::print_siginfo_brief(outputStream* os, const siginfo_t* si) {
++ char buf[20];
++ os->print("siginfo: ");
++
++ if (!si) {
++ os->print("");
++ return;
++ }
++
++ // See print_siginfo_full() for details.
++ const int sig = si->si_signo;
++
++ os->print("si_signo: %d (%s)", sig, os::Posix::get_signal_name(sig, buf, sizeof(buf)));
++
++ enum_sigcode_desc_t ed;
++ if (get_signal_code_description(si, &ed)) {
++ os->print(", si_code: %d (%s)", si->si_code, ed.s_name);
++ } else {
++ os->print(", si_code: %d (unknown)", si->si_code);
++ }
++
++ if (si->si_errno) {
++ os->print(", si_errno: %d", si->si_errno);
++ }
++
++ const int me = (int) ::getpid();
++ const int pid = (int) si->si_pid;
++
++ if (si->si_code == SI_USER || si->si_code == SI_QUEUE) {
++ if (IS_VALID_PID(pid) && pid != me) {
++ os->print(", sent from pid: %d (uid: %d)", pid, (int) si->si_uid);
++ }
++ } else if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL ||
++ sig == SIGTRAP || sig == SIGFPE) {
++ os->print(", si_addr: " PTR_FORMAT, si->si_addr);
++#ifdef SIGPOLL
++ } else if (sig == SIGPOLL) {
++ os->print(", si_band: " PTR64_FORMAT, (uint64_t)si->si_band);
++#endif
++ } else if (sig == SIGCHLD) {
++ os->print_cr(", si_pid: %d, si_uid: %d, si_status: %d", (int) si->si_pid, si->si_uid, si->si_status);
++ }
++}
++
+ os::WatcherThreadCrashProtection::WatcherThreadCrashProtection() {
+ assert(Thread::current()->is_Watcher_thread(), "Must be WatcherThread");
+ }
+--- ./hotspot/src/os/posix/vm/os_posix.hpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/os/posix/vm/os_posix.hpp Wed Jul 30 03:51:43 2014 -0700
+@@ -34,6 +34,30 @@
+ static void print_libversion_info(outputStream* st);
+ static void print_load_average(outputStream* st);
+
++public:
++
++ // Returns true if signal is valid.
++ static bool is_valid_signal(int sig);
++
++ // Helper function, returns a string (e.g. "SIGILL") for a signal.
++ // Returned string is a constant. For unknown signals "UNKNOWN" is returned.
++ static const char* get_signal_name(int sig, char* out, size_t outlen);
++
++ // Returns one-line short description of a signal set in a user provided buffer.
++ static const char* describe_signal_set_short(const sigset_t* set, char* buffer, size_t size);
++
++ // Prints a short one-line description of a signal set.
++ static void print_signal_set_short(outputStream* st, const sigset_t* set);
++
++ // Writes a one-line description of a combination of sigaction.sa_flags
++ // into a user provided buffer. Returns that buffer.
++ static const char* describe_sa_flags(int flags, char* buffer, size_t size);
++
++ // Prints a one-line description of a combination of sigaction.sa_flags.
++ static void print_sa_flags(outputStream* st, int flags);
++
++ // A POSIX conform, platform-independend siginfo print routine.
++ static void print_siginfo_brief(outputStream* os, const siginfo_t* si);
+
+ };
+
+@@ -57,4 +81,4 @@
+ sigjmp_buf _jmpbuf;
+ };
+
+-#endif
++#endif // OS_POSIX_VM_OS_POSIX_HPP
+--- ./hotspot/src/os/solaris/vm/os_solaris.cpp Tue Jun 03 14:19:26 2014 -0700
++++ ./hotspot/src/os/solaris/vm/os_solaris.cpp Wed Jul 30 03:51:43 2014 -0700
+@@ -415,11 +415,7 @@
+
+ static hrtime_t first_hrtime = 0;
+ static const hrtime_t hrtime_hz = 1000*1000*1000;
+-const int LOCK_BUSY = 1;
+-const int LOCK_FREE = 0;
+-const int LOCK_INVALID = -1;
+ static volatile hrtime_t max_hrtime = 0;
+-static volatile int max_hrtime_lock = LOCK_FREE; // Update counter with LSB as lock-in-progress
+
+
+ void os::Solaris::initialize_system_info() {
+@@ -648,9 +644,6 @@
+
+
+ void os::init_system_properties_values() {
+- char arch[12];
+- sysinfo(SI_ARCHITECTURE, arch, sizeof(arch));
+-
+ // The next steps are taken in the product version:
+ //
+ // Obtain the JAVA_HOME value from the location of libjvm.so.
+@@ -677,218 +670,174 @@
+ // Important note: if the location of libjvm.so changes this
+ // code needs to be changed accordingly.
+
+- // The next few definitions allow the code to be verbatim:
+-#define malloc(n) (char*)NEW_C_HEAP_ARRAY(char, (n), mtInternal)
+-#define free(p) FREE_C_HEAP_ARRAY(char, p, mtInternal)
+-#define getenv(n) ::getenv(n)
+-
++// Base path of extensions installed on the system.
++#define SYS_EXT_DIR "/usr/jdk/packages"
+ #define EXTENSIONS_DIR "/lib/ext"
+ #define ENDORSED_DIR "/lib/endorsed"
+-#define COMMON_DIR "/usr/jdk/packages"
+-
++
++ char cpu_arch[12];
++ // Buffer that fits several sprintfs.
++ // Note that the space for the colon and the trailing null are provided
++ // by the nulls included by the sizeof operator.
++ const size_t bufsize =
++ MAX4((size_t)MAXPATHLEN, // For dll_dir & friends.
++ sizeof(SYS_EXT_DIR) + sizeof("/lib/") + strlen(cpu_arch), // invariant ld_library_path
++ (size_t)MAXPATHLEN + sizeof(EXTENSIONS_DIR) + sizeof(SYS_EXT_DIR) + sizeof(EXTENSIONS_DIR), // extensions dir
++ (size_t)MAXPATHLEN + sizeof(ENDORSED_DIR)); // endorsed dir
++ char *buf = (char *)NEW_C_HEAP_ARRAY(char, bufsize, mtInternal);
++
++ // sysclasspath, java_home, dll_dir
+ {
+- /* sysclasspath, java_home, dll_dir */
+- {
+- char *home_path;
+- char *dll_path;
+- char *pslash;
+- char buf[MAXPATHLEN];
+- os::jvm_path(buf, sizeof(buf));
+-
+- // Found the full path to libjvm.so.
+- // Now cut the path to /jre if we can.
+- *(strrchr(buf, '/')) = '\0'; /* get rid of /libjvm.so */
++ char *pslash;
++ os::jvm_path(buf, bufsize);
++
++ // Found the full path to libjvm.so.
++ // Now cut the path to