/* * Copyright (C) 2010 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #define LOG_TAG "Matcher" #include #include #include #include #include #include #include #include "IcuUtilities.h" #include "JniException.h" #include "ScopedJavaUnicodeString.h" #include "unicode/parseerr.h" #include "unicode/regex.h" // ICU documentation: http://icu-project.org/apiref/icu4c/classRegexMatcher.html /** * Encapsulates an instance of ICU4C's RegexMatcher class along with a copy of * the input it's currently operating on in the native heap. * * Rationale: We choose to make a copy here because it turns out to be a lot * cheaper when a moving GC and/or string compression is enabled. This is * because env->GetStringChars() always copies in this scenario. This becomes * especially bad when the String in question is long and/or contains a large * number of matches. * * Drawbacks: The native allocation associated with this class is no longer * fixed size, so we're effectively lying to the NativeAllocationRegistry about * the size of the object(s) we're allocating on the native heap. The peak * memory usage doesn't change though, given that GetStringChars would have * made an allocation of precisely the same size. */ class MatcherState { public: MatcherState(icu::RegexMatcher* matcher) : mMatcher(matcher), mUChars(nullptr), mUText(nullptr), mStatus(U_ZERO_ERROR) { } bool updateInput(JNIEnv* env, jstring input) { // First, close the UText struct, since we're about to allocate a new one. if (mUText != nullptr) { utext_close(mUText); mUText = nullptr; } // Then delete the UChar* associated with the UText struct.. mUChars.reset(nullptr); // TODO: We should investigate whether we can avoid an additional copy // in the native heap when is_copy == JNI_TRUE. The problem with doing // that is that we might call ReleaseStringChars with a different // JNIEnv* on a different downcall. This is currently safe as // implemented in ART, but is unlikely to be portable and the spec stays // silent on the matter. ScopedStringChars inputChars(env, input); if (inputChars.get() == nullptr) { // There will be an exception pending if we get here. return false; } // Make a copy of |input| on the native heap. This copy will be live // until the next call to updateInput or close. mUChars.reset(new (std::nothrow) UChar[inputChars.size()]); if (mUChars.get() == nullptr) { env->ThrowNew(env->FindClass("Ljava/lang/OutOfMemoryError;"), "Out of memory"); return false; } static_assert(sizeof(UChar) == sizeof(jchar), "sizeof(Uchar) != sizeof(jchar)"); memcpy(mUChars.get(), inputChars.get(), inputChars.size() * sizeof(jchar)); // Reset any errors that might have occurred on previous patches. mStatus = U_ZERO_ERROR; mUText = utext_openUChars(nullptr, mUChars.get(), inputChars.size(), &mStatus); if (mUText == nullptr) { CHECK(maybeThrowIcuException(env, "utext_openUChars", mStatus)); return false; } // It is an error for ICU to have returned a non-null mUText but to // still have indicated an error. CHECK(U_SUCCESS(mStatus)); mMatcher->reset(mUText); return true; } ~MatcherState() { if (mUText != nullptr) { utext_close(mUText); } } icu::RegexMatcher* matcher() { return mMatcher.get(); } UErrorCode& status() { return mStatus; } void updateOffsets(JNIEnv* env, jintArray javaOffsets) { ScopedIntArrayRW offsets(env, javaOffsets); if (offsets.get() == NULL) { return; } for (size_t i = 0, groupCount = mMatcher->groupCount(); i <= groupCount; ++i) { offsets[2*i + 0] = mMatcher->start(i, mStatus); offsets[2*i + 1] = mMatcher->end(i, mStatus); } } private: std::unique_ptr mMatcher; std::unique_ptr mUChars; UText* mUText; UErrorCode mStatus; // Disallow copy and assignment. MatcherState(const MatcherState&); void operator=(const MatcherState&); }; static inline MatcherState* toMatcherState(jlong address) { return reinterpret_cast(static_cast(address)); } static void Matcher_free(void* address) { MatcherState* state = reinterpret_cast(address); delete state; } static jlong Matcher_getNativeFinalizer(JNIEnv*, jclass) { return reinterpret_cast(&Matcher_free); } static jboolean Matcher_findImpl(JNIEnv* env, jclass, jlong addr, jint startIndex, jintArray offsets) { MatcherState* state = toMatcherState(addr); UBool result = state->matcher()->find(startIndex, state->status()); if (result) { state->updateOffsets(env, offsets); return JNI_TRUE; } else { return JNI_FALSE; } } static jboolean Matcher_findNextImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) { MatcherState* state = toMatcherState(addr); UBool result = state->matcher()->find(); if (result) { state->updateOffsets(env, offsets); return JNI_TRUE; } else { return JNI_FALSE; } } static jint Matcher_groupCountImpl(JNIEnv*, jclass, jlong addr) { MatcherState* state = toMatcherState(addr); return state->matcher()->groupCount(); } static jboolean Matcher_hitEndImpl(JNIEnv*, jclass, jlong addr) { MatcherState* state = toMatcherState(addr); if (state->matcher()->hitEnd() != 0) { return JNI_TRUE; } else { return JNI_FALSE; } } static jboolean Matcher_lookingAtImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) { MatcherState* state = toMatcherState(addr); UBool result = state->matcher()->lookingAt(state->status()); if (result) { state->updateOffsets(env, offsets); return JNI_TRUE; } else { return JNI_FALSE; } } static jboolean Matcher_matchesImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) { MatcherState* state = toMatcherState(addr); UBool result = state->matcher()->matches(state->status()); if (result) { state->updateOffsets(env, offsets); return JNI_TRUE; } else { return JNI_FALSE; } } static jlong Matcher_openImpl(JNIEnv* env, jclass, jlong patternAddr) { icu::RegexPattern* pattern = reinterpret_cast(static_cast(patternAddr)); UErrorCode status = U_ZERO_ERROR; icu::RegexMatcher* result = pattern->matcher(status); if (maybeThrowIcuException(env, "RegexPattern::matcher", status)) { return 0; } return reinterpret_cast(new MatcherState(result)); } static jboolean Matcher_requireEndImpl(JNIEnv*, jclass, jlong addr) { MatcherState* state = toMatcherState(addr); if (state->matcher()->requireEnd() != 0) { return JNI_TRUE; } else { return JNI_FALSE; } } static void Matcher_setInputImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jint start, jint end) { MatcherState* state = toMatcherState(addr); if (state->updateInput(env, javaText)) { state->matcher()->region(start, end, state->status()); } } static void Matcher_useAnchoringBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) { MatcherState* state = toMatcherState(addr); state->matcher()->useAnchoringBounds(value); } static void Matcher_useTransparentBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) { MatcherState* state = toMatcherState(addr); state->matcher()->useTransparentBounds(value); } static jint Matcher_getMatchedGroupIndex0(JNIEnv* env, jclass, jlong patternAddr, jstring javaGroupName) { icu::RegexPattern* pattern = reinterpret_cast(static_cast(patternAddr)); ScopedJavaUnicodeString groupName(env, javaGroupName); UErrorCode status = U_ZERO_ERROR; jint result = pattern->groupNumberFromName(groupName.unicodeString(), status); if (U_SUCCESS(status)) { return result; } if (status == U_REGEX_INVALID_CAPTURE_GROUP_NAME) { return -1; } maybeThrowIcuException(env, "RegexPattern::groupNumberFromName", status); return -1; } static JNINativeMethod gMethods[] = { NATIVE_METHOD(Matcher, getMatchedGroupIndex0, "(JLjava/lang/String;)I"), NATIVE_METHOD(Matcher, findImpl, "(JI[I)Z"), NATIVE_METHOD(Matcher, findNextImpl, "(J[I)Z"), NATIVE_METHOD(Matcher, getNativeFinalizer, "()J"), NATIVE_METHOD(Matcher, groupCountImpl, "(J)I"), NATIVE_METHOD(Matcher, hitEndImpl, "(J)Z"), NATIVE_METHOD(Matcher, lookingAtImpl, "(J[I)Z"), NATIVE_METHOD(Matcher, matchesImpl, "(J[I)Z"), NATIVE_METHOD(Matcher, openImpl, "(J)J"), NATIVE_METHOD(Matcher, requireEndImpl, "(J)Z"), NATIVE_METHOD(Matcher, setInputImpl, "(JLjava/lang/String;II)V"), NATIVE_METHOD(Matcher, useAnchoringBoundsImpl, "(JZ)V"), NATIVE_METHOD(Matcher, useTransparentBoundsImpl, "(JZ)V"), }; void register_java_util_regex_Matcher(JNIEnv* env) { jniRegisterNativeMethods(env, "java/util/regex/Matcher", gMethods, NELEM(gMethods)); }