| compiler.h | | compiler.h | |
|
| // @file mongo/platform/compiler.h | | // Copyright 2012 the V8 project authors. All rights reserved. | |
| | | // Redistribution and use in source and binary forms, with or without | |
| | | // modification, are permitted provided that the following conditions are | |
| | | // met: | |
| | | // | |
| | | // * Redistributions of source code must retain the above copyright | |
| | | // notice, this list of conditions and the following disclaimer. | |
| | | // * Redistributions in binary form must reproduce the above | |
| | | // copyright notice, this list of conditions and the following | |
| | | // disclaimer in the documentation and/or other materials provided | |
| | | // with the distribution. | |
| | | // * Neither the name of Google Inc. nor the names of its | |
| | | // contributors may be used to endorse or promote products derived | |
| | | // from this software without specific prior written permission. | |
| | | // | |
| | | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| | | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| | | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| | | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| | | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| | | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| | | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| | | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| | | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| | | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| | | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| | | | |
|
| /* | | #ifndef V8_COMPILER_H_ | |
| * Copyright 2012 10gen Inc. | | #define V8_COMPILER_H_ | |
| * | | | |
| * Licensed under the Apache License, Version 2.0 (the "License"); | | #include "allocation.h" | |
| * you may not use this file except in compliance with the License. | | #include "ast.h" | |
| * You may obtain a copy of the License at | | #include "zone.h" | |
| * | | | |
| * http://www.apache.org/licenses/LICENSE-2.0 | | namespace v8 { | |
| * | | namespace internal { | |
| * Unless required by applicable law or agreed to in writing, software | | | |
| * distributed under the License is distributed on an "AS IS" BASIS, | | class ScriptDataImpl; | |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | | | |
| * See the License for the specific language governing permissions and | | // CompilationInfo encapsulates some information known at compile time. It | |
| * limitations under the License. | | // is constructed based on the resources available at compile-time. | |
| */ | | class CompilationInfo { | |
| | | public: | |
| #pragma once | | CompilationInfo(Handle<Script> script, Zone* zone); | |
| | | CompilationInfo(Handle<SharedFunctionInfo> shared_info, Zone* zone); | |
| /** | | CompilationInfo(Handle<JSFunction> closure, Zone* zone); | |
| * Include "mongo/platform/compiler.h" to get compiler-specific macro defin | | | |
| itions and utilities. | | virtual ~CompilationInfo(); | |
| */ | | | |
| | | Isolate* isolate() { | |
| #if defined(_MSC_VER) | | ASSERT(Isolate::Current() == isolate_); | |
| #include "mongo/platform/compiler_msvc.h" | | return isolate_; | |
| #elif defined(__GNUC__) | | } | |
| #include "mongo/platform/compiler_gcc.h" | | Zone* zone() { | |
| #else | | return zone_; | |
| #error "Unsupported compiler family" | | } | |
| | | bool is_lazy() const { return IsLazy::decode(flags_); } | |
| | | bool is_eval() const { return IsEval::decode(flags_); } | |
| | | bool is_global() const { return IsGlobal::decode(flags_); } | |
| | | bool is_classic_mode() const { return language_mode() == CLASSIC_MODE; } | |
| | | bool is_extended_mode() const { return language_mode() == EXTENDED_MODE; | |
| | | } | |
| | | LanguageMode language_mode() const { | |
| | | return LanguageModeField::decode(flags_); | |
| | | } | |
| | | bool is_in_loop() const { return IsInLoop::decode(flags_); } | |
| | | FunctionLiteral* function() const { return function_; } | |
| | | Scope* scope() const { return scope_; } | |
| | | Scope* global_scope() const { return global_scope_; } | |
| | | Handle<Code> code() const { return code_; } | |
| | | Handle<JSFunction> closure() const { return closure_; } | |
| | | Handle<SharedFunctionInfo> shared_info() const { return shared_info_; } | |
| | | Handle<Script> script() const { return script_; } | |
| | | v8::Extension* extension() const { return extension_; } | |
| | | ScriptDataImpl* pre_parse_data() const { return pre_parse_data_; } | |
| | | Handle<Context> calling_context() const { return calling_context_; } | |
| | | int osr_ast_id() const { return osr_ast_id_; } | |
| | | | |
| | | void MarkAsEval() { | |
| | | ASSERT(!is_lazy()); | |
| | | flags_ |= IsEval::encode(true); | |
| | | } | |
| | | void MarkAsGlobal() { | |
| | | ASSERT(!is_lazy()); | |
| | | flags_ |= IsGlobal::encode(true); | |
| | | } | |
| | | void SetLanguageMode(LanguageMode language_mode) { | |
| | | ASSERT(this->language_mode() == CLASSIC_MODE || | |
| | | this->language_mode() == language_mode || | |
| | | language_mode == EXTENDED_MODE); | |
| | | flags_ = LanguageModeField::update(flags_, language_mode); | |
| | | } | |
| | | void MarkAsInLoop() { | |
| | | ASSERT(is_lazy()); | |
| | | flags_ |= IsInLoop::encode(true); | |
| | | } | |
| | | void MarkAsNative() { | |
| | | flags_ |= IsNative::encode(true); | |
| | | } | |
| | | bool is_native() const { | |
| | | return IsNative::decode(flags_); | |
| | | } | |
| | | void SetFunction(FunctionLiteral* literal) { | |
| | | ASSERT(function_ == NULL); | |
| | | function_ = literal; | |
| | | } | |
| | | void SetScope(Scope* scope) { | |
| | | ASSERT(scope_ == NULL); | |
| | | scope_ = scope; | |
| | | } | |
| | | void SetGlobalScope(Scope* global_scope) { | |
| | | ASSERT(global_scope_ == NULL); | |
| | | global_scope_ = global_scope; | |
| | | } | |
| | | void SetCode(Handle<Code> code) { code_ = code; } | |
| | | void SetExtension(v8::Extension* extension) { | |
| | | ASSERT(!is_lazy()); | |
| | | extension_ = extension; | |
| | | } | |
| | | void SetPreParseData(ScriptDataImpl* pre_parse_data) { | |
| | | ASSERT(!is_lazy()); | |
| | | pre_parse_data_ = pre_parse_data; | |
| | | } | |
| | | void SetCallingContext(Handle<Context> context) { | |
| | | ASSERT(is_eval()); | |
| | | calling_context_ = context; | |
| | | } | |
| | | void SetOsrAstId(int osr_ast_id) { | |
| | | ASSERT(IsOptimizing()); | |
| | | osr_ast_id_ = osr_ast_id; | |
| | | } | |
| | | void MarkCompilingForDebugging(Handle<Code> current_code) { | |
| | | ASSERT(mode_ != OPTIMIZE); | |
| | | ASSERT(current_code->kind() == Code::FUNCTION); | |
| | | flags_ |= IsCompilingForDebugging::encode(true); | |
| | | if (current_code->is_compiled_optimizable()) { | |
| | | EnableDeoptimizationSupport(); | |
| | | } else { | |
| | | mode_ = CompilationInfo::NONOPT; | |
| | | } | |
| | | } | |
| | | bool IsCompilingForDebugging() { | |
| | | return IsCompilingForDebugging::decode(flags_); | |
| | | } | |
| | | | |
| | | bool has_global_object() const { | |
| | | return !closure().is_null() && (closure()->context()->global() != NULL) | |
| | | ; | |
| | | } | |
| | | | |
| | | GlobalObject* global_object() const { | |
| | | return has_global_object() ? closure()->context()->global() : NULL; | |
| | | } | |
| | | | |
| | | // Accessors for the different compilation modes. | |
| | | bool IsOptimizing() const { return mode_ == OPTIMIZE; } | |
| | | bool IsOptimizable() const { return mode_ == BASE; } | |
| | | void SetOptimizing(int osr_ast_id) { | |
| | | SetMode(OPTIMIZE); | |
| | | osr_ast_id_ = osr_ast_id; | |
| | | } | |
| | | void DisableOptimization(); | |
| | | | |
| | | // Deoptimization support. | |
| | | bool HasDeoptimizationSupport() const { | |
| | | return SupportsDeoptimization::decode(flags_); | |
| | | } | |
| | | void EnableDeoptimizationSupport() { | |
| | | ASSERT(IsOptimizable()); | |
| | | flags_ |= SupportsDeoptimization::encode(true); | |
| | | } | |
| | | | |
| | | // Determines whether or not to insert a self-optimization header. | |
| | | bool ShouldSelfOptimize(); | |
| | | | |
| | | // Disable all optimization attempts of this info for the rest of the | |
| | | // current compilation pipeline. | |
| | | void AbortOptimization(); | |
| | | | |
| | | void set_deferred_handles(DeferredHandles* deferred_handles) { | |
| | | ASSERT(deferred_handles_ == NULL); | |
| | | deferred_handles_ = deferred_handles; | |
| | | } | |
| | | | |
| | | void SaveHandles() { | |
| | | SaveHandle(&closure_); | |
| | | SaveHandle(&shared_info_); | |
| | | SaveHandle(&calling_context_); | |
| | | SaveHandle(&script_); | |
| | | } | |
| | | | |
| | | private: | |
| | | Isolate* isolate_; | |
| | | | |
| | | // Compilation mode. | |
| | | // BASE is generated by the full codegen, optionally prepared for bailout | |
| | | s. | |
| | | // OPTIMIZE is optimized code generated by the Hydrogen-based backend. | |
| | | // NONOPT is generated by the full codegen and is not prepared for | |
| | | // recompilation/bailouts. These functions are never recompiled. | |
| | | enum Mode { | |
| | | BASE, | |
| | | OPTIMIZE, | |
| | | NONOPT | |
| | | }; | |
| | | | |
| | | void Initialize(Mode mode) { | |
| | | mode_ = V8::UseCrankshaft() ? mode : NONOPT; | |
| | | ASSERT(!script_.is_null()); | |
| | | if (script_->type()->value() == Script::TYPE_NATIVE) { | |
| | | MarkAsNative(); | |
| | | } | |
| | | if (!shared_info_.is_null()) { | |
| | | ASSERT(language_mode() == CLASSIC_MODE); | |
| | | SetLanguageMode(shared_info_->language_mode()); | |
| | | } | |
| | | } | |
| | | | |
| | | void SetMode(Mode mode) { | |
| | | ASSERT(V8::UseCrankshaft()); | |
| | | mode_ = mode; | |
| | | } | |
| | | | |
| | | // Flags using template class BitField<type, start, length>. All are | |
| | | // false by default. | |
| | | // | |
| | | // Compilation is either eager or lazy. | |
| | | class IsLazy: public BitField<bool, 0, 1> {}; | |
| | | // Flags that can be set for eager compilation. | |
| | | class IsEval: public BitField<bool, 1, 1> {}; | |
| | | class IsGlobal: public BitField<bool, 2, 1> {}; | |
| | | // Flags that can be set for lazy compilation. | |
| | | class IsInLoop: public BitField<bool, 3, 1> {}; | |
| | | // Strict mode - used in eager compilation. | |
| | | class LanguageModeField: public BitField<LanguageMode, 4, 2> {}; | |
| | | // Is this a function from our natives. | |
| | | class IsNative: public BitField<bool, 6, 1> {}; | |
| | | // Is this code being compiled with support for deoptimization.. | |
| | | class SupportsDeoptimization: public BitField<bool, 7, 1> {}; | |
| | | // If compiling for debugging produce just full code matching the | |
| | | // initial mode setting. | |
| | | class IsCompilingForDebugging: public BitField<bool, 8, 1> {}; | |
| | | | |
| | | unsigned flags_; | |
| | | | |
| | | // Fields filled in by the compilation pipeline. | |
| | | // AST filled in by the parser. | |
| | | FunctionLiteral* function_; | |
| | | // The scope of the function literal as a convenience. Set to indicate | |
| | | // that scopes have been analyzed. | |
| | | Scope* scope_; | |
| | | // The global scope provided as a convenience. | |
| | | Scope* global_scope_; | |
| | | // The compiled code. | |
| | | Handle<Code> code_; | |
| | | | |
| | | // Possible initial inputs to the compilation process. | |
| | | Handle<JSFunction> closure_; | |
| | | Handle<SharedFunctionInfo> shared_info_; | |
| | | Handle<Script> script_; | |
| | | | |
| | | // Fields possibly needed for eager compilation, NULL by default. | |
| | | v8::Extension* extension_; | |
| | | ScriptDataImpl* pre_parse_data_; | |
| | | | |
| | | // The context of the caller is needed for eval code, and will be a null | |
| | | // handle otherwise. | |
| | | Handle<Context> calling_context_; | |
| | | | |
| | | // Compilation mode flag and whether deoptimization is allowed. | |
| | | Mode mode_; | |
| | | int osr_ast_id_; | |
| | | | |
| | | // The zone from which the compilation pipeline working on this | |
| | | // CompilationInfo allocates. | |
| | | Zone* zone_; | |
| | | | |
| | | DeferredHandles* deferred_handles_; | |
| | | | |
| | | template<typename T> | |
| | | void SaveHandle(Handle<T> *object) { | |
| | | if (!object->is_null()) { | |
| | | Handle<T> handle(*(*object)); | |
| | | *object = handle; | |
| | | } | |
| | | } | |
| | | | |
| | | DISALLOW_COPY_AND_ASSIGN(CompilationInfo); | |
| | | }; | |
| | | | |
| | | // Exactly like a CompilationInfo, except also creates and enters a | |
| | | // Zone on construction and deallocates it on exit. | |
| | | class CompilationInfoWithZone: public CompilationInfo { | |
| | | public: | |
| | | explicit CompilationInfoWithZone(Handle<Script> script) | |
| | | : CompilationInfo(script, &zone_), | |
| | | zone_(script->GetIsolate()), | |
| | | zone_scope_(&zone_, DELETE_ON_EXIT) {} | |
| | | explicit CompilationInfoWithZone(Handle<SharedFunctionInfo> shared_info) | |
| | | : CompilationInfo(shared_info, &zone_), | |
| | | zone_(shared_info->GetIsolate()), | |
| | | zone_scope_(&zone_, DELETE_ON_EXIT) {} | |
| | | explicit CompilationInfoWithZone(Handle<JSFunction> closure) | |
| | | : CompilationInfo(closure, &zone_), | |
| | | zone_(closure->GetIsolate()), | |
| | | zone_scope_(&zone_, DELETE_ON_EXIT) {} | |
| | | | |
| | | private: | |
| | | Zone zone_; | |
| | | ZoneScope zone_scope_; | |
| | | }; | |
| | | | |
| | | // A wrapper around a CompilationInfo that detaches the Handles from | |
| | | // the underlying DeferredHandleScope and stores them in info_ on | |
| | | // destruction. | |
| | | class CompilationHandleScope BASE_EMBEDDED { | |
| | | public: | |
| | | explicit CompilationHandleScope(CompilationInfo* info) | |
| | | : deferred_(info->isolate()), info_(info) {} | |
| | | ~CompilationHandleScope() { | |
| | | info_->set_deferred_handles(deferred_.Detach()); | |
| | | } | |
| | | | |
| | | private: | |
| | | DeferredHandleScope deferred_; | |
| | | CompilationInfo* info_; | |
| | | }; | |
| | | | |
| | | class HGraph; | |
| | | class HGraphBuilder; | |
| | | class LChunk; | |
| | | | |
| | | // A helper class that calls the three compilation phases in | |
| | | // Crankshaft and keeps track of its state. The three phases | |
| | | // CreateGraph, OptimizeGraph and GenerateAndInstallCode can either | |
| | | // fail, bail-out to the full code generator or succeed. Apart from | |
| | | // their return value, the status of the phase last run can be checked | |
| | | // using last_status(). | |
| | | class OptimizingCompiler: public ZoneObject { | |
| | | public: | |
| | | explicit OptimizingCompiler(CompilationInfo* info) | |
| | | : info_(info), | |
| | | oracle_(NULL), | |
| | | graph_builder_(NULL), | |
| | | graph_(NULL), | |
| | | chunk_(NULL), | |
| | | time_taken_to_create_graph_(0), | |
| | | time_taken_to_optimize_(0), | |
| | | time_taken_to_codegen_(0), | |
| | | last_status_(FAILED) { } | |
| | | | |
| | | enum Status { | |
| | | FAILED, BAILED_OUT, SUCCEEDED | |
| | | }; | |
| | | | |
| | | MUST_USE_RESULT Status CreateGraph(); | |
| | | MUST_USE_RESULT Status OptimizeGraph(); | |
| | | MUST_USE_RESULT Status GenerateAndInstallCode(); | |
| | | | |
| | | Status last_status() const { return last_status_; } | |
| | | CompilationInfo* info() const { return info_; } | |
| | | | |
| | | MUST_USE_RESULT Status AbortOptimization() { | |
| | | info_->AbortOptimization(); | |
| | | info_->shared_info()->DisableOptimization(); | |
| | | return SetLastStatus(BAILED_OUT); | |
| | | } | |
| | | | |
| | | private: | |
| | | CompilationInfo* info_; | |
| | | TypeFeedbackOracle* oracle_; | |
| | | HGraphBuilder* graph_builder_; | |
| | | HGraph* graph_; | |
| | | LChunk* chunk_; | |
| | | int64_t time_taken_to_create_graph_; | |
| | | int64_t time_taken_to_optimize_; | |
| | | int64_t time_taken_to_codegen_; | |
| | | Status last_status_; | |
| | | | |
| | | MUST_USE_RESULT Status SetLastStatus(Status status) { | |
| | | last_status_ = status; | |
| | | return last_status_; | |
| | | } | |
| | | void RecordOptimizationStats(); | |
| | | | |
| | | struct Timer { | |
| | | Timer(OptimizingCompiler* compiler, int64_t* location) | |
| | | : compiler_(compiler), | |
| | | start_(OS::Ticks()), | |
| | | location_(location) { } | |
| | | | |
| | | ~Timer() { | |
| | | *location_ += (OS::Ticks() - start_); | |
| | | } | |
| | | | |
| | | OptimizingCompiler* compiler_; | |
| | | int64_t start_; | |
| | | int64_t* location_; | |
| | | }; | |
| | | }; | |
| | | | |
| | | // The V8 compiler | |
| | | // | |
| | | // General strategy: Source code is translated into an anonymous function w | |
| | | /o | |
| | | // parameters which then can be executed. If the source code contains other | |
| | | // functions, they will be compiled and allocated as part of the compilatio | |
| | | n | |
| | | // of the source code. | |
| | | | |
| | | // Please note this interface returns shared function infos. This means yo | |
| | | u | |
| | | // need to call Factory::NewFunctionFromSharedFunctionInfo before you have | |
| | | a | |
| | | // real function with a context. | |
| | | | |
| | | class Compiler : public AllStatic { | |
| | | public: | |
| | | // Default maximum number of function optimization attempts before we | |
| | | // give up. | |
| | | static const int kDefaultMaxOptCount = 10; | |
| | | | |
| | | static const int kMaxInliningLevels = 3; | |
| | | | |
| | | // Call count before primitive functions trigger their own optimization. | |
| | | static const int kCallsUntilPrimitiveOpt = 200; | |
| | | | |
| | | // All routines return a SharedFunctionInfo. | |
| | | // If an error occurs an exception is raised and the return handle | |
| | | // contains NULL. | |
| | | | |
| | | // Compile a String source within a context. | |
| | | static Handle<SharedFunctionInfo> Compile(Handle<String> source, | |
| | | Handle<Object> script_name, | |
| | | int line_offset, | |
| | | int column_offset, | |
| | | v8::Extension* extension, | |
| | | ScriptDataImpl* pre_data, | |
| | | Handle<Object> script_data, | |
| | | NativesFlag is_natives_code); | |
| | | | |
| | | // Compile a String source within a context for Eval. | |
| | | static Handle<SharedFunctionInfo> CompileEval(Handle<String> source, | |
| | | Handle<Context> context, | |
| | | bool is_global, | |
| | | LanguageMode language_mode, | |
| | | int scope_position); | |
| | | | |
| | | // Compile from function info (used for lazy compilation). Returns true o | |
| | | n | |
| | | // success and false if the compilation resulted in a stack overflow. | |
| | | static bool CompileLazy(CompilationInfo* info); | |
| | | | |
| | | static void RecompileParallel(Handle<JSFunction> function); | |
| | | | |
| | | // Compile a shared function info object (the function is possibly lazily | |
| | | // compiled). | |
| | | static Handle<SharedFunctionInfo> BuildFunctionInfo(FunctionLiteral* node | |
| | | , | |
| | | Handle<Script> script | |
| | | ); | |
| | | | |
| | | // Set the function info for a newly compiled function. | |
| | | static void SetFunctionInfo(Handle<SharedFunctionInfo> function_info, | |
| | | FunctionLiteral* lit, | |
| | | bool is_toplevel, | |
| | | Handle<Script> script); | |
| | | | |
| | | static void InstallOptimizedCode(OptimizingCompiler* info); | |
| | | | |
| | | #ifdef ENABLE_DEBUGGER_SUPPORT | |
| | | static bool MakeCodeForLiveEdit(CompilationInfo* info); | |
| #endif | | #endif | |
|
| | | | |
| | | static void RecordFunctionCompilation(Logger::LogEventsAndTags tag, | |
| | | CompilationInfo* info, | |
| | | Handle<SharedFunctionInfo> shared); | |
| | | }; | |
| | | | |
| | | } } // namespace v8::internal | |
| | | | |
| | | #endif // V8_COMPILER_H_ | |
| | | | |
End of changes. 3 change blocks. |
| 30 lines changed or deleted | | 471 lines changed or added | |
|
| engine_v8.h | | engine_v8.h | |
| | | | |
| skipping to change at line 26 | | skipping to change at line 26 | |
| */ | | */ | |
| | | | |
| #pragma once | | #pragma once | |
| | | | |
| #include <v8.h> | | #include <v8.h> | |
| #include <vector> | | #include <vector> | |
| | | | |
| #include "mongo/base/disallow_copying.h" | | #include "mongo/base/disallow_copying.h" | |
| #include "mongo/client/dbclientinterface.h" | | #include "mongo/client/dbclientinterface.h" | |
| #include "mongo/client/dbclientcursor.h" | | #include "mongo/client/dbclientcursor.h" | |
|
| | | #include "mongo/platform/unordered_map.h" | |
| #include "mongo/scripting/engine.h" | | #include "mongo/scripting/engine.h" | |
| #include "mongo/scripting/v8_deadline_monitor.h" | | #include "mongo/scripting/v8_deadline_monitor.h" | |
| #include "mongo/scripting/v8_profiler.h" | | #include "mongo/scripting/v8_profiler.h" | |
| | | | |
| /** | | /** | |
| * V8_SIMPLE_HEADER must be placed in any function called from a public API | | * V8_SIMPLE_HEADER must be placed in any function called from a public API | |
| * that work with v8 handles (and/or must be within the V8Scope's isolate | | * that work with v8 handles (and/or must be within the V8Scope's isolate | |
| * and context). Be sure to close the handle_scope if returning a v8::Hand
le! | | * and context). Be sure to close the handle_scope if returning a v8::Hand
le! | |
| */ | | */ | |
| #define V8_SIMPLE_HEADER
\ | | #define V8_SIMPLE_HEADER
\ | |
| | | | |
| skipping to change at line 204 | | skipping to change at line 205 | |
| int timeoutMs = 0, bool ignoreReturn = false, | | int timeoutMs = 0, bool ignoreReturn = false, | |
| bool readOnlyArgs = false, bool readOnlyRecv = f
alse); | | bool readOnlyArgs = false, bool readOnlyRecv = f
alse); | |
| | | | |
| virtual bool exec(const StringData& code, const string& name, bool
printResult, | | virtual bool exec(const StringData& code, const string& name, bool
printResult, | |
| bool reportError, bool assertOnError, int timeout
Ms); | | bool reportError, bool assertOnError, int timeout
Ms); | |
| | | | |
| // functions to create v8 object and function templates | | // functions to create v8 object and function templates | |
| virtual void injectNative(const char* field, NativeFunction func, v
oid* data = 0); | | virtual void injectNative(const char* field, NativeFunction func, v
oid* data = 0); | |
| void injectNative(const char* field, NativeFunction func, v8::Handl
e<v8::Object>& obj, | | void injectNative(const char* field, NativeFunction func, v8::Handl
e<v8::Object>& obj, | |
| void* data = 0); | | void* data = 0); | |
|
| void injectV8Function(const char* field, v8Function func); | | | |
| void injectV8Function(const char* field, v8Function func, v8::Handl | | // These functions inject a function (either an unwrapped function | |
| e<v8::Object>& obj); | | pointer or a pre-wrapped | |
| void injectV8Function(const char* field, v8Function func, v8::Handl | | // FunctionTemplate) into the provided object. If no object is prov | |
| e<v8::Template>& t); | | ided, the function will | |
| | | // be injected at global scope. These functions take care of settin | |
| | | g the function and class | |
| | | // name on the returned FunctionTemplate. | |
| | | v8::Handle<v8::FunctionTemplate> injectV8Function(const char* name, | |
| | | v8Function func); | |
| | | v8::Handle<v8::FunctionTemplate> injectV8Function(const char* name, | |
| | | v8Function func, | |
| | | v8::Handle<v8::Ob | |
| | | ject>& obj); | |
| | | v8::Handle<v8::FunctionTemplate> injectV8Function(const char* name, | |
| | | v8::Handle<v8::Fu | |
| | | nctionTemplate> ft, | |
| | | v8::Handle<v8::Ob | |
| | | ject>& obj); | |
| | | | |
| | | // Injects a method into the provided prototype | |
| | | v8::Handle<v8::FunctionTemplate> injectV8Method(const char* name, | |
| | | v8Function func, | |
| | | v8::Handle<v8::Obje | |
| | | ctTemplate>& proto); | |
| v8::Handle<v8::FunctionTemplate> createV8Function(v8Function func); | | v8::Handle<v8::FunctionTemplate> createV8Function(v8Function func); | |
| virtual ScriptingFunction _createFunction(const char* code, | | virtual ScriptingFunction _createFunction(const char* code, | |
| ScriptingFunction functio
nNumber = 0); | | ScriptingFunction functio
nNumber = 0); | |
| v8::Local<v8::Function> __createFunction(const char* code, | | v8::Local<v8::Function> __createFunction(const char* code, | |
| ScriptingFunction function
Number = 0); | | ScriptingFunction function
Number = 0); | |
| | | | |
| /** | | /** | |
| * Convert BSON types to v8 Javascript types | | * Convert BSON types to v8 Javascript types | |
| */ | | */ | |
|
| v8::Persistent<v8::Object> mongoToLZV8(const mongo::BSONObj& m, boo
l readOnly = false); | | v8::Handle<v8::Object> mongoToLZV8(const mongo::BSONObj& m, bool re
adOnly = false); | |
| v8::Handle<v8::Value> mongoToV8Element(const BSONElement& f, bool r
eadOnly = false); | | v8::Handle<v8::Value> mongoToV8Element(const BSONElement& f, bool r
eadOnly = false); | |
| | | | |
| /** | | /** | |
| * Convert v8 Javascript types to BSON types | | * Convert v8 Javascript types to BSON types | |
| */ | | */ | |
| mongo::BSONObj v8ToMongo(v8::Handle<v8::Object> obj, int depth = 0)
; | | mongo::BSONObj v8ToMongo(v8::Handle<v8::Object> obj, int depth = 0)
; | |
| void v8ToMongoElement(BSONObjBuilder& b, | | void v8ToMongoElement(BSONObjBuilder& b, | |
|
| const string& sname, | | const StringData& sname, | |
| v8::Handle<v8::Value> value, | | v8::Handle<v8::Value> value, | |
| int depth = 0, | | int depth = 0, | |
| BSONObj* originalParent = 0); | | BSONObj* originalParent = 0); | |
| void v8ToMongoObject(BSONObjBuilder& b, | | void v8ToMongoObject(BSONObjBuilder& b, | |
|
| const string& sname, | | const StringData& sname, | |
| v8::Handle<v8::Value> value, | | v8::Handle<v8::Value> value, | |
| int depth, | | int depth, | |
| BSONObj* originalParent); | | BSONObj* originalParent); | |
| void v8ToMongoNumber(BSONObjBuilder& b, | | void v8ToMongoNumber(BSONObjBuilder& b, | |
|
| const string& elementName, | | const StringData& elementName, | |
| v8::Handle<v8::Value> value, | | v8::Handle<v8::Number> value, | |
| BSONObj* originalParent); | | BSONObj* originalParent); | |
|
| void v8ToMongoNumberLong(BSONObjBuilder& b, | | | |
| const string& elementName, | | | |
| v8::Handle<v8::Object> obj); | | | |
| void v8ToMongoInternal(BSONObjBuilder& b, | | | |
| const string& elementName, | | | |
| v8::Handle<v8::Object> obj); | | | |
| void v8ToMongoRegex(BSONObjBuilder& b, | | void v8ToMongoRegex(BSONObjBuilder& b, | |
|
| const string& elementName, | | const StringData& elementName, | |
| v8::Handle<v8::Object> v8Regex); | | v8::Handle<v8::RegExp> v8Regex); | |
| void v8ToMongoDBRef(BSONObjBuilder& b, | | void v8ToMongoDBRef(BSONObjBuilder& b, | |
|
| const string& elementName, | | const StringData& elementName, | |
| v8::Handle<v8::Object> obj); | | v8::Handle<v8::Object> obj); | |
| void v8ToMongoBinData(BSONObjBuilder& b, | | void v8ToMongoBinData(BSONObjBuilder& b, | |
|
| const string& elementName, | | const StringData& elementName, | |
| v8::Handle<v8::Object> obj); | | v8::Handle<v8::Object> obj); | |
|
| void v8ToMongoObjectID(BSONObjBuilder& b, | | OID v8ToMongoObjectID(v8::Handle<v8::Object> obj); | |
| const string& elementName, | | | |
| v8::Handle<v8::Object> obj); | | | |
| | | | |
| v8::Function* getNamedCons(const char* name); | | | |
| | | | |
| v8::Function* getObjectIdCons(); | | | |
| | | | |
| v8::Local<v8::Value> newId(const OID& id); | | v8::Local<v8::Value> newId(const OID& id); | |
| | | | |
| /** | | /** | |
| * Convert a JavaScript exception to a stl string. Requires | | * Convert a JavaScript exception to a stl string. Requires | |
| * access to the V8Scope instance to report source context informat
ion. | | * access to the V8Scope instance to report source context informat
ion. | |
| */ | | */ | |
| std::string v8ExceptionToSTLString(const v8::TryCatch* try_catch); | | std::string v8ExceptionToSTLString(const v8::TryCatch* try_catch); | |
| | | | |
| /** | | /** | |
|
| * GC callback for weak references to BSON objects (via BSONHolder) | | | |
| */ | | | |
| v8::Persistent<v8::Object> wrapBSONObject(v8::Local<v8::Object> obj | | | |
| , BSONHolder* data); | | | |
| | | | |
| /** | | | |
| * Create a V8 string with a local handle | | * Create a V8 string with a local handle | |
| */ | | */ | |
| static inline v8::Handle<v8::String> v8StringData(StringData str) { | | static inline v8::Handle<v8::String> v8StringData(StringData str) { | |
| return v8::String::New(str.rawData()); | | return v8::String::New(str.rawData()); | |
| } | | } | |
| | | | |
| /** | | /** | |
| * Get the isolate this scope belongs to (can be called from any th
read, but v8 requires | | * Get the isolate this scope belongs to (can be called from any th
read, but v8 requires | |
| * the new thread enter the isolate and context. Only one thread
can enter the isolate. | | * the new thread enter the isolate and context. Only one thread
can enter the isolate. | |
| */ | | */ | |
| v8::Isolate* getIsolate() { return _isolate; } | | v8::Isolate* getIsolate() { return _isolate; } | |
| | | | |
| /** | | /** | |
| * Get the JS context this scope executes within. | | * Get the JS context this scope executes within. | |
| */ | | */ | |
| v8::Persistent<v8::Context> getContext() { return _context; } | | v8::Persistent<v8::Context> getContext() { return _context; } | |
| | | | |
|
| | | /** | |
| | | * Get the global JS object | |
| | | */ | |
| | | v8::Persistent<v8::Object> getGlobal() { return _global; } | |
| | | | |
| ObjTracker<BSONHolder> bsonHolderTracker; | | ObjTracker<BSONHolder> bsonHolderTracker; | |
| ObjTracker<DBClientWithCommands> dbClientWithCommandsTracker; | | ObjTracker<DBClientWithCommands> dbClientWithCommandsTracker; | |
| ObjTracker<DBClientBase> dbClientBaseTracker; | | ObjTracker<DBClientBase> dbClientBaseTracker; | |
| ObjTracker<DBClientCursor> dbClientCursorTracker; | | ObjTracker<DBClientCursor> dbClientCursorTracker; | |
| | | | |
|
| | | // These are all named after the JS constructor name + FT | |
| | | v8::Handle<v8::FunctionTemplate> ObjectIdFT() const { return | |
| | | _ObjectIdFT; } | |
| | | v8::Handle<v8::FunctionTemplate> DBRefFT() const { return | |
| | | _DBRefFT; } | |
| | | v8::Handle<v8::FunctionTemplate> DBPointerFT() const { return | |
| | | _DBPointerFT; } | |
| | | v8::Handle<v8::FunctionTemplate> BinDataFT() const { return | |
| | | _BinDataFT; } | |
| | | v8::Handle<v8::FunctionTemplate> NumberLongFT() const { return | |
| | | _NumberLongFT; } | |
| | | v8::Handle<v8::FunctionTemplate> NumberIntFT() const { return | |
| | | _NumberIntFT; } | |
| | | v8::Handle<v8::FunctionTemplate> TimestampFT() const { return | |
| | | _TimestampFT; } | |
| | | v8::Handle<v8::FunctionTemplate> MinKeyFT() const { return | |
| | | _MinKeyFT; } | |
| | | v8::Handle<v8::FunctionTemplate> MaxKeyFT() const { return | |
| | | _MaxKeyFT; } | |
| | | v8::Handle<v8::FunctionTemplate> MongoFT() const { return | |
| | | _MongoFT; } | |
| | | v8::Handle<v8::FunctionTemplate> DBFT() const { return | |
| | | _DBFT; } | |
| | | v8::Handle<v8::FunctionTemplate> DBCollectionFT() const { return | |
| | | _DBCollectionFT; } | |
| | | v8::Handle<v8::FunctionTemplate> DBQueryFT() const { return | |
| | | _DBQueryFT; } | |
| | | v8::Handle<v8::FunctionTemplate> InternalCursorFT() const { return | |
| | | _InternalCursorFT; } | |
| | | v8::Handle<v8::FunctionTemplate> LazyBsonFT() const { return | |
| | | _LazyBsonFT; } | |
| | | v8::Handle<v8::FunctionTemplate> ROBsonFT() const { return | |
| | | _ROBsonFT; } | |
| | | | |
| | | template <size_t N> | |
| | | v8::Handle<v8::String> strLitToV8(const char (&str)[N]) { | |
| | | // Note that _strLitMap is keyed on string pointer not string | |
| | | // value. This is OK because each string literal has a constant | |
| | | // pointer for the program's lifetime. This works best if (but | |
| | | does | |
| | | // not require) the linker interns all string literals giving | |
| | | // identical strings used in different places the same pointer. | |
| | | | |
| | | StrLitMap::iterator it = _strLitMap.find(str); | |
| | | if (it != _strLitMap.end()) | |
| | | return it->second; | |
| | | | |
| | | StringData sd (str, StringData::LiteralTag()); | |
| | | v8::Handle<v8::String> v8Str = v8StringData(sd); | |
| | | | |
| | | // We never need to Dispose since this should last as long as V | |
| | | 8Scope exists | |
| | | _strLitMap[str] = v8::Persistent<v8::String>::New(v8Str); | |
| | | | |
| | | return v8Str; | |
| | | } | |
| | | | |
| private: | | private: | |
| | | | |
| /** | | /** | |
|
| | | * Attach data to obj such that the data has the same lifetime as t | |
| | | he Object obj points to. | |
| | | * obj must have been created by either LazyBsonFT or ROBsonFT. | |
| | | */ | |
| | | void wrapBSONObject(v8::Handle<v8::Object> obj, BSONObj data, bool | |
| | | readOnly); | |
| | | | |
| | | /** | |
| * Trampoline to call a c++ function with a specific signature (V8S
cope*, v8::Arguments&). | | * Trampoline to call a c++ function with a specific signature (V8S
cope*, v8::Arguments&). | |
| * Handles interruption, exceptions, etc. | | * Handles interruption, exceptions, etc. | |
| */ | | */ | |
| static v8::Handle<v8::Value> v8Callback(const v8::Arguments& args); | | static v8::Handle<v8::Value> v8Callback(const v8::Arguments& args); | |
| | | | |
| /** | | /** | |
| * Interpreter agnostic 'Native Callback' trampoline. Note this is
only called | | * Interpreter agnostic 'Native Callback' trampoline. Note this is
only called | |
| * from v8Callback(). | | * from v8Callback(). | |
| */ | | */ | |
| static v8::Handle<v8::Value> nativeCallback(V8Scope* scope, const v
8::Arguments& args); | | static v8::Handle<v8::Value> nativeCallback(V8Scope* scope, const v
8::Arguments& args); | |
| | | | |
| skipping to change at line 345 | | skipping to change at line 393 | |
| * be supported. | | * be supported. | |
| */ | | */ | |
| void registerOpId(); | | void registerOpId(); | |
| | | | |
| /** | | /** | |
| * Unregister this scope with the mongo op id. | | * Unregister this scope with the mongo op id. | |
| */ | | */ | |
| void unregisterOpId(); | | void unregisterOpId(); | |
| | | | |
| /** | | /** | |
|
| * Creates a new instance of the MinKey object | | | |
| */ | | | |
| v8::Local<v8::Object> newMinKeyInstance(); | | | |
| | | | |
| /** | | | |
| * Creates a new instance of the MaxKey object | | | |
| */ | | | |
| v8::Local<v8::Object> newMaxKeyInstance(); | | | |
| | | | |
| /** | | | |
| * Create a new function; primarily used for BSON/V8 conversion. | | * Create a new function; primarily used for BSON/V8 conversion. | |
| */ | | */ | |
| v8::Local<v8::Value> newFunction(const char *code); | | v8::Local<v8::Value> newFunction(const char *code); | |
| | | | |
| template <typename _HandleType> | | template <typename _HandleType> | |
| bool checkV8ErrorState(const _HandleType& resultHandle, | | bool checkV8ErrorState(const _HandleType& resultHandle, | |
| const v8::TryCatch& try_catch, | | const v8::TryCatch& try_catch, | |
| bool reportError = true, | | bool reportError = true, | |
| bool assertOnError = true); | | bool assertOnError = true); | |
| | | | |
| V8ScriptEngine* _engine; | | V8ScriptEngine* _engine; | |
| | | | |
| v8::Persistent<v8::Context> _context; | | v8::Persistent<v8::Context> _context; | |
| v8::Persistent<v8::Object> _global; | | v8::Persistent<v8::Object> _global; | |
| string _error; | | string _error; | |
| vector<v8::Persistent<v8::Value> > _funcs; | | vector<v8::Persistent<v8::Value> > _funcs; | |
| | | | |
| enum ConnectState { NOT, LOCAL, EXTERNAL }; | | enum ConnectState { NOT, LOCAL, EXTERNAL }; | |
| ConnectState _connectState; | | ConnectState _connectState; | |
| | | | |
|
| v8::Persistent<v8::FunctionTemplate> lzFunctionTemplate; | | // These are all named after the JS constructor name + FT | |
| v8::Persistent<v8::ObjectTemplate> lzObjectTemplate; | | v8::Persistent<v8::FunctionTemplate> _ObjectIdFT; | |
| v8::Persistent<v8::ObjectTemplate> roObjectTemplate; | | v8::Persistent<v8::FunctionTemplate> _DBRefFT; | |
| v8::Persistent<v8::ObjectTemplate> lzArrayTemplate; | | v8::Persistent<v8::FunctionTemplate> _DBPointerFT; | |
| v8::Persistent<v8::ObjectTemplate> internalFieldObjects; | | v8::Persistent<v8::FunctionTemplate> _BinDataFT; | |
| | | v8::Persistent<v8::FunctionTemplate> _NumberLongFT; | |
| | | v8::Persistent<v8::FunctionTemplate> _NumberIntFT; | |
| | | v8::Persistent<v8::FunctionTemplate> _TimestampFT; | |
| | | v8::Persistent<v8::FunctionTemplate> _MinKeyFT; | |
| | | v8::Persistent<v8::FunctionTemplate> _MaxKeyFT; | |
| | | v8::Persistent<v8::FunctionTemplate> _MongoFT; | |
| | | v8::Persistent<v8::FunctionTemplate> _DBFT; | |
| | | v8::Persistent<v8::FunctionTemplate> _DBCollectionFT; | |
| | | v8::Persistent<v8::FunctionTemplate> _DBQueryFT; | |
| | | v8::Persistent<v8::FunctionTemplate> _InternalCursorFT; | |
| | | v8::Persistent<v8::FunctionTemplate> _LazyBsonFT; | |
| | | v8::Persistent<v8::FunctionTemplate> _ROBsonFT; | |
| | | | |
| | | v8::Persistent<v8::Function> _jsRegExpConstructor; | |
| | | | |
| v8::Isolate* _isolate; | | v8::Isolate* _isolate; | |
| V8CpuProfiler _cpuProfiler; | | V8CpuProfiler _cpuProfiler; | |
| | | | |
|
| | | // See comments in strLitToV8 | |
| | | typedef unordered_map<const char*, v8::Handle<v8::String> > StrLitM | |
| | | ap; | |
| | | StrLitMap _strLitMap; | |
| | | | |
| mongo::mutex _interruptLock; // protects interruption-related flags | | mongo::mutex _interruptLock; // protects interruption-related flags | |
| bool _inNativeExecution; // protected by _interruptLock | | bool _inNativeExecution; // protected by _interruptLock | |
| bool _pendingKill; // protected by _interruptLock | | bool _pendingKill; // protected by _interruptLock | |
| int _opId; // op id for this scope | | int _opId; // op id for this scope | |
| }; | | }; | |
| | | | |
|
| | | /// Helper to extract V8Scope for an Isolate | |
| | | inline V8Scope* getScope(v8::Isolate* isolate) { | |
| | | return static_cast<V8Scope*>(isolate->GetData()); | |
| | | } | |
| | | | |
| class V8ScriptEngine : public ScriptEngine { | | class V8ScriptEngine : public ScriptEngine { | |
| public: | | public: | |
| V8ScriptEngine(); | | V8ScriptEngine(); | |
| virtual ~V8ScriptEngine(); | | virtual ~V8ScriptEngine(); | |
| virtual Scope* createScope() { return new V8Scope(this); } | | virtual Scope* createScope() { return new V8Scope(this); } | |
| virtual void runTest() {} | | virtual void runTest() {} | |
| bool utf8Ok() const { return true; } | | bool utf8Ok() const { return true; } | |
| | | | |
| /** | | /** | |
| * Interrupt a single active v8 execution context | | * Interrupt a single active v8 execution context | |
| | | | |
End of changes. 19 change blocks. |
| 48 lines changed or deleted | | 135 lines changed or added | |
|
| jsregexp.h | | jsregexp.h | |
|
| /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- | | // Copyright 2012 the V8 project authors. All rights reserved. | |
| * | | // Redistribution and use in source and binary forms, with or without | |
| * ***** BEGIN LICENSE BLOCK ***** | | // modification, are permitted provided that the following conditions are | |
| * Version: MPL 1.1/GPL 2.0/LGPL 2.1 | | // met: | |
| * | | // | |
| * The contents of this file are subject to the Mozilla Public License Vers | | // * Redistributions of source code must retain the above copyright | |
| ion | | // notice, this list of conditions and the following disclaimer. | |
| * 1.1 (the "License"); you may not use this file except in compliance with | | // * Redistributions in binary form must reproduce the above | |
| * the License. You may obtain a copy of the License at | | // copyright notice, this list of conditions and the following | |
| * http://www.mozilla.org/MPL/ | | // disclaimer in the documentation and/or other materials provided | |
| * | | // with the distribution. | |
| * Software distributed under the License is distributed on an "AS IS" basi | | // * Neither the name of Google Inc. nor the names of its | |
| s, | | // contributors may be used to endorse or promote products derived | |
| * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License | | // from this software without specific prior written permission. | |
| * for the specific language governing rights and limitations under the | | // | |
| * License. | | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| * | | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| * The Original Code is Mozilla Communicator client code, released | | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| * March 31, 1998. | | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| * | | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| * The Initial Developer of the Original Code is | | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| * Netscape Communications Corporation. | | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| * Portions created by the Initial Developer are Copyright (C) 1998 | | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| * the Initial Developer. All Rights Reserved. | | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| * | | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| * Contributor(s): | | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| * | | | |
| * Alternatively, the contents of this file may be used under the terms of | | #ifndef V8_JSREGEXP_H_ | |
| * either of the GNU General Public License Version 2 or later (the "GPL"), | | #define V8_JSREGEXP_H_ | |
| * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL | | | |
| "), | | #include "allocation.h" | |
| * in which case the provisions of the GPL or the LGPL are applicable inste | | #include "assembler.h" | |
| ad | | #include "zone-inl.h" | |
| * of those above. If you wish to allow use of your version of this file on | | | |
| ly | | namespace v8 { | |
| * under the terms of either the GPL or the LGPL, and not to allow others t | | namespace internal { | |
| o | | | |
| * use your version of this file under the terms of the MPL, indicate your | | class NodeVisitor; | |
| * decision by deleting the provisions above and replace them with the noti | | class RegExpCompiler; | |
| ce | | class RegExpMacroAssembler; | |
| * and other provisions required by the GPL or the LGPL. If you do not dele | | class RegExpNode; | |
| te | | class RegExpTree; | |
| * the provisions above, a recipient may use your version of this file unde | | class BoyerMooreLookahead; | |
| r | | | |
| * the terms of any one of the MPL, the GPL or the LGPL. | | class RegExpImpl { | |
| * | | public: | |
| * ***** END LICENSE BLOCK ***** */ | | // Whether V8 is compiled with native regexp support or not. | |
| | | static bool UsesNativeRegExp() { | |
| #ifndef jsregexp_h___ | | #ifdef V8_INTERPRETED_REGEXP | |
| #define jsregexp_h___ | | return false; | |
| /* | | #else | |
| * JS regular expression interface. | | return true; | |
| */ | | | |
| #include <stddef.h> | | | |
| #include "jspubtd.h" | | | |
| #include "jsstr.h" | | | |
| | | | |
| #ifdef JS_THREADSAFE | | | |
| #include "jsdhash.h" | | | |
| #endif | | #endif | |
|
| | | } | |
| | | | |
| | | // Creates a regular expression literal in the old space. | |
| | | // This function calls the garbage collector if necessary. | |
| | | static Handle<Object> CreateRegExpLiteral(Handle<JSFunction> constructor, | |
| | | Handle<String> pattern, | |
| | | Handle<String> flags, | |
| | | bool* has_pending_exception); | |
| | | | |
| | | // Returns a string representation of a regular expression. | |
| | | // Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4. | |
| | | // This function calls the garbage collector if necessary. | |
| | | static Handle<String> ToString(Handle<Object> value); | |
| | | | |
| | | // Parses the RegExp pattern and prepares the JSRegExp object with | |
| | | // generic data and choice of implementation - as well as what | |
| | | // the implementation wants to store in the data field. | |
| | | // Returns false if compilation fails. | |
| | | static Handle<Object> Compile(Handle<JSRegExp> re, | |
| | | Handle<String> pattern, | |
| | | Handle<String> flags, | |
| | | Zone* zone); | |
| | | | |
| | | // See ECMA-262 section 15.10.6.2. | |
| | | // This function calls the garbage collector if necessary. | |
| | | static Handle<Object> Exec(Handle<JSRegExp> regexp, | |
| | | Handle<String> subject, | |
| | | int index, | |
| | | Handle<JSArray> lastMatchInfo); | |
| | | | |
| | | // Prepares a JSRegExp object with Irregexp-specific data. | |
| | | static void IrregexpInitialize(Handle<JSRegExp> re, | |
| | | Handle<String> pattern, | |
| | | JSRegExp::Flags flags, | |
| | | int capture_register_count); | |
| | | | |
| | | static void AtomCompile(Handle<JSRegExp> re, | |
| | | Handle<String> pattern, | |
| | | JSRegExp::Flags flags, | |
| | | Handle<String> match_pattern); | |
| | | | |
| | | static Handle<Object> AtomExec(Handle<JSRegExp> regexp, | |
| | | Handle<String> subject, | |
| | | int index, | |
| | | Handle<JSArray> lastMatchInfo); | |
| | | | |
| | | enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 } | |
| | | ; | |
| | | | |
| | | // Prepare a RegExp for being executed one or more times (using | |
| | | // IrregexpExecOnce) on the subject. | |
| | | // This ensures that the regexp is compiled for the subject, and that | |
| | | // the subject is flat. | |
| | | // Returns the number of integer spaces required by IrregexpExecOnce | |
| | | // as its "registers" argument. If the regexp cannot be compiled, | |
| | | // an exception is set as pending, and this function returns negative. | |
| | | static int IrregexpPrepare(Handle<JSRegExp> regexp, | |
| | | Handle<String> subject); | |
| | | | |
| | | // Calculate the size of offsets vector for the case of global regexp | |
| | | // and the number of matches this vector is able to store. | |
| | | static int GlobalOffsetsVectorSize(Handle<JSRegExp> regexp, | |
| | | int registers_per_match, | |
| | | int* max_matches); | |
| | | | |
| | | // Execute a regular expression on the subject, starting from index. | |
| | | // If matching succeeds, return the number of matches. This can be large | |
| | | r | |
| | | // than one in the case of global regular expressions. | |
| | | // The captures and subcaptures are stored into the registers vector. | |
| | | // If matching fails, returns RE_FAILURE. | |
| | | // If execution fails, sets a pending exception and returns RE_EXCEPTION. | |
| | | static int IrregexpExecRaw(Handle<JSRegExp> regexp, | |
| | | Handle<String> subject, | |
| | | int index, | |
| | | Vector<int> registers); | |
| | | | |
| | | // Execute an Irregexp bytecode pattern. | |
| | | // On a successful match, the result is a JSArray containing | |
| | | // captured positions. On a failure, the result is the null value. | |
| | | // Returns an empty handle in case of an exception. | |
| | | static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp, | |
| | | Handle<String> subject, | |
| | | int index, | |
| | | Handle<JSArray> lastMatchInfo); | |
| | | | |
| | | // Array index in the lastMatchInfo array. | |
| | | static const int kLastCaptureCount = 0; | |
| | | static const int kLastSubject = 1; | |
| | | static const int kLastInput = 2; | |
| | | static const int kFirstCapture = 3; | |
| | | static const int kLastMatchOverhead = 3; | |
| | | | |
| | | // Direct offset into the lastMatchInfo array. | |
| | | static const int kLastCaptureCountOffset = | |
| | | FixedArray::kHeaderSize + kLastCaptureCount * kPointerSize; | |
| | | static const int kLastSubjectOffset = | |
| | | FixedArray::kHeaderSize + kLastSubject * kPointerSize; | |
| | | static const int kLastInputOffset = | |
| | | FixedArray::kHeaderSize + kLastInput * kPointerSize; | |
| | | static const int kFirstCaptureOffset = | |
| | | FixedArray::kHeaderSize + kFirstCapture * kPointerSize; | |
| | | | |
| | | // Used to access the lastMatchInfo array. | |
| | | static int GetCapture(FixedArray* array, int index) { | |
| | | return Smi::cast(array->get(index + kFirstCapture))->value(); | |
| | | } | |
| | | | |
| | | static void SetLastCaptureCount(FixedArray* array, int to) { | |
| | | array->set(kLastCaptureCount, Smi::FromInt(to)); | |
| | | } | |
| | | | |
| | | static void SetLastSubject(FixedArray* array, String* to) { | |
| | | array->set(kLastSubject, to); | |
| | | } | |
| | | | |
| | | static void SetLastInput(FixedArray* array, String* to) { | |
| | | array->set(kLastInput, to); | |
| | | } | |
| | | | |
| | | static void SetCapture(FixedArray* array, int index, int to) { | |
| | | array->set(index + kFirstCapture, Smi::FromInt(to)); | |
| | | } | |
| | | | |
| | | static int GetLastCaptureCount(FixedArray* array) { | |
| | | return Smi::cast(array->get(kLastCaptureCount))->value(); | |
| | | } | |
| | | | |
| | | // For acting on the JSRegExp data FixedArray. | |
| | | static int IrregexpMaxRegisterCount(FixedArray* re); | |
| | | static void SetIrregexpMaxRegisterCount(FixedArray* re, int value); | |
| | | static int IrregexpNumberOfCaptures(FixedArray* re); | |
| | | static int IrregexpNumberOfRegisters(FixedArray* re); | |
| | | static ByteArray* IrregexpByteCode(FixedArray* re, bool is_ascii); | |
| | | static Code* IrregexpNativeCode(FixedArray* re, bool is_ascii); | |
| | | | |
| | | // Limit the space regexps take up on the heap. In order to limit this w | |
| | | e | |
| | | // would like to keep track of the amount of regexp code on the heap. Th | |
| | | is | |
| | | // is not tracked, however. As a conservative approximation we track the | |
| | | // total regexp code compiled including code that has subsequently been f | |
| | | reed | |
| | | // and the total executable memory at any point. | |
| | | static const int kRegExpExecutableMemoryLimit = 16 * MB; | |
| | | static const int kRegWxpCompiledLimit = 1 * MB; | |
| | | | |
| | | private: | |
| | | static String* last_ascii_string_; | |
| | | static String* two_byte_cached_string_; | |
| | | | |
| | | static bool CompileIrregexp( | |
| | | Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); | |
| | | static inline bool EnsureCompiledIrregexp( | |
| | | Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); | |
| | | | |
| | | // Set the subject cache. The previous string buffer is not deleted, so | |
| | | the | |
| | | // caller should ensure that it doesn't leak. | |
| | | static void SetSubjectCache(String* subject, | |
| | | char* utf8_subject, | |
| | | int uft8_length, | |
| | | int character_position, | |
| | | int utf8_position); | |
| | | | |
| | | // A one element cache of the last utf8_subject string and its length. T | |
| | | he | |
| | | // subject JS String object is cached in the heap. We also cache a | |
| | | // translation between position and utf8 position. | |
| | | static char* utf8_subject_cache_; | |
| | | static int utf8_length_cache_; | |
| | | static int utf8_position_; | |
| | | static int character_position_; | |
| | | }; | |
| | | | |
| | | // Represents the location of one element relative to the intersection of | |
| | | // two sets. Corresponds to the four areas of a Venn diagram. | |
| | | enum ElementInSetsRelation { | |
| | | kInsideNone = 0, | |
| | | kInsideFirst = 1, | |
| | | kInsideSecond = 2, | |
| | | kInsideBoth = 3 | |
| | | }; | |
| | | | |
| | | // Represents code units in the range from from_ to to_, both ends are | |
| | | // inclusive. | |
| | | class CharacterRange { | |
| | | public: | |
| | | CharacterRange() : from_(0), to_(0) { } | |
| | | // For compatibility with the CHECK_OK macro | |
| | | CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT | |
| | | CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { } | |
| | | static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges, | |
| | | Zone* zone); | |
| | | static Vector<const int> GetWordBounds(); | |
| | | static inline CharacterRange Singleton(uc16 value) { | |
| | | return CharacterRange(value, value); | |
| | | } | |
| | | static inline CharacterRange Range(uc16 from, uc16 to) { | |
| | | ASSERT(from <= to); | |
| | | return CharacterRange(from, to); | |
| | | } | |
| | | static inline CharacterRange Everything() { | |
| | | return CharacterRange(0, 0xFFFF); | |
| | | } | |
| | | bool Contains(uc16 i) { return from_ <= i && i <= to_; } | |
| | | uc16 from() const { return from_; } | |
| | | void set_from(uc16 value) { from_ = value; } | |
| | | uc16 to() const { return to_; } | |
| | | void set_to(uc16 value) { to_ = value; } | |
| | | bool is_valid() { return from_ <= to_; } | |
| | | bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } | |
| | | bool IsSingleton() { return (from_ == to_); } | |
| | | void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii, | |
| | | Zone* zone); | |
| | | static void Split(ZoneList<CharacterRange>* base, | |
| | | Vector<const int> overlay, | |
| | | ZoneList<CharacterRange>** included, | |
| | | ZoneList<CharacterRange>** excluded, | |
| | | Zone* zone); | |
| | | // Whether a range list is in canonical form: Ranges ordered by from valu | |
| | | e, | |
| | | // and ranges non-overlapping and non-adjacent. | |
| | | static bool IsCanonical(ZoneList<CharacterRange>* ranges); | |
| | | // Convert range list to canonical form. The characters covered by the ra | |
| | | nges | |
| | | // will still be the same, but no character is in more than one range, an | |
| | | d | |
| | | // adjacent ranges are merged. The resulting list may be shorter than the | |
| | | // original, but cannot be longer. | |
| | | static void Canonicalize(ZoneList<CharacterRange>* ranges); | |
| | | // Negate the contents of a character range in canonical form. | |
| | | static void Negate(ZoneList<CharacterRange>* src, | |
| | | ZoneList<CharacterRange>* dst, | |
| | | Zone* zone); | |
| | | static const int kStartMarker = (1 << 24); | |
| | | static const int kPayloadMask = (1 << 24) - 1; | |
| | | | |
| | | private: | |
| | | uc16 from_; | |
| | | uc16 to_; | |
| | | }; | |
| | | | |
| | | // A set of unsigned integers that behaves especially well on small | |
| | | // integers (< 32). May do zone-allocation. | |
| | | class OutSet: public ZoneObject { | |
| | | public: | |
| | | OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } | |
| | | OutSet* Extend(unsigned value, Zone* zone); | |
| | | bool Get(unsigned value); | |
| | | static const unsigned kFirstLimit = 32; | |
| | | | |
| | | private: | |
| | | // Destructively set a value in this set. In most cases you want | |
| | | // to use Extend instead to ensure that only one instance exists | |
| | | // that contains the same values. | |
| | | void Set(unsigned value, Zone* zone); | |
| | | | |
| | | // The successors are a list of sets that contain the same values | |
| | | // as this set and the one more value that is not present in this | |
| | | // set. | |
| | | ZoneList<OutSet*>* successors(Zone* zone) { return successors_; } | |
| | | | |
| | | OutSet(uint32_t first, ZoneList<unsigned>* remaining) | |
| | | : first_(first), remaining_(remaining), successors_(NULL) { } | |
| | | uint32_t first_; | |
| | | ZoneList<unsigned>* remaining_; | |
| | | ZoneList<OutSet*>* successors_; | |
| | | friend class Trace; | |
| | | }; | |
| | | | |
| | | // A mapping from integers, specified as ranges, to a set of integers. | |
| | | // Used for mapping character ranges to choices. | |
| | | class DispatchTable : public ZoneObject { | |
| | | public: | |
| | | explicit DispatchTable(Zone* zone) : tree_(zone) { } | |
| | | | |
| | | class Entry { | |
| | | public: | |
| | | Entry() : from_(0), to_(0), out_set_(NULL) { } | |
| | | Entry(uc16 from, uc16 to, OutSet* out_set) | |
| | | : from_(from), to_(to), out_set_(out_set) { } | |
| | | uc16 from() { return from_; } | |
| | | uc16 to() { return to_; } | |
| | | void set_to(uc16 value) { to_ = value; } | |
| | | void AddValue(int value, Zone* zone) { | |
| | | out_set_ = out_set_->Extend(value, zone); | |
| | | } | |
| | | OutSet* out_set() { return out_set_; } | |
| | | private: | |
| | | uc16 from_; | |
| | | uc16 to_; | |
| | | OutSet* out_set_; | |
| | | }; | |
| | | | |
| | | class Config { | |
| | | public: | |
| | | typedef uc16 Key; | |
| | | typedef Entry Value; | |
| | | static const uc16 kNoKey; | |
| | | static const Entry NoValue() { return Value(); } | |
| | | static inline int Compare(uc16 a, uc16 b) { | |
| | | if (a == b) | |
| | | return 0; | |
| | | else if (a < b) | |
| | | return -1; | |
| | | else | |
| | | return 1; | |
| | | } | |
| | | }; | |
| | | | |
| | | void AddRange(CharacterRange range, int value, Zone* zone); | |
| | | OutSet* Get(uc16 value); | |
| | | void Dump(); | |
| | | | |
| | | template <typename Callback> | |
| | | void ForEach(Callback* callback) { | |
| | | return tree()->ForEach(callback); | |
| | | } | |
| | | | |
| | | private: | |
| | | // There can't be a static empty set since it allocates its | |
| | | // successors in a zone and caches them. | |
| | | OutSet* empty() { return &empty_; } | |
| | | OutSet empty_; | |
| | | ZoneSplayTree<Config>* tree() { return &tree_; } | |
| | | ZoneSplayTree<Config> tree_; | |
| | | }; | |
| | | | |
| | | #define FOR_EACH_NODE_TYPE(VISIT) \ | |
| | | VISIT(End) \ | |
| | | VISIT(Action) \ | |
| | | VISIT(Choice) \ | |
| | | VISIT(BackReference) \ | |
| | | VISIT(Assertion) \ | |
| | | VISIT(Text) | |
| | | | |
| | | #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ | |
| | | VISIT(Disjunction) \ | |
| | | VISIT(Alternative) \ | |
| | | VISIT(Assertion) \ | |
| | | VISIT(CharacterClass) \ | |
| | | VISIT(Atom) \ | |
| | | VISIT(Quantifier) \ | |
| | | VISIT(Capture) \ | |
| | | VISIT(Lookahead) \ | |
| | | VISIT(BackReference) \ | |
| | | VISIT(Empty) \ | |
| | | VISIT(Text) | |
| | | | |
| | | #define FORWARD_DECLARE(Name) class RegExp##Name; | |
| | | FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) | |
| | | #undef FORWARD_DECLARE | |
| | | | |
| | | class TextElement { | |
| | | public: | |
| | | enum Type {UNINITIALIZED, ATOM, CHAR_CLASS}; | |
| | | TextElement() : type(UNINITIALIZED) { } | |
| | | explicit TextElement(Type t) : type(t), cp_offset(-1) { } | |
| | | static TextElement Atom(RegExpAtom* atom); | |
| | | static TextElement CharClass(RegExpCharacterClass* char_class); | |
| | | int length(); | |
| | | Type type; | |
| | | union { | |
| | | RegExpAtom* u_atom; | |
| | | RegExpCharacterClass* u_char_class; | |
| | | } data; | |
| | | int cp_offset; | |
| | | }; | |
| | | | |
| | | class Trace; | |
| | | | |
| | | struct NodeInfo { | |
| | | NodeInfo() | |
| | | : being_analyzed(false), | |
| | | been_analyzed(false), | |
| | | follows_word_interest(false), | |
| | | follows_newline_interest(false), | |
| | | follows_start_interest(false), | |
| | | at_end(false), | |
| | | visited(false), | |
| | | replacement_calculated(false) { } | |
| | | | |
| | | // Returns true if the interests and assumptions of this node | |
| | | // matches the given one. | |
| | | bool Matches(NodeInfo* that) { | |
| | | return (at_end == that->at_end) && | |
| | | (follows_word_interest == that->follows_word_interest) && | |
| | | (follows_newline_interest == that->follows_newline_interest) && | |
| | | (follows_start_interest == that->follows_start_interest); | |
| | | } | |
| | | | |
| | | // Updates the interests of this node given the interests of the | |
| | | // node preceding it. | |
| | | void AddFromPreceding(NodeInfo* that) { | |
| | | at_end |= that->at_end; | |
| | | follows_word_interest |= that->follows_word_interest; | |
| | | follows_newline_interest |= that->follows_newline_interest; | |
| | | follows_start_interest |= that->follows_start_interest; | |
| | | } | |
| | | | |
| | | bool HasLookbehind() { | |
| | | return follows_word_interest || | |
| | | follows_newline_interest || | |
| | | follows_start_interest; | |
| | | } | |
| | | | |
| | | // Sets the interests of this node to include the interests of the | |
| | | // following node. | |
| | | void AddFromFollowing(NodeInfo* that) { | |
| | | follows_word_interest |= that->follows_word_interest; | |
| | | follows_newline_interest |= that->follows_newline_interest; | |
| | | follows_start_interest |= that->follows_start_interest; | |
| | | } | |
| | | | |
| | | void ResetCompilationState() { | |
| | | being_analyzed = false; | |
| | | been_analyzed = false; | |
| | | } | |
| | | | |
| | | bool being_analyzed: 1; | |
| | | bool been_analyzed: 1; | |
| | | | |
| | | // These bits are set of this node has to know what the preceding | |
| | | // character was. | |
| | | bool follows_word_interest: 1; | |
| | | bool follows_newline_interest: 1; | |
| | | bool follows_start_interest: 1; | |
| | | | |
| | | bool at_end: 1; | |
| | | bool visited: 1; | |
| | | bool replacement_calculated: 1; | |
| | | }; | |
| | | | |
| | | // Details of a quick mask-compare check that can look ahead in the | |
| | | // input stream. | |
| | | class QuickCheckDetails { | |
| | | public: | |
| | | QuickCheckDetails() | |
| | | : characters_(0), | |
| | | mask_(0), | |
| | | value_(0), | |
| | | cannot_match_(false) { } | |
| | | explicit QuickCheckDetails(int characters) | |
| | | : characters_(characters), | |
| | | mask_(0), | |
| | | value_(0), | |
| | | cannot_match_(false) { } | |
| | | bool Rationalize(bool ascii); | |
| | | // Merge in the information from another branch of an alternation. | |
| | | void Merge(QuickCheckDetails* other, int from_index); | |
| | | // Advance the current position by some amount. | |
| | | void Advance(int by, bool ascii); | |
| | | void Clear(); | |
| | | bool cannot_match() { return cannot_match_; } | |
| | | void set_cannot_match() { cannot_match_ = true; } | |
| | | struct Position { | |
| | | Position() : mask(0), value(0), determines_perfectly(false) { } | |
| | | uc16 mask; | |
| | | uc16 value; | |
| | | bool determines_perfectly; | |
| | | }; | |
| | | int characters() { return characters_; } | |
| | | void set_characters(int characters) { characters_ = characters; } | |
| | | Position* positions(int index) { | |
| | | ASSERT(index >= 0); | |
| | | ASSERT(index < characters_); | |
| | | return positions_ + index; | |
| | | } | |
| | | uint32_t mask() { return mask_; } | |
| | | uint32_t value() { return value_; } | |
| | | | |
| | | private: | |
| | | // How many characters do we have quick check information from. This is | |
| | | // the same for all branches of a choice node. | |
| | | int characters_; | |
| | | Position positions_[4]; | |
| | | // These values are the condensate of the above array after Rationalize() | |
| | | . | |
| | | uint32_t mask_; | |
| | | uint32_t value_; | |
| | | // If set to true, there is no way this quick check can match at all. | |
| | | // E.g., if it requires to be at the start of the input, and isn't. | |
| | | bool cannot_match_; | |
| | | }; | |
| | | | |
| | | extern int kUninitializedRegExpNodePlaceHolder; | |
| | | | |
|
| struct JSRegExpStatics { | | class RegExpNode: public ZoneObject { | |
| JSString *input; /* input string to match (perl $_, GC root) | | public: | |
| */ | | explicit RegExpNode(Zone* zone) | |
| JSBool multiline; /* whether input contains newlines (perl $* | | : replacement_(NULL), trace_count_(0), zone_(zone) { | |
| ) */ | | bm_info_[0] = bm_info_[1] = NULL; | |
| uint16 parenCount; /* number of valid elements in parens[] */ | | } | |
| uint16 moreLength; /* number of allocated elements in morePare | | virtual ~RegExpNode(); | |
| ns */ | | virtual void Accept(NodeVisitor* visitor) = 0; | |
| JSSubString parens[9]; /* last set of parens matched (perl $1, $2) | | // Generates a goto to this node or actually generates the code at this p | |
| */ | | oint. | |
| JSSubString *moreParens; /* null or realloc'd vector for $10, etc. * | | virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; | |
| / | | // How many characters must this node consume at a minimum in order to | |
| JSSubString lastMatch; /* last string matched (perl $&) */ | | // succeed. If we have found at least 'still_to_find' characters that | |
| JSSubString lastParen; /* last paren matched (perl $+) */ | | // must be consumed there is no need to ask any following nodes whether | |
| JSSubString leftContext; /* input to left of last match (perl $`) */ | | // they are sure to eat any more characters. The not_at_start argument i | |
| JSSubString rightContext; /* input to right of last match (perl $') * | | s | |
| / | | // used to indicate that we know we are not at the start of the input. I | |
| }; | | n | |
| | | // this case anchored branches will always fail and can be ignored when | |
| /* | | // determining how many characters are consumed on success. | |
| * This struct holds a bitmap representation of a class from a regexp. | | virtual int EatsAtLeast(int still_to_find, | |
| * There's a list of these referenced by the classList field in the JSRegEx | | int recursion_depth, | |
| p | | bool not_at_start) = 0; | |
| * struct below. The initial state has startIndex set to the offset in the | | // Emits some quick code that checks whether the preloaded characters mat | |
| * original regexp source of the beginning of the class contents. The first | | ch. | |
| * use of the class converts the source representation into a bitmap. | | // Falls through on certain failure, jumps to the label on possible succe | |
| * | | ss. | |
| */ | | // If the node cannot make a quick check it does nothing and returns fals | |
| typedef struct RECharSet { | | e. | |
| JSPackedBool converted; | | bool EmitQuickCheck(RegExpCompiler* compiler, | |
| JSPackedBool sense; | | Trace* trace, | |
| uint16 length; | | bool preload_has_checked_bounds, | |
| union { | | Label* on_possible_success, | |
| uint8 *bits; | | QuickCheckDetails* details_return, | |
| struct { | | bool fall_through_on_failure); | |
| size_t startIndex; | | // For a given number of characters this returns a mask and a value. The | |
| size_t length; | | // next n characters are anded with the mask and compared with the value. | |
| } src; | | // A comparison failure indicates the node cannot match the next n charac | |
| } u; | | ters. | |
| } RECharSet; | | // A comparison success indicates the node may match. | |
| | | virtual void GetQuickCheckDetails(QuickCheckDetails* details, | |
| /* | | RegExpCompiler* compiler, | |
| * This macro is safe because moreParens is guaranteed to be allocated and | | int characters_filled_in, | |
| big | | bool not_at_start) = 0; | |
| * enough to hold parenCount, or else be null when parenCount is 0. | | static const int kNodeIsTooComplexForGreedyLoops = -1; | |
| */ | | virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoo | |
| #define REGEXP_PAREN_SUBSTRING(res, num) | | ps; } | |
| \ | | // Only returns the successor for a text node of length 1 that matches an | |
| (((jsuint)(num) < (jsuint)(res)->parenCount) | | y | |
| \ | | // character and that has no guards on it. | |
| ? ((jsuint)(num) < 9) | | virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | |
| \ | | RegExpCompiler* compiler) { | |
| ? &(res)->parens[num] | | return NULL; | |
| \ | | } | |
| : &(res)->moreParens[(num) - 9] | | | |
| \ | | // Collects information on the possible code units (mod 128) that can mat | |
| : &js_EmptySubString) | | ch if | |
| | | // we look forward. This is used for a Boyer-Moore-like string searching | |
| typedef struct RENode RENode; | | // implementation. TODO(erikcorry): This should share more code with | |
| | | // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to lim | |
| struct JSRegExp { | | it | |
| jsrefcount nrefs; /* reference count */ | | // the number of nodes we are willing to look at in order to create this | |
| uint16 flags; /* flags, see jsapi.h's JSREG_* defines */ | | data. | |
| uint16 cloneIndex; /* index in fp->vars or funobj->slots of | | static const int kFillInBMBudget = 200; | |
| cloned regexp object */ | | virtual void FillInBMInfo(int offset, | |
| size_t parenCount; /* number of parenthesized submatches */ | | int recursion_depth, | |
| size_t classCount; /* count [...] bitmaps */ | | int budget, | |
| RECharSet *classList; /* list of [...] bitmaps */ | | BoyerMooreLookahead* bm, | |
| JSString *source; /* locked source string, sans // */ | | bool not_at_start) { | |
| jsbytecode program[1]; /* regular expression bytecode */ | | UNREACHABLE(); | |
| }; | | } | |
| | | | |
| extern JSRegExp * | | // If we know that the input is ASCII then there are some nodes that can | |
| js_NewRegExp(JSContext *cx, JSTokenStream *ts, | | // never match. This method returns a node that can be substituted for | |
| JSString *str, uintN flags, JSBool flat); | | // itself, or NULL if the node can never match. | |
| | | virtual RegExpNode* FilterASCII(int depth) { return this; } | |
| extern JSRegExp * | | // Helper for FilterASCII. | |
| js_NewRegExpOpt(JSContext *cx, JSTokenStream *ts, | | RegExpNode* replacement() { | |
| JSString *str, JSString *opt, JSBool flat); | | ASSERT(info()->replacement_calculated); | |
| | | return replacement_; | |
| #define HOLD_REGEXP(cx, re) JS_ATOMIC_INCREMENT(&(re)->nrefs) | | } | |
| #define DROP_REGEXP(cx, re) js_DestroyRegExp(cx, re) | | RegExpNode* set_replacement(RegExpNode* replacement) { | |
| | | info()->replacement_calculated = true; | |
| extern void | | replacement_ = replacement; | |
| js_DestroyRegExp(JSContext *cx, JSRegExp *re); | | return replacement; // For convenience. | |
| | | } | |
| /* | | | |
| * Execute re on input str at *indexp, returning null in *rval on mismatch. | | // We want to avoid recalculating the lookahead info, so we store it on t | |
| * On match, return true if test is true, otherwise return an array object. | | he | |
| * Update *indexp and cx->regExpStatics always on match. | | // node. Only info that is for this node is stored. We can tell that th | |
| */ | | e | |
| extern JSBool | | // info is for this node when offset == 0, so the information is calculat | |
| js_ExecuteRegExp(JSContext *cx, JSRegExp *re, JSString *str, size_t *indexp | | ed | |
| , | | // relative to this node. | |
| JSBool test, jsval *rval); | | void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, int offset) { | |
| | | if (offset == 0) set_bm_info(not_at_start, bm); | |
| /* | | } | |
| * These two add and remove GC roots, respectively, so their calls must be | | | |
| * well-ordered. | | Label* label() { return &label_; } | |
| */ | | // If non-generic code is generated for a node (i.e. the node is not at t | |
| extern JSBool | | he | |
| js_InitRegExpStatics(JSContext *cx, JSRegExpStatics *res); | | // start of the trace) then it cannot be reused. This variable sets a li | |
| | | mit | |
| extern void | | // on how often we allow that to happen before we insist on starting a ne | |
| js_FreeRegExpStatics(JSContext *cx, JSRegExpStatics *res); | | w | |
| | | // trace and generating generic code for a node that can be reused by flu | |
| #define JSVAL_IS_REGEXP(cx, v) | | shing | |
| \ | | // the deferred actions in the current trace and generating a goto. | |
| (JSVAL_IS_OBJECT(v) && JSVAL_TO_OBJECT(v) && | | static const int kMaxCopiesCodeGenerated = 10; | |
| \ | | | |
| OBJ_GET_CLASS(cx, JSVAL_TO_OBJECT(v)) == &js_RegExpClass) | | NodeInfo* info() { return &info_; } | |
| | | | |
| extern JSClass js_RegExpClass; | | BoyerMooreLookahead* bm_info(bool not_at_start) { | |
| | | return bm_info_[not_at_start ? 1 : 0]; | |
| extern JSObject * | | } | |
| js_InitRegExpClass(JSContext *cx, JSObject *obj); | | | |
| | | Zone* zone() const { return zone_; } | |
| /* | | | |
| * Export js_regexp_toString to the decompiler. | | protected: | |
| */ | | enum LimitResult { DONE, CONTINUE }; | |
| extern JSBool | | RegExpNode* replacement_; | |
| js_regexp_toString(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, | | | |
| jsval *rval); | | LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace); | |
| | | | |
| /* | | void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) { | |
| * Create, serialize/deserialize, or clone a RegExp object. | | bm_info_[not_at_start ? 1 : 0] = bm; | |
| */ | | } | |
| extern JSObject * | | | |
| js_NewRegExpObject(JSContext *cx, JSTokenStream *ts, | | private: | |
| jschar *chars, size_t length, uintN flags); | | static const int kFirstCharBudget = 10; | |
| | | Label label_; | |
| extern JSBool | | NodeInfo info_; | |
| js_XDRRegExp(JSXDRState *xdr, JSObject **objp); | | // This variable keeps track of how many times code has been generated fo | |
| | | r | |
| extern JSObject * | | // this node (in different traces). We don't keep track of where the | |
| js_CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *parent); | | // generated code is located unless the code is generated at the start of | |
| | | // a trace, in which case it is generic and can be reused by flushing the | |
| /* | | // deferred operations in the current trace and generating a goto. | |
| * Get and set the per-object (clone or clone-parent) lastIndex slot. | | int trace_count_; | |
| */ | | BoyerMooreLookahead* bm_info_[2]; | |
| extern JSBool | | | |
| js_GetLastIndex(JSContext *cx, JSObject *obj, jsdouble *lastIndex); | | Zone* zone_; | |
| | | }; | |
| | | | |
| | | // A simple closed interval. | |
| | | class Interval { | |
| | | public: | |
| | | Interval() : from_(kNone), to_(kNone) { } | |
| | | Interval(int from, int to) : from_(from), to_(to) { } | |
| | | Interval Union(Interval that) { | |
| | | if (that.from_ == kNone) | |
| | | return *this; | |
| | | else if (from_ == kNone) | |
| | | return that; | |
| | | else | |
| | | return Interval(Min(from_, that.from_), Max(to_, that.to_)); | |
| | | } | |
| | | bool Contains(int value) { | |
| | | return (from_ <= value) && (value <= to_); | |
| | | } | |
| | | bool is_empty() { return from_ == kNone; } | |
| | | int from() const { return from_; } | |
| | | int to() const { return to_; } | |
| | | static Interval Empty() { return Interval(); } | |
| | | static const int kNone = -1; | |
| | | private: | |
| | | int from_; | |
| | | int to_; | |
| | | }; | |
| | | | |
| | | class SeqRegExpNode: public RegExpNode { | |
| | | public: | |
| | | explicit SeqRegExpNode(RegExpNode* on_success) | |
| | | : RegExpNode(on_success->zone()), on_success_(on_success) { } | |
| | | RegExpNode* on_success() { return on_success_; } | |
| | | void set_on_success(RegExpNode* node) { on_success_ = node; } | |
| | | virtual RegExpNode* FilterASCII(int depth); | |
| | | virtual void FillInBMInfo(int offset, | |
| | | int recursion_depth, | |
| | | int budget, | |
| | | BoyerMooreLookahead* bm, | |
| | | bool not_at_start) { | |
| | | on_success_->FillInBMInfo( | |
| | | offset, recursion_depth + 1, budget - 1, bm, not_at_start); | |
| | | if (offset == 0) set_bm_info(not_at_start, bm); | |
| | | } | |
| | | | |
| | | protected: | |
| | | RegExpNode* FilterSuccessor(int depth); | |
| | | | |
| | | private: | |
| | | RegExpNode* on_success_; | |
| | | }; | |
| | | | |
| | | class ActionNode: public SeqRegExpNode { | |
| | | public: | |
| | | enum Type { | |
| | | SET_REGISTER, | |
| | | INCREMENT_REGISTER, | |
| | | STORE_POSITION, | |
| | | BEGIN_SUBMATCH, | |
| | | POSITIVE_SUBMATCH_SUCCESS, | |
| | | EMPTY_MATCH_CHECK, | |
| | | CLEAR_CAPTURES | |
| | | }; | |
| | | static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success); | |
| | | static ActionNode* IncrementRegister(int reg, RegExpNode* on_success); | |
| | | static ActionNode* StorePosition(int reg, | |
| | | bool is_capture, | |
| | | RegExpNode* on_success); | |
| | | static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success); | |
| | | static ActionNode* BeginSubmatch(int stack_pointer_reg, | |
| | | int position_reg, | |
| | | RegExpNode* on_success); | |
| | | static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg, | |
| | | int restore_reg, | |
| | | int clear_capture_count, | |
| | | int clear_capture_from, | |
| | | RegExpNode* on_success); | |
| | | static ActionNode* EmptyMatchCheck(int start_register, | |
| | | int repetition_register, | |
| | | int repetition_limit, | |
| | | RegExpNode* on_success); | |
| | | virtual void Accept(NodeVisitor* visitor); | |
| | | virtual void Emit(RegExpCompiler* compiler, Trace* trace); | |
| | | virtual int EatsAtLeast(int still_to_find, | |
| | | int recursion_depth, | |
| | | bool not_at_start); | |
| | | virtual void GetQuickCheckDetails(QuickCheckDetails* details, | |
| | | RegExpCompiler* compiler, | |
| | | int filled_in, | |
| | | bool not_at_start) { | |
| | | return on_success()->GetQuickCheckDetails( | |
| | | details, compiler, filled_in, not_at_start); | |
| | | } | |
| | | virtual void FillInBMInfo(int offset, | |
| | | int recursion_depth, | |
| | | int budget, | |
| | | BoyerMooreLookahead* bm, | |
| | | bool not_at_start); | |
| | | Type type() { return type_; } | |
| | | // TODO(erikcorry): We should allow some action nodes in greedy loops. | |
| | | virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoo | |
| | | ps; } | |
| | | | |
| | | private: | |
| | | union { | |
| | | struct { | |
| | | int reg; | |
| | | int value; | |
| | | } u_store_register; | |
| | | struct { | |
| | | int reg; | |
| | | } u_increment_register; | |
| | | struct { | |
| | | int reg; | |
| | | bool is_capture; | |
| | | } u_position_register; | |
| | | struct { | |
| | | int stack_pointer_register; | |
| | | int current_position_register; | |
| | | int clear_register_count; | |
| | | int clear_register_from; | |
| | | } u_submatch; | |
| | | struct { | |
| | | int start_register; | |
| | | int repetition_register; | |
| | | int repetition_limit; | |
| | | } u_empty_match_check; | |
| | | struct { | |
| | | int range_from; | |
| | | int range_to; | |
| | | } u_clear_captures; | |
| | | } data_; | |
| | | ActionNode(Type type, RegExpNode* on_success) | |
| | | : SeqRegExpNode(on_success), | |
| | | type_(type) { } | |
| | | Type type_; | |
| | | friend class DotPrinter; | |
| | | }; | |
| | | | |
| | | class TextNode: public SeqRegExpNode { | |
| | | public: | |
| | | TextNode(ZoneList<TextElement>* elms, | |
| | | RegExpNode* on_success) | |
| | | : SeqRegExpNode(on_success), | |
| | | elms_(elms) { } | |
| | | TextNode(RegExpCharacterClass* that, | |
| | | RegExpNode* on_success) | |
| | | : SeqRegExpNode(on_success), | |
| | | elms_(new(zone()) ZoneList<TextElement>(1, zone())) { | |
| | | elms_->Add(TextElement::CharClass(that), zone()); | |
| | | } | |
| | | virtual void Accept(NodeVisitor* visitor); | |
| | | virtual void Emit(RegExpCompiler* compiler, Trace* trace); | |
| | | virtual int EatsAtLeast(int still_to_find, | |
| | | int recursion_depth, | |
| | | bool not_at_start); | |
| | | virtual void GetQuickCheckDetails(QuickCheckDetails* details, | |
| | | RegExpCompiler* compiler, | |
| | | int characters_filled_in, | |
| | | bool not_at_start); | |
| | | ZoneList<TextElement>* elements() { return elms_; } | |
| | | void MakeCaseIndependent(bool is_ascii); | |
| | | virtual int GreedyLoopTextLength(); | |
| | | virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | |
| | | RegExpCompiler* compiler); | |
| | | virtual void FillInBMInfo(int offset, | |
| | | int recursion_depth, | |
| | | int budget, | |
| | | BoyerMooreLookahead* bm, | |
| | | bool not_at_start); | |
| | | void CalculateOffsets(); | |
| | | virtual RegExpNode* FilterASCII(int depth); | |
| | | | |
| | | private: | |
| | | enum TextEmitPassType { | |
| | | NON_ASCII_MATCH, // Check for characters that can't match. | |
| | | SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. | |
| | | NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equi | |
| | | vs. | |
| | | CASE_CHARACTER_MATCH, // Case-independent single character check | |
| | | . | |
| | | CHARACTER_CLASS_MATCH // Character class. | |
| | | }; | |
| | | static bool SkipPass(int pass, bool ignore_case); | |
| | | static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH; | |
| | | static const int kLastPass = CHARACTER_CLASS_MATCH; | |
| | | void TextEmitPass(RegExpCompiler* compiler, | |
| | | TextEmitPassType pass, | |
| | | bool preloaded, | |
| | | Trace* trace, | |
| | | bool first_element_checked, | |
| | | int* checked_up_to); | |
| | | int Length(); | |
| | | ZoneList<TextElement>* elms_; | |
| | | }; | |
| | | | |
| | | class AssertionNode: public SeqRegExpNode { | |
| | | public: | |
| | | enum AssertionNodeType { | |
| | | AT_END, | |
| | | AT_START, | |
| | | AT_BOUNDARY, | |
| | | AT_NON_BOUNDARY, | |
| | | AFTER_NEWLINE | |
| | | }; | |
| | | static AssertionNode* AtEnd(RegExpNode* on_success) { | |
| | | return new(on_success->zone()) AssertionNode(AT_END, on_success); | |
| | | } | |
| | | static AssertionNode* AtStart(RegExpNode* on_success) { | |
| | | return new(on_success->zone()) AssertionNode(AT_START, on_success); | |
| | | } | |
| | | static AssertionNode* AtBoundary(RegExpNode* on_success) { | |
| | | return new(on_success->zone()) AssertionNode(AT_BOUNDARY, on_success); | |
| | | } | |
| | | static AssertionNode* AtNonBoundary(RegExpNode* on_success) { | |
| | | return new(on_success->zone()) AssertionNode(AT_NON_BOUNDARY, on_succes | |
| | | s); | |
| | | } | |
| | | static AssertionNode* AfterNewline(RegExpNode* on_success) { | |
| | | return new(on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success) | |
| | | ; | |
| | | } | |
| | | virtual void Accept(NodeVisitor* visitor); | |
| | | virtual void Emit(RegExpCompiler* compiler, Trace* trace); | |
| | | virtual int EatsAtLeast(int still_to_find, | |
| | | int recursion_depth, | |
| | | bool not_at_start); | |
| | | virtual void GetQuickCheckDetails(QuickCheckDetails* details, | |
| | | RegExpCompiler* compiler, | |
| | | int filled_in, | |
| | | bool not_at_start); | |
| | | virtual void FillInBMInfo(int offset, | |
| | | int recursion_depth, | |
| | | int budget, | |
| | | BoyerMooreLookahead* bm, | |
| | | bool not_at_start); | |
| | | AssertionNodeType type() { return type_; } | |
| | | void set_type(AssertionNodeType type) { type_ = type; } | |
| | | | |
| | | private: | |
| | | void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); | |
| | | enum IfPrevious { kIsNonWord, kIsWord }; | |
| | | void BacktrackIfPrevious(RegExpCompiler* compiler, | |
| | | Trace* trace, | |
| | | IfPrevious backtrack_if_previous); | |
| | | AssertionNode(AssertionNodeType t, RegExpNode* on_success) | |
| | | : SeqRegExpNode(on_success), type_(t) { } | |
| | | AssertionNodeType type_; | |
| | | }; | |
| | | | |
| | | class BackReferenceNode: public SeqRegExpNode { | |
| | | public: | |
| | | BackReferenceNode(int start_reg, | |
| | | int end_reg, | |
| | | RegExpNode* on_success) | |
| | | : SeqRegExpNode(on_success), | |
| | | start_reg_(start_reg), | |
| | | end_reg_(end_reg) { } | |
| | | virtual void Accept(NodeVisitor* visitor); | |
| | | int start_register() { return start_reg_; } | |
| | | int end_register() { return end_reg_; } | |
| | | virtual void Emit(RegExpCompiler* compiler, Trace* trace); | |
| | | virtual int EatsAtLeast(int still_to_find, | |
| | | int recursion_depth, | |
| | | bool not_at_start); | |
| | | virtual void GetQuickCheckDetails(QuickCheckDetails* details, | |
| | | RegExpCompiler* compiler, | |
| | | int characters_filled_in, | |
| | | bool not_at_start) { | |
| | | return; | |
| | | } | |
| | | virtual void FillInBMInfo(int offset, | |
| | | int recursion_depth, | |
| | | int budget, | |
| | | BoyerMooreLookahead* bm, | |
| | | bool not_at_start); | |
| | | | |
| | | private: | |
| | | int start_reg_; | |
| | | int end_reg_; | |
| | | }; | |
| | | | |
| | | class EndNode: public RegExpNode { | |
| | | public: | |
| | | enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | |
| | | explicit EndNode(Action action, Zone* zone) | |
| | | : RegExpNode(zone), action_(action) { } | |
| | | virtual void Accept(NodeVisitor* visitor); | |
| | | virtual void Emit(RegExpCompiler* compiler, Trace* trace); | |
| | | virtual int EatsAtLeast(int still_to_find, | |
| | | int recursion_depth, | |
| | | bool not_at_start) { return 0; } | |
| | | virtual void GetQuickCheckDetails(QuickCheckDetails* details, | |
| | | RegExpCompiler* compiler, | |
| | | int characters_filled_in, | |
| | | bool not_at_start) { | |
| | | // Returning 0 from EatsAtLeast should ensure we never get here. | |
| | | UNREACHABLE(); | |
| | | } | |
| | | virtual void FillInBMInfo(int offset, | |
| | | int recursion_depth, | |
| | | int budget, | |
| | | BoyerMooreLookahead* bm, | |
| | | bool not_at_start) { | |
| | | // Returning 0 from EatsAtLeast should ensure we never get here. | |
| | | UNREACHABLE(); | |
| | | } | |
| | | | |
| | | private: | |
| | | Action action_; | |
| | | }; | |
| | | | |
| | | class NegativeSubmatchSuccess: public EndNode { | |
| | | public: | |
| | | NegativeSubmatchSuccess(int stack_pointer_reg, | |
| | | int position_reg, | |
| | | int clear_capture_count, | |
| | | int clear_capture_start, | |
| | | Zone* zone) | |
| | | : EndNode(NEGATIVE_SUBMATCH_SUCCESS, zone), | |
| | | stack_pointer_register_(stack_pointer_reg), | |
| | | current_position_register_(position_reg), | |
| | | clear_capture_count_(clear_capture_count), | |
| | | clear_capture_start_(clear_capture_start) { } | |
| | | virtual void Emit(RegExpCompiler* compiler, Trace* trace); | |
| | | | |
| | | private: | |
| | | int stack_pointer_register_; | |
| | | int current_position_register_; | |
| | | int clear_capture_count_; | |
| | | int clear_capture_start_; | |
| | | }; | |
| | | | |
| | | class Guard: public ZoneObject { | |
| | | public: | |
| | | enum Relation { LT, GEQ }; | |
| | | Guard(int reg, Relation op, int value) | |
| | | : reg_(reg), | |
| | | op_(op), | |
| | | value_(value) { } | |
| | | int reg() { return reg_; } | |
| | | Relation op() { return op_; } | |
| | | int value() { return value_; } | |
| | | | |
| | | private: | |
| | | int reg_; | |
| | | Relation op_; | |
| | | int value_; | |
| | | }; | |
| | | | |
| | | class GuardedAlternative { | |
| | | public: | |
| | | explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL | |
| | | ) { } | |
| | | void AddGuard(Guard* guard, Zone* zone); | |
| | | RegExpNode* node() { return node_; } | |
| | | void set_node(RegExpNode* node) { node_ = node; } | |
| | | ZoneList<Guard*>* guards() { return guards_; } | |
| | | | |
| | | private: | |
| | | RegExpNode* node_; | |
| | | ZoneList<Guard*>* guards_; | |
| | | }; | |
| | | | |
| | | class AlternativeGeneration; | |
| | | | |
| | | class ChoiceNode: public RegExpNode { | |
| | | public: | |
| | | explicit ChoiceNode(int expected_size, Zone* zone) | |
| | | : RegExpNode(zone), | |
| | | alternatives_(new(zone) | |
| | | ZoneList<GuardedAlternative>(expected_size, zone)), | |
| | | table_(NULL), | |
| | | not_at_start_(false), | |
| | | being_calculated_(false) { } | |
| | | virtual void Accept(NodeVisitor* visitor); | |
| | | void AddAlternative(GuardedAlternative node) { | |
| | | alternatives()->Add(node, zone()); | |
| | | } | |
| | | ZoneList<GuardedAlternative>* alternatives() { return alternatives_; } | |
| | | DispatchTable* GetTable(bool ignore_case); | |
| | | virtual void Emit(RegExpCompiler* compiler, Trace* trace); | |
| | | virtual int EatsAtLeast(int still_to_find, | |
| | | int recursion_depth, | |
| | | bool not_at_start); | |
| | | int EatsAtLeastHelper(int still_to_find, | |
| | | int recursion_depth, | |
| | | RegExpNode* ignore_this_node, | |
| | | bool not_at_start); | |
| | | virtual void GetQuickCheckDetails(QuickCheckDetails* details, | |
| | | RegExpCompiler* compiler, | |
| | | int characters_filled_in, | |
| | | bool not_at_start); | |
| | | virtual void FillInBMInfo(int offset, | |
| | | int recursion_depth, | |
| | | int budget, | |
| | | BoyerMooreLookahead* bm, | |
| | | bool not_at_start); | |
| | | | |
| | | bool being_calculated() { return being_calculated_; } | |
| | | bool not_at_start() { return not_at_start_; } | |
| | | void set_not_at_start() { not_at_start_ = true; } | |
| | | void set_being_calculated(bool b) { being_calculated_ = b; } | |
| | | virtual bool try_to_emit_quick_check_for_alternative(int i) { return true | |
| | | ; } | |
| | | virtual RegExpNode* FilterASCII(int depth); | |
| | | | |
| | | protected: | |
| | | int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); | |
| | | ZoneList<GuardedAlternative>* alternatives_; | |
| | | | |
| | | private: | |
| | | friend class DispatchTableConstructor; | |
| | | friend class Analysis; | |
| | | void GenerateGuard(RegExpMacroAssembler* macro_assembler, | |
| | | Guard* guard, | |
| | | Trace* trace); | |
| | | int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_leas | |
| | | t); | |
| | | void EmitOutOfLineContinuation(RegExpCompiler* compiler, | |
| | | Trace* trace, | |
| | | GuardedAlternative alternative, | |
| | | AlternativeGeneration* alt_gen, | |
| | | int preload_characters, | |
| | | bool next_expects_preload); | |
| | | DispatchTable* table_; | |
| | | // If true, this node is never checked at the start of the input. | |
| | | // Allows a new trace to start with at_start() set to false. | |
| | | bool not_at_start_; | |
| | | bool being_calculated_; | |
| | | }; | |
| | | | |
| | | class NegativeLookaheadChoiceNode: public ChoiceNode { | |
| | | public: | |
| | | explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, | |
| | | GuardedAlternative then_do_this, | |
| | | Zone* zone) | |
| | | : ChoiceNode(2, zone) { | |
| | | AddAlternative(this_must_fail); | |
| | | AddAlternative(then_do_this); | |
| | | } | |
| | | virtual int EatsAtLeast(int still_to_find, | |
| | | int recursion_depth, | |
| | | bool not_at_start); | |
| | | virtual void GetQuickCheckDetails(QuickCheckDetails* details, | |
| | | RegExpCompiler* compiler, | |
| | | int characters_filled_in, | |
| | | bool not_at_start); | |
| | | virtual void FillInBMInfo(int offset, | |
| | | int recursion_depth, | |
| | | int budget, | |
| | | BoyerMooreLookahead* bm, | |
| | | bool not_at_start) { | |
| | | alternatives_->at(1).node()->FillInBMInfo( | |
| | | offset, recursion_depth + 1, budget - 1, bm, not_at_start); | |
| | | if (offset == 0) set_bm_info(not_at_start, bm); | |
| | | } | |
| | | // For a negative lookahead we don't emit the quick check for the | |
| | | // alternative that is expected to fail. This is because quick check cod | |
| | | e | |
| | | // starts by loading enough characters for the alternative that takes few | |
| | | est | |
| | | // characters, but on a negative lookahead the negative branch did not ta | |
| | | ke | |
| | | // part in that calculation (EatsAtLeast) so the assumptions don't hold. | |
| | | virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != | |
| | | 0; } | |
| | | virtual RegExpNode* FilterASCII(int depth); | |
| | | }; | |
| | | | |
| | | class LoopChoiceNode: public ChoiceNode { | |
| | | public: | |
| | | explicit LoopChoiceNode(bool body_can_be_zero_length, Zone* zone) | |
| | | : ChoiceNode(2, zone), | |
| | | loop_node_(NULL), | |
| | | continue_node_(NULL), | |
| | | body_can_be_zero_length_(body_can_be_zero_length) { } | |
| | | void AddLoopAlternative(GuardedAlternative alt); | |
| | | void AddContinueAlternative(GuardedAlternative alt); | |
| | | virtual void Emit(RegExpCompiler* compiler, Trace* trace); | |
| | | virtual int EatsAtLeast(int still_to_find, | |
| | | int recursion_depth, | |
| | | bool not_at_start); | |
| | | virtual void GetQuickCheckDetails(QuickCheckDetails* details, | |
| | | RegExpCompiler* compiler, | |
| | | int characters_filled_in, | |
| | | bool not_at_start); | |
| | | virtual void FillInBMInfo(int offset, | |
| | | int recursion_depth, | |
| | | int budget, | |
| | | BoyerMooreLookahead* bm, | |
| | | bool not_at_start); | |
| | | RegExpNode* loop_node() { return loop_node_; } | |
| | | RegExpNode* continue_node() { return continue_node_; } | |
| | | bool body_can_be_zero_length() { return body_can_be_zero_length_; } | |
| | | virtual void Accept(NodeVisitor* visitor); | |
| | | virtual RegExpNode* FilterASCII(int depth); | |
| | | | |
| | | private: | |
| | | // AddAlternative is made private for loop nodes because alternatives | |
| | | // should not be added freely, we need to keep track of which node | |
| | | // goes back to the node itself. | |
| | | void AddAlternative(GuardedAlternative node) { | |
| | | ChoiceNode::AddAlternative(node); | |
| | | } | |
| | | | |
| | | RegExpNode* loop_node_; | |
| | | RegExpNode* continue_node_; | |
| | | bool body_can_be_zero_length_; | |
| | | }; | |
| | | | |
| | | // Improve the speed that we scan for an initial point where a non-anchored | |
| | | // regexp can match by using a Boyer-Moore-like table. This is done by | |
| | | // identifying non-greedy non-capturing loops in the nodes that eat any | |
| | | // character one at a time. For example in the middle of the regexp | |
| | | // /foo[\s\S]*?bar/ we find such a loop. There is also such a loop implici | |
| | | tly | |
| | | // inserted at the start of any non-anchored regexp. | |
| | | // | |
| | | // When we have found such a loop we look ahead in the nodes to find the se | |
| | | t of | |
| | | // characters that can come at given distances. For example for the regexp | |
| | | // /.?foo/ we know that there are at least 3 characters ahead of us, and th | |
| | | e | |
| | | // sets of characters that can occur are [any, [f, o], [o]]. We find a rang | |
| | | e in | |
| | | // the lookahead info where the set of characters is reasonably constrained | |
| | | . In | |
| | | // our example this is from index 1 to 2 (0 is not constrained). We can now | |
| | | // look 3 characters ahead and if we don't find one of [f, o] (the union of | |
| | | // [f, o] and [o]) then we can skip forwards by the range size (in this cas | |
| | | e 2). | |
| | | // | |
| | | // For Unicode input strings we do the same, but modulo 128. | |
| | | // | |
| | | // We also look at the first string fed to the regexp and use that to get a | |
| | | hint | |
| | | // of the character frequencies in the inputs. This affects the assessment | |
| | | of | |
| | | // whether the set of characters is 'reasonably constrained'. | |
| | | // | |
| | | // We also have another lookahead mechanism (called quick check in the code | |
| | | ), | |
| | | // which uses a wide load of multiple characters followed by a mask and com | |
| | | pare | |
| | | // to determine whether a match is possible at this point. | |
| | | enum ContainedInLattice { | |
| | | kNotYet = 0, | |
| | | kLatticeIn = 1, | |
| | | kLatticeOut = 2, | |
| | | kLatticeUnknown = 3 // Can also mean both in and out. | |
| | | }; | |
| | | | |
| | | inline ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice | |
| | | b) { | |
| | | return static_cast<ContainedInLattice>(a | b); | |
| | | } | |
| | | | |
| | | ContainedInLattice AddRange(ContainedInLattice a, | |
| | | const int* ranges, | |
| | | int ranges_size, | |
| | | Interval new_range); | |
| | | | |
| | | class BoyerMoorePositionInfo : public ZoneObject { | |
| | | public: | |
| | | explicit BoyerMoorePositionInfo(Zone* zone) | |
| | | : map_(new(zone) ZoneList<bool>(kMapSize, zone)), | |
| | | map_count_(0), | |
| | | w_(kNotYet), | |
| | | s_(kNotYet), | |
| | | d_(kNotYet), | |
| | | surrogate_(kNotYet) { | |
| | | for (int i = 0; i < kMapSize; i++) { | |
| | | map_->Add(false, zone); | |
| | | } | |
| | | } | |
| | | | |
| | | bool& at(int i) { return map_->at(i); } | |
| | | | |
| | | static const int kMapSize = 128; | |
| | | static const int kMask = kMapSize - 1; | |
| | | | |
| | | int map_count() const { return map_count_; } | |
| | | | |
| | | void Set(int character); | |
| | | void SetInterval(const Interval& interval); | |
| | | void SetAll(); | |
| | | bool is_non_word() { return w_ == kLatticeOut; } | |
| | | bool is_word() { return w_ == kLatticeIn; } | |
| | | | |
| | | private: | |
| | | ZoneList<bool>* map_; | |
| | | int map_count_; // Number of set bits in the map. | |
| | | ContainedInLattice w_; // The \w character class. | |
| | | ContainedInLattice s_; // The \s character class. | |
| | | ContainedInLattice d_; // The \d character class. | |
| | | ContainedInLattice surrogate_; // Surrogate UTF-16 code units. | |
| | | }; | |
| | | | |
| | | class BoyerMooreLookahead : public ZoneObject { | |
| | | public: | |
| | | BoyerMooreLookahead(int length, RegExpCompiler* compiler, Zone* zone); | |
| | | | |
| | | int length() { return length_; } | |
| | | int max_char() { return max_char_; } | |
| | | RegExpCompiler* compiler() { return compiler_; } | |
| | | | |
| | | int Count(int map_number) { | |
| | | return bitmaps_->at(map_number)->map_count(); | |
| | | } | |
| | | | |
| | | BoyerMoorePositionInfo* at(int i) { return bitmaps_->at(i); } | |
| | | | |
| | | void Set(int map_number, int character) { | |
| | | if (character > max_char_) return; | |
| | | BoyerMoorePositionInfo* info = bitmaps_->at(map_number); | |
| | | info->Set(character); | |
| | | } | |
| | | | |
| | | void SetInterval(int map_number, const Interval& interval) { | |
| | | if (interval.from() > max_char_) return; | |
| | | BoyerMoorePositionInfo* info = bitmaps_->at(map_number); | |
| | | if (interval.to() > max_char_) { | |
| | | info->SetInterval(Interval(interval.from(), max_char_)); | |
| | | } else { | |
| | | info->SetInterval(interval); | |
| | | } | |
| | | } | |
| | | | |
| | | void SetAll(int map_number) { | |
| | | bitmaps_->at(map_number)->SetAll(); | |
| | | } | |
| | | | |
| | | void SetRest(int from_map) { | |
| | | for (int i = from_map; i < length_; i++) SetAll(i); | |
| | | } | |
| | | bool EmitSkipInstructions(RegExpMacroAssembler* masm); | |
| | | | |
| | | private: | |
| | | // This is the value obtained by EatsAtLeast. If we do not have at least | |
| | | this | |
| | | // many characters left in the sample string then the match is bound to f | |
| | | ail. | |
| | | // Therefore it is OK to read a character this far ahead of the current m | |
| | | atch | |
| | | // point. | |
| | | int length_; | |
| | | RegExpCompiler* compiler_; | |
| | | // 0x7f for ASCII, 0xffff for UTF-16. | |
| | | int max_char_; | |
| | | ZoneList<BoyerMoorePositionInfo*>* bitmaps_; | |
| | | | |
| | | int GetSkipTable(int min_lookahead, | |
| | | int max_lookahead, | |
| | | Handle<ByteArray> boolean_skip_table); | |
| | | bool FindWorthwhileInterval(int* from, int* to); | |
| | | int FindBestInterval( | |
| | | int max_number_of_chars, int old_biggest_points, int* from, int* to); | |
| | | }; | |
| | | | |
| | | // There are many ways to generate code for a node. This class encapsulate | |
| | | s | |
| | | // the current way we should be generating. In other words it encapsulates | |
| | | // the current state of the code generator. The effect of this is that we | |
| | | // generate code for paths that the matcher can take through the regular | |
| | | // expression. A given node in the regexp can be code-generated several ti | |
| | | mes | |
| | | // as it can be part of several traces. For example for the regexp: | |
| | | // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as | |
| | | part | |
| | | // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The | |
| | | code | |
| | | // to match foo is generated only once (the traces have a common prefix). | |
| | | The | |
| | | // code to store the capture is deferred and generated (twice) after the pl | |
| | | aces | |
| | | // where baz has been matched. | |
| | | class Trace { | |
| | | public: | |
| | | // A value for a property that is either known to be true, know to be fal | |
| | | se, | |
| | | // or not known. | |
| | | enum TriBool { | |
| | | UNKNOWN = -1, FALSE = 0, TRUE = 1 | |
| | | }; | |
| | | | |
| | | class DeferredAction { | |
| | | public: | |
| | | DeferredAction(ActionNode::Type type, int reg) | |
| | | : type_(type), reg_(reg), next_(NULL) { } | |
| | | DeferredAction* next() { return next_; } | |
| | | bool Mentions(int reg); | |
| | | int reg() { return reg_; } | |
| | | ActionNode::Type type() { return type_; } | |
| | | private: | |
| | | ActionNode::Type type_; | |
| | | int reg_; | |
| | | DeferredAction* next_; | |
| | | friend class Trace; | |
| | | }; | |
| | | | |
| | | class DeferredCapture : public DeferredAction { | |
| | | public: | |
| | | DeferredCapture(int reg, bool is_capture, Trace* trace) | |
| | | : DeferredAction(ActionNode::STORE_POSITION, reg), | |
| | | cp_offset_(trace->cp_offset()), | |
| | | is_capture_(is_capture) { } | |
| | | int cp_offset() { return cp_offset_; } | |
| | | bool is_capture() { return is_capture_; } | |
| | | private: | |
| | | int cp_offset_; | |
| | | bool is_capture_; | |
| | | void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; } | |
| | | }; | |
| | | | |
| | | class DeferredSetRegister : public DeferredAction { | |
| | | public: | |
| | | DeferredSetRegister(int reg, int value) | |
| | | : DeferredAction(ActionNode::SET_REGISTER, reg), | |
| | | value_(value) { } | |
| | | int value() { return value_; } | |
| | | private: | |
| | | int value_; | |
| | | }; | |
| | | | |
| | | class DeferredClearCaptures : public DeferredAction { | |
| | | public: | |
| | | explicit DeferredClearCaptures(Interval range) | |
| | | : DeferredAction(ActionNode::CLEAR_CAPTURES, -1), | |
| | | range_(range) { } | |
| | | Interval range() { return range_; } | |
| | | private: | |
| | | Interval range_; | |
| | | }; | |
| | | | |
| | | class DeferredIncrementRegister : public DeferredAction { | |
| | | public: | |
| | | explicit DeferredIncrementRegister(int reg) | |
| | | : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { } | |
| | | }; | |
| | | | |
| | | Trace() | |
| | | : cp_offset_(0), | |
| | | actions_(NULL), | |
| | | backtrack_(NULL), | |
| | | stop_node_(NULL), | |
| | | loop_label_(NULL), | |
| | | characters_preloaded_(0), | |
| | | bound_checked_up_to_(0), | |
| | | flush_budget_(100), | |
| | | at_start_(UNKNOWN) { } | |
| | | | |
| | | // End the trace. This involves flushing the deferred actions in the tra | |
| | | ce | |
| | | // and pushing a backtrack location onto the backtrack stack. Once this | |
| | | is | |
| | | // done we can start a new trace or go to one that has already been | |
| | | // generated. | |
| | | void Flush(RegExpCompiler* compiler, RegExpNode* successor); | |
| | | int cp_offset() { return cp_offset_; } | |
| | | DeferredAction* actions() { return actions_; } | |
| | | // A trivial trace is one that has no deferred actions or other state tha | |
| | | t | |
| | | // affects the assumptions used when generating code. There is no record | |
| | | ed | |
| | | // backtrack location in a trivial trace, so with a trivial trace we will | |
| | | // generate code that, on a failure to match, gets the backtrack location | |
| | | // from the backtrack stack rather than using a direct jump instruction. | |
| | | We | |
| | | // always start code generation with a trivial trace and non-trivial trac | |
| | | es | |
| | | // are created as we emit code for nodes or add to the list of deferred | |
| | | // actions in the trace. The location of the code generated for a node u | |
| | | sing | |
| | | // a trivial trace is recorded in a label in the node so that gotos can b | |
| | | e | |
| | | // generated to that code. | |
| | | bool is_trivial() { | |
| | | return backtrack_ == NULL && | |
| | | actions_ == NULL && | |
| | | cp_offset_ == 0 && | |
| | | characters_preloaded_ == 0 && | |
| | | bound_checked_up_to_ == 0 && | |
| | | quick_check_performed_.characters() == 0 && | |
| | | at_start_ == UNKNOWN; | |
| | | } | |
| | | TriBool at_start() { return at_start_; } | |
| | | void set_at_start(bool at_start) { at_start_ = at_start ? TRUE : FALSE; } | |
| | | Label* backtrack() { return backtrack_; } | |
| | | Label* loop_label() { return loop_label_; } | |
| | | RegExpNode* stop_node() { return stop_node_; } | |
| | | int characters_preloaded() { return characters_preloaded_; } | |
| | | int bound_checked_up_to() { return bound_checked_up_to_; } | |
| | | int flush_budget() { return flush_budget_; } | |
| | | QuickCheckDetails* quick_check_performed() { return &quick_check_performe | |
| | | d_; } | |
| | | bool mentions_reg(int reg); | |
| | | // Returns true if a deferred position store exists to the specified | |
| | | // register and stores the offset in the out-parameter. Otherwise | |
| | | // returns false. | |
| | | bool GetStoredPosition(int reg, int* cp_offset); | |
| | | // These set methods and AdvanceCurrentPositionInTrace should be used onl | |
| | | y on | |
| | | // new traces - the intention is that traces are immutable after creation | |
| | | . | |
| | | void add_action(DeferredAction* new_action) { | |
| | | ASSERT(new_action->next_ == NULL); | |
| | | new_action->next_ = actions_; | |
| | | actions_ = new_action; | |
| | | } | |
| | | void set_backtrack(Label* backtrack) { backtrack_ = backtrack; } | |
| | | void set_stop_node(RegExpNode* node) { stop_node_ = node; } | |
| | | void set_loop_label(Label* label) { loop_label_ = label; } | |
| | | void set_characters_preloaded(int count) { characters_preloaded_ = count; | |
| | | } | |
| | | void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; } | |
| | | void set_flush_budget(int to) { flush_budget_ = to; } | |
| | | void set_quick_check_performed(QuickCheckDetails* d) { | |
| | | quick_check_performed_ = *d; | |
| | | } | |
| | | void InvalidateCurrentCharacter(); | |
| | | void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler); | |
| | | | |
| | | private: | |
| | | int FindAffectedRegisters(OutSet* affected_registers, Zone* zone); | |
| | | void PerformDeferredActions(RegExpMacroAssembler* macro, | |
| | | int max_register, | |
| | | OutSet& affected_registers, | |
| | | OutSet* registers_to_pop, | |
| | | OutSet* registers_to_clear, | |
| | | Zone* zone); | |
| | | void RestoreAffectedRegisters(RegExpMacroAssembler* macro, | |
| | | int max_register, | |
| | | OutSet& registers_to_pop, | |
| | | OutSet& registers_to_clear); | |
| | | int cp_offset_; | |
| | | DeferredAction* actions_; | |
| | | Label* backtrack_; | |
| | | RegExpNode* stop_node_; | |
| | | Label* loop_label_; | |
| | | int characters_preloaded_; | |
| | | int bound_checked_up_to_; | |
| | | QuickCheckDetails quick_check_performed_; | |
| | | int flush_budget_; | |
| | | TriBool at_start_; | |
| | | }; | |
| | | | |
| | | class NodeVisitor { | |
| | | public: | |
| | | virtual ~NodeVisitor() { } | |
| | | #define DECLARE_VISIT(Type) \ | |
| | | virtual void Visit##Type(Type##Node* that) = 0; | |
| | | FOR_EACH_NODE_TYPE(DECLARE_VISIT) | |
| | | #undef DECLARE_VISIT | |
| | | virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); } | |
| | | }; | |
| | | | |
| | | // Node visitor used to add the start set of the alternatives to the | |
| | | // dispatch table of a choice node. | |
| | | class DispatchTableConstructor: public NodeVisitor { | |
| | | public: | |
| | | DispatchTableConstructor(DispatchTable* table, bool ignore_case, | |
| | | Zone* zone) | |
| | | : table_(table), | |
| | | choice_index_(-1), | |
| | | ignore_case_(ignore_case), | |
| | | zone_(zone) { } | |
| | | | |
| | | void BuildTable(ChoiceNode* node); | |
| | | | |
| | | void AddRange(CharacterRange range) { | |
| | | table()->AddRange(range, choice_index_, zone_); | |
| | | } | |
| | | | |
| | | void AddInverse(ZoneList<CharacterRange>* ranges); | |
| | | | |
| | | #define DECLARE_VISIT(Type) \ | |
| | | virtual void Visit##Type(Type##Node* that); | |
| | | FOR_EACH_NODE_TYPE(DECLARE_VISIT) | |
| | | #undef DECLARE_VISIT | |
| | | | |
| | | DispatchTable* table() { return table_; } | |
| | | void set_choice_index(int value) { choice_index_ = value; } | |
| | | | |
| | | protected: | |
| | | DispatchTable* table_; | |
| | | int choice_index_; | |
| | | bool ignore_case_; | |
| | | Zone* zone_; | |
| | | }; | |
| | | | |
| | | // Assertion propagation moves information about assertions such as | |
| | | // \b to the affected nodes. For instance, in /.\b./ information must | |
| | | // be propagated to the first '.' that whatever follows needs to know | |
| | | // if it matched a word or a non-word, and to the second '.' that it | |
| | | // has to check if it succeeds a word or non-word. In this case the | |
| | | // result will be something like: | |
| | | // | |
| | | // +-------+ +------------+ | |
| | | // | . | | . | | |
| | | // +-------+ ---> +------------+ | |
| | | // | word? | | check word | | |
| | | // +-------+ +------------+ | |
| | | class Analysis: public NodeVisitor { | |
| | | public: | |
| | | Analysis(bool ignore_case, bool is_ascii) | |
| | | : ignore_case_(ignore_case), | |
| | | is_ascii_(is_ascii), | |
| | | error_message_(NULL) { } | |
| | | void EnsureAnalyzed(RegExpNode* node); | |
| | | | |
| | | #define DECLARE_VISIT(Type) \ | |
| | | virtual void Visit##Type(Type##Node* that); | |
| | | FOR_EACH_NODE_TYPE(DECLARE_VISIT) | |
| | | #undef DECLARE_VISIT | |
| | | virtual void VisitLoopChoice(LoopChoiceNode* that); | |
| | | | |
| | | bool has_failed() { return error_message_ != NULL; } | |
| | | const char* error_message() { | |
| | | ASSERT(error_message_ != NULL); | |
| | | return error_message_; | |
| | | } | |
| | | void fail(const char* error_message) { | |
| | | error_message_ = error_message; | |
| | | } | |
| | | | |
| | | private: | |
| | | bool ignore_case_; | |
| | | bool is_ascii_; | |
| | | const char* error_message_; | |
| | | | |
| | | DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); | |
| | | }; | |
| | | | |
| | | struct RegExpCompileData { | |
| | | RegExpCompileData() | |
| | | : tree(NULL), | |
| | | node(NULL), | |
| | | simple(true), | |
| | | contains_anchor(false), | |
| | | capture_count(0) { } | |
| | | RegExpTree* tree; | |
| | | RegExpNode* node; | |
| | | bool simple; | |
| | | bool contains_anchor; | |
| | | Handle<String> error; | |
| | | int capture_count; | |
| | | }; | |
| | | | |
| | | class RegExpEngine: public AllStatic { | |
| | | public: | |
| | | struct CompilationResult { | |
| | | explicit CompilationResult(const char* error_message) | |
| | | : error_message(error_message), | |
| | | code(HEAP->the_hole_value()), | |
| | | num_registers(0) {} | |
| | | CompilationResult(Object* code, int registers) | |
| | | : error_message(NULL), | |
| | | code(code), | |
| | | num_registers(registers) {} | |
| | | const char* error_message; | |
| | | Object* code; | |
| | | int num_registers; | |
| | | }; | |
| | | | |
| | | static CompilationResult Compile(RegExpCompileData* input, | |
| | | bool ignore_case, | |
| | | bool global, | |
| | | bool multiline, | |
| | | Handle<String> pattern, | |
| | | Handle<String> sample_subject, | |
| | | bool is_ascii, Zone* zone); | |
| | | | |
| | | static void DotPrint(const char* label, RegExpNode* node, bool ignore_cas | |
| | | e); | |
| | | }; | |
| | | | |
| | | class OffsetsVector { | |
| | | public: | |
| | | inline OffsetsVector(int num_registers, Isolate* isolate) | |
| | | : offsets_vector_length_(num_registers) { | |
| | | if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) | |
| | | { | |
| | | vector_ = NewArray<int>(offsets_vector_length_); | |
| | | } else { | |
| | | vector_ = isolate->jsregexp_static_offsets_vector(); | |
| | | } | |
| | | } | |
| | | inline ~OffsetsVector() { | |
| | | if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) | |
| | | { | |
| | | DeleteArray(vector_); | |
| | | vector_ = NULL; | |
| | | } | |
| | | } | |
| | | inline int* vector() { return vector_; } | |
| | | inline int length() { return offsets_vector_length_; } | |
| | | | |
| | | static const int kStaticOffsetsVectorSize = | |
| | | Isolate::kJSRegexpStaticOffsetsVectorSize; | |
| | | | |
| | | private: | |
| | | static Address static_offsets_vector_address(Isolate* isolate) { | |
| | | return reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vecto | |
| | | r()); | |
| | | } | |
| | | | |
| | | int* vector_; | |
| | | int offsets_vector_length_; | |
| | | | |
| | | friend class ExternalReference; | |
| | | }; | |
| | | | |
|
| extern JSBool | | } } // namespace v8::internal | |
| js_SetLastIndex(JSContext *cx, JSObject *obj, jsdouble lastIndex); | | | |
| | | | |
|
| #endif /* jsregexp_h___ */ | | #endif // V8_JSREGEXP_H_ | |
| | | | |
End of changes. 5 change blocks. |
| 203 lines changed or deleted | | 1694 lines changed or added | |
|
| value.h | | value.h | |
|
| /* @file value.h | | | |
| concurrency helpers DiagStr, Guarded | | | |
| */ | | | |
| | | | |
| /** | | /** | |
|
| * Copyright (C) 2008 10gen Inc. | | * Copyright (c) 2011 10gen Inc. | |
| * | | * | |
| * This program is free software: you can redistribute it and/or modify | | * This program is free software: you can redistribute it and/or modify | |
| * it under the terms of the GNU Affero General Public License, version 3 | | * it under the terms of the GNU Affero General Public License, version 3, | |
| , | | * as published by the Free Software Foundation. | |
| * as published by the Free Software Foundation. | | * | |
| * | | * This program is distributed in the hope that it will be useful, | |
| * This program is distributed in the hope that it will be useful,b | | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | * GNU Affero General Public License for more details. | |
| * GNU Affero General Public License for more details. | | * | |
| * | | * You should have received a copy of the GNU Affero General Public License | |
| * You should have received a copy of the GNU Affero General Public Licen | | * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
| se | | */ | |
| * along with this program. If not, see <http://www.gnu.org/licenses/>. | | | |
| */ | | | |
| | | | |
| #pragma once | | #pragma once | |
| | | | |
|
| #include "spin_lock.h" | | #include "mongo/db/pipeline/value_internal.h" | |
| | | | |
| namespace mongo { | | namespace mongo { | |
|
| | | class BSONElement; | |
| | | class Builder; | |
| | | | |
|
| /** declare that a variable that is "guarded" by a mutex. | | /** A variant type that can hold any type of data representable in BSON | |
| | | * | |
| The decl documents the rule. For example "counta and countb are gu | | * Small values are stored inline, but some values, such as large stri | |
| arded by xyzMutex": | | ngs, | |
| | | * are heap allocated. It has smart pointer capabilities built-in so i | |
| Guarded<int, xyzMutex> counta; | | t is | |
| Guarded<int, xyzMutex> countb; | | * safe and recommended to pass these around and return them by value. | |
| | | * | |
| Upon use, specify the scoped_lock object. This makes it hard for s | | * Values are immutable, but can be assigned. This means that once you | |
| omeone | | have | |
| later to forget to be in the lock. Check is made that it is the ri | | * a Value, you can be assured that none of the data in that Value wil | |
| ght lock in _DEBUG | | l | |
| builds at runtime. | | * change. However if you have a non-const Value you replace it with | |
| */ | | * operator=. These rules are the same as BSONObj, and similar to | |
| template <typename T, SimpleMutex& BY> | | * shared_ptr<const Object> with stronger guarantees of constness. Thi | |
| class Guarded { | | s is | |
| T _val; | | * also the same as Java's String type. | |
| | | * | |
| | | * Thread-safety: A single Value instance can be safely shared between | |
| | | * threads as long as there are no writers while other threads are | |
| | | * accessing the object. Any number of threads can read from a Value | |
| | | * concurrently. There are no restrictions on how threads access Value | |
| | | * instances exclusively owned by them, even if they reference the sam | |
| | | e | |
| | | * storage as Value in other threads. | |
| | | */ | |
| | | class Value { | |
| public: | | public: | |
|
| T& ref(const SimpleMutex::scoped_lock& lk) { | | /** Construct a Value | |
| dassert( &lk.m() == &BY ); | | * | |
| return _val; | | * All types not listed will be rejected rather than converted (se | |
| | | e private for why) | |
| | | * | |
| | | * Note: Currently these are all explicit conversions. | |
| | | * I'm not sure if we want implicit or not. | |
| | | * //TODO decide | |
| | | */ | |
| | | | |
| | | Value(): _storage() {} // "Missing" value | |
| | | explicit Value(bool value) : _storage(Bool, value) { | |
| | | } | |
| | | explicit Value(int value) : _storage(NumberInt, val | |
| | | ue) {} | |
| | | explicit Value(long long value) : _storage(NumberLong, va | |
| | | lue) {} | |
| | | explicit Value(double value) : _storage(NumberDouble, | |
| | | value) {} | |
| | | explicit Value(const OpTime& value) : _storage(Timestamp, val | |
| | | ue.asDate()) {} | |
| | | explicit Value(const OID& value) : _storage(jstOID, value) | |
| | | {} | |
| | | explicit Value(const StringData& value) : _storage(String, value) | |
| | | {} | |
| | | explicit Value(const string& value) : _storage(String, String | |
| | | Data(value)) {} | |
| | | explicit Value(const char* value) : _storage(String, String | |
| | | Data(value)) {} | |
| | | explicit Value(const Document& doc) : _storage(Object, doc) { | |
| | | } | |
| | | explicit Value(const BSONObj& obj);// : _storage(Object, Docume | |
| | | nt(obj)) {} // in cpp | |
| | | explicit Value(const vector<Value>& vec) : _storage(Array, new RCV | |
| | | ector(vec)) {} | |
| | | explicit Value(const BSONBinData& bd) : _storage(BinData, bd) { | |
| | | } | |
| | | explicit Value(const BSONRegEx& re) : _storage(RegEx, re) {} | |
| | | explicit Value(const BSONCodeWScope& cws) : _storage(CodeWScope, cw | |
| | | s) {} | |
| | | explicit Value(const BSONDBRef& dbref) : _storage(DBRef, dbref) | |
| | | {} | |
| | | explicit Value(const BSONSymbol& sym) : _storage(Symbol, sym.sy | |
| | | mbol) {} | |
| | | explicit Value(const BSONCode& code) : _storage(Code, code.cod | |
| | | e) {} | |
| | | explicit Value(const NullLabeler&) : _storage(jstNULL) {} | |
| | | // BSONNull | |
| | | explicit Value(const UndefinedLabeler&) : _storage(Undefined) {} | |
| | | // BSONUndefined | |
| | | explicit Value(const MinKeyLabeler&) : _storage(MinKey) {} | |
| | | // MINKEY | |
| | | explicit Value(const MaxKeyLabeler&) : _storage(MaxKey) {} | |
| | | // MAXKEY | |
| | | explicit Value(const Date_t& date) | |
| | | : _storage(Date, static_cast<long long>(date.millis)) // millis | |
| | | really signed | |
| | | {} | |
| | | | |
| | | /** Creates an empty or zero value of specified type. | |
| | | * This is currently the only way to create Undefined or Null Valu | |
| | | es. | |
| | | */ | |
| | | explicit Value(BSONType type); | |
| | | | |
| | | // TODO: add an unsafe version that can share storage with the BSON | |
| | | Element | |
| | | /// Deep-convert from BSONElement to Value | |
| | | explicit Value(const BSONElement& elem); | |
| | | | |
| | | /** Construct a long or integer-valued Value. | |
| | | * | |
| | | * Used when preforming arithmetic operations with int where the | |
| | | * result may be too large and need to be stored as long. The Valu | |
| | | e | |
| | | * will be an int if value fits, otherwise it will be a long. | |
| | | */ | |
| | | static Value createIntOrLong(long long value); | |
| | | | |
| | | /** A "missing" value indicates the lack of a Value. | |
| | | * This is similar to undefined/null but should not appear in outp | |
| | | ut to BSON. | |
| | | * Missing Values are returned by Document when accessing non-exis | |
| | | tent fields. | |
| | | */ | |
| | | bool missing() const { return _storage.type == EOO; } | |
| | | | |
| | | /// true if missing() or type is jstNULL or Undefined | |
| | | bool nullish() const { | |
| | | return missing() | |
| | | || _storage.type == jstNULL | |
| | | || _storage.type == Undefined; | |
| } | | } | |
|
| }; | | | |
| | | | |
|
| // todo: rename this to ThreadSafeString or something | | /// true if type represents a number | |
| /** there is now one mutex per DiagStr. If you have hundreds or millio | | bool numeric() const { | |
| ns of | | return _storage.type == NumberDouble | |
| DiagStrs you'll need to do something different. | | || _storage.type == NumberLong | |
| */ | | || _storage.type == NumberInt; | |
| class DiagStr { | | | |
| mutable SpinLock m; | | | |
| string _s; | | | |
| public: | | | |
| DiagStr(const DiagStr& r) : _s(r.get()) { } | | | |
| DiagStr(const string& r) : _s(r) { } | | | |
| DiagStr() { } | | | |
| bool empty() const { | | | |
| scoped_spinlock lk(m); | | | |
| return _s.empty(); | | | |
| } | | | |
| string get() const { | | | |
| scoped_spinlock lk(m); | | | |
| return _s; | | | |
| } | | | |
| void set(const char *s) { | | | |
| scoped_spinlock lk(m); | | | |
| _s = s; | | | |
| } | | } | |
|
| void set(const string& s) { | | | |
| scoped_spinlock lk(m); | | /// Get the BSON type of the field. | |
| _s = s; | | BSONType getType() const { return _storage.bsonType(); } | |
| | | | |
| | | /** Exact type getters. | |
| | | * Asserts if the requested value type is not exactly correct. | |
| | | * See coerceTo methods below for a more type-flexible alternative | |
| | | . | |
| | | */ | |
| | | double getDouble() const; | |
| | | string getString() const; | |
| | | Document getDocument() const; | |
| | | OID getOid() const; | |
| | | bool getBool() const; | |
| | | long long getDate() const; // in milliseconds | |
| | | OpTime getTimestamp() const; | |
| | | const char* getRegex() const; | |
| | | const char* getRegexFlags() const; | |
| | | string getSymbol() const; | |
| | | string getCode() const; | |
| | | int getInt() const; | |
| | | long long getLong() const; | |
| | | const vector<Value>& getArray() const { return _storage.getArray(); | |
| | | } | |
| | | size_t getArrayLength() const; | |
| | | | |
| | | /// Access an element of a subarray. Returns Value() if missing or | |
| | | getType() != Array | |
| | | Value operator[] (size_t index) const; | |
| | | | |
| | | /// Access a field of a subdocument. Returns Value() if missing or | |
| | | getType() != Object | |
| | | Value operator[] (StringData name) const; | |
| | | | |
| | | /// Add this value to the BSON object under construction. | |
| | | void addToBsonObj(BSONObjBuilder* pBuilder, StringData fieldName) c | |
| | | onst; | |
| | | | |
| | | /// Add this field to the BSON array under construction. | |
| | | void addToBsonArray(BSONArrayBuilder* pBuilder) const; | |
| | | | |
| | | // Support BSONObjBuilder and BSONArrayBuilder "stream" API | |
| | | friend BSONObjBuilder& operator << (BSONObjBuilderValueStream& buil | |
| | | der, const Value& val); | |
| | | | |
| | | /** Coerce a value to a bool using BSONElement::trueValue() rules. | |
| | | * Some types unsupported. SERVER-6120 | |
| | | */ | |
| | | bool coerceToBool() const; | |
| | | | |
| | | /** Coercion operators to extract values with fuzzy type logic. | |
| | | * | |
| | | * These currently assert if called on an unconvertible type. | |
| | | * TODO: decided how to handle unsupported types. | |
| | | */ | |
| | | string coerceToString() const; | |
| | | int coerceToInt() const; | |
| | | long long coerceToLong() const; | |
| | | double coerceToDouble() const; | |
| | | OpTime coerceToTimestamp() const; | |
| | | long long coerceToDate() const; | |
| | | time_t coerceToTimeT() const; | |
| | | tm coerceToTm() const; // broken-out time struct (see man gmtime) | |
| | | | |
| | | /** Compare two Values. | |
| | | * @returns an integer less than zero, zero, or an integer greater | |
| | | than | |
| | | * zero, depending on whether lhs < rhs, lhs == rhs, or l | |
| | | hs > rhs | |
| | | * Warning: may return values other than -1, 0, or 1 | |
| | | */ | |
| | | static int compare(const Value& lhs, const Value& rhs); | |
| | | | |
| | | friend | |
| | | bool operator==(const Value& v1, const Value& v2) { | |
| | | if (v1._storage.identical(v2._storage)) { | |
| | | // Simple case | |
| | | return true; | |
| | | } | |
| | | return (Value::compare(v1, v2) == 0); | |
| } | | } | |
|
| operator string() const { return get(); } | | | |
| void operator=(const string& s) { set(s); } | | /// This is for debugging, logging, etc. See getString() for how to | |
| void operator=(const DiagStr& rhs) { | | extract a string. | |
| set( rhs.get() ); | | string toString() const; | |
| | | friend ostream& operator << (ostream& out, const Value& v); | |
| | | | |
| | | void swap(Value& rhs) { | |
| | | _storage.swap(rhs._storage); | |
| } | | } | |
| | | | |
|
| // == is not defined. use get() == ... instead. done this way so | | /** Figure out what the widest of two numeric types is. | |
| one thinks about if composing multiple operations | | * | |
| bool operator==(const string& s) const; | | * Widest can be thought of as "most capable," or "able to hold th | |
| | | e | |
| | | * largest or most precise value." The progression is Int, Long, | |
| | | Double. | |
| | | */ | |
| | | static BSONType getWidestNumeric(BSONType lType, BSONType rType); | |
| | | | |
| | | /// Get the approximate memory size of the value, in bytes. Include | |
| | | s sizeof(Value) | |
| | | size_t getApproximateSize() const; | |
| | | | |
| | | /** Calculate a hash value. | |
| | | * | |
| | | * Meant to be used to create composite hashes suitable for | |
| | | * hashed container classes such as unordered_map<>. | |
| | | */ | |
| | | void hash_combine(size_t& seed) const; | |
| | | | |
| | | /// struct Hash is defined to enable the use of Values as keys in u | |
| | | nordered_map. | |
| | | struct Hash : unary_function<const Value&, size_t> { | |
| | | size_t operator()(const Value& rV) const; | |
| | | }; | |
| | | | |
| | | /// Call this after memcpying to update ref counts if needed | |
| | | void memcpyed() const { _storage.memcpyed(); } | |
| | | | |
| | | // LEGACY creation functions | |
| | | static Value createFromBsonElement(const BSONElement* pBsonElement) | |
| | | ; | |
| | | static Value createInt(int value) { return Value(value); } | |
| | | static Value createLong(long long value) { return Value(value); } | |
| | | static Value createDouble(double value) { return Value(value); } | |
| | | static Value createTimestamp(const OpTime& value) { return Value(va | |
| | | lue); } | |
| | | static Value createString(const string& value) { return Value(value | |
| | | ); } | |
| | | static Value createDocument(const Document& doc) { return Value(doc | |
| | | ); } | |
| | | static Value createArray(const vector<Value>& vec) { return Value(v | |
| | | ec); } | |
| | | static Value createDate(const long long value); | |
| | | | |
| | | private: | |
| | | /** This is a "honeypot" to prevent unexpected implicit conversions | |
| | | to the accepted argument | |
| | | * types. bool is especially bad since without this it will accept | |
| | | any pointer. | |
| | | * | |
| | | * Template argument name was chosen to make produced error easier | |
| | | to read. | |
| | | */ | |
| | | template <typename InvalidArgumentType> | |
| | | explicit Value(const InvalidArgumentType& invalidArgument); | |
| | | | |
| | | // does no type checking | |
| | | StringData getStringData() const; // May contain embedded NUL bytes | |
| | | | |
| | | ValueStorage _storage; | |
| | | friend class MutableValue; // gets and sets _storage.genericRCPtr | |
| }; | | }; | |
|
| | | BOOST_STATIC_ASSERT(sizeof(Value) == 16); | |
| | | } | |
| | | | |
|
| | | namespace std { | |
| | | // This is used by std::sort and others | |
| | | template <> | |
| | | inline void swap(mongo::Value& lhs, mongo::Value& rhs) { lhs.swap(rhs); | |
| | | } | |
| } | | } | |
|
| | | | |
| | | /* ======================= INLINED IMPLEMENTATIONS ======================== | |
| | | == */ | |
| | | | |
| | | namespace mongo { | |
| | | | |
| | | inline size_t Value::getArrayLength() const { | |
| | | verify(getType() == Array); | |
| | | return getArray().size(); | |
| | | } | |
| | | | |
| | | inline size_t Value::Hash::operator()(const Value& v) const { | |
| | | size_t seed = 0xf0afbeef; | |
| | | v.hash_combine(seed); | |
| | | return seed; | |
| | | } | |
| | | | |
| | | inline StringData Value::getStringData() const { | |
| | | return _storage.getString(); | |
| | | } | |
| | | | |
| | | inline string Value::getString() const { | |
| | | verify(getType() == String); | |
| | | return _storage.getString().toString(); | |
| | | } | |
| | | | |
| | | inline OID Value::getOid() const { | |
| | | verify(getType() == jstOID); | |
| | | return OID(_storage.oid); | |
| | | } | |
| | | | |
| | | inline bool Value::getBool() const { | |
| | | verify(getType() == Bool); | |
| | | return _storage.boolValue; | |
| | | } | |
| | | | |
| | | inline long long Value::getDate() const { | |
| | | verify(getType() == Date); | |
| | | return _storage.dateValue; | |
| | | } | |
| | | | |
| | | inline OpTime Value::getTimestamp() const { | |
| | | verify(getType() == Timestamp); | |
| | | return _storage.timestampValue; | |
| | | } | |
| | | | |
| | | inline const char* Value::getRegex() const { | |
| | | verify(getType() == RegEx); | |
| | | return _storage.getString().rawData(); // this is known to be NUL t | |
| | | erminated | |
| | | } | |
| | | inline const char* Value::getRegexFlags() const { | |
| | | verify(getType() == RegEx); | |
| | | const char* pattern = _storage.getString().rawData(); // this is kn | |
| | | own to be NUL terminated | |
| | | const char* flags = pattern + strlen(pattern) + 1; // first byte af | |
| | | ter pattern's NUL | |
| | | dassert(flags + strlen(flags) == pattern + _storage.getString().siz | |
| | | e()); | |
| | | return flags; | |
| | | } | |
| | | | |
| | | inline string Value::getSymbol() const { | |
| | | verify(getType() == Symbol); | |
| | | return _storage.getString().toString(); | |
| | | } | |
| | | inline string Value::getCode() const { | |
| | | verify(getType() == Code); | |
| | | return _storage.getString().toString(); | |
| | | } | |
| | | | |
| | | inline int Value::getInt() const { | |
| | | verify(getType() == NumberInt); | |
| | | return _storage.intValue; | |
| | | } | |
| | | | |
| | | inline long long Value::getLong() const { | |
| | | BSONType type = getType(); | |
| | | if (type == NumberInt) | |
| | | return _storage.intValue; | |
| | | | |
| | | verify(type == NumberLong); | |
| | | return _storage.longValue; | |
| | | } | |
| | | }; | |
| | | | |
End of changes. 14 change blocks. |
| 75 lines changed or deleted | | 298 lines changed or added | |
|