// Copyright 2010 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef V8_PARSER_H_ #define V8_PARSER_H_ #include "allocation.h" #include "ast.h" #include "scanner.h" #include "scopes.h" #include "preparse-data.h" namespace v8 { namespace internal { class CompilationInfo; class FuncNameInferrer; class ParserLog; class PositionStack; class Target; class LexicalScope; template class ZoneListWrapper; class ParserMessage : public Malloced { public: ParserMessage(Scanner::Location loc, const char* message, Vector args) : loc_(loc), message_(message), args_(args) { } ~ParserMessage(); Scanner::Location location() { return loc_; } const char* message() { return message_; } Vector args() { return args_; } private: Scanner::Location loc_; const char* message_; Vector args_; }; class FunctionEntry BASE_EMBEDDED { public: explicit FunctionEntry(Vector backing) : backing_(backing) { } FunctionEntry() : backing_(Vector::empty()) { } int start_pos() { return backing_[kStartPosOffset]; } void set_start_pos(int value) { backing_[kStartPosOffset] = value; } int end_pos() { return backing_[kEndPosOffset]; } void set_end_pos(int value) { backing_[kEndPosOffset] = value; } int literal_count() { return backing_[kLiteralCountOffset]; } void set_literal_count(int value) { backing_[kLiteralCountOffset] = value; } int property_count() { return backing_[kPropertyCountOffset]; } void set_property_count(int value) { backing_[kPropertyCountOffset] = value; } bool is_valid() { return backing_.length() > 0; } static const int kSize = 4; private: Vector backing_; static const int kStartPosOffset = 0; static const int kEndPosOffset = 1; static const int kLiteralCountOffset = 2; static const int kPropertyCountOffset = 3; }; class ScriptDataImpl : public ScriptData { public: explicit ScriptDataImpl(Vector store) : store_(store), owns_store_(true) { } // Create an empty ScriptDataImpl that is guaranteed to not satisfy // a SanityCheck. ScriptDataImpl() : store_(Vector()), owns_store_(false) { } virtual ~ScriptDataImpl(); virtual int Length(); virtual const char* Data(); virtual bool HasError(); void Initialize(); void ReadNextSymbolPosition(); FunctionEntry GetFunctionEntry(int start); int GetSymbolIdentifier(); bool SanityCheck(); Scanner::Location MessageLocation(); const char* BuildMessage(); Vector BuildArgs(); int symbol_count() { return (store_.length() > PreparseDataConstants::kHeaderSize) ? store_[PreparseDataConstants::kSymbolCountOffset] : 0; } // The following functions should only be called if SanityCheck has // returned true. bool has_error() { return store_[PreparseDataConstants::kHasErrorOffset]; } unsigned magic() { return store_[PreparseDataConstants::kMagicOffset]; } unsigned version() { return store_[PreparseDataConstants::kVersionOffset]; } private: Vector store_; unsigned char* symbol_data_; unsigned char* symbol_data_end_; int function_index_; bool owns_store_; unsigned Read(int position); unsigned* ReadAddress(int position); // Reads a number from the current symbols int ReadNumber(byte** source); ScriptDataImpl(const char* backing_store, int length) : store_(reinterpret_cast(const_cast(backing_store)), length / static_cast(sizeof(unsigned))), owns_store_(false) { ASSERT_EQ(0, static_cast( reinterpret_cast(backing_store) % sizeof(unsigned))); } // Read strings written by ParserRecorder::WriteString. static const char* ReadString(unsigned* start, int* chars); friend class ScriptData; }; class ParserApi { public: // Parses the source code represented by the compilation info and sets its // function literal. Returns false (and deallocates any allocated AST // nodes) if parsing failed. static bool Parse(CompilationInfo* info); // Generic preparser generating full preparse data. static ScriptDataImpl* PreParse(UC16CharacterStream* source, v8::Extension* extension); // Preparser that only does preprocessing that makes sense if only used // immediately after. static ScriptDataImpl* PartialPreParse(UC16CharacterStream* source, v8::Extension* extension); }; // ---------------------------------------------------------------------------- // REGEXP PARSING // A BuffferedZoneList is an automatically growing list, just like (and backed // by) a ZoneList, that is optimized for the case of adding and removing // a single element. The last element added is stored outside the backing list, // and if no more than one element is ever added, the ZoneList isn't even // allocated. // Elements must not be NULL pointers. template class BufferedZoneList { public: BufferedZoneList() : list_(NULL), last_(NULL) {} // Adds element at end of list. This element is buffered and can // be read using last() or removed using RemoveLast until a new Add or until // RemoveLast or GetList has been called. void Add(T* value) { if (last_ != NULL) { if (list_ == NULL) { list_ = new ZoneList(initial_size); } list_->Add(last_); } last_ = value; } T* last() { ASSERT(last_ != NULL); return last_; } T* RemoveLast() { ASSERT(last_ != NULL); T* result = last_; if ((list_ != NULL) && (list_->length() > 0)) last_ = list_->RemoveLast(); else last_ = NULL; return result; } T* Get(int i) { ASSERT((0 <= i) && (i < length())); if (list_ == NULL) { ASSERT_EQ(0, i); return last_; } else { if (i == list_->length()) { ASSERT(last_ != NULL); return last_; } else { return list_->at(i); } } } void Clear() { list_ = NULL; last_ = NULL; } int length() { int length = (list_ == NULL) ? 0 : list_->length(); return length + ((last_ == NULL) ? 0 : 1); } ZoneList* GetList() { if (list_ == NULL) { list_ = new ZoneList(initial_size); } if (last_ != NULL) { list_->Add(last_); last_ = NULL; } return list_; } private: ZoneList* list_; T* last_; }; // Accumulates RegExp atoms and assertions into lists of terms and alternatives. class RegExpBuilder: public ZoneObject { public: RegExpBuilder(); void AddCharacter(uc16 character); // "Adds" an empty expression. Does nothing except consume a // following quantifier void AddEmpty(); void AddAtom(RegExpTree* tree); void AddAssertion(RegExpTree* tree); void NewAlternative(); // '|' void AddQuantifierToAtom(int min, int max, RegExpQuantifier::Type type); RegExpTree* ToRegExp(); private: void FlushCharacters(); void FlushText(); void FlushTerms(); Zone* zone() { return zone_; } Zone* zone_; bool pending_empty_; ZoneList* characters_; BufferedZoneList terms_; BufferedZoneList text_; BufferedZoneList alternatives_; #ifdef DEBUG enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; #define LAST(x) last_added_ = x; #else #define LAST(x) #endif }; class RegExpParser { public: RegExpParser(FlatStringReader* in, Handle* error, bool multiline_mode); static bool ParseRegExp(FlatStringReader* input, bool multiline, RegExpCompileData* result); RegExpTree* ParsePattern(); RegExpTree* ParseDisjunction(); RegExpTree* ParseGroup(); RegExpTree* ParseCharacterClass(); // Parses a {...,...} quantifier and stores the range in the given // out parameters. bool ParseIntervalQuantifier(int* min_out, int* max_out); // Parses and returns a single escaped character. The character // must not be 'b' or 'B' since they are usually handle specially. uc32 ParseClassCharacterEscape(); // Checks whether the following is a length-digit hexadecimal number, // and sets the value if it is. bool ParseHexEscape(int length, uc32* value); uc32 ParseOctalLiteral(); // Tries to parse the input as a back reference. If successful it // stores the result in the output parameter and returns true. If // it fails it will push back the characters read so the same characters // can be reparsed. bool ParseBackReferenceIndex(int* index_out); CharacterRange ParseClassAtom(uc16* char_class); RegExpTree* ReportError(Vector message); void Advance(); void Advance(int dist); void Reset(int pos); // Reports whether the pattern might be used as a literal search string. // Only use if the result of the parse is a single atom node. bool simple(); bool contains_anchor() { return contains_anchor_; } void set_contains_anchor() { contains_anchor_ = true; } int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } int position() { return next_pos_ - 1; } bool failed() { return failed_; } static const int kMaxCaptures = 1 << 16; static const uc32 kEndMarker = (1 << 21); private: enum SubexpressionType { INITIAL, CAPTURE, // All positive values represent captures. POSITIVE_LOOKAHEAD, NEGATIVE_LOOKAHEAD, GROUPING }; class RegExpParserState : public ZoneObject { public: RegExpParserState(RegExpParserState* previous_state, SubexpressionType group_type, int disjunction_capture_index) : previous_state_(previous_state), builder_(new RegExpBuilder()), group_type_(group_type), disjunction_capture_index_(disjunction_capture_index) {} // Parser state of containing expression, if any. RegExpParserState* previous_state() { return previous_state_; } bool IsSubexpression() { return previous_state_ != NULL; } // RegExpBuilder building this regexp's AST. RegExpBuilder* builder() { return builder_; } // Type of regexp being parsed (parenthesized group or entire regexp). SubexpressionType group_type() { return group_type_; } // Index in captures array of first capture in this sub-expression, if any. // Also the capture index of this sub-expression itself, if group_type // is CAPTURE. int capture_index() { return disjunction_capture_index_; } private: // Linked list implementation of stack of states. RegExpParserState* previous_state_; // Builder for the stored disjunction. RegExpBuilder* builder_; // Stored disjunction type (capture, look-ahead or grouping), if any. SubexpressionType group_type_; // Stored disjunction's capture index (if any). int disjunction_capture_index_; }; Isolate* isolate() { return isolate_; } Zone* zone() { return isolate_->zone(); } uc32 current() { return current_; } bool has_more() { return has_more_; } bool has_next() { return next_pos_ < in()->length(); } uc32 Next(); FlatStringReader* in() { return in_; } void ScanForCaptures(); Isolate* isolate_; Handle* error_; ZoneList* captures_; FlatStringReader* in_; uc32 current_; int next_pos_; // The capture count is only valid after we have scanned for captures. int capture_count_; bool has_more_; bool multiline_; bool simple_; bool contains_anchor_; bool is_scanned_for_captures_; bool failed_; }; // ---------------------------------------------------------------------------- // JAVASCRIPT PARSING class Parser { public: Parser(Handle