// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. // Generated by scripts/tokenizer_gen.py. part of '../parser.dart'; /// Tokenizer state to support look ahead for Less' nested selectors. class TokenizerState { final int index; final int startIndex; final bool inSelectorExpression; final bool inSelector; TokenizerState(TokenizerBase base) : index = base._index, startIndex = base._startIndex, inSelectorExpression = base.inSelectorExpression, inSelector = base.inSelector; } /// The base class for our tokenizer. The hand coded parts are in this file, /// with the generated parts in the subclass Tokenizer. abstract class TokenizerBase { final SourceFile _file; final String _text; // TODO: this seems like a bug – this field *is* used // ignore: prefer_final_fields bool _inString; /// Changes tokenization when in a pseudo function expression. If true then /// minus signs are handled as operators instead of identifiers. bool inSelectorExpression = false; /// Changes tokenization when in selectors. If true, it prevents identifiers /// from being treated as units. This would break things like ":lang(fr)" or /// the HTML (unknown) tag name "px", which is legal to use in a selector. // TODO(jmesserly): is this a problem elsewhere? "fr" for example will be // processed as a "fraction" unit token, preventing it from working in // places where an identifier is expected. This was breaking selectors like: // :lang(fr) // The assumption that "fr" always means fraction (and similar issue with // other units) doesn't seem valid. We probably should defer this // analysis until we reach places in the parser where units are expected. // I'm not sure this is tokenizing as described in the specs: // http://dev.w3.org/csswg/css-syntax/ // http://dev.w3.org/csswg/selectors4/ bool inSelector = false; int _index = 0; int _startIndex = 0; TokenizerBase(this._file, this._text, this._inString, [this._index = 0]); Token next(); int getIdentifierKind(); /// Snapshot of Tokenizer scanning state. TokenizerState get mark => TokenizerState(this); /// Restore Tokenizer scanning state. void restore(TokenizerState markedData) { _index = markedData.index; _startIndex = markedData.startIndex; inSelectorExpression = markedData.inSelectorExpression; inSelector = markedData.inSelector; } int _nextChar() { if (_index < _text.length) { return _text.codeUnitAt(_index++); } else { return 0; } } int _peekChar([int offset = 0]) { if (_index + offset < _text.length) { return _text.codeUnitAt(_index + offset); } else { return 0; } } bool _maybeEatChar(int ch) { if (_index < _text.length) { if (_text.codeUnitAt(_index) == ch) { _index++; return true; } else { return false; } } else { return false; } } bool _nextCharsAreNumber(int first) { if (TokenizerHelpers.isDigit(first)) return true; var second = _peekChar(); if (first == TokenChar.DOT) return TokenizerHelpers.isDigit(second); if (first == TokenChar.PLUS || first == TokenChar.MINUS) { return TokenizerHelpers.isDigit(second) || (second == TokenChar.DOT && TokenizerHelpers.isDigit(_peekChar(1))); } return false; } Token _finishToken(int kind) => Token(kind, _file.span(_startIndex, _index)); Token _errorToken([String? message]) => ErrorToken(TokenKind.ERROR, _file.span(_startIndex, _index), message); Token finishWhitespace() { _index--; while (_index < _text.length) { final ch = _text.codeUnitAt(_index++); if (ch == TokenChar.SPACE || ch == TokenChar.TAB || ch == TokenChar.RETURN) { // do nothing } else if (ch == TokenChar.NEWLINE) { if (!_inString) { return _finishToken(TokenKind.WHITESPACE); // note the newline? } } else { _index--; if (_inString) { return next(); } else { return _finishToken(TokenKind.WHITESPACE); } } } return _finishToken(TokenKind.END_OF_FILE); } Token finishMultiLineComment() { var nesting = 1; do { var ch = _nextChar(); if (ch == 0) { return _errorToken(); } else if (ch == TokenChar.ASTERISK) { if (_maybeEatChar(TokenChar.SLASH)) { nesting--; } } else if (ch == TokenChar.SLASH) { if (_maybeEatChar(TokenChar.ASTERISK)) { nesting++; } } } while (nesting > 0); if (_inString) { return next(); } else { return _finishToken(TokenKind.COMMENT); } } void eatDigits() { while (_index < _text.length) { if (TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) { _index++; } else { return; } } } static int _hexDigit(int c) { if (c >= 48 /*0*/ && c <= 57 /*9*/) { return c - 48; } else if (c >= 97 /*a*/ && c <= 102 /*f*/) { return c - 87; } else if (c >= 65 /*A*/ && c <= 70 /*F*/) { return c - 55; } else { return -1; } } int readHex([int? hexLength]) { int maxIndex; if (hexLength == null) { maxIndex = _text.length - 1; } else { // TODO(jimhug): What if this is too long? maxIndex = _index + hexLength; if (maxIndex >= _text.length) return -1; } var result = 0; while (_index < maxIndex) { final digit = _hexDigit(_text.codeUnitAt(_index)); if (digit == -1) { if (hexLength == null) { return result; } else { return -1; } } _hexDigit(_text.codeUnitAt(_index)); // Multiply by 16 rather than shift by 4 since that will result in a // correct value for numbers that exceed the 32 bit precision of JS // 'integers'. // TODO: Figure out a better solution to integer truncation. Issue 638. result = (result * 16) + digit; _index++; } return result; } Token finishNumber() { eatDigits(); if (_peekChar() == TokenChar.DOT) { // Handle the case of 1.toString(). _nextChar(); if (TokenizerHelpers.isDigit(_peekChar())) { eatDigits(); return finishNumberExtra(TokenKind.DOUBLE); } else { _index--; } } return finishNumberExtra(TokenKind.INTEGER); } Token finishNumberExtra(int kind) { if (_maybeEatChar(101 /*e*/) || _maybeEatChar(69 /*E*/)) { kind = TokenKind.DOUBLE; _maybeEatChar(TokenKind.MINUS); _maybeEatChar(TokenKind.PLUS); eatDigits(); } if (_peekChar() != 0 && TokenizerHelpers.isIdentifierStart(_peekChar())) { _nextChar(); return _errorToken('illegal character in number'); } return _finishToken(kind); } Token _makeStringToken(List buf, bool isPart) { final s = String.fromCharCodes(buf); final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING; return LiteralToken(kind, _file.span(_startIndex, _index), s); } Token makeIEFilter(int start, int end) { var filter = _text.substring(start, end); return LiteralToken(TokenKind.STRING, _file.span(start, end), filter); } Token _makeRawStringToken(bool isMultiline) { String s; if (isMultiline) { // Skip initial newline in multiline strings var start = _startIndex + 4; if (_text[start] == '\n') start++; s = _text.substring(start, _index - 3); } else { s = _text.substring(_startIndex + 2, _index - 1); } return LiteralToken(TokenKind.STRING, _file.span(_startIndex, _index), s); } Token finishMultilineString(int quote) { var buf = []; while (true) { var ch = _nextChar(); if (ch == 0) { return _errorToken(); } else if (ch == quote) { if (_maybeEatChar(quote)) { if (_maybeEatChar(quote)) { return _makeStringToken(buf, false); } buf.add(quote); } buf.add(quote); } else if (ch == TokenChar.BACKSLASH) { var escapeVal = readEscapeSequence(); if (escapeVal == -1) { return _errorToken('invalid hex escape sequence'); } else { buf.add(escapeVal); } } else { buf.add(ch); } } } Token finishString(int quote) { if (_maybeEatChar(quote)) { if (_maybeEatChar(quote)) { // skip an initial newline _maybeEatChar(TokenChar.NEWLINE); return finishMultilineString(quote); } else { return _makeStringToken([], false); } } return finishStringBody(quote); } Token finishRawString(int quote) { if (_maybeEatChar(quote)) { if (_maybeEatChar(quote)) { return finishMultilineRawString(quote); } else { return _makeStringToken([], false); } } while (true) { var ch = _nextChar(); if (ch == quote) { return _makeRawStringToken(false); } else if (ch == 0) { return _errorToken(); } } } Token finishMultilineRawString(int quote) { while (true) { var ch = _nextChar(); if (ch == 0) { return _errorToken(); } else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) { return _makeRawStringToken(true); } } } Token finishStringBody(int quote) { var buf = []; while (true) { var ch = _nextChar(); if (ch == quote) { return _makeStringToken(buf, false); } else if (ch == 0) { return _errorToken(); } else if (ch == TokenChar.BACKSLASH) { var escapeVal = readEscapeSequence(); if (escapeVal == -1) { return _errorToken('invalid hex escape sequence'); } else { buf.add(escapeVal); } } else { buf.add(ch); } } } int readEscapeSequence() { final ch = _nextChar(); int hexValue; switch (ch) { case 110 /*n*/ : return TokenChar.NEWLINE; case 114 /*r*/ : return TokenChar.RETURN; case 102 /*f*/ : return TokenChar.FF; case 98 /*b*/ : return TokenChar.BACKSPACE; case 116 /*t*/ : return TokenChar.TAB; case 118 /*v*/ : return TokenChar.FF; case 120 /*x*/ : hexValue = readHex(2); break; case 117 /*u*/ : if (_maybeEatChar(TokenChar.LBRACE)) { hexValue = readHex(); if (!_maybeEatChar(TokenChar.RBRACE)) { return -1; } } else { hexValue = readHex(4); } break; default: return ch; } if (hexValue == -1) return -1; // According to the Unicode standard the high and low surrogate halves // used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF // are not legal Unicode values. if (hexValue < 0xD800 || hexValue > 0xDFFF && hexValue <= 0xFFFF) { return hexValue; } else if (hexValue <= 0x10FFFF) { messages.error('unicode values greater than 2 bytes not implemented yet', _file.span(_startIndex, _startIndex + 1)); return -1; } else { return -1; } } Token finishDot() { if (TokenizerHelpers.isDigit(_peekChar())) { eatDigits(); return finishNumberExtra(TokenKind.DOUBLE); } else { return _finishToken(TokenKind.DOT); } } }