// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. part of '../parser.dart'; // TODO: We should update the tokenization to follow what's described in the // spec: https://www.w3.org/TR/css-syntax-3/#tokenization. class Tokenizer extends TokenizerBase { /// U+ prefix for unicode characters. // ignore: non_constant_identifier_names final UNICODE_U = 'U'.codeUnitAt(0); // ignore: non_constant_identifier_names final UNICODE_LOWER_U = 'u'.codeUnitAt(0); // ignore: non_constant_identifier_names final UNICODE_PLUS = '+'.codeUnitAt(0); // ignore: non_constant_identifier_names final QUESTION_MARK = '?'.codeUnitAt(0); /// CDATA keyword. // ignore: non_constant_identifier_names final List CDATA_NAME = 'CDATA'.codeUnits; Tokenizer(super.file, super.text, super.skipWhitespace, [super.index]); @override Token next({bool unicodeRange = false}) { // keep track of our starting position _startIndex = _index; int ch; ch = _nextChar(); switch (ch) { case TokenChar.NEWLINE: case TokenChar.RETURN: case TokenChar.SPACE: case TokenChar.TAB: return finishWhitespace(); case TokenChar.END_OF_FILE: return _finishToken(TokenKind.END_OF_FILE); case TokenChar.AT: var peekCh = _peekChar(); if (TokenizerHelpers.isIdentifierStart(peekCh)) { var oldIndex = _index; var oldStartIndex = _startIndex; _startIndex = _index; ch = _nextChar(); finishIdentifier(); // Is it a directive? var tokId = TokenKind.matchDirectives( _text, _startIndex, _index - _startIndex); if (tokId == -1) { // No, is it a margin directive? tokId = TokenKind.matchMarginDirectives( _text, _startIndex, _index - _startIndex); } if (tokId != -1) { return _finishToken(tokId); } else { // Didn't find a CSS directive or margin directive so the @name is // probably the Less definition '@name: value_variable_definition'. _startIndex = oldStartIndex; _index = oldIndex; } } return _finishToken(TokenKind.AT); case TokenChar.DOT: var start = _startIndex; // Start where the dot started. if (maybeEatDigit()) { // looks like a number dot followed by digit(s). var number = finishNumber(); if (number.kind == TokenKind.INTEGER) { // It's a number but it's preceded by a dot, so make it a double. _startIndex = start; return _finishToken(TokenKind.DOUBLE); } else { // Don't allow dot followed by a double (e.g, '..1'). return _errorToken(); } } // It's really a dot. return _finishToken(TokenKind.DOT); case TokenChar.LPAREN: return _finishToken(TokenKind.LPAREN); case TokenChar.RPAREN: return _finishToken(TokenKind.RPAREN); case TokenChar.LBRACE: return _finishToken(TokenKind.LBRACE); case TokenChar.RBRACE: return _finishToken(TokenKind.RBRACE); case TokenChar.LBRACK: return _finishToken(TokenKind.LBRACK); case TokenChar.RBRACK: if (_maybeEatChar(TokenChar.RBRACK) && _maybeEatChar(TokenChar.GREATER)) { // ]]> return next(); } return _finishToken(TokenKind.RBRACK); case TokenChar.HASH: return _finishToken(TokenKind.HASH); case TokenChar.PLUS: if (_nextCharsAreNumber(ch)) return finishNumber(); return _finishToken(TokenKind.PLUS); case TokenChar.MINUS: if (inSelectorExpression || unicodeRange) { // If parsing in pseudo function expression then minus is an operator // not part of identifier e.g., interval value range (e.g. U+400-4ff) // or minus operator in selector expression. return _finishToken(TokenKind.MINUS); } else if (_nextCharsAreNumber(ch)) { return finishNumber(); } else if (TokenizerHelpers.isIdentifierStart(ch)) { return finishIdentifier(); } return _finishToken(TokenKind.MINUS); case TokenChar.GREATER: return _finishToken(TokenKind.GREATER); case TokenChar.TILDE: if (_maybeEatChar(TokenChar.EQUALS)) { return _finishToken(TokenKind.INCLUDES); // ~= } return _finishToken(TokenKind.TILDE); case TokenChar.ASTERISK: if (_maybeEatChar(TokenChar.EQUALS)) { return _finishToken(TokenKind.SUBSTRING_MATCH); // *= } return _finishToken(TokenKind.ASTERISK); case TokenChar.AMPERSAND: return _finishToken(TokenKind.AMPERSAND); case TokenChar.NAMESPACE: if (_maybeEatChar(TokenChar.EQUALS)) { return _finishToken(TokenKind.DASH_MATCH); // |= } return _finishToken(TokenKind.NAMESPACE); case TokenChar.COLON: return _finishToken(TokenKind.COLON); case TokenChar.COMMA: return _finishToken(TokenKind.COMMA); case TokenChar.SEMICOLON: return _finishToken(TokenKind.SEMICOLON); case TokenChar.PERCENT: return _finishToken(TokenKind.PERCENT); case TokenChar.SINGLE_QUOTE: return _finishToken(TokenKind.SINGLE_QUOTE); case TokenChar.DOUBLE_QUOTE: return _finishToken(TokenKind.DOUBLE_QUOTE); case TokenChar.SLASH: if (_maybeEatChar(TokenChar.ASTERISK)) return finishMultiLineComment(); return _finishToken(TokenKind.SLASH); case TokenChar.LESS: // (CDC). */ if (_maybeEatChar(TokenChar.MINUS)) { if (_maybeEatChar(TokenChar.GREATER)) { if (_inString) { return next(); } else { return _finishToken(TokenKind.HTML_COMMENT); } } } } } } @override Token finishMultiLineComment() { while (true) { var ch = _nextChar(); if (ch == 0) { return _finishToken(TokenKind.INCOMPLETE_COMMENT); } else if (ch == 42 /*'*'*/) { if (_maybeEatChar(47 /*'/'*/)) { if (_inString) { return next(); } else { return _finishToken(TokenKind.COMMENT); } } } } } } /// Static helper methods. class TokenizerHelpers { static bool isIdentifierStart(int c) => isIdentifierStartExpr(c) || c == 45 /*-*/; static bool isDigit(int c) => c >= 48 /*0*/ && c <= 57 /*9*/; static bool isHexDigit(int c) => isDigit(c) || (c >= 97 /*a*/ && c <= 102 /*f*/) || (c >= 65 /*A*/ && c <= 70 /*F*/); static bool isIdentifierPart(int c) => isIdentifierPartExpr(c) || c == 45 /*-*/; /// Pseudo function expressions identifiers can't have a minus sign. static bool isIdentifierStartExpr(int c) => (c >= 97 /*a*/ && c <= 122 /*z*/) || (c >= 65 /*A*/ && c <= 90 /*Z*/) || // Note: Unicode 10646 chars U+00A0 or higher are allowed, see: // http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier // http://www.w3.org/TR/CSS21/syndata.html#characters // Also, escaped character should be allowed. c == 95 /*_*/ || c >= 0xA0 || c == 92 /*\*/; /// Pseudo function expressions identifiers can't have a minus sign. static bool isIdentifierPartExpr(int c) => isIdentifierStartExpr(c) || isDigit(c); }