import 'constants.dart'; import 'html_input_stream.dart'; /// String-like object with an associated position and various extra methods. /// /// If the position is ever greater than the string length then an exception is /// raised. class EncodingBytes { final String _bytes; int __position = -1; EncodingBytes(this._bytes); int get _length => _bytes.length; String _next() { final p = __position = __position + 1; if (p >= _length) { throw _EncodingRangeException('No more elements'); } else if (p < 0) { throw RangeError(p); } return _bytes[p]; } String _previous() { var p = __position; if (p >= _length) { throw _EncodingRangeException('No more elements'); } else if (p < 0) { throw RangeError(p); } __position = p = p - 1; return _bytes[p]; } set _position(int value) { if (__position >= _length) { throw _EncodingRangeException('No more elements'); } __position = value; } int get _position { if (__position >= _length) { throw _EncodingRangeException('No more elements'); } if (__position >= 0) { return __position; } else { return 0; } } String get _currentByte => _bytes[_position]; /// Skip past a list of characters. Defaults to skipping [isWhitespace]. String? _skipChars([_CharPredicate? skipChars]) { skipChars ??= isWhitespace; var p = _position; // use property for the error-checking while (p < _length) { final c = _bytes[p]; if (!skipChars(c)) { __position = p; return c; } p += 1; } __position = p; return null; } String? _skipUntil(_CharPredicate untilChars) { var p = _position; while (p < _length) { final c = _bytes[p]; if (untilChars(c)) { __position = p; return c; } p += 1; } return null; } /// Look for a sequence of bytes at the start of a string. If the bytes /// are found return true and advance the position to the byte after the /// match. Otherwise return false and leave the position alone. bool _matchBytes(String bytes) { final p = _position; if (_bytes.length < p + bytes.length) { return false; } final data = _bytes.substring(p, p + bytes.length); if (data == bytes) { _position += bytes.length; return true; } return false; } /// Look for the next sequence of bytes matching a given sequence. If /// a match is found advance the position to the last byte of the match bool _jumpTo(String bytes) { final newPosition = _bytes.indexOf(bytes, _position); if (newPosition >= 0) { __position = newPosition + bytes.length - 1; return true; } else { throw _EncodingRangeException('No more elements'); } } String _slice(int start, [int? end]) { end ??= _length; if (end < 0) end += _length; return _bytes.substring(start, end); } } typedef _MethodHandler = bool Function(); class _DispatchEntry { final String pattern; final _MethodHandler handler; _DispatchEntry(this.pattern, this.handler); } /// Mini parser for detecting character encoding from meta elements. class EncodingParser { final EncodingBytes _data; String? _encoding; /// [bytes] - the data to work on for encoding detection. EncodingParser(List bytes) // Note: this is intentionally interpreting bytes as codepoints. : _data = EncodingBytes(String.fromCharCodes(bytes).toLowerCase()); String? getEncoding() { final methodDispatch = [ _DispatchEntry(''); bool _handleMeta() { if (!isWhitespace(_data._currentByte)) { // if we have _handlePossibleTag(false); bool _handlePossibleEndTag() { _data._next(); return _handlePossibleTag(true); } bool _handlePossibleTag(bool endTag) { if (!isLetter(_data._currentByte)) { //If the next byte is not an ascii letter either ignore this //fragment (possible start tag case) or treat it according to //handleOther if (endTag) { _data._previous(); _handleOther(); } return true; } final c = _data._skipUntil(_isSpaceOrAngleBracket); if (c == '<') { // return to the first step in the overall "two step" algorithm // reprocessing the < byte _data._previous(); } else { //Read all attributes var attr = _getAttribute(); while (attr != null) { attr = _getAttribute(); } } return true; } bool _handleOther() => _data._jumpTo('>'); /// Return a name,value pair for the next attribute in the stream, /// if one is found, or null List? _getAttribute() { // Step 1 (skip chars) var c = _data._skipChars((x) => x == '/' || isWhitespace(x)); // Step 2 if (c == '>' || c == null) { return null; } // Step 3 final attrName = []; final attrValue = []; // Step 4 attribute name while (true) { if (c == null) { return null; } else if (c == '=' && attrName.isNotEmpty) { break; } else if (isWhitespace(c)) { // Step 6! c = _data._skipChars(); c = _data._next(); break; } else if (c == '/' || c == '>') { return [attrName.join(), '']; } else if (isLetter(c)) { attrName.add(c.toLowerCase()); } else { attrName.add(c); } // Step 5 c = _data._next(); } // Step 7 if (c != '=') { _data._previous(); return [attrName.join(), '']; } // Step 8 _data._next(); // Step 9 c = _data._skipChars(); // Step 10 if (c == "'" || c == '"') { // 10.1 final quoteChar = c; while (true) { // 10.2 c = _data._next(); if (c == quoteChar) { // 10.3 _data._next(); return [attrName.join(), attrValue.join()]; } else if (isLetter(c)) { // 10.4 attrValue.add(c.toLowerCase()); } else { // 10.5 attrValue.add(c); } } } else if (c == '>') { return [attrName.join(), '']; } else if (c == null) { return null; } else if (isLetter(c)) { attrValue.add(c.toLowerCase()); } else { attrValue.add(c); } // Step 11 while (true) { c = _data._next(); if (_isSpaceOrAngleBracket(c)) { return [attrName.join(), attrValue.join()]; } else if (isLetter(c)) { attrValue.add(c.toLowerCase()); } else { attrValue.add(c); } } } } class ContentAttrParser { final EncodingBytes data; ContentAttrParser(this.data); String? parse() { try { // Check if the attr name is charset // otherwise return data._jumpTo('charset'); data._position += 1; data._skipChars(); if (data._currentByte != '=') { // If there is no = sign keep looking for attrs return null; } data._position += 1; data._skipChars(); // Look for an encoding between matching quote marks if (data._currentByte == '"' || data._currentByte == "'") { final quoteMark = data._currentByte; data._position += 1; final oldPosition = data._position; if (data._jumpTo(quoteMark)) { return data._slice(oldPosition, data._position); } else { return null; } } else { // Unquoted value final oldPosition = data._position; try { data._skipUntil(isWhitespace); return data._slice(oldPosition, data._position); } on _EncodingRangeException catch (_) { //Return the whole remaining value return data._slice(oldPosition); } } } on _EncodingRangeException catch (_) { return null; } } } bool _isSpaceOrAngleBracket(String char) { return char == '>' || char == '<' || isWhitespace(char); } typedef _CharPredicate = bool Function(String char); class _EncodingRangeException implements Exception { final String message; _EncodingRangeException(this.message); }