// Copyright (c) 2020, the Dart project authors. Please see the AUTHORS file // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. import 'dart:convert'; import 'dart:typed_data'; /// The ISO-8859-2/Latin-2 (Eastern European) code page. final CodePage latin2 = CodePage._bmp('latin-2', '$_ascii$_noControls$_top8859_2'); /// The ISO-8859-3/Latin-3 (South European) code page. final CodePage latin3 = CodePage._bmp('latin-3', '$_ascii$_noControls$_top8859_3'); /// The ISO-8859-4/Latin-4 (North European) code page. final CodePage latin4 = CodePage._bmp('latin-4', '$_ascii$_noControls$_top8859_4'); /// The ISO-8859-5/Latin-Cyrillic code page. final CodePage latinCyrillic = CodePage._bmp('cyrillic', '$_ascii$_noControls$_top8859_5'); /// The ISO-8859-6/Latin-Arabic code page. final CodePage latinArabic = CodePage._bmp('arabic', '$_ascii$_noControls$_top8859_6'); /// The ISO-8859-7/Latin-Greek code page. final CodePage latinGreek = CodePage._bmp('greek', '$_ascii$_noControls$_top8859_7'); /// The ISO-8859-7/Latin-Hebrew code page. final CodePage latinHebrew = CodePage._bmp('hebrew', '$_ascii$_noControls$_top8859_8'); /// The ISO-8859-9/Latin-5 (Turkish) code page. final CodePage latin5 = CodePage._bmp('latin-5', '$_ascii$_noControls$_top8859_9'); /// The ISO-8859-10/Latin-6 (Nordic) code page. final CodePage latin6 = CodePage._bmp('latin-6', '$_ascii$_noControls$_top8859_10'); /// The ISO-8859-11/Latin-Thai code page. final CodePage latinThai = CodePage._bmp('tis620', '$_ascii$_noControls$_top8859_11'); /// The ISO-8859-13/Latin-6 (Baltic Rim) code page. final CodePage latin7 = CodePage._bmp('latin-7', '$_ascii$_noControls$_top8859_13'); /// The ISO-8859-14/Latin-8 (Celtic) code page. final CodePage latin8 = CodePage._bmp('latin-8', '$_ascii$_noControls$_top8859_14'); /// The ISO-8859-15/Latin-9 (Western European revised) code page. final CodePage latin9 = CodePage._bmp('latin-9', '$_ascii$_noControls$_top8859_15'); /// The ISO-8859-16/Latin-10 (South Eastern European) code page. final CodePage latin10 = CodePage._bmp('latin-10', '$_ascii$_noControls$_top8859_16'); /// Characters in ISO-8859-2 above the ASCII and top control characters. const _top8859_2 = '\xa0Ą˘Ł¤ĽŚ§¨ŠŞŤŹ\xadŽŻ°ą˛ł´ľśˇ¸šşťź˝žż' 'ŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢß' 'ŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙'; /// Characters in ISO-8859-3 above the ASCII and top control characters. const _top8859_3 = '\xa0Ħ˘£\uFFFD¤Ĥ§¨İŞĞĴ\xad\uFFFDݰħ²³´µĥ·¸ışğĵ½\uFFFDż' 'ÀÁÂ\uFFFDÄĊĈÇÈÉÊËÌÍÎÏ\uFFFDÑÒÓÔĠÖ×ĜÙÚÛÜŬŜß' 'àáâ\uFFFDäċĉçèéêëìíîï\uFFFDñòóôġö÷ĝùúûüŭŝ˙'; /// Characters in ISO-8859-4 above the ASCII and top control characters. const _top8859_4 = '\xa0ĄĸŖ¤Ĩϧ¨ŠĒĢŦ\xadޝ°ą˛ŗ´ĩšēģŧŊžŋ' 'ĀÁÂÃÄÅÆĮČÉĘËĖÍÎĪĐŅŌĶÔÕÖרŲÚÛÜŨŪß' 'āáâãäåæįčéęëėíîīđņōķôõö÷øųúûüũū˙'; /// Characters in ISO-8859-5 above the ASCII and top control characters. const _top8859_5 = '\xa0ЁЂЃЄЅІЇЈЉЊЋЌ\xadЎЏАБВГДЕЖЗИЙКЛМНОП' 'РСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп' 'рстуфхцчшщъыьэюя№ёђѓєѕіїјљњћќ§ўџ'; /// Characters in ISO-8859-6 above the ASCII and top control characters. const _top8859_6 = '\xa0\uFFFD\uFFFD\uFFFD¤\uFFFD\uFFFD\uFFFD' '\uFFFD\uFFFD\uFFFD\uFFFD\u060c\xad\uFFFD\uFFFD' '\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD' '\uFFFD\uFFFD\uFFFD\u061b\uFFFD\uFFFD\uFFFD\u061f' '\uFFFD\u0621\u0622\u0623\u0624\u0625\u0626\u0627' '\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f' '\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637' '\u0638\u0639\u063a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD' '\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647' '\u0648\u0649\u064a\u064b\u064c\u064d\u064e\u064f' '\u0650\u0651\u0652\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD' '\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD'; /// Characters in ISO-8859-7 above the ASCII and top control characters. const _top8859_7 = '\xa0‘’£€₯¦§¨©ͺ«¬\xad\uFFFD―°±²³΄΅Ά·ΈΉΊ»Ό½ΎΏ' 'ΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ\uFFFDΣΤΥΦΧΨΩΪΫάέήί' 'ΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ\uFFFD'; /// Characters in ISO-8859-8 above the ASCII and top control characters. const _top8859_8 = '\xa0\uFFFD¢£¤¥¦§¨©×«¬\xad®¯°±²³´µ¶·¸¹÷»¼½¾\uFFFD' '\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD' '\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD' '\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD' '\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD‗' '\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7' '\u05d8\u05d9\u05da\u05db\u05dc\u05dd\u05de\u05df' '\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5\u05e6\u05e7' '\u05e8\u05e9\u05ea\uFFFD\uFFFD\u200e\u200f\uFFFD'; /// Characters in ISO-8859-9 above the ASCII and top control characters. const _top8859_9 = '\xa0¡¢£¤¥¦§¨©ª«¬\xad®¯°±²³´µ¶·¸¹º»¼½¾¿' 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏĞÑÒÓÔÕÖרÙÚÛÜİŞß' 'àáâãäåæçèéêëìíîïğñòóôõö÷øùúûüışÿ'; /// Characters in ISO-8859-10 above the ASCII and top control characters. const _top8859_10 = '\xa0ĄĒĢĪĨͧĻĐŠŦŽ\xadŪŊ°ąēģīĩķ·ļđšŧž―ūŋ' 'ĀÁÂÃÄÅÆĮČÉĘËĖÍÎÏÐŅŌÓÔÕÖŨØŲÚÛÜÝÞß' 'āáâãäåæįčéęëėíîïðņōóôõöũøųúûüýþĸ'; /// Characters in ISO-8859-11 above the ASCII and top control characters. const _top8859_11 = '\xa0กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟ' 'ภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู\uFFFD\uFFFD\uFFFD\uFFFD฿' 'เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛\uFFFD\uFFFD\uFFFD\uFFFD'; /// Characters in ISO-8859-13 above the ASCII and top control characters. const _top8859_13 = '\xa0”¢£¤„¦§Ø©Ŗ«¬\xad®Æ°±²³“µ¶·ø¹ŗ»¼½¾æ' 'ĄĮĀĆÄÅĘĒČÉŹĖĢĶĪĻŠŃŅÓŌÕÖ×ŲŁŚŪÜŻŽß' 'ąįāćäåęēčéźėģķīļšńņóōõö÷ųłśūüżž’'; /// Characters in ISO-8859-14 above the ASCII and top control characters. const _top8859_14 = '\xa0Ḃḃ£ĊċḊ§Ẁ©ẂḋỲ\xad®ŸḞḟĠġṀṁ¶ṖẁṗẃṠỳẄẅṡ' 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏŴÑÒÓÔÕÖṪØÙÚÛÜÝŶß' 'àáâãäåæçèéêëìíîïŵñòóôõöṫøùúûüýŷÿ'; /// Characters in ISO-8859-15 above the ASCII and top control characters. const _top8859_15 = '\xa0¡¢£€¥Š§š©ª«¬\xad®¯°±²³Žµ¶·ž¹º»ŒœŸ¿' 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß' 'àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'; /// Characters in ISO-8859-16 above the ASCII and top control characters. const _top8859_16 = '\xa0ĄąŁ€„Чš©Ș«Ź\xadźŻ°±ČłŽ”¶·žčș»ŒœŸż' 'ÀÁÂĂÄĆÆÇÈÉÊËÌÍÎÏĐŃÒÓÔŐÖŚŰÙÚÛÜĘȚß' 'àáâăäćæçèéêëìíîïđńòóôőöśűùúûüęțÿ'; const _noControls = '\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD' '\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD' '\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD' '\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD'; /// ASCII characters without control characters. Shared by many code pages. const _ascii = '$_noControls' // ignore: missing_whitespace_between_adjacent_strings r""" !"#$%&'()*+,-./0123456789:;<=>?""" r'@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_' '`abcdefghijklmnopqrstuvwxyz{|}~\uFFFD'; /// A mapping between bytes and characters. /// /// A code page is a way to map bytes to character. /// As such, it can only represent 256 different characters. class CodePage extends Encoding { @override final CodePageDecoder decoder; @override final String name; CodePageEncoder? _encoder; /// Creates a code page with the given name and characters. /// /// The [characters] string must contain 256 code points (runes) /// in the order of the bytes representing them. /// /// Any byte not defined by the code page should have a /// U+FFFD (invalid character) code point at its place in /// [characters]. /// /// The name is used by [Encoding.name]. factory CodePage(String name, String characters) = CodePage._general; /// Creates a code page with the characters of [characters]. /// /// The [characters] must contain precisely 256 characters (code points). /// /// A U+FFFD (invalid character) entry in [characters] means that the /// corresponding byte does not have a definition in this code page. CodePage._general(this.name, String characters) : decoder = _createDecoder(characters); /// Creates a code page with characters from the basic multilingual plane. /// /// The basic multilingual plane (BMP) contains the first 65536 code points. /// As such, each character can be represented by a single UTF-16 code unit, /// which makes some operations more efficient. /// /// The [characters] must contain precisely 256 code points from the BMP /// which means that it should have length 256 and not contain any surrogates. /// /// A U+FFFD (invalid character) entry in [characters] means that the /// corresponding byte does not have a definition in this code page. CodePage._bmp(this.name, String characters) : decoder = _BmpCodePageDecoder(characters); /// The character associated with a particular byte in this code page. /// /// The [byte] must be in the range 0..255. /// The returned value should be a Unicode scalar value /// (a non-surrogate code point). /// /// If a code page does not have a defined character for a particular /// byte, it should return the Unicode invalid character (U+FFFD) /// instad. int operator [](int byte) => decoder._char(byte); /// Encodes [input] using `encoder.convert`. @override Uint8List encode(String input, {int? invalidCharacter}) => encoder.convert(input, invalidCharacter: invalidCharacter); /// Decodes [bytes] using `encoder.convert`. @override String decode(List bytes, {bool allowInvalid = false}) => decoder.convert(bytes, allowInvalid: allowInvalid); @override CodePageEncoder get encoder => _encoder ??= decoder._createEncoder(); } /// A code page decoder, converts from bytes to characters. /// /// A code page assigns characters to a subset of byte values. /// The decoder converts those bytes back to their characters. abstract class CodePageDecoder implements Converter, String> { /// Decodes a sequence of bytes into a string using a code page. /// /// The code page assigns one character to each byte. /// Values in [input] must be bytes (integers in the range 0..255). /// /// If [allowInvalid] is true, non-byte values in [input], /// or byte values not defined as a character in the code page, /// are emitted as U+FFFD (the Unicode invalid character). /// If not true, the bytes must be calid and defined characters. @override String convert(List input, {bool allowInvalid = false}); CodePageEncoder _createEncoder(); int _char(int byte); } /// Creates a decoder from [characters]. /// /// Recognizes if [characters] contains only characters in the BMP, /// and creates a [_BmpCodePageDecoder] in that case. CodePageDecoder _createDecoder(String characters) { var result = Uint32List(256); var i = 0; var allChars = 0; for (var char in characters.runes) { if (i >= 256) { throw ArgumentError.value( characters, 'characters', 'Must contain 256 characters'); } result[i++] = char; allChars |= char; } if (i < 256) { throw ArgumentError.value( characters, 'characters', 'Must contain 256 characters'); } if (allChars <= 0xFFFF) { // It's in the BMP. return _BmpCodePageDecoder(characters); } return _NonBmpCodePageDecoder._(result); } /// An input [ByteConversionSink] for decoders where each input byte can be be /// considered independantly. class _CodePageDecoderSink extends ByteConversionSink { final Sink _output; final Converter, String> _decoder; _CodePageDecoderSink(this._output, this._decoder); @override void add(List chunk) { _output.add(_decoder.convert(chunk)); } @override void close() { _output.close(); } } /// Code page with non-BMP characters. class _NonBmpCodePageDecoder extends Converter, String> implements CodePageDecoder { final Uint32List _characters; _NonBmpCodePageDecoder(String characters) : this._(_buildMapping(characters)); _NonBmpCodePageDecoder._(this._characters); @override int _char(int byte) => _characters[byte]; static Uint32List _buildMapping(String characters) { var result = Uint32List(256); var i = 0; for (var char in characters.runes) { if (i >= 256) { throw ArgumentError.value( characters, 'characters', 'Must contain 256 characters'); } result[i++] = char; } if (i < 256) { throw ArgumentError.value( characters, 'characters', 'Must contain 256 characters'); } return result; } @override CodePageEncoder _createEncoder() { var result = {}; for (var i = 0; i < 256; i++) { var char = _characters[i]; if (char != 0xFFFD) { result[char] = i; } } return CodePageEncoder._(result); } @override String convert(List input, {bool allowInvalid = false}) { var buffer = Uint32List(input.length); for (var i = 0; i < input.length; i++) { var byte = input[i]; if (byte & 0xff != byte) throw FormatException('Not a byte', input, i); buffer[i] = _characters[byte]; } return String.fromCharCodes(buffer); } @override Sink> startChunkedConversion(Sink sink) => _CodePageDecoderSink(sink, this); } class _BmpCodePageDecoder extends Converter, String> implements CodePageDecoder { final String _characters; _BmpCodePageDecoder(String characters) : _characters = characters { if (characters.length != 256) { throw ArgumentError.value(characters, 'characters', 'Must contain 256 characters. Was ${characters.length}'); } } @override int _char(int byte) => _characters.codeUnitAt(byte); @override String convert(List bytes, {bool allowInvalid = false}) { if (allowInvalid) return _convertAllowInvalid(bytes); var count = bytes.length; var codeUnits = Uint16List(count); for (var i = 0; i < count; i++) { var byte = bytes[i]; if (byte != byte & 0xff) { throw FormatException('Not a byte value', bytes, i); } var character = _characters.codeUnitAt(byte); if (character == 0xFFFD) { throw FormatException('Not defined in this code page', bytes, i); } codeUnits[i] = character; } return String.fromCharCodes(codeUnits); } @override Sink> startChunkedConversion(Sink sink) => _CodePageDecoderSink(sink, this); String _convertAllowInvalid(List bytes) { var count = bytes.length; var codeUnits = Uint16List(count); for (var i = 0; i < count; i++) { var byte = bytes[i]; int character; if (byte == byte & 0xff) { character = _characters.codeUnitAt(byte); } else { character = 0xFFFD; } codeUnits[i] = character; } return String.fromCharCodes(codeUnits); } @override CodePageEncoder _createEncoder() => CodePageEncoder._bmp(_characters); } /// Encoder for a code page. /// /// Converts a string into bytes where each byte represents that character /// according to the code page definition. class CodePageEncoder extends Converter> { final Map _encoding; CodePageEncoder._bmp(String characters) : _encoding = _createBmpEncoding(characters); CodePageEncoder._(this._encoding); static Map _createBmpEncoding(String characters) { var encoding = {}; for (var i = 0; i < characters.length; i++) { var char = characters.codeUnitAt(i); if (char != 0xFFFD) encoding[characters.codeUnitAt(i)] = i; } return encoding; } /// Converts input to the byte encoding in this code page. /// /// If [invalidCharacter] is supplied, it must be a byte value /// (in the range 0..255). /// /// If [input] contains characters that are not available /// in this code page, they are replaced by the [invalidCharacter] byte, /// and then [invalidCharacter] must have been supplied. @override Uint8List convert(String input, {int? invalidCharacter}) { if (invalidCharacter != null) { RangeError.checkValueInInterval( invalidCharacter, 0, 255, 'invalidCharacter'); } var count = input.length; var result = Uint8List(count); var j = 0; for (var i = 0; i < count; i++) { var char = input.codeUnitAt(i); var byte = _encoding[char]; nullCheck: if (byte == null) { // Check for surrogate. var offset = i; if (char & 0xFC00 == 0xD800 && i + 1 < count) { var next = input.codeUnitAt(i + 1); if ((next & 0xFC00) == 0xDC00) { i = i + 1; char = 0x10000 + ((char & 0x3ff) << 10) + (next & 0x3ff); byte = _encoding[char]; if (byte != null) break nullCheck; } } byte = invalidCharacter ?? (throw FormatException( 'Not a character in this code page', input, offset)); } result[j++] = byte; } return Uint8List.sublistView(result, 0, j); } }