// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. // Character constants. const int _zero = 0x30; const int _upperCaseA = 0x41; const int _upperCaseZ = 0x5a; const int _lowerCaseA = 0x61; const int _lowerCaseZ = 0x7a; const int _asciiCaseBit = 0x20; /// Checks if strings [a] and [b] differ only on the case of ASCII letters. /// /// Strings are equal if they have the same length, and the characters at /// each index are the same, or they are ASCII letters where one is upper-case /// and the other is the lower-case version of the same letter. /// /// The comparison does not ignore the case of non-ASCII letters, so /// an upper-case ae-ligature (Æ) is different from /// a lower case ae-ligature (æ). /// /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense /// for situations where the strings are known to be ASCII. Examples could /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar /// strings with a known structure. bool equalsIgnoreAsciiCase(String a, String b) { if (a.length != b.length) return false; for (var i = 0; i < a.length; i++) { var aChar = a.codeUnitAt(i); var bChar = b.codeUnitAt(i); if (aChar == bChar) continue; // Quick-check for whether this may be different cases of the same letter. if (aChar ^ bChar != _asciiCaseBit) return false; // If it's possible, then check if either character is actually an ASCII // letter. var aCharLowerCase = aChar | _asciiCaseBit; if (_lowerCaseA <= aCharLowerCase && aCharLowerCase <= _lowerCaseZ) { continue; } return false; } return true; } /// Hash code for a string which is compatible with [equalsIgnoreAsciiCase]. /// /// The hash code is unaffected by changing the case of ASCII letters, but /// the case of non-ASCII letters do affect the result. int hashIgnoreAsciiCase(String string) { // Jenkins hash code ( http://en.wikipedia.org/wiki/Jenkins_hash_function). // adapted to smi values. // Same hash used by dart2js for strings, modified to ignore ASCII letter // case. var hash = 0; for (var i = 0; i < string.length; i++) { var char = string.codeUnitAt(i); // Convert lower-case ASCII letters to upper case.upper // This ensures that strings that differ only in case will have the // same hash code. if (_lowerCaseA <= char && char <= _lowerCaseZ) char -= _asciiCaseBit; hash = 0x1fffffff & (hash + char); hash = 0x1fffffff & (hash + ((0x0007ffff & hash) << 10)); hash >>= 6; } hash = 0x1fffffff & (hash + ((0x03ffffff & hash) << 3)); hash >>= 11; return 0x1fffffff & (hash + ((0x00003fff & hash) << 15)); } /// Compares [a] and [b] lexically, converting ASCII letters to upper case. /// /// Comparison treats all lower-case ASCII letters as upper-case letters, /// but does no case conversion for non-ASCII letters. /// /// If two strings differ only on the case of ASCII letters, the one with the /// capital letter at the first difference will compare as less than the other /// string. This tie-breaking ensures that the comparison is a total ordering /// on strings and is compatible with equality. /// /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense /// for situations where the strings are known to be ASCII. Examples could /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar /// strings with a known structure. int compareAsciiUpperCase(String a, String b) { var defaultResult = 0; // Returned if no difference found. for (var i = 0; i < a.length; i++) { if (i >= b.length) return 1; var aChar = a.codeUnitAt(i); var bChar = b.codeUnitAt(i); if (aChar == bChar) continue; // Upper-case if letters. var aUpperCase = aChar; var bUpperCase = bChar; if (_lowerCaseA <= aChar && aChar <= _lowerCaseZ) { aUpperCase -= _asciiCaseBit; } if (_lowerCaseA <= bChar && bChar <= _lowerCaseZ) { bUpperCase -= _asciiCaseBit; } if (aUpperCase != bUpperCase) return (aUpperCase - bUpperCase).sign; if (defaultResult == 0) defaultResult = aChar - bChar; } if (b.length > a.length) return -1; return defaultResult.sign; } /// Compares [a] and [b] lexically, converting ASCII letters to lower case. /// /// Comparison treats all upper-case ASCII letters as lower-case letters, /// but does no case conversion for non-ASCII letters. /// /// If two strings differ only on the case of ASCII letters, the one with the /// capital letter at the first difference will compare as less than the other /// string. This tie-breaking ensures that the comparison is a total ordering /// on strings. /// /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense /// for situations where the strings are known to be ASCII. Examples could /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar /// strings with a known structure. int compareAsciiLowerCase(String a, String b) { var defaultResult = 0; for (var i = 0; i < a.length; i++) { if (i >= b.length) return 1; var aChar = a.codeUnitAt(i); var bChar = b.codeUnitAt(i); if (aChar == bChar) continue; var aLowerCase = aChar; var bLowerCase = bChar; // Upper case if ASCII letters. if (_upperCaseA <= bChar && bChar <= _upperCaseZ) { bLowerCase += _asciiCaseBit; } if (_upperCaseA <= aChar && aChar <= _upperCaseZ) { aLowerCase += _asciiCaseBit; } if (aLowerCase != bLowerCase) return (aLowerCase - bLowerCase).sign; if (defaultResult == 0) defaultResult = aChar - bChar; } if (b.length > a.length) return -1; return defaultResult.sign; } /// Compares strings [a] and [b] according to [natural sort ordering][]. /// /// A natural sort ordering is a lexical ordering where embedded /// numerals (digit sequences) are treated as a single unit and ordered by /// numerical value. /// This means that `"a10b"` will be ordered after `"a7b"` in natural /// ordering, where lexical ordering would put the `1` before the `7`, ignoring /// that the `1` is part of a larger number. /// /// Example: /// The following strings are in the order they would be sorted by using this /// comparison function: /// /// "a", "a0", "a0b", "a1", "a01", "a9", "a10", "a100", "a100b", "aa" /// /// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order int compareNatural(String a, String b) { for (var i = 0; i < a.length; i++) { if (i >= b.length) return 1; var aChar = a.codeUnitAt(i); var bChar = b.codeUnitAt(i); if (aChar != bChar) { return _compareNaturally(a, b, i, aChar, bChar); } } if (b.length > a.length) return -1; return 0; } /// Compares strings [a] and [b] according to lower-case /// [natural sort ordering][]. /// /// ASCII letters are converted to lower case before being compared, like /// for [compareAsciiLowerCase], then the result is compared like for /// [compareNatural]. /// /// If two strings differ only on the case of ASCII letters, the one with the /// capital letter at the first difference will compare as less than the other /// string. This tie-breaking ensures that the comparison is a total ordering /// on strings. /// /// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order int compareAsciiLowerCaseNatural(String a, String b) { var defaultResult = 0; // Returned if no difference found. for (var i = 0; i < a.length; i++) { if (i >= b.length) return 1; var aChar = a.codeUnitAt(i); var bChar = b.codeUnitAt(i); if (aChar == bChar) continue; var aLowerCase = aChar; var bLowerCase = bChar; if (_upperCaseA <= aChar && aChar <= _upperCaseZ) { aLowerCase += _asciiCaseBit; } if (_upperCaseA <= bChar && bChar <= _upperCaseZ) { bLowerCase += _asciiCaseBit; } if (aLowerCase != bLowerCase) { return _compareNaturally(a, b, i, aLowerCase, bLowerCase); } if (defaultResult == 0) defaultResult = aChar - bChar; } if (b.length > a.length) return -1; return defaultResult.sign; } /// Compares strings [a] and [b] according to upper-case /// [natural sort ordering][]. /// /// ASCII letters are converted to upper case before being compared, like /// for [compareAsciiUpperCase], then the result is compared like for /// [compareNatural]. /// /// If two strings differ only on the case of ASCII letters, the one with the /// capital letter at the first difference will compare as less than the other /// string. This tie-breaking ensures that the comparison is a total ordering /// on strings /// /// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order int compareAsciiUpperCaseNatural(String a, String b) { var defaultResult = 0; for (var i = 0; i < a.length; i++) { if (i >= b.length) return 1; var aChar = a.codeUnitAt(i); var bChar = b.codeUnitAt(i); if (aChar == bChar) continue; var aUpperCase = aChar; var bUpperCase = bChar; if (_lowerCaseA <= aChar && aChar <= _lowerCaseZ) { aUpperCase -= _asciiCaseBit; } if (_lowerCaseA <= bChar && bChar <= _lowerCaseZ) { bUpperCase -= _asciiCaseBit; } if (aUpperCase != bUpperCase) { return _compareNaturally(a, b, i, aUpperCase, bUpperCase); } if (defaultResult == 0) defaultResult = aChar - bChar; } if (b.length > a.length) return -1; return defaultResult.sign; } /// Check for numbers overlapping the current mismatched characters. /// /// If both [aChar] and [bChar] are digits, use numerical comparison. /// Check if the previous characters is a non-zero number, and if not, /// skip - but count - leading zeros before comparing numbers. /// /// If one is a digit and the other isn't, check if the previous character /// is a digit, and if so, the the one with the digit is the greater number. /// /// Otherwise just returns the difference between [aChar] and [bChar]. int _compareNaturally(String a, String b, int index, int aChar, int bChar) { assert(aChar != bChar); var aIsDigit = _isDigit(aChar); var bIsDigit = _isDigit(bChar); if (aIsDigit) { if (bIsDigit) { return _compareNumerically(a, b, aChar, bChar, index); } else if (index > 0 && _isDigit(a.codeUnitAt(index - 1))) { // aChar is the continuation of a longer number. return 1; } } else if (bIsDigit && index > 0 && _isDigit(b.codeUnitAt(index - 1))) { // bChar is the continuation of a longer number. return -1; } // Characters are both non-digits, or not continuation of earlier number. return (aChar - bChar).sign; } /// Compare numbers overlapping [aChar] and [bChar] numerically. /// /// If the numbers have the same numerical value, but one has more leading /// zeros, the longer number is considered greater than the shorter one. /// /// This ensures a total ordering on strings compatible with equality. int _compareNumerically(String a, String b, int aChar, int bChar, int index) { // Both are digits. Find the first significant different digit, then find // the length of the numbers. if (_isNonZeroNumberSuffix(a, index)) { // Part of a longer number, differs at this index, just count the length. var result = _compareDigitCount(a, b, index, index); if (result != 0) return result; // If same length, the current character is the most significant differing // digit. return (aChar - bChar).sign; } // Not part of larger (non-zero) number, so skip leading zeros before // comparing numbers. var aIndex = index; var bIndex = index; if (aChar == _zero) { do { aIndex++; if (aIndex == a.length) return -1; // number in a is zero, b is not. aChar = a.codeUnitAt(aIndex); } while (aChar == _zero); if (!_isDigit(aChar)) return -1; } else if (bChar == _zero) { do { bIndex++; if (bIndex == b.length) return 1; // number in b is zero, a is not. bChar = b.codeUnitAt(bIndex); } while (bChar == _zero); if (!_isDigit(bChar)) return 1; } if (aChar != bChar) { var result = _compareDigitCount(a, b, aIndex, bIndex); if (result != 0) return result; return (aChar - bChar).sign; } // Same leading digit, one had more leading zeros. // Compare digits until reaching a difference. while (true) { var aIsDigit = false; var bIsDigit = false; aChar = 0; bChar = 0; if (++aIndex < a.length) { aChar = a.codeUnitAt(aIndex); aIsDigit = _isDigit(aChar); } if (++bIndex < b.length) { bChar = b.codeUnitAt(bIndex); bIsDigit = _isDigit(bChar); } if (aIsDigit) { if (bIsDigit) { if (aChar == bChar) continue; // First different digit found. break; } // bChar is non-digit, so a has longer number. return 1; } else if (bIsDigit) { return -1; // b has longer number. } else { // Neither is digit, so numbers had same numerical value. // Fall back on number of leading zeros // (reflected by difference in indices). return (aIndex - bIndex).sign; } } // At first differing digits. var result = _compareDigitCount(a, b, aIndex, bIndex); if (result != 0) return result; return (aChar - bChar).sign; } /// Checks which of [a] and [b] has the longest sequence of digits. /// /// Starts counting from `i + 1` and `j + 1` (assumes that `a[i]` and `b[j]` are /// both already known to be digits). int _compareDigitCount(String a, String b, int i, int j) { while (++i < a.length) { var aIsDigit = _isDigit(a.codeUnitAt(i)); if (++j == b.length) return aIsDigit ? 1 : 0; var bIsDigit = _isDigit(b.codeUnitAt(j)); if (aIsDigit) { if (bIsDigit) continue; return 1; } else if (bIsDigit) { return -1; } else { return 0; } } if (++j < b.length && _isDigit(b.codeUnitAt(j))) { return -1; } return 0; } bool _isDigit(int charCode) => (charCode ^ _zero) <= 9; /// Check if the digit at [index] is continuing a non-zero number. /// /// If there is no non-zero digits before, then leading zeros at [index] /// are also ignored when comparing numerically. If there is a non-zero digit /// before, then zeros at [index] are significant. bool _isNonZeroNumberSuffix(String string, int index) { while (--index >= 0) { var char = string.codeUnitAt(index); if (char != _zero) return _isDigit(char); } return false; }