|
| 1 | +# |
| 2 | +# MDEV-39995: JSON_CONTAINS and JSON_EQUALS do not compare strings |
| 3 | +# based on semantic |
| 4 | +# |
| 5 | +# |
| 6 | +# JSON string values with Unicode escape sequences should be treated |
| 7 | +# as semantically equal to their literal equivalents. |
| 8 | +# \u0041 is the Unicode escape for 'A'. |
| 9 | +# |
| 10 | +# JSON_CONTAINS: should return 1 for semantically equal strings |
| 11 | +SELECT JSON_CONTAINS('"A"', '"\\u0041"'); |
| 12 | +JSON_CONTAINS('"A"', '"\\u0041"') |
| 13 | +1 |
| 14 | +SELECT JSON_CONTAINS('"\\u0041"', '"A"'); |
| 15 | +JSON_CONTAINS('"\\u0041"', '"A"') |
| 16 | +1 |
| 17 | +# JSON_OVERLAPS: should return 1 for semantically equal strings |
| 18 | +SELECT JSON_OVERLAPS('"A"', '"\\u0041"'); |
| 19 | +JSON_OVERLAPS('"A"', '"\\u0041"') |
| 20 | +1 |
| 21 | +# JSON_EQUALS: should return 1 for semantically equal strings |
| 22 | +SELECT JSON_EQUALS('"A"', '"\\u0041"'); |
| 23 | +JSON_EQUALS('"A"', '"\\u0041"') |
| 24 | +1 |
| 25 | +# JSON_UNQUOTE correctly resolves the escape (proving they are the same) |
| 26 | +SELECT JSON_UNQUOTE('"A"') = JSON_UNQUOTE('"\\u0041"'); |
| 27 | +JSON_UNQUOTE('"A"') = JSON_UNQUOTE('"\\u0041"') |
| 28 | +1 |
| 29 | +# |
| 30 | +# Additional test from MDEV-39995 comment: |
| 31 | +# Using hex literal that represents the bytes of '"\u0041"' |
| 32 | +# |
| 33 | +SELECT JSON_UNQUOTE('"A"'); |
| 34 | +JSON_UNQUOTE('"A"') |
| 35 | +A |
| 36 | +SELECT JSON_UNQUOTE(CAST(0x225C753030343122 AS CHAR)); |
| 37 | +JSON_UNQUOTE(CAST(0x225C753030343122 AS CHAR)) |
| 38 | +A |
| 39 | +SELECT JSON_CONTAINS('"A"', CAST(0x225C753030343122 AS CHAR)); |
| 40 | +JSON_CONTAINS('"A"', CAST(0x225C753030343122 AS CHAR)) |
| 41 | +1 |
| 42 | +SELECT JSON_CONTAINS(JSON_QUOTE(JSON_UNQUOTE('"A"')), |
| 43 | +JSON_QUOTE(JSON_UNQUOTE(CAST(0x225C753030343122 AS CHAR)))); |
| 44 | +JSON_CONTAINS(JSON_QUOTE(JSON_UNQUOTE('"A"')), |
| 45 | +JSON_QUOTE(JSON_UNQUOTE(CAST(0x225C753030343122 AS CHAR)))) |
| 46 | +1 |
| 47 | +# |
| 48 | +# More Unicode escape equivalences |
| 49 | +# |
| 50 | +# \u0048\u0065\u006C\u006C\u006F = "Hello" |
| 51 | +SELECT JSON_CONTAINS('"Hello"', '"\\u0048\\u0065\\u006C\\u006C\\u006F"'); |
| 52 | +JSON_CONTAINS('"Hello"', '"\\u0048\\u0065\\u006C\\u006C\\u006F"') |
| 53 | +1 |
| 54 | +SELECT JSON_EQUALS('"Hello"', '"\\u0048\\u0065\\u006C\\u006C\\u006F"'); |
| 55 | +JSON_EQUALS('"Hello"', '"\\u0048\\u0065\\u006C\\u006C\\u006F"') |
| 56 | +1 |
| 57 | +SELECT JSON_OVERLAPS('"Hello"', '"\\u0048\\u0065\\u006C\\u006C\\u006F"'); |
| 58 | +JSON_OVERLAPS('"Hello"', '"\\u0048\\u0065\\u006C\\u006C\\u006F"') |
| 59 | +1 |
| 60 | +# Mixed literal and escape in the same string: "H\u0065llo" = "Hello" |
| 61 | +SELECT JSON_EQUALS('"Hello"', '"H\\u0065llo"'); |
| 62 | +JSON_EQUALS('"Hello"', '"H\\u0065llo"') |
| 63 | +1 |
| 64 | +# |
| 65 | +# Test within arrays and objects |
| 66 | +# |
| 67 | +SELECT JSON_CONTAINS('["A", "B"]', '["\\u0041"]'); |
| 68 | +JSON_CONTAINS('["A", "B"]', '["\\u0041"]') |
| 69 | +1 |
| 70 | +SELECT JSON_CONTAINS('{"key": "A"}', '{"key": "\\u0041"}'); |
| 71 | +JSON_CONTAINS('{"key": "A"}', '{"key": "\\u0041"}') |
| 72 | +1 |
| 73 | +SELECT JSON_EQUALS('["A", "B"]', '["\\u0041", "\\u0042"]'); |
| 74 | +JSON_EQUALS('["A", "B"]', '["\\u0041", "\\u0042"]') |
| 75 | +1 |
| 76 | +SELECT JSON_EQUALS('{"key": "A"}', '{"key": "\\u0041"}'); |
| 77 | +JSON_EQUALS('{"key": "A"}', '{"key": "\\u0041"}') |
| 78 | +1 |
| 79 | +# |
| 80 | +# Surrogate pairs: characters above U+FFFF encoded as two \uXXXX escapes. |
| 81 | +# U+1F600 (😀) = \uD83D\uDE00 |
| 82 | +# U+1F60A (😊) = \uD83D\uDE0A |
| 83 | +# |
| 84 | +SET NAMES utf8mb4; |
| 85 | +SELECT JSON_EQUALS('"😀"', '"\\uD83D\\uDE00"'); |
| 86 | +JSON_EQUALS('"?"', '"\\uD83D\\uDE00"') |
| 87 | +1 |
| 88 | +SELECT JSON_CONTAINS('"😀"', '"\\uD83D\\uDE00"'); |
| 89 | +JSON_CONTAINS('"?"', '"\\uD83D\\uDE00"') |
| 90 | +1 |
| 91 | +SELECT JSON_OVERLAPS('"😀"', '"\\uD83D\\uDE00"'); |
| 92 | +JSON_OVERLAPS('"?"', '"\\uD83D\\uDE00"') |
| 93 | +1 |
| 94 | +SELECT JSON_EQUALS('"😊"', '"\\uD83D\\uDE0A"'); |
| 95 | +JSON_EQUALS('"?"', '"\\uD83D\\uDE0A"') |
| 96 | +1 |
| 97 | +SELECT JSON_CONTAINS('["😀", "hello"]', '["\\uD83D\\uDE00"]'); |
| 98 | +JSON_CONTAINS('["?", "hello"]', '["\\uD83D\\uDE00"]') |
| 99 | +1 |
| 100 | +SELECT JSON_EQUALS('{"emoji": "😀"}', '{"emoji": "\\uD83D\\uDE00"}'); |
| 101 | +JSON_EQUALS('{"emoji": "?"}', '{"emoji": "\\uD83D\\uDE00"}') |
| 102 | +1 |
| 103 | +# |
| 104 | +# Escaped object keys: \u006B\u0065\u0079 = "key" |
| 105 | +# |
| 106 | +SELECT JSON_EQUALS('{"key":"A"}', '{"\\u006B\\u0065\\u0079":"A"}'); |
| 107 | +JSON_EQUALS('{"key":"A"}', '{"\\u006B\\u0065\\u0079":"A"}') |
| 108 | +1 |
| 109 | +SELECT JSON_CONTAINS('{"key":"A"}', '{"\\u006B\\u0065\\u0079":"A"}'); |
| 110 | +JSON_CONTAINS('{"key":"A"}', '{"\\u006B\\u0065\\u0079":"A"}') |
| 111 | +1 |
| 112 | +# |
| 113 | +# BMP non-ASCII: é = U+00E9, literal UTF-8 vs escape |
| 114 | +# |
| 115 | +SELECT JSON_EQUALS('"é"', '"\\u00E9"'); |
| 116 | +JSON_EQUALS('"é"', '"\\u00E9"') |
| 117 | +1 |
| 118 | +SELECT JSON_CONTAINS('"é"', '"\\u00E9"'); |
| 119 | +JSON_CONTAINS('"é"', '"\\u00E9"') |
| 120 | +1 |
| 121 | +SELECT JSON_OVERLAPS('["é"]', '["\\u00E9"]'); |
| 122 | +JSON_OVERLAPS('["é"]', '["\\u00E9"]') |
| 123 | +1 |
| 124 | +# |
| 125 | +# CJK: 中 = U+4E2D |
| 126 | +# |
| 127 | +SELECT JSON_EQUALS('"中"', '"\\u4E2D"'); |
| 128 | +JSON_EQUALS('"中"', '"\\u4E2D"') |
| 129 | +1 |
0 commit comments