Skip to content

Commit f44b8d4

Browse files
committed
use the correct mather for the surrogate pair code range
1 parent 07aa6ce commit f44b8d4

2 files changed

Lines changed: 2 additions & 1 deletion

File tree

src/SIL.LCModel.Core/Text/TsStringSerializer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ public static string SerializeTsStringToXml(ITsString tss, ILgWritingSystemFacto
188188
return xml.ToString();
189189
}
190190

191-
private static readonly Regex InvalidXmlRegex = new Regex(@"[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD\u10000-\u10FFFF]", RegexOptions.Compiled);
191+
private static readonly Regex InvalidXmlRegex = new Regex(@"[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD\p{Cs}]", RegexOptions.Compiled);
192192
public static string StripInvalidXmlChars(string text)
193193
{
194194
// Remove characters not allowed in XML:

tests/SIL.LCModel.Core.Tests/Text/TsStringSerializerTests.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ public void SerializeTsStringToXml_StripsInvalidControlCharacter()
5555
[TestCase(" 𐰉 (dǒng)")]//Nushu script
5656
[TestCase("𠔤野 (Nishino)")]//Japanese Kanji
5757
[TestCase("𠮷野家 (Yóu yě jiā)")]//Historic Chinese
58+
[TestCase("🦊")]//emoji
5859
public void SerializeTsStringToXml_DoesNotStripValidCharacters(string word)
5960
{
6061
ITsString tss = TsStringUtils.MakeString(word, EnWS);

0 commit comments

Comments
 (0)