diff --git a/src/SIL.LCModel/DomainServices/ITextUtils.cs b/src/SIL.LCModel/DomainServices/ITextUtils.cs
index 342991e0..f0591a49 100644
--- a/src/SIL.LCModel/DomainServices/ITextUtils.cs
+++ b/src/SIL.LCModel/DomainServices/ITextUtils.cs
@@ -275,6 +275,18 @@ public static void ParseText(IStText sttext)
}
}
+ ///
+ /// Determine whether text ends with an EOS character.
+ /// This is used by the FieldWorks interlinear importer
+ /// to make sure that segments are well-formed.
+ ///
+ public static bool EndsWithEOS(ITsString text, LcmCache cache)
+ {
+ var collector = new SegmentMaker(text, cache.WritingSystemFactory, null);
+ collector.Run();
+ return !collector.ExtraSegment;
+ }
+
///
/// tokenize the paragraph with segments and analyses (wordforms generally, though we try to preserve other existing ones).
///
@@ -1632,6 +1644,7 @@ internal abstract class SegmentBreaker
private int m_csegs;
private int m_prevCh;
private readonly ILgWritingSystemFactory m_wsf;
+ internal bool ExtraSegment = false;
// The idea here is that certain characters more-or-less mark the end of a segment:
// basically, sentence-terminating characters like period, question-mark, and so forth.
@@ -1824,7 +1837,13 @@ public void Run()
}
// We reached the end of the loop. Make a segment out of anything left over.
if (ichStartSeg < m_tssText.Length)
+ {
+ if (state != SegParseState.FoundEosChar)
+ {
+ ExtraSegment = true;
+ }
CreateSegment(ichStartSeg, m_tssText.Length);
+ }
}
@@ -1985,7 +2004,10 @@ internal SegmentMaker(ITsString text, ILgWritingSystemFactory wsf, ParagraphPars
protected override void CreateSegment(int ichMin, int ichLim)
{
base.CreateSegment(ichMin, ichLim);
- m_segments.Add(m_paraParser.CreateSegment(ichMin, ichLim));
+ if (m_paraParser != null)
+ {
+ m_segments.Add(m_paraParser.CreateSegment(ichMin, ichLim));
+ }
}
///
diff --git a/tests/SIL.LCModel.Tests/DomainServices/ParagraphParserTests.cs b/tests/SIL.LCModel.Tests/DomainServices/ParagraphParserTests.cs
index 64285c49..ecf03591 100644
--- a/tests/SIL.LCModel.Tests/DomainServices/ParagraphParserTests.cs
+++ b/tests/SIL.LCModel.Tests/DomainServices/ParagraphParserTests.cs
@@ -946,6 +946,14 @@ public void CheckValidGuessesAfterInsertNewWord_LT8467()
ValidateGuesses(expectedGuessesAfterEdit, paraGuessed);
}
+ [Test]
+ public void EndsWithEOS()
+ {
+ Assert.IsFalse(ParagraphParser.EndsWithEOS(TsStringUtils.MakeString("abc", Cache.DefaultVernWs), Cache));
+ Assert.IsTrue(ParagraphParser.EndsWithEOS(TsStringUtils.MakeString("abc.", Cache.DefaultVernWs), Cache));
+ Assert.IsTrue(ParagraphParser.EndsWithEOS(TsStringUtils.MakeString("abc.\"", Cache.DefaultVernWs), Cache));
+ }
+
private void ValidateGuesses(IList expectedGuesses, IStTxtPara paraWithGuesses)
{
var segsParaGuesses = paraWithGuesses.SegmentsOS;