@@ -13,6 +13,7 @@ import JapaneseResearcher from "../../../src/languageProcessing/languages/ja/Res
1313import getMorphologyData from "../../specHelpers/getMorphologyData" ;
1414import { realWorldULExample1 , realWorldULExample2 } from "../helpers/sanitize/mergeListItemsSpec" ;
1515import buildTree from "../../specHelpers/parse/buildTree" ;
16+ import getSentencesFromTree from "../../../src/languageProcessing/helpers/sentence/getSentencesFromTree" ;
1617import { primeLanguageSpecificData } from "../../../src/languageProcessing/helpers/morphology/buildTopicStems" ;
1718
1819const morphologyData = getMorphologyData ( "en" ) ;
@@ -1460,6 +1461,71 @@ describe( "Test for the research", function() {
14601461 expect ( keyphraseDistributionResearcher ( paperWithList , researcherListCondition ) . keyphraseDistractionPercentage ) . toEqual (
14611462 keyphraseDistributionResearcher ( paperWithWords , researcherWordsCondition ) . keyphraseDistractionPercentage ) ;
14621463 } ) ;
1464+
1465+ describe ( "leaves no sentenceParentNode back-references on the tree" , ( ) => {
1466+ // The research uses getSentencesFromTree( tree, true ) which sets `sentence.sentenceParentNode = parentNode`
1467+ // on every sentence in the tree. The parent paragraph in turn holds the sentence in its `sentences` array,
1468+ // which creates a paragraph -> sentence -> sentenceParentNode -> paragraph cycle. When the tree is reused
1469+ // across calls (post tree-build dedup), that cycle escapes into downstream research/analyze results and
1470+ // blows Transporter.serialize. Assert the research cleans up after itself so the tree stays acyclic.
1471+ it ( "strips sentenceParentNode from tree sentences after running on a paragraph-only paper" , ( ) => {
1472+ const paper = new Paper (
1473+ "<p>The keyphrase appears here. The keyphrase appears again.</p>" +
1474+ "<p>This is a distinct paragraph without the keyphrase. Another sentence.</p>" ,
1475+ { locale : "en_US" , keyword : "keyphrase" }
1476+ ) ;
1477+ const researcher = new Researcher ( paper ) ;
1478+ buildTree ( paper , researcher ) ;
1479+ researcher . addResearchData ( "morphology" , morphologyData ) ;
1480+
1481+ keyphraseDistributionResearcher ( paper , researcher ) ;
1482+
1483+ const sentences = getSentencesFromTree ( paper . getTree ( ) ) ;
1484+ expect ( sentences . length ) . toBeGreaterThan ( 0 ) ;
1485+ sentences . forEach ( sentence => {
1486+ expect ( sentence . sentenceParentNode ) . toBeUndefined ( ) ;
1487+ } ) ;
1488+ } ) ;
1489+
1490+ it ( "strips sentenceParentNode from tree sentences after running on a paper with list items" , ( ) => {
1491+ // The list-items code path merges sentences and writes sentenceParentNode (as an array) on the merged
1492+ // sentence as well. The tree's original sentences also carry the back-ref and must be cleaned up.
1493+ const paper = new Paper (
1494+ "<p>The keyphrase intro paragraph.</p>" +
1495+ "<ul><li>First list item with the keyphrase.</li><li>Second list item without it.</li></ul>" ,
1496+ { locale : "en_US" , keyword : "keyphrase" }
1497+ ) ;
1498+ const researcher = new Researcher ( paper ) ;
1499+ buildTree ( paper , researcher ) ;
1500+ researcher . addResearchData ( "morphology" , morphologyData ) ;
1501+
1502+ keyphraseDistributionResearcher ( paper , researcher ) ;
1503+
1504+ const sentences = getSentencesFromTree ( paper . getTree ( ) ) ;
1505+ expect ( sentences . length ) . toBeGreaterThan ( 0 ) ;
1506+ sentences . forEach ( sentence => {
1507+ expect ( sentence . sentenceParentNode ) . toBeUndefined ( ) ;
1508+ } ) ;
1509+ } ) ;
1510+
1511+ it ( "leaves the tree JSON-stringifiable (no cycle) after running" , ( ) => {
1512+ // JSON.stringify throws "Converting circular structure to JSON" on cycles, which is exactly the failure
1513+ // mode the bug produced downstream. A successful stringify is the strongest end-to-end guarantee that
1514+ // the cleanup actually broke the cycle, independent of which specific property held the back-reference.
1515+ const paper = new Paper (
1516+ "<p>The keyphrase appears here. The keyphrase appears again.</p>" +
1517+ "<ul><li>First list item with the keyphrase.</li><li>Second list item without it.</li></ul>" ,
1518+ { locale : "en_US" , keyword : "keyphrase" }
1519+ ) ;
1520+ const researcher = new Researcher ( paper ) ;
1521+ buildTree ( paper , researcher ) ;
1522+ researcher . addResearchData ( "morphology" , morphologyData ) ;
1523+
1524+ keyphraseDistributionResearcher ( paper , researcher ) ;
1525+
1526+ expect ( ( ) => JSON . stringify ( paper . getTree ( ) ) ) . not . toThrow ( ) ;
1527+ } ) ;
1528+ } ) ;
14631529} ) ;
14641530
14651531describe ( "a test for exact match of keyphrase in English" , ( ) => {
0 commit comments