@@ -66,23 +66,15 @@ namespace torali {
6666 std::vector<uint8_t > alt;
6767 };
6868
69- template <typename TAlign, typename TQualities>
70- inline uint32_t
71- _getAlignmentQual (TAlign const & align, TQualities const & qual) {
72- typedef typename TAlign::index TAIndex;
73- uint32_t baseQualSum = 0 ;
74- uint32_t seqPtr = 0 ;
75- uint32_t alignedBases = 0 ;
76- for (TAIndex j = 0 ; j < (TAIndex) align.shape ()[1 ]; ++j) {
77- if (align[1 ][j] != ' -' ) {
78- if (align[0 ][j] != ' -' ) {
79- ++alignedBases;
80- baseQualSum += qual[seqPtr];
81- }
82- ++seqPtr;
83- }
84- }
85- return (baseQualSum / alignedBases);
69+
70+ template <typename TConfig>
71+ inline double
72+ _editDistanceHW (TConfig const & c, std::string const & query, std::string const & target) {
73+ double score = 0 ;
74+ EdlibAlignResult align = edlibAlign (query.c_str (), query.size (), target.c_str (), target.size (), edlibNewAlignConfig (2 * c.flankQuality * query.size (), EDLIB_MODE_HW , EDLIB_TASK_DISTANCE , NULL , 0 ));
75+ if (align.editDistance != -1 ) score = ((1.0 - c.flankQuality ) * (double ) query.size ()) / (double ) (align.editDistance + 1 );
76+ edlibFreeAlignResult (align);
77+ return score;
8678 }
8779
8880 template <typename TPos>
@@ -239,7 +231,6 @@ namespace torali {
239231 {
240232 typedef typename TSpanMap::value_type::value_type TSpanPair;
241233 typedef typename TCountMap::value_type::value_type TCountPair;
242- typedef std::vector<uint8_t > TQuality;
243234
244235 // Open file handles
245236 typedef std::vector<samFile*> TSamFile;
@@ -316,14 +307,14 @@ namespace torali {
316307 for (unsigned int file_c = 0 ; file_c < c.files .size (); ++file_c) {
317308 // Pair qualities and features
318309 typedef boost::unordered_map<std::size_t , uint8_t > TQualities;
319- TQualities qualities;
320310 TQualities qualitiestra;
321311 typedef boost::unordered_map<std::size_t , bool > TClip;
322- TClip clip;
323312 TClip cliptra;
324313
325314 // Iterate chromosomes
326315 for (int32_t refIndex=0 ; refIndex < (int32_t ) hdr[file_c]->n_targets ; ++refIndex) {
316+ TQualities qualities;
317+ TClip clip;
327318
328319 // Any SV breakpoints on this chromosome?
329320 if (!svOnChr[refIndex]) continue ;
@@ -430,8 +421,8 @@ namespace torali {
430421 if ((countMap[file_c][itBp->id ].ref .size () + countMap[file_c][itBp->id ].alt .size ()) >= c.maxGenoReadCount ) continue ;
431422 // Read spans breakpoint?
432423 if ((hasSoftClip) || ((!hasClip) && (rec->core .pos + c.minimumFlankSize + itBp->homLeft <= itBp->bppos ) && (rec->core .pos + rec->core .l_qseq >= itBp->bppos + c.minimumFlankSize + itBp->homRight ))) {
433- std::string consProbe = consProbeArr[itBp->bpPoint ][itBp->id ];
434- std::string refProbe = refProbeArr[itBp->bpPoint ][itBp->id ];
424+ auto & consProbe = consProbeArr[itBp->bpPoint ][itBp->id ];
425+ auto & refProbe = refProbeArr[itBp->bpPoint ][itBp->id ];
435426
436427 // Get sequence
437428 std::string sequence;
@@ -441,65 +432,36 @@ namespace torali {
441432 _adjustOrientation (sequence, itBp->bpPoint , itBp->svt );
442433
443434 // Compute alignment to alternative haplotype
444- typedef boost::multi_array<char , 2 > TAlign;
445- TAlign alignAlt;
446- DnaScore<int > simple (5 , -4 , -4 , -4 );
447- AlignConfig<true , false > semiglobal;
448- int32_t scoreA = needle (consProbe, sequence, alignAlt, semiglobal, simple);
449- int32_t scoreAltThreshold = (int32_t ) (c.flankQuality * consProbe.size () * simple.match + (1.0 - c.flankQuality ) * consProbe.size () * simple.mismatch );
450- double scoreAlt = (double ) scoreA / (double ) scoreAltThreshold;
451- // Using edlib and score threshold of >0.7 below
452- // double scoreAlt = ((1.0 - c.flankQuality) * (double) consProbe.size()) / (double) (_editDistanceHW(consProbe, sequence) + 1);
453-
454- // Compute alignment to reference haplotype
455- TAlign alignRef;
456- int32_t scoreR = needle (refProbe, sequence, alignRef, semiglobal, simple);
457- int32_t scoreRefThreshold = (int32_t ) (c.flankQuality * refProbe.size () * simple.match + (1.0 - c.flankQuality ) * refProbe.size () * simple.mismatch );
458- double scoreRef = (double ) scoreR / (double ) scoreRefThreshold;
459- // Using edlib and score threshold of >0.7 below
460- // double scoreRef = ((1.0 - c.flankQuality) * (double) refProbe.size()) / (double) (_editDistanceHW(refProbe, sequence) + 1);
461- // Any confident alignment?
462- if ((scoreRef > 1 ) || (scoreAlt > 1 )) {
463- // Debug alignment to REF and ALT
464- // std::cerr << "Alt:\t" << scoreAlt << "\tRef:\t" << scoreRef << std::endl;
465- // for(TAIndex i = 0; i< (TAIndex) alignAlt.shape()[0]; ++i) {
466- // for(TAIndex j = 0; j< (TAIndex) alignAlt.shape()[1]; ++j) std::cerr << alignAlt[i][j];
467- // std::cerr << std::endl;
468- // }
469- // for(TAIndex i = 0; i< (TAIndex) alignRef.shape()[0]; ++i) {
470- // for(TAIndex j = 0; j< (TAIndex) alignRef.shape()[1]; ++j) std::cerr << alignRef[i][j];
471- // std::cerr << std::endl;
472- // }
473-
474- if (scoreRef > scoreAlt) {
475- // Account for reference bias
476- if (++refAlignedReadCount[file_c][itBp->id ] % 2 ) {
477- TQuality quality;
478- quality.resize (rec->core .l_qseq );
479- const uint8_t * qualptr = bam_get_qual (rec);
480- for (int i = 0 ; i < rec->core .l_qseq ; ++i) quality[i] = qualptr[i];
481- uint32_t rq = _getAlignmentQual (alignRef, quality);
482- // uint32_t rq = scoreRef * 35;
483- if (rq >= c.minGenoQual ) {
484- countMap[file_c][itBp->id ].ref .push_back ((uint8_t ) std::min (rq, (uint32_t ) rec->core .qual ));
435+ if (c.hasDumpFile ) {
436+ double scoreAlt = _editDistanceHW (c, consProbe, sequence);
437+ double scoreRef = _editDistanceHW (c, refProbe, sequence);
438+ if ((scoreRef > 0.7 ) || (scoreAlt > 0.7 )) {
439+ if (scoreRef > scoreAlt) {
440+ // Account for reference bias
441+ if (++refAlignedReadCount[file_c][itBp->id ] % 2 ) {
442+ countMap[file_c][itBp->id ].ref .push_back ((uint8_t ) std::min (255 , std::min ((int ) (scoreRef * 35 ), (int ) rec->core .qual )));
485443 }
444+ } else {
445+ countMap[file_c][itBp->id ].alt .push_back ((uint8_t ) std::min (255 , std::min ((int ) (scoreAlt * 35 ), (int ) rec->core .qual )));
446+
447+ std::string svid (_addID (itBp->svt ));
448+ std::string padNumber = boost::lexical_cast<std::string>(itBp->id );
449+ padNumber.insert (padNumber.begin (), 8 - padNumber.length (), ' 0' );
450+ svid += padNumber;
451+ dumpOut << svid << " \t " << c.files [file_c].string () << " \t " << bam_get_qname (rec) << " \t " << hdr[file_c]->target_name [rec->core .tid ] << " \t " << rec->core .pos << " \t " << hdr[file_c]->target_name [rec->core .mtid ] << " \t " << rec->core .mpos << " \t " << (int32_t ) rec->core .qual << " \t SR" << std::endl;
486452 }
487- } else {
488- TQuality quality;
489- quality.resize (rec->core .l_qseq );
490- const uint8_t * qualptr = bam_get_qual (rec);
491- for (int i = 0 ; i < rec->core .l_qseq ; ++i) quality[i] = qualptr[i];
492- uint32_t aq = _getAlignmentQual (alignAlt, quality);
493- // uint32_t aq = scoreAlt * 35;
494- if (aq >= c.minGenoQual ) {
495- if (c.hasDumpFile ) {
496- std::string svid (_addID (itBp->svt ));
497- std::string padNumber = boost::lexical_cast<std::string>(itBp->id );
498- padNumber.insert (padNumber.begin (), 8 - padNumber.length (), ' 0' );
499- svid += padNumber;
500- dumpOut << svid << " \t " << c.files [file_c].string () << " \t " << bam_get_qname (rec) << " \t " << hdr[file_c]->target_name [rec->core .tid ] << " \t " << rec->core .pos << " \t " << hdr[file_c]->target_name [rec->core .mtid ] << " \t " << rec->core .mpos << " \t " << (int32_t ) rec->core .qual << " \t SR" << std::endl;
453+ }
454+ } else {
455+ double scoreAlt = _editDistanceHW (c, consProbe, sequence);
456+ double scoreRef = _editDistanceHW (c, refProbe, sequence);
457+ if ((scoreRef > 0.7 ) || (scoreAlt > 0.7 )) {
458+ if (scoreRef > scoreAlt) {
459+ // Account for reference bias
460+ if (++refAlignedReadCount[file_c][itBp->id ] % 2 ) {
461+ countMap[file_c][itBp->id ].ref .push_back ((uint8_t ) std::min (255 , std::min ((int ) (scoreRef * 35 ), (int ) rec->core .qual )));
501462 }
502- countMap[file_c][itBp->id ].alt .push_back ((uint8_t ) std::min (aq, (uint32_t ) rec->core .qual ));
463+ } else {
464+ countMap[file_c][itBp->id ].alt .push_back ((uint8_t ) std::min (255 , std::min ((int ) (scoreAlt * 35 ), (int ) rec->core .qual )));
503465 }
504466 }
505467 }
0 commit comments