Skip to content

Commit f0693c3

Browse files
committed
drop system
1 parent 9f5fa2d commit f0693c3

5 files changed

Lines changed: 65 additions & 77 deletions

File tree

Makefile

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,7 @@ bindir ?= $(exec_prefix)/bin
1414
# Flags
1515
CXX ?= g++
1616
CXXFLAGS += -std=c++17 -isystem ${EBROOTHTSLIB} -pedantic -W -Wall -Wno-unknown-pragmas -D__STDC_LIMIT_MACROS -fno-strict-aliasing -fpermissive
17-
LDFLAGS += -L${EBROOTHTSLIB} -lboost_iostreams -lboost_filesystem -lboost_system -lboost_program_options -lboost_date_time
18-
19-
# Flags for parallel computation
20-
ifeq (${PARALLEL}, 1)
21-
CXXFLAGS += -fopenmp -DOPENMP
22-
else
23-
CXXFLAGS += -DNOPENMP
24-
endif
17+
LDFLAGS += -L${EBROOTHTSLIB} -lboost_iostreams -lboost_filesystem -lboost_program_options -lboost_date_time
2518

2619
# Flags for static compile
2720
ifeq (${STATIC}, 1)

src/coverage.h

Lines changed: 16 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,6 @@ namespace torali {
313313
dumpOut << "#svid\tbam\tqname\tchr\tpos\tmatechr\tmatepos\tmapq\ttype" << std::endl;
314314
}
315315

316-
#pragma omp parallel for default(shared)
317316
for(unsigned int file_c = 0; file_c < c.files.size(); ++file_c) {
318317
// Pair qualities and features
319318
typedef boost::unordered_map<std::size_t, uint8_t> TQualities;
@@ -478,10 +477,7 @@ namespace torali {
478477
for (int i = 0; i < rec->core.l_qseq; ++i) quality[i] = qualptr[i];
479478
uint32_t rq = _getAlignmentQual(alignRef, quality);
480479
if (rq >= c.minGenoQual) {
481-
#pragma omp critical
482-
{
483-
countMap[file_c][itBp->id].ref.push_back((uint8_t) std::min(rq, (uint32_t) rec->core.qual));
484-
}
480+
countMap[file_c][itBp->id].ref.push_back((uint8_t) std::min(rq, (uint32_t) rec->core.qual));
485481
}
486482
}
487483
} else {
@@ -491,17 +487,14 @@ namespace torali {
491487
for (int i = 0; i < rec->core.l_qseq; ++i) quality[i] = qualptr[i];
492488
uint32_t aq = _getAlignmentQual(alignAlt, quality);
493489
if (aq >= c.minGenoQual) {
494-
#pragma omp critical
495-
{
496-
if (c.hasDumpFile) {
497-
std::string svid(_addID(itBp->svt));
498-
std::string padNumber = boost::lexical_cast<std::string>(itBp->id);
499-
padNumber.insert(padNumber.begin(), 8 - padNumber.length(), '0');
500-
svid += padNumber;
501-
dumpOut << svid << "\t" << c.files[file_c].string() << "\t" << bam_get_qname(rec) << "\t" << hdr[file_c]->target_name[rec->core.tid] << "\t" << rec->core.pos << "\t" << hdr[file_c]->target_name[rec->core.mtid] << "\t" << rec->core.mpos << "\t" << (int32_t) rec->core.qual << "\tSR" << std::endl;
502-
}
503-
countMap[file_c][itBp->id].alt.push_back((uint8_t) std::min(aq, (uint32_t) rec->core.qual));
490+
if (c.hasDumpFile) {
491+
std::string svid(_addID(itBp->svt));
492+
std::string padNumber = boost::lexical_cast<std::string>(itBp->id);
493+
padNumber.insert(padNumber.begin(), 8 - padNumber.length(), '0');
494+
svid += padNumber;
495+
dumpOut << svid << "\t" << c.files[file_c].string() << "\t" << bam_get_qname(rec) << "\t" << hdr[file_c]->target_name[rec->core.tid] << "\t" << rec->core.pos << "\t" << hdr[file_c]->target_name[rec->core.mtid] << "\t" << rec->core.mpos << "\t" << (int32_t) rec->core.qual << "\tSR" << std::endl;
504496
}
497+
countMap[file_c][itBp->id].alt.push_back((uint8_t) std::min(aq, (uint32_t) rec->core.qual));
505498
}
506499
}
507500
}
@@ -586,10 +579,7 @@ namespace torali {
586579
for(; ((itSpan != spanPoint.end()) && (st + spanlen >= itSpan->bppos)); ++itSpan) {
587580
// Account for reference bias
588581
if (++refAlignedSpanCount[file_c][itSpan->id] % 2) {
589-
#pragma omp critical
590-
{
591-
spanMap[file_c][itSpan->id].ref.push_back(pairQuality);
592-
}
582+
spanMap[file_c][itSpan->id].ref.push_back(pairQuality);
593583
}
594584
}
595585
}
@@ -623,17 +613,14 @@ namespace torali {
623613
// Make sure, mate is correct
624614
if (rec->core.mtid == itSpan->chr2) {
625615
if (std::abs((int32_t) rec->core.mpos - itSpan->otherBppos) < sampleLib[file_c].maxNormalISize) {
626-
#pragma omp critical
627-
{
628-
if (c.hasDumpFile) {
629-
std::string svid(_addID(itSpan->svt));
630-
std::string padNumber = boost::lexical_cast<std::string>(itSpan->id);
631-
padNumber.insert(padNumber.begin(), 8 - padNumber.length(), '0');
632-
svid += padNumber;
633-
dumpOut << svid << "\t" << c.files[file_c].string() << "\t" << bam_get_qname(rec) << "\t" << hdr[file_c]->target_name[rec->core.tid] << "\t" << rec->core.pos << "\t" << hdr[file_c]->target_name[rec->core.mtid] << "\t" << rec->core.mpos << "\t" << (int32_t) rec->core.qual << "\tPE" << std::endl;
634-
}
635-
spanMap[file_c][itSpan->id].alt.push_back(pairQuality);
616+
if (c.hasDumpFile) {
617+
std::string svid(_addID(itSpan->svt));
618+
std::string padNumber = boost::lexical_cast<std::string>(itSpan->id);
619+
padNumber.insert(padNumber.begin(), 8 - padNumber.length(), '0');
620+
svid += padNumber;
621+
dumpOut << svid << "\t" << c.files[file_c].string() << "\t" << bam_get_qname(rec) << "\t" << hdr[file_c]->target_name[rec->core.tid] << "\t" << rec->core.pos << "\t" << hdr[file_c]->target_name[rec->core.mtid] << "\t" << rec->core.mpos << "\t" << (int32_t) rec->core.qual << "\tPE" << std::endl;
636622
}
623+
spanMap[file_c][itSpan->id].alt.push_back(pairQuality);
637624
}
638625
}
639626
}

src/delly.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <htslib/vcf.h>
2626
#include <htslib/sam.h>
2727

28+
#include "threadpool.h"
2829
#include "version.h"
2930
#include "util.h"
3031
#include "bolog.h"
@@ -61,6 +62,7 @@ namespace torali
6162
uint32_t minClip;
6263
uint32_t maxGenoReadCount;
6364
uint32_t minCliqueSize;
65+
uint32_t maxThreads;
6466
float flankQuality;
6567
bool hasExcludeFile;
6668
bool hasVcfFile;
@@ -204,6 +206,7 @@ namespace torali
204206
("genome,g", boost::program_options::value<boost::filesystem::path>(&c.genome), "genome fasta file")
205207
("exclude,x", boost::program_options::value<boost::filesystem::path>(&c.exclude), "file with regions to exclude")
206208
("outfile,o", boost::program_options::value<boost::filesystem::path>(&c.outfile), "BCF output file")
209+
("threads,h", boost::program_options::value<uint32_t>(&c.maxThreads)->default_value(8), "number of threads")
207210
;
208211

209212
boost::program_options::options_description disc("Discovery options");
@@ -258,9 +261,6 @@ namespace torali
258261
std::cerr << "Please specify a valid SV type, i.e., -t INV or -t DEL,INV without spaces." << std::endl;
259262
return 1;
260263
}
261-
//typedef std::set<int32_t> TSvSetTmp;
262-
//for(typename TSvSetTmp::iterator itst = c.svtset.begin(); itst!=c.svtset.end(); ++itst) std::cerr << *itst << std::endl;
263-
//std::cerr << c.svtset.size() << std::endl;
264264

265265
// Dump PE and SR support?
266266
if (vm.count("dump")) c.hasDumpFile = true;

src/genotype.h

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -156,15 +156,14 @@ namespace torali
156156
if ((itSV->chr == refIndex) && (itSV->alleles.empty())) itSV->alleles = _addAlleles(boost::to_upper_copy(std::string(seq + itSV->svStart - 1, seq + itSV->svStart)), std::string(hdr[0]->target_name[itSV->chr2]), *itSV, itSV->svt);
157157
}
158158

159-
#pragma omp parallel for default(shared)
160159
for(unsigned int file_c = 0; file_c < c.files.size(); ++file_c) {
161-
160+
162161
// Coverage track
163162
typedef uint16_t TMaxCoverage;
164163
uint32_t maxCoverage = std::numeric_limits<TMaxCoverage>::max();
165164
typedef std::vector<TMaxCoverage> TBpCoverage;
166165
TBpCoverage covBases(hdr[file_c]->target_len[refIndex], 0);
167-
166+
168167
// Parse BAM
169168
hts_itr_t* iter = sam_itr_queryi(idx[file_c], refIndex, 0, hdr[file_c]->target_len[refIndex]);
170169
bam1_t* rec = bam_init1();
@@ -288,10 +287,7 @@ namespace torali
288287
//for (int i = 0; i < rec->core.l_qseq; ++i) quality[i] = qualptr[i];
289288
uint32_t rq = scoreRef * 35;
290289
if (rq >= c.minGenoQual) {
291-
#pragma omp critical
292-
{
293-
jctMap[file_c][svid].ref.push_back((uint8_t) std::min(rq, (uint32_t) rec->core.qual));
294-
}
290+
jctMap[file_c][svid].ref.push_back((uint8_t) std::min(rq, (uint32_t) rec->core.qual));
295291
}
296292
}
297293
} else {
@@ -301,17 +297,14 @@ namespace torali
301297
//for (int i = 0; i < rec->core.l_qseq; ++i) quality[i] = qualptr[i];
302298
uint32_t aq = scoreAlt * 35;
303299
if (aq >= c.minGenoQual) {
304-
#pragma omp critical
305-
{
306-
if (c.hasDumpFile) {
307-
std::string svidStr(_addID(svs[svid].svt));
308-
std::string padNumber = boost::lexical_cast<std::string>(svid);
309-
padNumber.insert(padNumber.begin(), 8 - padNumber.length(), '0');
310-
svidStr += padNumber;
311-
dumpOut << svidStr << "\t" << c.files[file_c].string() << "\t" << bam_get_qname(rec) << "\t" << hdr[file_c]->target_name[rec->core.tid] << "\t" << rec->core.pos << "\t" << (int32_t) rec->core.qual << "\tSR" << std::endl;
312-
}
313-
jctMap[file_c][svid].alt.push_back((uint8_t) std::min(aq, (uint32_t) rec->core.qual));
300+
if (c.hasDumpFile) {
301+
std::string svidStr(_addID(svs[svid].svt));
302+
std::string padNumber = boost::lexical_cast<std::string>(svid);
303+
padNumber.insert(padNumber.begin(), 8 - padNumber.length(), '0');
304+
svidStr += padNumber;
305+
dumpOut << svidStr << "\t" << c.files[file_c].string() << "\t" << bam_get_qname(rec) << "\t" << hdr[file_c]->target_name[rec->core.tid] << "\t" << rec->core.pos << "\t" << (int32_t) rec->core.qual << "\tSR" << std::endl;
314306
}
307+
jctMap[file_c][svid].alt.push_back((uint8_t) std::min(aq, (uint32_t) rec->core.qual));
315308
}
316309
}
317310
}

src/shortpe.h

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -271,8 +271,11 @@ namespace torali
271271
// Parse genome, process chromosome by chromosome
272272
boost::posix_time::ptime now = boost::posix_time::second_clock::local_time();
273273
std::cerr << '[' << boost::posix_time::to_simple_string(now) << "] " << "Paired-end and split-read scanning" << std::endl;
274+
275+
// Multi-threading
276+
ThreadPool pool(std::max<std::size_t>(1, c.maxThreads));
277+
274278
// Iterate all samples
275-
#pragma omp parallel for default(shared)
276279
for(unsigned int file_c = 0; file_c < c.files.size(); ++file_c) {
277280
// Inter-chromosomal mate map and alignment length
278281
typedef std::pair<uint8_t, int32_t> TQualLen;
@@ -399,11 +402,7 @@ namespace torali
399402
alenmate = p.second;
400403
mateMap[hv].first = 0;
401404
}
402-
403-
#pragma omp critical
404-
{
405-
bamRecord[svt].push_back(BamAlignRecord(rec, pairQuality, alignmentLength(rec), alenmate, sampleLib[file_c].median, sampleLib[file_c].mad, sampleLib[file_c].maxNormalISize));
406-
}
405+
bamRecord[svt].push_back(BamAlignRecord(rec, pairQuality, alignmentLength(rec), alenmate, sampleLib[file_c].median, sampleLib[file_c].mad, sampleLib[file_c].maxNormalISize));
407406
++sampleLib[file_c].abnormal_pairs;
408407
}
409408
}
@@ -417,14 +416,11 @@ namespace torali
417416
for(typename TReadBp::iterator it = readBp.begin(); it != readBp.end(); ++it) std::sort(it->second.begin(), it->second.end());
418417

419418
// Collect split-read SVs
420-
#pragma omp critical
421-
{
422-
if ((c.svtset.empty()) || (c.svtset.find(2) != c.svtset.end())) selectDeletions(c, readBp, srBR);
423-
if ((c.svtset.empty()) || (c.svtset.find(3) != c.svtset.end())) selectDuplications(c, readBp, srBR);
424-
if ((c.svtset.empty()) || (c.svtset.find(0) != c.svtset.end()) || (c.svtset.find(1) != c.svtset.end())) selectInversions(c, readBp, srBR);
425-
if ((c.svtset.empty()) || (c.svtset.find(4) != c.svtset.end())) selectInsertions(c, readBp, srBR);
426-
if ((c.svtset.empty()) || (c.svtset.find(DELLY_SVT_TRANS) != c.svtset.end()) || (c.svtset.find(DELLY_SVT_TRANS + 1) != c.svtset.end()) || (c.svtset.find(DELLY_SVT_TRANS + 2) != c.svtset.end()) || (c.svtset.find(DELLY_SVT_TRANS + 3) != c.svtset.end())) selectTranslocations(c, readBp, srBR);
427-
}
419+
if ((c.svtset.empty()) || (c.svtset.find(2) != c.svtset.end())) selectDeletions(c, readBp, srBR);
420+
if ((c.svtset.empty()) || (c.svtset.find(3) != c.svtset.end())) selectDuplications(c, readBp, srBR);
421+
if ((c.svtset.empty()) || (c.svtset.find(0) != c.svtset.end()) || (c.svtset.find(1) != c.svtset.end())) selectInversions(c, readBp, srBR);
422+
if ((c.svtset.empty()) || (c.svtset.find(4) != c.svtset.end())) selectInsertions(c, readBp, srBR);
423+
if ((c.svtset.empty()) || (c.svtset.find(DELLY_SVT_TRANS) != c.svtset.end()) || (c.svtset.find(DELLY_SVT_TRANS + 1) != c.svtset.end()) || (c.svtset.find(DELLY_SVT_TRANS + 2) != c.svtset.end()) || (c.svtset.find(DELLY_SVT_TRANS + 3) != c.svtset.end())) selectTranslocations(c, readBp, srBR);
428424
}
429425

430426
// Debug abnormal paired-ends and split-reads
@@ -433,6 +429,8 @@ namespace torali
433429
// Cluster split-read records
434430
now = boost::posix_time::second_clock::local_time();
435431
std::cerr << '[' << boost::posix_time::to_simple_string(now) << "] " << "Split-read clustering" << std::endl;
432+
std::vector<std::vector<StructuralVariantRecord>> srBySvt(srBR.size());
433+
std::vector<std::future<void>> cluster_futures;
436434
for(uint32_t svt = 0; svt < srBR.size(); ++svt) {
437435
if ((!c.svtset.empty()) && (c.svtset.find(svt) == c.svtset.end())) continue;
438436
if (srBR[svt].empty()) continue;
@@ -441,18 +439,26 @@ namespace torali
441439
std::sort(srBR[svt].begin(), srBR[svt].end());
442440

443441
// Cluster
444-
cluster(c, srBR[svt], srSVs, svt);
445-
446-
// Debug SR SVs
447-
//outputStructuralVariants(c, srSVs, srBR, svt, false); // Short reads
442+
cluster_futures.push_back(pool.enqueue([&, svt]() {
443+
cluster(c, srBR[svt], srBySvt[svt], svt);
444+
}));
445+
}
446+
// Wait and merge
447+
for(auto &f : cluster_futures) f.get();
448+
for(auto &vec : srBySvt) {
449+
if (!vec.empty()) {
450+
srSVs.insert(srSVs.end(), std::make_move_iterator(vec.begin()), std::make_move_iterator(vec.end()));
451+
}
448452
}
449453

450454
// Cluster paired-end records
451455
now = boost::posix_time::second_clock::local_time();
452456
std::cerr << '[' << boost::posix_time::to_simple_string(now) << "] " << "Paired-end clustering" << std::endl;
453457

454458
// Maximum variability in insert size
455-
int32_t varisize = getVariability(c, sampleLib);
459+
int32_t varisize = getVariability(c, sampleLib);
460+
std::vector<std::vector<StructuralVariantRecord>> peBySvt(srBR.size());
461+
std::vector<std::future<void>> pe_cluster_futures;
456462
for(int32_t svt = 0; svt < (int32_t) bamRecord.size(); ++svt) {
457463
if ((!c.svtset.empty()) && (c.svtset.find(svt) == c.svtset.end())) continue;
458464
if (bamRecord[svt].empty()) continue;
@@ -461,7 +467,16 @@ namespace torali
461467
std::sort(bamRecord[svt].begin(), bamRecord[svt].end());
462468

463469
// Cluster
464-
cluster(c, bamRecord[svt], svs, varisize, svt);
470+
pe_cluster_futures.push_back(pool.enqueue([&, svt]() {
471+
cluster(c, bamRecord[svt], peBySvt[svt], varisize, svt);
472+
}));
473+
}
474+
// Wait and merge
475+
for(auto &f : pe_cluster_futures) f.get();
476+
for(auto &vec : peBySvt) {
477+
if (!vec.empty()) {
478+
svs.insert(svs.end(), std::make_move_iterator(vec.begin()), std::make_move_iterator(vec.end()));
479+
}
465480
}
466481

467482
// Track split-reads

0 commit comments

Comments
 (0)