11#! /usr/bin/env python
22
33from __future__ import print_function
4- from .readfq import readfq
54import argparse
65import sys
76import cyvcf2
@@ -17,6 +16,39 @@ def argument_parsing():
1716 parser .add_argument ('-o' , '--out' , metavar = 'out.vcf' , required = True , dest = 'out' , help = 'output VCF file (required)' )
1817 return parser .parse_args ()
1918
19+ """Heng Li's FASTA/FASTQ parser
20+ """
21+ def readfq (fp ): # this is a generator function
22+ last = None # this is a buffer keeping the last unprocessed line
23+ while True : # mimic closure; is it a bad idea?
24+ if not last : # the first record or a record following a fastq
25+ for l in fp : # search for the start of the next record
26+ if l [0 ] in '>@' : # fasta/q header line
27+ last = l [:- 1 ] # save this line
28+ break
29+ if not last : break
30+ name , seqs , last = last [1 :].partition (" " )[0 ], [], None
31+ for l in fp : # read the sequence
32+ if l [0 ] in '@+>' :
33+ last = l [:- 1 ]
34+ break
35+ seqs .append (l [:- 1 ])
36+ if not last or last [0 ] != '+' : # this is a fasta record
37+ yield name , '' .join (seqs ), None # yield a fasta record
38+ if not last : break
39+ else : # this is a fastq record
40+ seq , leng , seqs = '' .join (seqs ), 0 , []
41+ for l in fp : # read the quality
42+ seqs .append (l [:- 1 ])
43+ leng += len (l ) - 1
44+ if leng >= len (seq ): # have read enough quality
45+ last = None
46+ yield name , seq , '' .join (seqs ); # yield a fastq record
47+ break
48+ if last : # reach EOF before reading enough quality
49+ yield name , seq , None # yield a fasta record instead
50+ break
51+
2052def delly2bnd (args ):
2153 # Fetch breakpoint positions
2254 bndPos = collections .defaultdict (dict )
0 commit comments