make refseq annotation file an optional argument; if specified, indels will be annotated as genomic/utr/intron/coding (accidentally appearing 'unknowns' probably mean that there's something wrong with refseq annotations?)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1077 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
9c0dba6979
commit
1339f3f3e3
|
|
@ -46,16 +46,20 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
@Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
|
@Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
|
||||||
doc="Minimum fraction of reads with indel at the site that must contain consensus indel in order to make the call", required=false)
|
doc="Minimum fraction of reads with indel at the site that must contain consensus indel in order to make the call", required=false)
|
||||||
public double minConsensusFraction = 0.7;
|
public double minConsensusFraction = 0.7;
|
||||||
|
@Argument(fullName="refseq", shortName="refseq",
|
||||||
|
doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated as GENOMIC/UTR/INTRON/CODING", required=false)
|
||||||
|
public String RefseqFileName = null;
|
||||||
|
|
||||||
private static int WINDOW_SIZE = 200;
|
private static int WINDOW_SIZE = 200;
|
||||||
private RunningCoverage coverage;
|
private RunningCoverage coverage;
|
||||||
private RunningCoverage normal_coverage; // when performing somatic calls, we will be using this one for normal, and 'coverage' for tumor
|
private RunningCoverage normal_coverage; // when performing somatic calls, we will be using this one for normal, and 'coverage' for tumor
|
||||||
private int currentContigIndex = -1;
|
private int currentContigIndex = -1;
|
||||||
|
private int currentPosition = -1; // position of the last read we've seen on the current contig
|
||||||
private String refName = null;
|
private String refName = null;
|
||||||
private java.io.Writer output = null;
|
private java.io.Writer output = null;
|
||||||
private GenomeLoc location = null;
|
private GenomeLoc location = null;
|
||||||
|
|
||||||
private RODIterator<rodRefSeq> refseqIterator;
|
private RODIterator<rodRefSeq> refseqIterator=null;
|
||||||
|
|
||||||
private Set<String> normal_samples = new HashSet<String>();
|
private Set<String> normal_samples = new HashSet<String>();
|
||||||
private Set<String> tumor_samples = new HashSet<String>();
|
private Set<String> tumor_samples = new HashSet<String>();
|
||||||
|
|
@ -71,14 +75,20 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
private static String annCoding = "CODING";
|
private static String annCoding = "CODING";
|
||||||
private static String annUnknown = "UNKNOWN";
|
private static String annUnknown = "UNKNOWN";
|
||||||
|
|
||||||
|
private SAMRecord lastRead;
|
||||||
|
|
||||||
|
// "/humgen/gsa-scr1/GATK_Data/refGene.sorted.txt"
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
coverage = new RunningCoverage(0,WINDOW_SIZE);
|
coverage = new RunningCoverage(0,WINDOW_SIZE);
|
||||||
|
|
||||||
ReferenceOrderedData<rodRefSeq> refseq = new ReferenceOrderedData<rodRefSeq>("refseq",
|
if ( RefseqFileName != null ) {
|
||||||
new java.io.File("/humgen/gsa-scr1/GATK_Data/refGene.sorted.txt"),rodRefSeq.class);
|
ReferenceOrderedData<rodRefSeq> refseq = new ReferenceOrderedData<rodRefSeq>("refseq",
|
||||||
|
new java.io.File(RefseqFileName),rodRefSeq.class);
|
||||||
|
|
||||||
refseqIterator = refseq.iterator();
|
refseqIterator = refseq.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
int nSams = getToolkit().getArguments().samFiles.size();
|
int nSams = getToolkit().getArguments().samFiles.size();
|
||||||
|
|
||||||
|
|
@ -177,6 +187,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
if ( call_somatic) emit_somatic(1000000000, true); // print remaining indels from the previous contig (if any);
|
if ( call_somatic) emit_somatic(1000000000, true); // print remaining indels from the previous contig (if any);
|
||||||
else emit(1000000000,true);
|
else emit(1000000000,true);
|
||||||
currentContigIndex = read.getReferenceIndex();
|
currentContigIndex = read.getReferenceIndex();
|
||||||
|
currentPosition = read.getAlignmentStart();
|
||||||
refName = new String(read.getReferenceName());
|
refName = new String(read.getReferenceName());
|
||||||
location.setContig(refName);
|
location.setContig(refName);
|
||||||
|
|
||||||
|
|
@ -184,6 +195,14 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
if ( call_somatic) normal_coverage.clear();
|
if ( call_somatic) normal_coverage.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( read.getAlignmentStart() < currentPosition )
|
||||||
|
throw new StingException("Read "+read.getReadName() +" out of order on the contig\n"+
|
||||||
|
"Read starts at "+refName+":"+read.getAlignmentStart()+"; last read seen started at "+refName+":"+currentPosition
|
||||||
|
+"\nLast read was: "+lastRead.getReadName()+" RG="+lastRead.getAttribute("RG")+" at "+lastRead.getAlignmentStart()+"-"
|
||||||
|
+lastRead.getAlignmentEnd()+" cigar="+lastRead.getCigarString());
|
||||||
|
|
||||||
|
currentPosition = read.getAlignmentStart();
|
||||||
|
|
||||||
if ( read.getAlignmentStart() < coverage.getStart() ) {
|
if ( read.getAlignmentStart() < coverage.getStart() ) {
|
||||||
// should never happen
|
// should never happen
|
||||||
throw new StingException("Read "+read.getReadName()+": out of order on the contig\n"+
|
throw new StingException("Read "+read.getReadName()+": out of order on the contig\n"+
|
||||||
|
|
@ -191,6 +210,8 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
"); window starts at "+coverage.getStart());
|
"); window starts at "+coverage.getStart());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lastRead = read;
|
||||||
|
|
||||||
// a little trick here: we want to make sure that current read completely fits into the current
|
// a little trick here: we want to make sure that current read completely fits into the current
|
||||||
// window so that we can accumulate the coverage/indel counts over the whole length of the read.
|
// window so that we can accumulate the coverage/indel counts over the whole length of the read.
|
||||||
// The ::getAlignmentEnd() method returns the last position on the reference where bases from the
|
// The ::getAlignmentEnd() method returns the last position on the reference where bases from the
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue