should be a little faster
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@978 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3a340ca887
commit
8d25f1a105
|
|
@ -2,12 +2,15 @@ package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import net.sf.samtools.Cigar;
|
import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.SAMFileReader;
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
@ -52,16 +55,16 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
// this is an ugly hack: we want to be able to tell what file (tumor/normal sample) each read came from,
|
// this is an ugly hack: we want to be able to tell what file (tumor/normal sample) each read came from,
|
||||||
// but reads do not carry this information!
|
// but reads do not carry this information!
|
||||||
|
|
||||||
SAMFileReader rn = new SAMFileReader(getToolkit().getArguments().samFiles.get(0));
|
// SAMFileReader rn = new SAMFileReader(getToolkit().getArguments().samFiles.get(0));
|
||||||
for ( SAMReadGroupRecord rec : rn.getFileHeader().getReadGroups() ) {
|
// for ( SAMReadGroupRecord rec : rn.getFileHeader().getReadGroups() ) {
|
||||||
normal_samples.add(rec.getSample());
|
// normal_samples.add(rec.getSample());
|
||||||
}
|
// }
|
||||||
rn.close();
|
// rn.close();
|
||||||
rn = new SAMFileReader(getToolkit().getArguments().samFiles.get(1));
|
// rn = new SAMFileReader(getToolkit().getArguments().samFiles.get(1));
|
||||||
for ( SAMReadGroupRecord rec : rn.getFileHeader().getReadGroups() ) {
|
// for ( SAMReadGroupRecord rec : rn.getFileHeader().getReadGroups() ) {
|
||||||
tumor_samples.add(rec.getSample());
|
// tumor_samples.add(rec.getSample());
|
||||||
}
|
// }
|
||||||
rn.close();
|
// rn.close();
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
if ( nSams != 1 ) System.out.println("WARNING: multiple input files specified. \n"+
|
if ( nSams != 1 ) System.out.println("WARNING: multiple input files specified. \n"+
|
||||||
|
|
@ -74,6 +77,36 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void assignReadGroups(final SAMFileHeader mergedHeader) {
|
||||||
|
|
||||||
|
Set<String> normal = new HashSet<String>(); // list normal samples here
|
||||||
|
Set<String> tumor = new HashSet<String>(); // list tumor samples here
|
||||||
|
|
||||||
|
SAMFileReader rn = new SAMFileReader(getToolkit().getArguments().samFiles.get(0));
|
||||||
|
for ( SAMReadGroupRecord rec : rn.getFileHeader().getReadGroups() ) {
|
||||||
|
normal.add(new String(rec.getSample()));
|
||||||
|
}
|
||||||
|
rn.close();
|
||||||
|
rn = new SAMFileReader(getToolkit().getArguments().samFiles.get(1));
|
||||||
|
for ( SAMReadGroupRecord rec : rn.getFileHeader().getReadGroups() ) {
|
||||||
|
tumor.add(new String(rec.getSample()));
|
||||||
|
}
|
||||||
|
rn.close();
|
||||||
|
|
||||||
|
// now we know what samples are normal, and what are tumor; let's assign dynamic read groups we get in merged header:
|
||||||
|
for ( SAMReadGroupRecord mr : mergedHeader.getReadGroups() ) {
|
||||||
|
if ( normal.contains(mr.getSample() ) ) {
|
||||||
|
normal_samples.add( new String(mr.getReadGroupId()) );
|
||||||
|
System.out.println("Read group "+ mr.getReadGroupId() + "--> Sample "+ mr.getSample() + " (normal)");
|
||||||
|
} else if ( tumor.contains(mr.getSample() ) ) {
|
||||||
|
tumor_samples.add( new String(mr.getReadGroupId()) );
|
||||||
|
System.out.println("Read group "+ mr.getReadGroupId() + "--> Sample "+ mr.getSample() + " (tumor)");
|
||||||
|
} else throw new StingException("Unrecognized sample "+mr.getSample() +" in merged SAM stream");
|
||||||
|
}
|
||||||
|
System.out.println();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Integer map(char[] ref, SAMRecord read) {
|
public Integer map(char[] ref, SAMRecord read) {
|
||||||
|
|
||||||
|
|
@ -108,32 +141,41 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
throw new StingException("Read "+read.getReadName()+": out of order on the contig");
|
throw new StingException("Read "+read.getReadName()+": out of order on the contig");
|
||||||
}
|
}
|
||||||
|
|
||||||
// reads are sorted; we are not going to see any more coverage or new indels prior
|
|
||||||
// to current read's start position!
|
|
||||||
|
|
||||||
if ( call_somatic ) emit_somatic( read.getAlignmentStart() );
|
|
||||||
else emit( read.getAlignmentStart() );
|
|
||||||
|
|
||||||
|
|
||||||
if ( read.getAlignmentEnd() > coverage.getStop()) {
|
if ( read.getAlignmentEnd() > coverage.getStop()) {
|
||||||
// should never happen
|
|
||||||
throw new StingException("Read "+read.getReadName()+": out of coverage window bounds.\n"+
|
// we don't emit anything until we reach a read that does not fit into the current window.
|
||||||
|
// At that point we shift the window to the start of that read and emit everything prior to
|
||||||
|
// that position (reads are sorted, so we are not gonna see any more coverage at those lower positions)
|
||||||
|
|
||||||
|
if ( call_somatic ) emit_somatic( read.getAlignmentStart() );
|
||||||
|
else emit( read.getAlignmentStart() );
|
||||||
|
|
||||||
|
if ( read.getAlignmentEnd() > coverage.getStop()) {
|
||||||
|
// ooops, looks like the read does not fit into the current window!!
|
||||||
|
throw new StingException("Read "+read.getReadName()+": out of coverage window bounds.Probably window is too small.\n"+
|
||||||
"Read length="+read.getReadLength()+"; cigar="+read.getCigarString()+"; start="+
|
"Read length="+read.getReadLength()+"; cigar="+read.getCigarString()+"; start="+
|
||||||
read.getAlignmentStart()+"; end="+read.getAlignmentEnd()+"; window start="+coverage.getStart()+
|
read.getAlignmentStart()+"; end="+read.getAlignmentEnd()+"; window start="+coverage.getStart()+
|
||||||
"; window end="+coverage.getStop());
|
"; window end="+coverage.getStop());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( call_somatic ) {
|
if ( call_somatic ) {
|
||||||
|
|
||||||
|
// this is a hack. currently we can get access to the merged header only through the read,
|
||||||
|
// so below we figure out which of the reassigned read groups in the merged stream are normal
|
||||||
|
// and which are tumor, and we make sure we do it only once:
|
||||||
|
if ( normal_samples.size() == 0 ) assignReadGroups(read.getHeader());
|
||||||
|
|
||||||
String rg = (String)read.getAttribute("RG");
|
String rg = (String)read.getAttribute("RG");
|
||||||
|
if ( rg == null ) throw new StingException("Read "+read.getReadName()+" has no read group in merged stream");
|
||||||
|
|
||||||
String sample = read.getHeader().getReadGroup(rg).getSample();
|
if ( normal_samples.contains(rg) ) {
|
||||||
|
|
||||||
if ( normal_samples.contains(sample) ) {
|
|
||||||
normal_coverage.add(read,ref);
|
normal_coverage.add(read,ref);
|
||||||
} else if ( tumor_samples.contains(sample) ) {
|
} else if ( tumor_samples.contains(rg) ) {
|
||||||
coverage.add(read,ref);
|
coverage.add(read,ref);
|
||||||
} else {
|
} else {
|
||||||
throw new StingException("Unrecognized sample: "+sample);
|
throw new StingException("Unrecognized read group in merged stream: "+rg);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
coverage.add(read, ref);
|
coverage.add(read, ref);
|
||||||
|
|
@ -229,8 +271,8 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
if ( (double)total_variant_count_tumor > 0.3*tumor_cov && (double) max_variant_count_tumor > 0.7*total_variant_count_tumor ) {
|
if ( (double)total_variant_count_tumor > 0.3*tumor_cov && (double) max_variant_count_tumor > 0.7*total_variant_count_tumor ) {
|
||||||
|
|
||||||
System.out.println(refName+"\t"+(pos-1)+"\t"+(event_length_tumor > 0 ? pos-1+event_length_tumor : pos-1)+
|
String message = refName+"\t"+(pos-1)+"\t"+(event_length_tumor > 0 ? pos-1+event_length_tumor : pos-1)+
|
||||||
"\t"+(event_length_tumor >0? "-":"+")+indelStringTumor +":"+total_variant_count_tumor+"/"+tumor_cov);
|
"\t"+(event_length_tumor >0? "-":"+")+indelStringTumor +":"+total_variant_count_tumor+"/"+tumor_cov;
|
||||||
if ( normal_variants.size() == 0 ) {
|
if ( normal_variants.size() == 0 ) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
@ -241,10 +283,11 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
throw new StingException("Error encountered while writing into output BED file");
|
throw new StingException("Error encountered while writing into output BED file");
|
||||||
}
|
}
|
||||||
System.out.println("INDICATION FOR SOMATIC\n---------------------------\n");
|
message += "\tSOMATIC";
|
||||||
} else {
|
} else {
|
||||||
System.out.println("INDICATION FOR GERMLINE\n---------------------------\n");
|
message += "\tGERMLINE";
|
||||||
}
|
}
|
||||||
|
logger.info(message);
|
||||||
}
|
}
|
||||||
// for ( IndelVariant var : variants ) {
|
// for ( IndelVariant var : variants ) {
|
||||||
// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
|
// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue