(pseudo-)genotyping functionality added: force-emits calls (including REF) at specified locations. Currently @Hidden for testing.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4643 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8e36a07bea
commit
68ce55148e
|
|
@ -48,6 +48,7 @@ import org.broadinstitute.sting.utils.*;
|
|||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.OverlappingIntervalIterator;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
|
|
@ -178,14 +179,23 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
private static String annCoding = "CODING";
|
||||
private static String annUnknown = "UNKNOWN";
|
||||
|
||||
enum CallType {
|
||||
NOCOVERAGE,
|
||||
BADCOVERAGE,
|
||||
NOEVIDENCE,
|
||||
GERMLINE,
|
||||
SOMATIC
|
||||
};
|
||||
|
||||
private SAMRecord lastRead;
|
||||
private byte[] refBases;
|
||||
private ReferenceDataSource refData;
|
||||
private Iterator<GenomeLoc> genotypeIntervals = null;
|
||||
private GenomeLocSortedSet traverseIntervals = null; // these are the traversal intervals passed with -L option (if any)
|
||||
private Iterator<GenomeLoc> genotypeIntervalIterator = null;
|
||||
|
||||
// the current interval in the list of intervals, for which we want to do full genotyping
|
||||
private GenomeLoc currentGenotypeInterval = null;
|
||||
private long lastGenotypedPosition = -1; // last position on the currentGenotypeInterval, for which a call was already printed;
|
||||
// can be 1 base before lastGenotyped start
|
||||
|
||||
// "/humgen/gsa-scr1/GATK_Data/refGene.sorted.txt"
|
||||
|
||||
|
|
@ -295,50 +305,34 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
}
|
||||
if ( genotypeIntervalsFile != null ) {
|
||||
|
||||
traverseIntervals = getToolkit().getIntervals();
|
||||
|
||||
if ( ! GENOTYPE_NOT_SORTED && IntervalUtils.isIntervalFile(genotypeIntervalsFile)) {
|
||||
// prepare to read intervals one-by-one, as needed (assuming they are sorted).
|
||||
genotypeIntervals = new IntervalFileMergingIterator(getToolkit().getGenomeLocParser(),
|
||||
genotypeIntervalIterator = new IntervalFileMergingIterator(getToolkit().getGenomeLocParser(),
|
||||
new java.io.File(genotypeIntervalsFile), IntervalMergingRule.OVERLAPPING_ONLY );
|
||||
} else {
|
||||
// read in the whole list of intervals for cleaning
|
||||
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),
|
||||
IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(genotypeIntervalsFile),true), IntervalMergingRule.OVERLAPPING_ONLY);
|
||||
genotypeIntervals = locs.iterator();
|
||||
genotypeIntervalIterator = locs.iterator();
|
||||
}
|
||||
currentGenotypeInterval = genotypeIntervals.hasNext() ? genotypeIntervals.next() : null;
|
||||
|
||||
// wrap intervals requested for genotyping inside overlapping iterator, so that we actually
|
||||
// genotype only on the intersections of the requested intervals with the -L intervals
|
||||
genotypeIntervalIterator = new OverlappingIntervalIterator(genotypeIntervalIterator, getToolkit().getIntervals().iterator() );
|
||||
|
||||
currentGenotypeInterval = genotypeIntervalIterator.hasNext() ? genotypeIntervalIterator.next() : null;
|
||||
|
||||
if ( DEBUG) System.out.println("DEBUG>> first genotyping interval="+currentGenotypeInterval);
|
||||
|
||||
if ( currentGenotypeInterval != null ) lastGenotypedPosition = currentGenotypeInterval.getStart()-1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
location = getToolkit().getGenomeLocParser().createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(),1);
|
||||
|
||||
// List<Set<String>> readGroupSets = getToolkit().getMergedReadGroupsByReaders();
|
||||
// List<Set<String>> sampleSets = getToolkit().getSamplesByReaders();
|
||||
|
||||
normalSamples = getToolkit().getSamplesByReaders().get(0);
|
||||
|
||||
// if ( call_somatic ) {
|
||||
// if ( nSams != 2 ) {
|
||||
// System.out.println("In --somatic mode two input bam files must be specified (normal/tumor)");
|
||||
// System.exit(1);
|
||||
// }
|
||||
|
||||
// normalReadGroups = readGroupSets.get(0); // first -I option must specify normal.bam
|
||||
// System.out.println(normalReadGroups.size() + " normal read groups");
|
||||
// for ( String rg : normalReadGroups ) System.out.println("Normal RG: "+rg);
|
||||
|
||||
// tumorReadGroups = readGroupSets.get(1); // second -I option must specify tumor.bam
|
||||
// System.out.println(tumorReadGroups.size() + " tumor read groups");
|
||||
// for ( String rg : tumorReadGroups ) System.out.println("Tumor RG: "+rg);
|
||||
|
||||
// tumorSamples = sampleSets.get(1);
|
||||
// } else {
|
||||
// if ( nSams != 1 ) System.out.println("WARNING: multiple input files specified. \n"+
|
||||
// "WARNING: Without --somatic option they will be merged and processed as a single sample");
|
||||
// }
|
||||
|
||||
try {
|
||||
// we already checked that bedOutput and output_file are not set simultaneously
|
||||
if ( bedOutput != null ) bedWriter = new FileWriter(bedOutput);
|
||||
|
|
@ -393,6 +387,8 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
contigLength = getToolkit().getGenomeLocParser().getContigInfo(refName).getSequenceLength();
|
||||
outOfContigUserWarned = false;
|
||||
|
||||
lastGenotypedPosition = -1;
|
||||
|
||||
normal_context.clear(); // reset coverage window; this will also set reference position to 0
|
||||
if ( call_somatic) tumor_context.clear();
|
||||
|
||||
|
|
@ -527,7 +523,45 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
return 1;
|
||||
}
|
||||
|
||||
/** An auxiliary shortcut: returns true if position(location.getContig(), p) is past l */
|
||||
private boolean pastInterval(long p, GenomeLoc l) {
|
||||
return ( location.getContigIndex() > l.getContigIndex() ||
|
||||
location.getContigIndex() == l.getContigIndex() && p > l.getStop() );
|
||||
}
|
||||
|
||||
/** Emit calls of the specified type across genotyping intervals, from position lastGenotypedPosition+1 to
|
||||
* pos-1, inclusive.
|
||||
* @param contigIndex
|
||||
* @param pos
|
||||
* @param call
|
||||
*/
|
||||
/*
|
||||
private void emitNoCallsUpTo(int contigIndex, long pos, CallType call) {
|
||||
|
||||
if ( contigIndex < currentGenotypeInterval.getContigIndex() ||
|
||||
contigIndex == currentGenotypeInterval.getContigIndex() && pos <= currentGenotypeInterval.getStart() ) return;
|
||||
|
||||
if ( contigIndex == currentGenotypeInterval.getContigIndex() && pos >= currentGenotypeInterval.getStart() ) {
|
||||
for ( long p = lastGenotypedPosition+1; p < pos; p++ ) {
|
||||
|
||||
}
|
||||
}
|
||||
while( currentGenotypeInterval != null ) {
|
||||
|
||||
while ( )
|
||||
if ( genotypeIntervalIterator.hasNext() ) {
|
||||
currentGenotypeInterval = genotypeIntervalIterator.next() ;
|
||||
if ( pastInterval(p,currentGenotypeInterval) ) {
|
||||
// if we are about to jump over the whole next interval, we need to emit NO_COVERAGE calls there!
|
||||
emitNoCoverageCalls(currentGenotypeInterval);
|
||||
}
|
||||
} else {
|
||||
currentGenotypeInterval = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
/** Output indel calls up to the specified position and shift the window: after this method is executed, the
|
||||
* first element of the window maps onto 'position', if possible, or at worst a few bases to the left of 'position' if we may need more
|
||||
* reads to get full NQS-style statistics for an indel in the close proximity of 'position'.
|
||||
|
|
@ -547,21 +581,47 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
for ( int pos = normal_context.getStart() ; pos < Math.min(adjustedPosition,normal_context.getStop()+1) ; pos++ ) {
|
||||
|
||||
if ( normal_context.indelsAt(pos).size() == 0 ) continue; // no indels
|
||||
boolean genotype = false;
|
||||
// first let's see if we need to genotype current position:
|
||||
|
||||
final long p = pos - 1; // our internally used positions (pos) are +1 compared to external format spec (e.g. vcf)
|
||||
|
||||
if ( pos <= lastGenotypedPosition ) continue;
|
||||
|
||||
while ( currentGenotypeInterval != null ) {
|
||||
|
||||
// if we did not even reach next interval yet, no genotyping at current position:
|
||||
if ( location.getContigIndex() < currentGenotypeInterval.getContigIndex() ||
|
||||
location.getContigIndex() == currentGenotypeInterval.getContigIndex() &&
|
||||
p < currentGenotypeInterval.getStart() ) break;
|
||||
if ( pastInterval(p, currentGenotypeInterval) ) {
|
||||
// we are past current genotyping interval, so we are done with it; let's load next interval:
|
||||
currentGenotypeInterval = genotypeIntervalIterator.hasNext() ? genotypeIntervalIterator.next() : null;
|
||||
continue; // re-enter the loop to check against the interval we just loaded
|
||||
}
|
||||
|
||||
// we reach tjis point only if p is inside current genotyping interval; set the flag and bail out:
|
||||
genotype = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// if ( DEBUG ) System.out.println("DEBUG>> pos="+pos +"; genotyping interval="+currentGenotypeInterval+"; genotype="+genotype);
|
||||
|
||||
if ( normal_context.indelsAt(pos).size() == 0 && ! genotype ) continue;
|
||||
|
||||
IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
|
||||
|
||||
if ( normalCall.getCoverage() < minCoverage ) {
|
||||
if ( normalCall.getCoverage() < minCoverage && ! genotype ) {
|
||||
if ( DEBUG ) {
|
||||
System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
|
||||
}
|
||||
continue; // low coverage
|
||||
}
|
||||
|
||||
if ( DEBUG ) System.out.println("DEBUG>> Indel at "+pos);
|
||||
if ( DEBUG ) System.out.println("DEBUG>> "+(normalCall.getAllVariantCount() == 0?"No Indel":"Indel")+" at "+pos);
|
||||
|
||||
long left = Math.max( pos-NQS_WIDTH, normal_context.getStart() );
|
||||
long right = pos+normalCall.getVariant().lengthOnRef()+NQS_WIDTH-1;
|
||||
long right = pos+( normalCall.getVariant() == null ? 0 : normalCall.getVariant().lengthOnRef())+NQS_WIDTH-1;
|
||||
|
||||
if ( right >= adjustedPosition && ! force) {
|
||||
// we are not asked to force-shift, and there is more coverage around the current indel that we still need to collect
|
||||
|
|
@ -584,26 +644,14 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
location = getToolkit().getGenomeLocParser().setStart(location,pos);
|
||||
location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data
|
||||
|
||||
if ( normalCall.isCall() ) {
|
||||
normalCallsMade++;
|
||||
boolean haveCall = normalCall.isCall(); // cache the value
|
||||
|
||||
if ( haveCall || genotype) {
|
||||
if ( haveCall ) normalCallsMade++;
|
||||
printVCFLine(vcf_writer,normalCall);
|
||||
if ( bedWriter != null ) normalCall.printBedLine(bedWriter);
|
||||
|
||||
if ( verboseWriter != null ) {
|
||||
|
||||
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
|
||||
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
|
||||
|
||||
StringBuilder fullRecord = new StringBuilder();
|
||||
fullRecord.append(makeFullRecord(normalCall));
|
||||
fullRecord.append(annotationString);
|
||||
try {
|
||||
verboseWriter.write(fullRecord.toString());
|
||||
verboseWriter.write('\n');
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(verboseOutput, "Write failed", e);
|
||||
}
|
||||
}
|
||||
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall);
|
||||
lastGenotypedPosition = pos;
|
||||
}
|
||||
|
||||
normal_context.indelsAt(pos).clear();
|
||||
|
|
@ -696,28 +744,58 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
for ( int pos = tumor_context.getStart() ; pos < Math.min(adjustedPosition,tumor_context.getStop()+1) ; pos++ ) {
|
||||
|
||||
if ( tumor_context.indelsAt(pos).size() == 0 ) continue; // no indels in tumor
|
||||
boolean genotype = false;
|
||||
// first let's see if we need to genotype current position:
|
||||
|
||||
final long p = pos - 1; // our internally used positions (pos) are +1 compared to external format spec (e.g. vcf)
|
||||
|
||||
if ( pos <= lastGenotypedPosition ) continue;
|
||||
|
||||
while ( currentGenotypeInterval != null ) {
|
||||
|
||||
// if we did not even reach next interval yet, no genotyping at current position:
|
||||
if ( location.getContigIndex() < currentGenotypeInterval.getContigIndex() ||
|
||||
location.getContigIndex() == currentGenotypeInterval.getContigIndex() &&
|
||||
p < currentGenotypeInterval.getStart() ) break;
|
||||
if ( pastInterval(p, currentGenotypeInterval) ) {
|
||||
// we are past current genotyping interval, so we are done with it; let's load next interval:
|
||||
currentGenotypeInterval = genotypeIntervalIterator.hasNext() ? genotypeIntervalIterator.next() : null;
|
||||
continue; // re-enter the loop to check against the interval we just loaded
|
||||
}
|
||||
|
||||
// we reach tjis point only if p is inside current genotyping interval; set the flag and bail out:
|
||||
genotype = true;
|
||||
break;
|
||||
}
|
||||
// if ( DEBUG) System.out.println("DEBUG>> pos="+pos +"; genotyping interval="+currentGenotypeInterval+"; genotype="+genotype);
|
||||
|
||||
if ( tumor_context.indelsAt(pos).size() == 0 && ! genotype ) continue; // no indels in tumor
|
||||
|
||||
if ( DEBUG && genotype ) System.out.println("DEBUG>> Genotyping requested at "+pos);
|
||||
|
||||
IndelPrecall tumorCall = new IndelPrecall(tumor_context,pos,NQS_WIDTH);
|
||||
IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
|
||||
|
||||
if ( tumorCall.getCoverage() < minCoverage ) {
|
||||
if ( tumorCall.getCoverage() < minCoverage && ! genotype ) {
|
||||
if ( DEBUG ) {
|
||||
System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)");
|
||||
}
|
||||
continue; // low coverage
|
||||
}
|
||||
if ( normalCall.getCoverage() < minNormalCoverage ) {
|
||||
if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) {
|
||||
if ( DEBUG ) {
|
||||
System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
|
||||
}
|
||||
continue; // low coverage
|
||||
}
|
||||
|
||||
if ( DEBUG ) System.out.println("DEBUG>> Indel in tumor at "+pos);
|
||||
if ( DEBUG ) {
|
||||
System.out.print("DEBUG>> "+(tumorCall.getAllVariantCount() == 0?"No Indel":"Indel")+" in tumor, ");
|
||||
System.out.print("DEBUG>> "+(normalCall.getAllVariantCount() == 0?"No Indel":"Indel")+" in normal at "+pos);
|
||||
}
|
||||
|
||||
long left = Math.max( pos-NQS_WIDTH, tumor_context.getStart() );
|
||||
long right = pos+tumorCall.getVariant().lengthOnRef()+NQS_WIDTH-1;
|
||||
long right = pos+ ( tumorCall.getVariant() == null ? 0 : tumorCall.getVariant().lengthOnRef() )+NQS_WIDTH-1;
|
||||
|
||||
if ( right >= adjustedPosition && ! force) {
|
||||
// we are not asked to force-shift, and there is more coverage around the current indel that we still need to collect
|
||||
|
|
@ -740,32 +818,17 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
location = getToolkit().getGenomeLocParser().setStart(location,pos);
|
||||
location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data
|
||||
|
||||
if ( tumorCall.isCall() ) {
|
||||
tumorCallsMade++;
|
||||
boolean haveCall = tumorCall.isCall(); // cache the value
|
||||
|
||||
if ( haveCall || genotype ) {
|
||||
if ( haveCall ) tumorCallsMade++;
|
||||
|
||||
printVCFLine(vcf_writer,normalCall,tumorCall);
|
||||
|
||||
if ( bedWriter != null ) tumorCall.printBedLine(bedWriter);
|
||||
|
||||
if ( verboseWriter != null ) {
|
||||
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
|
||||
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
|
||||
|
||||
StringBuilder fullRecord = new StringBuilder();
|
||||
fullRecord.append(makeFullRecord(normalCall,tumorCall));
|
||||
|
||||
if ( normalCall.getVariant() == null ) {
|
||||
fullRecord.append("SOMATIC");
|
||||
} else {
|
||||
fullRecord.append("GERMLINE");
|
||||
}
|
||||
try {
|
||||
verboseWriter.write(fullRecord + "\t"+ annotationString);
|
||||
verboseWriter.write('\n');
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(verboseOutput, "Write failed", e);
|
||||
}
|
||||
}
|
||||
if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall );
|
||||
lastGenotypedPosition = pos;
|
||||
}
|
||||
tumor_context.indelsAt(pos).clear();
|
||||
normal_context.indelsAt(pos).clear();
|
||||
|
|
@ -785,7 +848,11 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
private String makeFullRecord(IndelPrecall normalCall, IndelPrecall tumorCall) {
|
||||
StringBuilder fullRecord = new StringBuilder();
|
||||
fullRecord.append(tumorCall.makeEventString());
|
||||
if ( tumorCall.getVariant() != null || normalCall.getVariant() == null) {
|
||||
fullRecord.append(tumorCall.makeEventString());
|
||||
} else {
|
||||
fullRecord.append(normalCall.makeEventString());
|
||||
}
|
||||
fullRecord.append('\t');
|
||||
fullRecord.append(normalCall.makeStatsString("N_"));
|
||||
fullRecord.append('\t');
|
||||
|
|
@ -825,9 +892,83 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
}
|
||||
|
||||
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall) {
|
||||
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
|
||||
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
|
||||
|
||||
StringBuilder fullRecord = new StringBuilder();
|
||||
fullRecord.append(makeFullRecord(normalCall));
|
||||
fullRecord.append(annotationString);
|
||||
if ( ! normalCall.isCall() && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
|
||||
try {
|
||||
verboseWriter.write(fullRecord.toString());
|
||||
verboseWriter.write('\n');
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(verboseOutput, "Write failed", e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall) {
|
||||
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
|
||||
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
|
||||
|
||||
StringBuilder fullRecord = new StringBuilder();
|
||||
fullRecord.append(makeFullRecord(normalCall,tumorCall));
|
||||
|
||||
if ( normalCall.getVariant() == null && tumorCall.getVariant() == null ) {
|
||||
// did not observe anything
|
||||
if ( normalCall.getCoverage() >= minNormalCoverage && tumorCall.getCoverage() >= minCoverage ) fullRecord.append("REFERENCE");
|
||||
else {
|
||||
if ( tumorCall.getCoverage() >= minCoverage ) fullRecord.append("REFERENCE"); // no coverage in normal but nothing in tumor
|
||||
else {
|
||||
// no coverage in tumor; if we have no coverage in normal, it can be anything; if we do have coverage in normal,
|
||||
// this still could be a somatic event. so either way it is 'unknown'
|
||||
fullRecord.append("UNKNOWN");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ( normalCall.getVariant() == null && tumorCall.getVariant() != null ) {
|
||||
// looks like somatic call
|
||||
if ( normalCall.getCoverage() >= minNormalCoverage ) fullRecord.append("SOMATIC"); // we confirm there is nothing in normal
|
||||
else {
|
||||
// low coverage in normal
|
||||
fullRecord.append("EVENT_T"); // no coverage in normal, no idea whether it is germline or somatic
|
||||
}
|
||||
}
|
||||
|
||||
if ( normalCall.getVariant() != null && tumorCall.getVariant() == null ) {
|
||||
// it's likely germline (with missing observation in tumor - maybe loh?
|
||||
if ( tumorCall.getCoverage() >= minCoverage ) fullRecord.append("GERMLINE_LOH"); // we confirm there is nothing in tumor
|
||||
else {
|
||||
// low coverage in tumor, maybe we missed the event
|
||||
fullRecord.append("GERMLINE"); // no coverage in tumor but we already saw it in normal...
|
||||
}
|
||||
}
|
||||
|
||||
if ( normalCall.getVariant() != null && tumorCall.getVariant() != null ) {
|
||||
// events in both T/N, got to be germline!
|
||||
fullRecord.append("GERMLINE");
|
||||
}
|
||||
|
||||
|
||||
fullRecord.append('\t');
|
||||
fullRecord.append(annotationString);
|
||||
|
||||
if ( ! tumorCall.isCall() && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL");
|
||||
|
||||
try {
|
||||
verboseWriter.write(fullRecord.toString());
|
||||
verboseWriter.write('\n');
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(verboseOutput, "Write failed", e);
|
||||
}
|
||||
}
|
||||
|
||||
public void printVCFLine(VCFWriter vcf, IndelPrecall call) {
|
||||
int event_length = call.getVariant().lengthOnRef();
|
||||
if ( event_length < 0 ) event_length = 0;
|
||||
|
||||
long start = call.getPosition()-1;
|
||||
// If the beginning of the chromosome is deleted (possible, however unlikely), it's unclear how to proceed.
|
||||
|
|
@ -838,84 +979,146 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
long stop = start;
|
||||
|
||||
List<Allele> alleles = new ArrayList<Allele>(2);
|
||||
List<Allele> alleles = new ArrayList<Allele>(2); // actual observed (distinct!) alleles at the site
|
||||
List<Allele> homref_alleles = null; // when needed, will contain two identical copies of ref allele - needed to generate hom-ref genotype
|
||||
|
||||
if ( event_length == 0 ) { // insertion
|
||||
alleles.add( Allele.create(Allele.NULL_ALLELE_STRING,true) );
|
||||
alleles.add( Allele.create(call.getVariant().getBases(), false ));
|
||||
} else { //deletion:
|
||||
alleles.add( Allele.create(Allele.NULL_ALLELE_STRING,false) );
|
||||
alleles.add( Allele.create(call.getVariant().getBases(), true ));
|
||||
stop += event_length;
|
||||
|
||||
if ( call.getVariant() == null ) {
|
||||
// we will need to cteate genotype with two (hom) ref alleles (below).
|
||||
// we can not use 'alleles' list here, since that list is supposed to contain
|
||||
// only *distinct* alleles observed at the site or VCFContext will frown upon us...
|
||||
alleles.add( Allele.create(refBases[(int)start-1],true) );
|
||||
homref_alleles = new ArrayList<Allele>(2);
|
||||
homref_alleles.add( alleles.get(0));
|
||||
homref_alleles.add( alleles.get(0));
|
||||
} else {
|
||||
// we always create alt allele when we observe anything but the ref, even if it is not a call!
|
||||
// (Genotype will tell us whether it is an actual call or not!)
|
||||
int event_length = call.getVariant().lengthOnRef();
|
||||
if ( event_length < 0 ) event_length = 0;
|
||||
fillAlleleList(alleles,call);
|
||||
stop += event_length;
|
||||
}
|
||||
|
||||
Map<String,Genotype> genotypes = new HashMap<String,Genotype>();
|
||||
|
||||
for ( String sample : normalSamples ) {
|
||||
genotypes.put(sample,new Genotype(sample, alleles));
|
||||
if ( call.isCall() ) // we made a call - put actual het genotype here:
|
||||
genotypes.put(sample,new Genotype(sample, alleles));
|
||||
else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
|
||||
genotypes.put(sample,new Genotype(sample, homref_alleles));
|
||||
|
||||
}
|
||||
Set<String> filters = null;
|
||||
if ( call.getVariant() != null && ! call.isCall() ) {
|
||||
filters = new HashSet<String>();
|
||||
filters.add("NOCALL");
|
||||
}
|
||||
|
||||
VariantContext vc = new VariantContext("IGv2_Indel_call", refName, start, stop, alleles, genotypes,
|
||||
-1.0 /* log error */, null /* filters */, call.makeStatsAttributes("",null));
|
||||
-1.0 /* log error */, filters, call.makeStatsAttributes("",null));
|
||||
vcf.add(vc,refBases[(int)start-1]);
|
||||
}
|
||||
|
||||
/** Fills l with appropriate alleles depending on whether call is insertion or deletion
|
||||
* (l MUST have a variant or this method will crash). It is guaranteed that the *first* allele added
|
||||
* to the list is ref, and the next one is alt.
|
||||
* @param l
|
||||
* @param call
|
||||
*/
|
||||
private void fillAlleleList(List<Allele> l, IndelPrecall call) {
|
||||
int event_length = call.getVariant().lengthOnRef();
|
||||
if ( event_length == 0 ) { // insertion
|
||||
|
||||
l.add( Allele.create(Allele.NULL_ALLELE_STRING,true) );
|
||||
l.add( Allele.create(call.getVariant().getBases(), false ));
|
||||
|
||||
} else { //deletion:
|
||||
l.add( Allele.create(call.getVariant().getBases(), true ));
|
||||
l.add( Allele.create(Allele.NULL_ALLELE_STRING,false) );
|
||||
}
|
||||
}
|
||||
|
||||
public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall) {
|
||||
int event_length = tCall.getVariant().lengthOnRef();
|
||||
if ( event_length < 0 ) event_length = 0;
|
||||
|
||||
long start = tCall.getPosition()-1;
|
||||
long stop = start;
|
||||
|
||||
// If the beginning of the chromosome is deleted (possible, however unlikely), it's unclear how to proceed.
|
||||
// The suggestion is instead of putting the base before the indel, to put the base after the indel.
|
||||
// For now, just don't print out that site.
|
||||
if ( start == 0 )
|
||||
return;
|
||||
|
||||
Map<String,Object> attrs = nCall.makeStatsAttributes("N_",null);
|
||||
attrs = tCall.makeStatsAttributes("T_",attrs);
|
||||
|
||||
boolean isSomatic = false;
|
||||
if ( nCall.getVariant() == null ) {
|
||||
if ( nCall.getCoverage() >= minNormalCoverage && nCall.getVariant() == null && tCall.getVariant() != null ) {
|
||||
isSomatic = true;
|
||||
attrs.put(VCFConstants.SOMATIC_KEY,true);
|
||||
}
|
||||
List<Allele> alleles = new ArrayList<Allele>(2); // all alleles at the site
|
||||
// List<Allele> normal_alleles = null; // all alleles at the site
|
||||
List<Allele> homRefAlleles = null;
|
||||
|
||||
List<Allele> alleles = new ArrayList<Allele>(2);
|
||||
List<Allele> homRefAlleles = isSomatic ? new ArrayList<Allele>(2) : null ; // we need this only for somatic calls
|
||||
|
||||
if ( event_length == 0 ) { // insertion
|
||||
alleles.add( Allele.create(Allele.NULL_ALLELE_STRING,true) );
|
||||
alleles.add( Allele.create(tCall.getVariant().getBases(), false ));
|
||||
if ( isSomatic ) {
|
||||
// create alleles of hom-ref genotype for normal sample
|
||||
homRefAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING,true) );
|
||||
homRefAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING,true) );
|
||||
}
|
||||
} else { //deletion:
|
||||
alleles.add( Allele.create(Allele.NULL_ALLELE_STRING,false) );
|
||||
alleles.add( Allele.create(tCall.getVariant().getBases(), true ));
|
||||
stop += event_length;
|
||||
if ( isSomatic ) {
|
||||
// create alleles of hom-ref genotype for normal sample
|
||||
homRefAlleles.add( Allele.create(tCall.getVariant().getBases(), true ));
|
||||
homRefAlleles.add( Allele.create(tCall.getVariant().getBases(), true ));
|
||||
// if ( nCall.getVariant() == null || tCall.getVariant() == null ) {
|
||||
homRefAlleles = new ArrayList<Allele>(2) ; // we need this for somatic calls (since normal is ref-ref), and also for no-calls
|
||||
// }
|
||||
boolean homRefT = ( tCall.getVariant() == null );
|
||||
boolean homRefN = ( nCall.getVariant() == null );
|
||||
if ( tCall.getVariant() == null && nCall.getVariant() == null) {
|
||||
// no indel at all ; create base-representation ref/ref alleles for genotype construction
|
||||
alleles.add( Allele.create(refBases[(int)start-1],true) );
|
||||
} else {
|
||||
// we got indel(s)
|
||||
int event_length = 0;
|
||||
if ( tCall.getVariant() != null ) {
|
||||
// indel in tumor
|
||||
event_length = tCall.getVariant().lengthOnRef();
|
||||
fillAlleleList(alleles, tCall);
|
||||
} else {
|
||||
event_length = nCall.getVariant().lengthOnRef();
|
||||
fillAlleleList(alleles, nCall);
|
||||
}
|
||||
if ( event_length > 0 ) stop += event_length;
|
||||
}
|
||||
|
||||
homRefAlleles.add( alleles.get(0));
|
||||
homRefAlleles.add( alleles.get(0));
|
||||
|
||||
Map<String,Genotype> genotypes = new HashMap<String,Genotype>();
|
||||
|
||||
for ( String sample : normalSamples ) {
|
||||
genotypes.put(sample,new Genotype(sample, isSomatic ? homRefAlleles : alleles,0));
|
||||
genotypes.put(sample,new Genotype(sample, homRefN ? homRefAlleles : alleles,0));
|
||||
}
|
||||
|
||||
for ( String sample : tumorSamples ) {
|
||||
genotypes.put(sample,new Genotype(sample, alleles,0) );
|
||||
genotypes.put(sample,new Genotype(sample, homRefT ? homRefAlleles : alleles,0) );
|
||||
}
|
||||
|
||||
Set<String> filters = null;
|
||||
if ( tCall.getVariant() != null && ! tCall.isCall() ) {
|
||||
filters = new HashSet<String>();
|
||||
filters.add("NOCALL");
|
||||
}
|
||||
if ( nCall.getCoverage() < minNormalCoverage ) {
|
||||
if ( filters == null ) filters = new HashSet<String>();
|
||||
filters.add("NCOV");
|
||||
}
|
||||
if ( tCall.getCoverage() < minCoverage ) {
|
||||
if ( filters == null ) filters = new HashSet<String>();
|
||||
filters.add("TCOV");
|
||||
}
|
||||
|
||||
VariantContext vc = new VariantContext("IGv2_Indel_call", refName, start, stop, alleles, genotypes,
|
||||
-1.0 /* log error */, null /* filters */, attrs);
|
||||
-1.0 /* log error */, filters, attrs);
|
||||
vcf.add(vc,refBases[(int)start-1]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Integer result) {
|
||||
if ( DEBUG ) {
|
||||
System.out.println("DEBUG>> Emitting last window at "+normal_context.getStart()+"-"+normal_context.getStop());
|
||||
}
|
||||
if ( call_somatic ) emit_somatic(1000000000, true);
|
||||
else emit(1000000000,true); // emit everything we might have left
|
||||
|
||||
|
|
@ -1078,6 +1281,14 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
private PrimitivePair.Int all_indel_read_orientation_cnt = new PrimitivePair.Int();
|
||||
private PrimitivePair.Int all_read_orientation_cnt = new PrimitivePair.Int();
|
||||
|
||||
/** Makes an empty call (no-call) with all stats set to 0
|
||||
*
|
||||
* @param position
|
||||
*/
|
||||
public IndelPrecall(long position) {
|
||||
this.pos = position;
|
||||
}
|
||||
|
||||
public IndelPrecall(WindowContext context, long position, int nqs_width) {
|
||||
this.pos = position;
|
||||
this.nqs = nqs_width;
|
||||
|
|
@ -1281,7 +1492,7 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
public boolean isCall() {
|
||||
boolean ret = ( consensus_indel_count >= minIndelCount &&
|
||||
(double)consensus_indel_count > minFraction * total_coverage &&
|
||||
(double) consensus_indel_count > minConsensusFraction*all_indel_count );
|
||||
(double) consensus_indel_count > minConsensusFraction*all_indel_count && total_coverage >= minCoverage);
|
||||
if ( DEBUG && ! ret ) System.out.println("DEBUG>> NOT a call: count="+consensus_indel_count+
|
||||
" total_count="+all_indel_count+" cov="+total_coverage+
|
||||
" minConsensusF="+((double)consensus_indel_count)/all_indel_count+
|
||||
|
|
@ -1312,23 +1523,36 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
|
||||
public void printBedLine(Writer bed) {
|
||||
int event_length = consensus_indel.lengthOnRef();
|
||||
if ( event_length < 0 ) event_length = 0;
|
||||
int event_length;
|
||||
if ( consensus_indel == null ) event_length = 0;
|
||||
else {
|
||||
event_length = consensus_indel.lengthOnRef();
|
||||
if ( event_length < 0 ) event_length = 0;
|
||||
}
|
||||
|
||||
StringBuffer message = new StringBuffer();
|
||||
message.append(refName+"\t"+(pos-1)+"\t");
|
||||
message.append((pos-1+event_length)+"\t"+(event_length>0? "-":"+")+consensus_indel.getBases() +":"+all_indel_count+"/"+total_coverage);
|
||||
|
||||
try {
|
||||
message.append((pos-1+event_length)+"\t");
|
||||
if ( consensus_indel != null ) {
|
||||
message.append((event_length>0? "-":"+")+consensus_indel.getBases());
|
||||
} else {
|
||||
message.append('.');
|
||||
}
|
||||
message.append(":"+all_indel_count+"/"+total_coverage);
|
||||
try {
|
||||
bed.write(message.toString()+"\n");
|
||||
} catch (IOException e) {
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(bedOutput, "Error encountered while writing into output BED file", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String makeEventString() {
|
||||
int event_length = consensus_indel.lengthOnRef();
|
||||
if ( event_length < 0 ) event_length = 0;
|
||||
int event_length;
|
||||
if ( consensus_indel == null ) event_length = 0;
|
||||
else {
|
||||
event_length = consensus_indel.lengthOnRef();
|
||||
if ( event_length < 0 ) event_length = 0;
|
||||
}
|
||||
StringBuffer message = new StringBuffer();
|
||||
message.append(refName);
|
||||
message.append('\t');
|
||||
|
|
@ -1336,8 +1560,12 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
message.append('\t');
|
||||
message.append(pos-1+event_length);
|
||||
message.append('\t');
|
||||
message.append((event_length>0?'-':'+'));
|
||||
message.append(consensus_indel.getBases());
|
||||
if ( consensus_indel != null ) {
|
||||
message.append((event_length>0?'-':'+'));
|
||||
message.append(consensus_indel.getBases());
|
||||
} else {
|
||||
message.append('.');
|
||||
}
|
||||
return message.toString();
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue