Changes to Depth of Coverage (added maximum base and mapping quality flags; with new integration tests -- because they use b36, and the other test uses hg18, it's in a different class (integration test system can't change refs on the fly). Initial change to VariantAnnotator to allow it to see extended event pilups; you currently have to throw the -dels flag; and it's specified as "very experimental". Yet,all the integration tests pass.

Homopolymer Run now does the "right" thing (e.g. single bases are represented as HRun = 0 rather than HRun = 1) for indels. AlleleBalance now does something close enough to correct.

Added a convenience method to VariantContext that will return the indel length (or lengths if a site is not biallelic).



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3409 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-05-21 13:02:01 +00:00
parent 6faf101c6c
commit 88cb93cc3c
8 changed files with 159 additions and 25 deletions

View File

@ -576,6 +576,24 @@ public class VariantContext {
return Collections.unmodifiableSet(altAlleles);
}
/**
* Gets the sizes of the alternate alleles if they are insertion/deletion events, and returns a list of their sizes
*
* @return a list of indel lengths ( null if not of type indel or mixed )
*/
public List<Integer> getIndelLengths() {
if ( getType() != Type.INDEL || getType() != Type.MIXED ) {
return null;
}
List<Integer> lengths = new ArrayList<Integer>();
for ( Allele a : getAlternateAlleles() ) {
lengths.add(a.length() - getReference().length());
}
return lengths;
}
/**
* @param i -- the ith allele (from 0 to n - 2 for a context with n alleles including a reference allele)
* @return the ith non-reference allele in this context

View File

@ -26,12 +26,14 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import java.util.Map;
import java.util.HashMap;
@ -41,11 +43,13 @@ public class AlleleBalance implements InfoFieldAnnotation, StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
if ( !vc.isBiallelic() || !vc.isSNP() )
if ( !vc.isBiallelic() )
return null;
final Map<String, Genotype> genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() == 0 )
if ( genotypes == null || genotypes.size() == 0 ) {
System.out.println("No genotypes for vc at "+ref.getLocus().toString());
return null;
}
double ratio = 0.0;
double totalWeights = 0.0;
@ -58,23 +62,39 @@ public class AlleleBalance implements InfoFieldAnnotation, StandardAnnotation {
if ( context == null )
continue;
final String bases = new String(context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup().getBases()).toUpperCase();
if ( bases.length() == 0 )
return null;
if ( vc.isSNP() ) {
final String bases = new String(context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup().getBases()).toUpperCase();
if ( bases.length() == 0 )
return null;
char refChr = vc.getReference().toString().charAt(0);
char altChr = vc.getAlternateAllele(0).toString().charAt(0);
char refChr = vc.getReference().toString().charAt(0);
char altChr = vc.getAlternateAllele(0).toString().charAt(0);
int refCount = MathUtils.countOccurrences(refChr, bases);
int altCount = MathUtils.countOccurrences(altChr, bases);
int refCount = MathUtils.countOccurrences(refChr, bases);
int altCount = MathUtils.countOccurrences(altChr, bases);
// sanity check
if ( refCount + altCount == 0 )
continue;
// sanity check
if ( refCount + altCount == 0 )
continue;
// weight the allele balance by genotype quality so that e.g. mis-called homs don't affect the ratio too much
ratio += genotype.getValue().getNegLog10PError() * ((double)refCount / (double)(refCount + altCount));
totalWeights += genotype.getValue().getNegLog10PError();
} else if ( vc.isIndel() ) {
final ReadBackedExtendedEventPileup indelPileup = context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup();
if ( indelPileup == null ) {
continue;
}
// todo -- actually care about indel length from the pileup (agnostic at the moment)
int refCount = indelPileup.size();
int altCount = vc.isInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
// weight the allele balance by genotype quality so that e.g. mis-called homs don't affect the ratio too much
ratio += genotype.getValue().getNegLog10PError() * ((double)refCount / (double)(refCount + altCount));
totalWeights += genotype.getValue().getNegLog10PError();
if ( refCount + altCount == 0 ) {
continue;
}
ratio += /* todo -- make not uniform */ 1 * ((double) refCount) / (double) (refCount + altCount);
totalWeights += 1;
}
}
// make sure we had a het genotype

View File

@ -15,7 +15,7 @@ import java.util.HashMap;
public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation {
private boolean ANNOTATE_INDELS = false;
private boolean ANNOTATE_INDELS = true;
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
@ -84,7 +84,7 @@ public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation {
}
}
return computeHomopolymerRun(dBase,ref);
return computeHomopolymerRun(dBase,ref)-1; // remove the extra match from the base itself
} else {
// check that inserted bases are the same
byte insBase = vc.getAlternateAllele(0).getBases()[0];

View File

@ -73,6 +73,9 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
@Argument(fullName="list", shortName="ls", doc="List the available annotations and exit")
protected Boolean LIST = false;
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
protected boolean indelsOnly = false;
private VCFWriter vcfWriter;
private HashMap<String, String> nonVCFsampleName = new HashMap<String, String>();
@ -153,6 +156,13 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
*/
public boolean includeReadsWithDeletionAtLoci() { return true; }
/**
* We want to see extended events if annotating indels
*
* @return true
*/
public boolean generateExtendedEvents() { return indelsOnly; }
/**
* For each site of interest, annotate based on the requested annotation types
*
@ -177,8 +187,13 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
// if the reference base is not ambiguous, we can annotate
Collection<VariantContext> annotatedVCs = Arrays.asList(vc);
Map<String, StratifiedAlignmentContext> stratifiedContexts;
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
if ( ! context.hasExtendedEventPileup() ) {
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
} else {
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getExtendedEventPileup());
}
if ( stratifiedContexts != null ) {
annotatedVCs = engine.annotateContext(tracker, ref, stratifiedContexts, vc);
}

View File

@ -52,14 +52,14 @@ public class CoverageUtils {
}
}
public static Map<CoverageAggregator.AggregationType,Map<String,int[]>>
getBaseCountsByPartition(AlignmentContext context, int minMapQ, int minBaseQ, List<CoverageAggregator.AggregationType> types) {
getBaseCountsByPartition(AlignmentContext context, int minMapQ, int maxMapQ, byte minBaseQ, byte maxBaseQ, List<CoverageAggregator.AggregationType> types) {
Map<CoverageAggregator.AggregationType,Map<String,int[]>> countsByIDByType = new HashMap<CoverageAggregator.AggregationType,Map<String,int[]>>();
for (CoverageAggregator.AggregationType t : types ) {
countsByIDByType.put(t,new HashMap<String,int[]>());
}
for (PileupElement e : context.getBasePileup()) {
if ( e.getMappingQual() >= minMapQ && ( e.getQual() >= minBaseQ || e.isDeletion() ) ) {
if ( e.getMappingQual() >= minMapQ && e.getMappingQual() <= maxMapQ && ( ( e.getQual() >= minBaseQ && e.getQual() <= maxBaseQ ) || e.isDeletion() ) ) {
for (CoverageAggregator.AggregationType t : types ) {
String id = getTypeID(e.getRead(),t);
if ( ! countsByIDByType.get(t).keySet().contains(id) ) {

View File

@ -75,9 +75,13 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
@Argument(fullName = "nBins", doc = "Number of bins to use for granular binning", required = false)
int nBins = 499;
@Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth. Defaults to -1.", required = false)
byte minMappingQuality = -1;
int minMappingQuality = -1;
@Argument(fullName = "maxMappingQuality", doc = "Maximum mapping quality of reads to count towards depth. Defaults to 2^31-1 (Integer.MAX_VALUE).", required = false)
int maxMappingQuality = Integer.MAX_VALUE;
@Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum quality of bases to count towards depth. Defaults to -1.", required = false)
byte minBaseQuality = -1;
@Argument(fullName = "maxBaseQuality", doc = "Maximum quality of bases to count towards depth. Defaults to 127 (Byte.MAX_VALUE).", required = false)
byte maxBaseQuality = Byte.MAX_VALUE;
@Argument(fullName = "printBaseCounts", shortName = "baseCounts", doc = "Will add base counts to per-locus output.", required = false)
boolean printBaseCounts = false;
@Argument(fullName = "omitLocusTable", shortName = "omitLocusTable", doc = "Will not calculate the per-sample per-depth counts of loci, which should result in speedup", required = false)
@ -245,7 +249,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
//System.out.printf("\t[log]\t%s",ref.getLocus());
}
return CoverageUtils.getBaseCountsByPartition(context,minMappingQuality,minBaseQuality,aggregationTypes);
return CoverageUtils.getBaseCountsByPartition(context,minMappingQuality,maxMappingQuality,minBaseQuality,maxBaseQuality,aggregationTypes);
}
public CoverageAggregator reduce(Map<CoverageAggregator.AggregationType,Map<String,int[]>> thisMap, CoverageAggregator prevReduce) {
@ -370,7 +374,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
private void printGeneStats(List<Pair<GenomeLoc,CoverageAggregator>> statsByTarget) {
LocationAwareSeekableRODIterator refseqIterator = initializeRefSeq();
List<Pair<String,DepthOfCoverageStats>> statsByGene = new ArrayList<Pair<String,DepthOfCoverageStats>>();// maintains order
Map<String,DepthOfCoverageStats> geneNamesToStats = new HashMap<String,DepthOfCoverageStats>(); // alows indirect updating of objects in list
Map<String,DepthOfCoverageStats> geneNamesToStats = new HashMap<String,DepthOfCoverageStats>(); // allows indirect updating of objects in list
for ( Pair<GenomeLoc,CoverageAggregator> targetStats : statsByTarget ) {
String gene = getGeneName(targetStats.first,refseqIterator);
@ -710,7 +714,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
bin++;
}
return bin;
return bin == -1 ? 0 : bin;
}
private void printDepths(PrintStream stream, Map<CoverageAggregator.AggregationType,Map<String,int[]>> countsBySampleByType, Map<CoverageAggregator.AggregationType,List<String>> identifiersByType) {

View File

@ -0,0 +1,77 @@
package org.broadinstitute.sting.gatk.walkers.coverage;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
*
* @Author chartl
* @Date May 20, 2010
*/
public class DepthOfCoverageB36IntegrationTest extends WalkerTest {
private boolean RUN_TESTS = true;
private String root = "-T DepthOfCoverage ";
private String hg18 = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
private String b36 = "/broad/1KG/reference/human_b36_both.fasta";
private String buildRootCmd(String ref, List<String> bams, List<String> intervals) {
StringBuilder bamBuilder = new StringBuilder();
do {
bamBuilder.append(" -I ");
bamBuilder.append(bams.remove(0));
} while ( bams.size() > 0 );
StringBuilder intervalBuilder = new StringBuilder();
do {
intervalBuilder.append(" -L ");
intervalBuilder.append(intervals.remove(0));
} while ( intervals.size() > 0 );
return root + "-R "+ref+bamBuilder.toString()+intervalBuilder.toString();
}
private void execute(String name, WalkerTest.WalkerTestSpec spec) {
if ( RUN_TESTS ) {
executeTest(name,spec);
}
}
public File createTempFileFromBase(String name) {
File fl = new File(name);
fl.deleteOnExit();
return fl;
}
@Test
public void testMapQ0Only() {
// our base file
File baseOutputFile = this.createTempFile("depthofcoveragemapq0",".tmp");
this.setOutputFileLocation(baseOutputFile);
String[] intervals = {"1:10,000,000-10,002,000","1:10,003,000-10,004,000"};
String[] bams = {"/humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam"};
String cmd = buildRootCmd(b36,new ArrayList<String>(Arrays.asList(bams)), new ArrayList<String>(Arrays.asList(intervals))) + " --maxMappingQuality 0";
WalkerTestSpec spec = new WalkerTestSpec(cmd,0,new ArrayList<String>());
spec.addAuxFile("f39af6ad99520fd4fb27b409ab0344a0",baseOutputFile);
spec.addAuxFile("6b15f5330414b6d4e2f6caea42139fa1", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
spec.addAuxFile("cc6640d82077991dde8a2b523935cdff", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
spec.addAuxFile("0fb627234599c258a3fee1b2703e164a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
spec.addAuxFile("6f6085293e8cca402ef4fe648cf06ea8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
spec.addAuxFile("347b47ef73fbd4e277704ddbd7834f69", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
spec.addAuxFile("ff0c79d161259402d54d572151582697", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
execute("testMapQ0Only",spec);
}
}

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.gatk.walkers;
package org.broadinstitute.sting.gatk.walkers.coverage;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;