Changes to Depth of Coverage (added maximum base and mapping quality flags; with new integration tests -- because they use b36, and the other test uses hg18, it's in a different class (integration test system can't change refs on the fly). Initial change to VariantAnnotator to allow it to see extended event pilups; you currently have to throw the -dels flag; and it's specified as "very experimental". Yet,all the integration tests pass.
Homopolymer Run now does the "right" thing (e.g. single bases are represented as HRun = 0 rather than HRun = 1) for indels. AlleleBalance now does something close enough to correct. Added a convenience method to VariantContext that will return the indel length (or lengths if a site is not biallelic). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3409 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6faf101c6c
commit
88cb93cc3c
|
|
@ -576,6 +576,24 @@ public class VariantContext {
|
|||
return Collections.unmodifiableSet(altAlleles);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the sizes of the alternate alleles if they are insertion/deletion events, and returns a list of their sizes
|
||||
*
|
||||
* @return a list of indel lengths ( null if not of type indel or mixed )
|
||||
*/
|
||||
public List<Integer> getIndelLengths() {
|
||||
if ( getType() != Type.INDEL || getType() != Type.MIXED ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
List<Integer> lengths = new ArrayList<Integer>();
|
||||
for ( Allele a : getAlternateAlleles() ) {
|
||||
lengths.add(a.length() - getReference().length());
|
||||
}
|
||||
|
||||
return lengths;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param i -- the ith allele (from 0 to n - 2 for a context with n alleles including a reference allele)
|
||||
* @return the ith non-reference allele in this context
|
||||
|
|
|
|||
|
|
@ -26,12 +26,14 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -41,11 +43,13 @@ public class AlleleBalance implements InfoFieldAnnotation, StandardAnnotation {
|
|||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
|
||||
if ( !vc.isBiallelic() || !vc.isSNP() )
|
||||
if ( !vc.isBiallelic() )
|
||||
return null;
|
||||
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() == 0 )
|
||||
if ( genotypes == null || genotypes.size() == 0 ) {
|
||||
System.out.println("No genotypes for vc at "+ref.getLocus().toString());
|
||||
return null;
|
||||
}
|
||||
|
||||
double ratio = 0.0;
|
||||
double totalWeights = 0.0;
|
||||
|
|
@ -58,23 +62,39 @@ public class AlleleBalance implements InfoFieldAnnotation, StandardAnnotation {
|
|||
if ( context == null )
|
||||
continue;
|
||||
|
||||
final String bases = new String(context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup().getBases()).toUpperCase();
|
||||
if ( bases.length() == 0 )
|
||||
return null;
|
||||
if ( vc.isSNP() ) {
|
||||
final String bases = new String(context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup().getBases()).toUpperCase();
|
||||
if ( bases.length() == 0 )
|
||||
return null;
|
||||
char refChr = vc.getReference().toString().charAt(0);
|
||||
char altChr = vc.getAlternateAllele(0).toString().charAt(0);
|
||||
|
||||
char refChr = vc.getReference().toString().charAt(0);
|
||||
char altChr = vc.getAlternateAllele(0).toString().charAt(0);
|
||||
int refCount = MathUtils.countOccurrences(refChr, bases);
|
||||
int altCount = MathUtils.countOccurrences(altChr, bases);
|
||||
|
||||
int refCount = MathUtils.countOccurrences(refChr, bases);
|
||||
int altCount = MathUtils.countOccurrences(altChr, bases);
|
||||
// sanity check
|
||||
if ( refCount + altCount == 0 )
|
||||
continue;
|
||||
|
||||
// sanity check
|
||||
if ( refCount + altCount == 0 )
|
||||
continue;
|
||||
// weight the allele balance by genotype quality so that e.g. mis-called homs don't affect the ratio too much
|
||||
ratio += genotype.getValue().getNegLog10PError() * ((double)refCount / (double)(refCount + altCount));
|
||||
totalWeights += genotype.getValue().getNegLog10PError();
|
||||
} else if ( vc.isIndel() ) {
|
||||
final ReadBackedExtendedEventPileup indelPileup = context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup();
|
||||
if ( indelPileup == null ) {
|
||||
continue;
|
||||
}
|
||||
// todo -- actually care about indel length from the pileup (agnostic at the moment)
|
||||
int refCount = indelPileup.size();
|
||||
int altCount = vc.isInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
|
||||
|
||||
// weight the allele balance by genotype quality so that e.g. mis-called homs don't affect the ratio too much
|
||||
ratio += genotype.getValue().getNegLog10PError() * ((double)refCount / (double)(refCount + altCount));
|
||||
totalWeights += genotype.getValue().getNegLog10PError();
|
||||
if ( refCount + altCount == 0 ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ratio += /* todo -- make not uniform */ 1 * ((double) refCount) / (double) (refCount + altCount);
|
||||
totalWeights += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// make sure we had a het genotype
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ import java.util.HashMap;
|
|||
|
||||
public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation {
|
||||
|
||||
private boolean ANNOTATE_INDELS = false;
|
||||
private boolean ANNOTATE_INDELS = true;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
|
||||
|
|
@ -84,7 +84,7 @@ public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation {
|
|||
}
|
||||
}
|
||||
|
||||
return computeHomopolymerRun(dBase,ref);
|
||||
return computeHomopolymerRun(dBase,ref)-1; // remove the extra match from the base itself
|
||||
} else {
|
||||
// check that inserted bases are the same
|
||||
byte insBase = vc.getAlternateAllele(0).getBases()[0];
|
||||
|
|
|
|||
|
|
@ -73,6 +73,9 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
|
|||
@Argument(fullName="list", shortName="ls", doc="List the available annotations and exit")
|
||||
protected Boolean LIST = false;
|
||||
|
||||
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
|
||||
protected boolean indelsOnly = false;
|
||||
|
||||
private VCFWriter vcfWriter;
|
||||
|
||||
private HashMap<String, String> nonVCFsampleName = new HashMap<String, String>();
|
||||
|
|
@ -153,6 +156,13 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
|
|||
*/
|
||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
|
||||
/**
|
||||
* We want to see extended events if annotating indels
|
||||
*
|
||||
* @return true
|
||||
*/
|
||||
public boolean generateExtendedEvents() { return indelsOnly; }
|
||||
|
||||
/**
|
||||
* For each site of interest, annotate based on the requested annotation types
|
||||
*
|
||||
|
|
@ -177,8 +187,13 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
|
|||
|
||||
// if the reference base is not ambiguous, we can annotate
|
||||
Collection<VariantContext> annotatedVCs = Arrays.asList(vc);
|
||||
Map<String, StratifiedAlignmentContext> stratifiedContexts;
|
||||
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
|
||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
|
||||
if ( ! context.hasExtendedEventPileup() ) {
|
||||
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
|
||||
} else {
|
||||
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getExtendedEventPileup());
|
||||
}
|
||||
if ( stratifiedContexts != null ) {
|
||||
annotatedVCs = engine.annotateContext(tracker, ref, stratifiedContexts, vc);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,14 +52,14 @@ public class CoverageUtils {
|
|||
}
|
||||
}
|
||||
public static Map<CoverageAggregator.AggregationType,Map<String,int[]>>
|
||||
getBaseCountsByPartition(AlignmentContext context, int minMapQ, int minBaseQ, List<CoverageAggregator.AggregationType> types) {
|
||||
getBaseCountsByPartition(AlignmentContext context, int minMapQ, int maxMapQ, byte minBaseQ, byte maxBaseQ, List<CoverageAggregator.AggregationType> types) {
|
||||
|
||||
Map<CoverageAggregator.AggregationType,Map<String,int[]>> countsByIDByType = new HashMap<CoverageAggregator.AggregationType,Map<String,int[]>>();
|
||||
for (CoverageAggregator.AggregationType t : types ) {
|
||||
countsByIDByType.put(t,new HashMap<String,int[]>());
|
||||
}
|
||||
for (PileupElement e : context.getBasePileup()) {
|
||||
if ( e.getMappingQual() >= minMapQ && ( e.getQual() >= minBaseQ || e.isDeletion() ) ) {
|
||||
if ( e.getMappingQual() >= minMapQ && e.getMappingQual() <= maxMapQ && ( ( e.getQual() >= minBaseQ && e.getQual() <= maxBaseQ ) || e.isDeletion() ) ) {
|
||||
for (CoverageAggregator.AggregationType t : types ) {
|
||||
String id = getTypeID(e.getRead(),t);
|
||||
if ( ! countsByIDByType.get(t).keySet().contains(id) ) {
|
||||
|
|
|
|||
|
|
@ -75,9 +75,13 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
|
|||
@Argument(fullName = "nBins", doc = "Number of bins to use for granular binning", required = false)
|
||||
int nBins = 499;
|
||||
@Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth. Defaults to -1.", required = false)
|
||||
byte minMappingQuality = -1;
|
||||
int minMappingQuality = -1;
|
||||
@Argument(fullName = "maxMappingQuality", doc = "Maximum mapping quality of reads to count towards depth. Defaults to 2^31-1 (Integer.MAX_VALUE).", required = false)
|
||||
int maxMappingQuality = Integer.MAX_VALUE;
|
||||
@Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum quality of bases to count towards depth. Defaults to -1.", required = false)
|
||||
byte minBaseQuality = -1;
|
||||
@Argument(fullName = "maxBaseQuality", doc = "Maximum quality of bases to count towards depth. Defaults to 127 (Byte.MAX_VALUE).", required = false)
|
||||
byte maxBaseQuality = Byte.MAX_VALUE;
|
||||
@Argument(fullName = "printBaseCounts", shortName = "baseCounts", doc = "Will add base counts to per-locus output.", required = false)
|
||||
boolean printBaseCounts = false;
|
||||
@Argument(fullName = "omitLocusTable", shortName = "omitLocusTable", doc = "Will not calculate the per-sample per-depth counts of loci, which should result in speedup", required = false)
|
||||
|
|
@ -245,7 +249,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
|
|||
//System.out.printf("\t[log]\t%s",ref.getLocus());
|
||||
}
|
||||
|
||||
return CoverageUtils.getBaseCountsByPartition(context,minMappingQuality,minBaseQuality,aggregationTypes);
|
||||
return CoverageUtils.getBaseCountsByPartition(context,minMappingQuality,maxMappingQuality,minBaseQuality,maxBaseQuality,aggregationTypes);
|
||||
}
|
||||
|
||||
public CoverageAggregator reduce(Map<CoverageAggregator.AggregationType,Map<String,int[]>> thisMap, CoverageAggregator prevReduce) {
|
||||
|
|
@ -370,7 +374,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
|
|||
private void printGeneStats(List<Pair<GenomeLoc,CoverageAggregator>> statsByTarget) {
|
||||
LocationAwareSeekableRODIterator refseqIterator = initializeRefSeq();
|
||||
List<Pair<String,DepthOfCoverageStats>> statsByGene = new ArrayList<Pair<String,DepthOfCoverageStats>>();// maintains order
|
||||
Map<String,DepthOfCoverageStats> geneNamesToStats = new HashMap<String,DepthOfCoverageStats>(); // alows indirect updating of objects in list
|
||||
Map<String,DepthOfCoverageStats> geneNamesToStats = new HashMap<String,DepthOfCoverageStats>(); // allows indirect updating of objects in list
|
||||
|
||||
for ( Pair<GenomeLoc,CoverageAggregator> targetStats : statsByTarget ) {
|
||||
String gene = getGeneName(targetStats.first,refseqIterator);
|
||||
|
|
@ -710,7 +714,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
|
|||
bin++;
|
||||
}
|
||||
|
||||
return bin;
|
||||
return bin == -1 ? 0 : bin;
|
||||
}
|
||||
|
||||
private void printDepths(PrintStream stream, Map<CoverageAggregator.AggregationType,Map<String,int[]>> countsBySampleByType, Map<CoverageAggregator.AggregationType,List<String>> identifiersByType) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,77 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.coverage;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
|
||||
*
|
||||
* @Author chartl
|
||||
* @Date May 20, 2010
|
||||
*/
|
||||
public class DepthOfCoverageB36IntegrationTest extends WalkerTest {
|
||||
|
||||
private boolean RUN_TESTS = true;
|
||||
private String root = "-T DepthOfCoverage ";
|
||||
private String hg18 = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
|
||||
private String b36 = "/broad/1KG/reference/human_b36_both.fasta";
|
||||
|
||||
private String buildRootCmd(String ref, List<String> bams, List<String> intervals) {
|
||||
StringBuilder bamBuilder = new StringBuilder();
|
||||
do {
|
||||
bamBuilder.append(" -I ");
|
||||
bamBuilder.append(bams.remove(0));
|
||||
} while ( bams.size() > 0 );
|
||||
|
||||
StringBuilder intervalBuilder = new StringBuilder();
|
||||
do {
|
||||
intervalBuilder.append(" -L ");
|
||||
intervalBuilder.append(intervals.remove(0));
|
||||
} while ( intervals.size() > 0 );
|
||||
|
||||
|
||||
return root + "-R "+ref+bamBuilder.toString()+intervalBuilder.toString();
|
||||
}
|
||||
|
||||
private void execute(String name, WalkerTest.WalkerTestSpec spec) {
|
||||
if ( RUN_TESTS ) {
|
||||
executeTest(name,spec);
|
||||
}
|
||||
}
|
||||
|
||||
public File createTempFileFromBase(String name) {
|
||||
File fl = new File(name);
|
||||
fl.deleteOnExit();
|
||||
return fl;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMapQ0Only() {
|
||||
// our base file
|
||||
File baseOutputFile = this.createTempFile("depthofcoveragemapq0",".tmp");
|
||||
this.setOutputFileLocation(baseOutputFile);
|
||||
|
||||
String[] intervals = {"1:10,000,000-10,002,000","1:10,003,000-10,004,000"};
|
||||
String[] bams = {"/humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam"};
|
||||
|
||||
String cmd = buildRootCmd(b36,new ArrayList<String>(Arrays.asList(bams)), new ArrayList<String>(Arrays.asList(intervals))) + " --maxMappingQuality 0";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(cmd,0,new ArrayList<String>());
|
||||
|
||||
spec.addAuxFile("f39af6ad99520fd4fb27b409ab0344a0",baseOutputFile);
|
||||
spec.addAuxFile("6b15f5330414b6d4e2f6caea42139fa1", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
|
||||
spec.addAuxFile("cc6640d82077991dde8a2b523935cdff", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
|
||||
spec.addAuxFile("0fb627234599c258a3fee1b2703e164a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
|
||||
spec.addAuxFile("6f6085293e8cca402ef4fe648cf06ea8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
|
||||
spec.addAuxFile("347b47ef73fbd4e277704ddbd7834f69", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
|
||||
spec.addAuxFile("ff0c79d161259402d54d572151582697", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
|
||||
|
||||
execute("testMapQ0Only",spec);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
package org.broadinstitute.sting.gatk.walkers.coverage;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.junit.Test;
|
||||
Loading…
Reference in New Issue