diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java index 460da1aa0..2ccf3f5f5 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java @@ -576,6 +576,24 @@ public class VariantContext { return Collections.unmodifiableSet(altAlleles); } + /** + * Gets the sizes of the alternate alleles if they are insertion/deletion events, and returns a list of their sizes + * + * @return a list of indel lengths ( null if not of type indel or mixed ) + */ + public List getIndelLengths() { + if ( getType() != Type.INDEL || getType() != Type.MIXED ) { + return null; + } + + List lengths = new ArrayList(); + for ( Allele a : getAlternateAlleles() ) { + lengths.add(a.length() - getReference().length()); + } + + return lengths; + } + /** * @param i -- the ith allele (from 0 to n - 2 for a context with n alleles including a reference allele) * @return the ith non-reference allele in this context diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index 48518cfd6..75bbff21d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -26,12 +26,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broad.tribble.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import java.util.Map; import java.util.HashMap; @@ -41,11 +43,13 @@ public class AlleleBalance implements InfoFieldAnnotation, StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { - if ( !vc.isBiallelic() || !vc.isSNP() ) + if ( !vc.isBiallelic() ) return null; final Map genotypes = vc.getGenotypes(); - if ( genotypes == null || genotypes.size() == 0 ) + if ( genotypes == null || genotypes.size() == 0 ) { + System.out.println("No genotypes for vc at "+ref.getLocus().toString()); return null; + } double ratio = 0.0; double totalWeights = 0.0; @@ -58,23 +62,39 @@ public class AlleleBalance implements InfoFieldAnnotation, StandardAnnotation { if ( context == null ) continue; - final String bases = new String(context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup().getBases()).toUpperCase(); - if ( bases.length() == 0 ) - return null; + if ( vc.isSNP() ) { + final String bases = new String(context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup().getBases()).toUpperCase(); + if ( bases.length() == 0 ) + return null; + char refChr = vc.getReference().toString().charAt(0); + char altChr = vc.getAlternateAllele(0).toString().charAt(0); - char refChr = vc.getReference().toString().charAt(0); - char altChr = vc.getAlternateAllele(0).toString().charAt(0); + int refCount = MathUtils.countOccurrences(refChr, bases); + int altCount = MathUtils.countOccurrences(altChr, bases); - int refCount = MathUtils.countOccurrences(refChr, bases); - int altCount = MathUtils.countOccurrences(altChr, bases); + // sanity check + if ( refCount + altCount == 0 ) + continue; - // sanity check - if ( refCount + altCount == 0 ) - continue; + // weight the allele balance by genotype quality so that e.g. mis-called homs don't affect the ratio too much + ratio += genotype.getValue().getNegLog10PError() * ((double)refCount / (double)(refCount + altCount)); + totalWeights += genotype.getValue().getNegLog10PError(); + } else if ( vc.isIndel() ) { + final ReadBackedExtendedEventPileup indelPileup = context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup(); + if ( indelPileup == null ) { + continue; + } + // todo -- actually care about indel length from the pileup (agnostic at the moment) + int refCount = indelPileup.size(); + int altCount = vc.isInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions(); - // weight the allele balance by genotype quality so that e.g. mis-called homs don't affect the ratio too much - ratio += genotype.getValue().getNegLog10PError() * ((double)refCount / (double)(refCount + altCount)); - totalWeights += genotype.getValue().getNegLog10PError(); + if ( refCount + altCount == 0 ) { + continue; + } + + ratio += /* todo -- make not uniform */ 1 * ((double) refCount) / (double) (refCount + altCount); + totalWeights += 1; + } } // make sure we had a het genotype diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index f16a84900..70f674657 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -15,7 +15,7 @@ import java.util.HashMap; public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation { - private boolean ANNOTATE_INDELS = false; + private boolean ANNOTATE_INDELS = true; public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { @@ -84,7 +84,7 @@ public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation { } } - return computeHomopolymerRun(dBase,ref); + return computeHomopolymerRun(dBase,ref)-1; // remove the extra match from the base itself } else { // check that inserted bases are the same byte insBase = vc.getAlternateAllele(0).getBases()[0]; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 3f068c2a2..e160df268 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -73,6 +73,9 @@ public class VariantAnnotator extends LocusWalker { @Argument(fullName="list", shortName="ls", doc="List the available annotations and exit") protected Boolean LIST = false; + @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false) + protected boolean indelsOnly = false; + private VCFWriter vcfWriter; private HashMap nonVCFsampleName = new HashMap(); @@ -153,6 +156,13 @@ public class VariantAnnotator extends LocusWalker { */ public boolean includeReadsWithDeletionAtLoci() { return true; } + /** + * We want to see extended events if annotating indels + * + * @return true + */ + public boolean generateExtendedEvents() { return indelsOnly; } + /** * For each site of interest, annotate based on the requested annotation types * @@ -177,8 +187,13 @@ public class VariantAnnotator extends LocusWalker { // if the reference base is not ambiguous, we can annotate Collection annotatedVCs = Arrays.asList(vc); + Map stratifiedContexts; if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) { - Map stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup()); + if ( ! context.hasExtendedEventPileup() ) { + stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup()); + } else { + stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getExtendedEventPileup()); + } if ( stratifiedContexts != null ) { annotatedVCs = engine.annotateContext(tracker, ref, stratifiedContexts, vc); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java index 7538b8693..1c8958777 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java @@ -52,14 +52,14 @@ public class CoverageUtils { } } public static Map> - getBaseCountsByPartition(AlignmentContext context, int minMapQ, int minBaseQ, List types) { + getBaseCountsByPartition(AlignmentContext context, int minMapQ, int maxMapQ, byte minBaseQ, byte maxBaseQ, List types) { Map> countsByIDByType = new HashMap>(); for (CoverageAggregator.AggregationType t : types ) { countsByIDByType.put(t,new HashMap()); } for (PileupElement e : context.getBasePileup()) { - if ( e.getMappingQual() >= minMapQ && ( e.getQual() >= minBaseQ || e.isDeletion() ) ) { + if ( e.getMappingQual() >= minMapQ && e.getMappingQual() <= maxMapQ && ( ( e.getQual() >= minBaseQ && e.getQual() <= maxBaseQ ) || e.isDeletion() ) ) { for (CoverageAggregator.AggregationType t : types ) { String id = getTypeID(e.getRead(),t); if ( ! countsByIDByType.get(t).keySet().contains(id) ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index fa831fd86..a59b0cfe8 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -75,9 +75,13 @@ public class DepthOfCoverageWalker extends LocusWalker> thisMap, CoverageAggregator prevReduce) { @@ -370,7 +374,7 @@ public class DepthOfCoverageWalker extends LocusWalker> statsByTarget) { LocationAwareSeekableRODIterator refseqIterator = initializeRefSeq(); List> statsByGene = new ArrayList>();// maintains order - Map geneNamesToStats = new HashMap(); // alows indirect updating of objects in list + Map geneNamesToStats = new HashMap(); // allows indirect updating of objects in list for ( Pair targetStats : statsByTarget ) { String gene = getGeneName(targetStats.first,refseqIterator); @@ -710,7 +714,7 @@ public class DepthOfCoverageWalker extends LocusWalker> countsBySampleByType, Map> identifiersByType) { diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageB36IntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageB36IntegrationTest.java new file mode 100644 index 000000000..ef812de05 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageB36IntegrationTest.java @@ -0,0 +1,77 @@ +package org.broadinstitute.sting.gatk.walkers.coverage; + +import org.broadinstitute.sting.WalkerTest; +import org.junit.Test; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl + * + * @Author chartl + * @Date May 20, 2010 + */ +public class DepthOfCoverageB36IntegrationTest extends WalkerTest { + + private boolean RUN_TESTS = true; + private String root = "-T DepthOfCoverage "; + private String hg18 = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"; + private String b36 = "/broad/1KG/reference/human_b36_both.fasta"; + + private String buildRootCmd(String ref, List bams, List intervals) { + StringBuilder bamBuilder = new StringBuilder(); + do { + bamBuilder.append(" -I "); + bamBuilder.append(bams.remove(0)); + } while ( bams.size() > 0 ); + + StringBuilder intervalBuilder = new StringBuilder(); + do { + intervalBuilder.append(" -L "); + intervalBuilder.append(intervals.remove(0)); + } while ( intervals.size() > 0 ); + + + return root + "-R "+ref+bamBuilder.toString()+intervalBuilder.toString(); + } + + private void execute(String name, WalkerTest.WalkerTestSpec spec) { + if ( RUN_TESTS ) { + executeTest(name,spec); + } + } + + public File createTempFileFromBase(String name) { + File fl = new File(name); + fl.deleteOnExit(); + return fl; + } + + @Test + public void testMapQ0Only() { + // our base file + File baseOutputFile = this.createTempFile("depthofcoveragemapq0",".tmp"); + this.setOutputFileLocation(baseOutputFile); + + String[] intervals = {"1:10,000,000-10,002,000","1:10,003,000-10,004,000"}; + String[] bams = {"/humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam"}; + + String cmd = buildRootCmd(b36,new ArrayList(Arrays.asList(bams)), new ArrayList(Arrays.asList(intervals))) + " --maxMappingQuality 0"; + + WalkerTestSpec spec = new WalkerTestSpec(cmd,0,new ArrayList()); + + spec.addAuxFile("f39af6ad99520fd4fb27b409ab0344a0",baseOutputFile); + spec.addAuxFile("6b15f5330414b6d4e2f6caea42139fa1", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts")); + spec.addAuxFile("cc6640d82077991dde8a2b523935cdff", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions")); + spec.addAuxFile("0fb627234599c258a3fee1b2703e164a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics")); + spec.addAuxFile("6f6085293e8cca402ef4fe648cf06ea8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary")); + spec.addAuxFile("347b47ef73fbd4e277704ddbd7834f69", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics")); + spec.addAuxFile("ff0c79d161259402d54d572151582697", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary")); + + execute("testMapQ0Only",spec); + } + +} diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java similarity index 98% rename from java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java rename to java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java index d6c43e5a8..7dae9ee8d 100644 --- a/java/test/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.walkers; +package org.broadinstitute.sting.gatk.walkers.coverage; import org.broadinstitute.sting.WalkerTest; import org.junit.Test;