diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java new file mode 100755 index 000000000..3fbfa0bbf --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2009 The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, sub ject to the following + * conditions: + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED ÓAS ISÓ, WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.coverage; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.By; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.commandline.Argument; + + +/** + * Emits a data file containing information about callable, uncallable, poorly mapped, and other parts of the genome + * + * @Author depristo + * @Date May 7, 2010 + */ +@By(DataSource.REFERENCE) +public class CallableLociWalker extends LocusWalker { + @Argument(fullName = "maxLowMAPQ", shortName = "mlmq", doc = "Maximum value for MAPQ to be considered a problematic mapped read. The gap between this value and mmq are reads that are not sufficiently well mapped for calling but aren't indicative of mapping problems.", required = false) + byte maxLowMAPQ = 1; + + @Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth. Defaults to 50.", required = false) + byte minMappingQuality = 10; + + @Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum quality of bases to count towards depth. Defaults to 20.", required = false) + byte minBaseQuality = 20; + + @Argument(fullName = "minDepth", shortName = "minDepth", doc = "Minimum QC+ read depth before a locus is considered callable", required = false) + int minDepth = 4; + + @Argument(fullName = "maxDepth", shortName = "maxDepth", doc = "Maximum read depth before a locus is considered poorly mapped", required = false) + int maxDepth = -1; + + @Argument(fullName = "minDepthForLowMAPQ", shortName = "mdflmq", doc = "Minimum read depth before a locus is considered a potential candidate for poorly mapped", required = false) + int minDepthLowMAPQ = 10; + + @Argument(fullName = "maxFractionOfReadsWithLowMAPQ", shortName = "frlmq", doc = "Maximum read depth before a locus is considered poorly mapped", required = false) + double maxLowMAPQFraction = 0.1; + + @Argument(fullName = "format", shortName = "format", doc = "Output format for the system: either BED or STATE_PER_BASE", required = false) + OutputFormat outputFormat; + + public enum OutputFormat { BED, STATE_PER_BASE } + + //////////////////////////////////////////////////////////////////////////////////// + // STANDARD WALKER METHODS + //////////////////////////////////////////////////////////////////////////////////// + + public boolean includeReadsWithDeletionAtLoci() { return true; } + + public void initialize() { + } + +// public boolean isReduceByInterval() { +// return false; +// } + + public static class CallableBaseState { + public GenomeLoc loc; + public CalledState state; + + public CallableBaseState(GenomeLoc loc, CalledState state) { + this.loc = loc; + this.state = state; + } + + public GenomeLoc getLocation() { + return loc; + } + + public CalledState getState() { + return state; + } + + // update routines + public boolean changingState( CalledState newState ) { + return state != newState; + } + + /** + * Updating the location of this CalledBaseState by the new stop location + * @param newStop + */ + public void update(GenomeLoc newStop) { + loc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), newStop.getStop()); + } + + public String toString() { + return String.format("%s %d %d %s", loc.getContig(), loc.getStart(), loc.getStop(), state); + //return String.format("%s %d %d %d %s", loc.getContig(), loc.getStart(), loc.getStop(), loc.getStop() - loc.getStart() + 1, state); + } + } + + public enum CalledState { REF_N, CALLABLE, NO_COVERAGE, LOW_COVERAGE, EXCESSIVE_COVERAGE, POOR_MAPPING_QUALITY } + + public CallableBaseState reduceInit() { + return null; + } + + public CallableBaseState map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + CalledState state; + + if ( BaseUtils.isNBase(ref.getBase()) ) { + state = CalledState.REF_N; + } else { + // count up the depths of all and QC+ bases + int rawDepth = 0, QCDepth = 0, lowMAPQDepth = 0; + for (PileupElement e : context.getBasePileup()) { + rawDepth++; + + if ( e.getMappingQual() <= maxLowMAPQ ) + lowMAPQDepth++; + + if ( e.getMappingQual() >= minMappingQuality && ( e.getQual() >= minBaseQuality || e.isDeletion() ) ) { + QCDepth++; + } + } + + //System.out.printf("%s rawdepth = %d QCDepth = %d lowMAPQ = %d%n", context.getLocation(), rawDepth, QCDepth, lowMAPQDepth); + if ( rawDepth == 0 ) { + state = CalledState.NO_COVERAGE; + } else if ( rawDepth >= minDepthLowMAPQ && MathUtils.ratio( lowMAPQDepth, rawDepth ) >= maxLowMAPQFraction ) { + state = CalledState.POOR_MAPPING_QUALITY; + } else if ( QCDepth < minDepth ) { + state = CalledState.LOW_COVERAGE; + } else if ( rawDepth >= maxDepth && maxDepth != -1 ) { + state = CalledState.EXCESSIVE_COVERAGE; + } else { + state = CalledState.CALLABLE; + } + } + + return new CallableBaseState(context.getLocation(), state); + } + + public CallableBaseState reduce(CallableBaseState state, CallableBaseState integrator) { + if ( outputFormat == OutputFormat.STATE_PER_BASE ) { + out.println(state.toString()); + } else { + // format is integrating + if ( integrator == null ) { + integrator = state; + } else if ( state.getLocation().getStart() != integrator.getLocation().getStop() + 1 || + integrator.changingState(state.getState()) ) { + out.println(integrator.toString()); + integrator = state; + } else { + integrator.update(state.getLocation()); + } + } + + return integrator; + } + + + //////////////////////////////////////////////////////////////////////////////////// + // INTERVAL ON TRAVERSAL DONE + //////////////////////////////////////////////////////////////////////////////////// + + public void onTraversalDone(CallableBaseState result) { + // print out the last state + if ( result!= null ) + out.println(result.toString()); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java new file mode 100755 index 000000000..d588ab0eb --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2009 The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, sub ject to the following + * conditions: + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED ÓAS ISÓ, WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.coverage; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.commandline.Argument; +import org.broad.tribble.bed.FullBEDFeature; + +import java.util.*; + +/** + * Test routine for new VariantContext object + */ +public class CompareCallableLociWalker extends RodWalker, long[][]> { + @Argument(shortName="comp1", doc="First comparison track name", required=false) + protected String COMP1 = "comp1"; + + @Argument(shortName="comp2", doc="First comparison track name", required=false) + protected String COMP2 = "comp2"; + + @Argument(shortName="printState", doc="If provided, prints sites satisfying this state pair", required=false) + protected String printState = null; + + CallableLociWalker.CalledState printState1 = CallableLociWalker.CalledState.REF_N; + CallableLociWalker.CalledState printState2 = CallableLociWalker.CalledState.REF_N; + + // -------------------------------------------------------------------------------------------------------------- + // + // initialize + // + // -------------------------------------------------------------------------------------------------------------- + + public void initialize() { + if ( printState != null ) { + String[] states = printState.split(","); + printState1 = CallableLociWalker.CalledState.valueOf(states[0]); + printState2 = CallableLociWalker.CalledState.valueOf(states[1]); + } + } + + + // -------------------------------------------------------------------------------------------------------------- + // + // map + // + // -------------------------------------------------------------------------------------------------------------- + public List map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker != null ) { + CallableLociWalker.CallableBaseState comp1 = getCallableBaseState(tracker, COMP1); + CallableLociWalker.CallableBaseState comp2 = getCallableBaseState(tracker, COMP2); + + if ( printState != null && comp1.getState() == printState1 && comp2.getState() == printState2 ) { + out.printf("%s %s %s %s%n", comp1.getLocation(), comp1.getState(), comp2.getLocation(), comp2.getState()); + } + + return Arrays.asList(comp1, comp2); + } else { + return null; + } + } + + private CallableLociWalker.CallableBaseState getCallableBaseState(RefMetaDataTracker tracker, String track) { + //System.out.printf("tracker %s%n", tracker); + List bindings = tracker.getReferenceMetaData(track); + if ( bindings.size() != 1 || ! (bindings.get(0) instanceof FullBEDFeature)) { + throw new StingException(String.format("%s track isn't a properly formated CallableBases object!", track)); + } + + FullBEDFeature bed = (FullBEDFeature)bindings.get(0); + GenomeLoc loc = GenomeLocParser.createGenomeLoc(bed.getChr(), bed.getStart(), bed.getEnd()); + CallableLociWalker.CalledState state = CallableLociWalker.CalledState.valueOf(bed.getName()); + return new CallableLociWalker.CallableBaseState(loc, state); + } + + // -------------------------------------------------------------------------------------------------------------- + // + // reduce + // + // -------------------------------------------------------------------------------------------------------------- + public long[][] reduceInit() { + int n = CallableLociWalker.CalledState.values().length; + return new long[n][n]; + } + + public long[][] reduce(List comps, long[][] sum) { + if ( comps != null ) { + CallableLociWalker.CallableBaseState comp1 = comps.get(0); + CallableLociWalker.CallableBaseState comp2 = comps.get(1); + + sum[comp1.getState().ordinal()][comp2.getState().ordinal()]++; + } + + return sum; + } + + public void onTraversalDone(long[][] result) { + for ( CallableLociWalker.CalledState state1 : CallableLociWalker.CalledState.values() ) { + for ( CallableLociWalker.CalledState state2 : CallableLociWalker.CalledState.values() ) { + out.printf("%s %s %s %s %d%n", COMP1, COMP2, state1, state2, result[state1.ordinal()][state2.ordinal()]); + } + } + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java index b3c360222..e35705eee 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java @@ -19,21 +19,40 @@ public class CoverageUtils { public enum PartitionType { BY_READ_GROUP, BY_SAMPLE } + /** + * Returns the counts of bases from reads with MAPQ > minMapQ and base quality > minBaseQ in the context + * as an array of ints, indexed by the index fields of BaseUtils + * + * @param context + * @param minMapQ + * @param minBaseQ + * @return + */ + public static int[] getBaseCounts(AlignmentContext context, int minMapQ, int minBaseQ) { + int[] counts = new int[6]; + + for (PileupElement e : context.getBasePileup()) { + if ( e.getMappingQual() >= minMapQ && ( e.getQual() >= minBaseQ || e.isDeletion() ) ) { + updateCounts(counts,e); + } + } + + return counts; + } + + public static Map getBaseCountsBySample(AlignmentContext context, int minMapQ, int minBaseQ, PartitionType type) { Map samplesToCounts = new HashMap(); - for (PileupElement e : context.getBasePileup()) { - if ( e.getMappingQual() >= minMapQ && ( e.getQual() >= minBaseQ || e.isDeletion() ) ) { - String sample = type == PartitionType.BY_SAMPLE ? e.getRead().getReadGroup().getSample() : - e.getRead().getReadGroup().getSample()+"_rg_"+e.getRead().getReadGroup().getReadGroupId(); - if ( samplesToCounts.keySet().contains(sample) ) { - updateCounts(samplesToCounts.get(sample),e); - } else { - samplesToCounts.put(sample,new int[6]); - updateCounts(samplesToCounts.get(sample),e); - } - } + for (PileupElement e : context.getBasePileup()) { + if ( e.getMappingQual() >= minMapQ && ( e.getQual() >= minBaseQ || e.isDeletion() ) ) { + String sample = type == PartitionType.BY_SAMPLE ? e.getRead().getReadGroup().getSample() : + e.getRead().getReadGroup().getSample()+"_rg_"+e.getRead().getReadGroup().getReadGroupId(); + if ( ! samplesToCounts.keySet().contains(sample) ) + samplesToCounts.put(sample,new int[6]); + updateCounts(samplesToCounts.get(sample),e); } + } return samplesToCounts; } diff --git a/java/test/org/broadinstitute/sting/BaseTest.java b/java/test/org/broadinstitute/sting/BaseTest.java index 5aeadfd10..bf6ceaed3 100755 --- a/java/test/org/broadinstitute/sting/BaseTest.java +++ b/java/test/org/broadinstitute/sting/BaseTest.java @@ -42,6 +42,8 @@ public abstract class BaseTest { protected static String seqLocation = "/seq/"; protected static String oneKGLocation = "/broad/1KG/"; + protected static String hg18Reference = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"; + protected static String b36KGReference = "/broad/1KG/reference/human_b36_both.fasta"; protected static String GATKDataLocation = "/humgen/gsa-hpprojects/GATK/data/"; protected static String validationDataLocation = GATKDataLocation + "Validation_Data/"; protected static String evaluationDataLocation = GATKDataLocation + "Evaluation_Data/"; diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java new file mode 100755 index 000000000..8ef92427f --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2009 The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, sub ject to the following + * conditions: + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED ÓAS ISÓ, WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.coverage; + +import org.broadinstitute.sting.WalkerTest; +import org.junit.Test; + +import java.util.Arrays; + +public class CallableLociWalkerIntegrationTest extends WalkerTest { + final static String commonArgs = "-R " + b36KGReference + " -T CallableLoci -I " + validationDataLocation + "/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s"; + + @Test + public void testCallableLociWalker1() { + String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000"; + WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList("b3a273984da6744d6de4ca0ab3eb759b")); + executeTest("formatBed", spec); + } + + @Test + public void testCallableLociWalker2() { + String gatk_args = commonArgs + " -format BED -L 1:10,000,000-10,000,100;1:10,000,110-10,000,120"; + WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList("c671f65712d9575b8b3e1f1dbedc146e")); + executeTest("formatBed by interval", spec); + } + + @Test + public void testCallableLociWalker3() { + String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -minDepth 10 -maxDepth 100 --minBaseQuality 10 --minMappingQuality 20"; + WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList("3fee7d7d0e305f439db29b4e641d1c20")); + executeTest("formatBed lots of arguments", spec); + } +} \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalkerIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalkerIntegrationTest.java new file mode 100755 index 000000000..1b73f1c00 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalkerIntegrationTest.java @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2009 The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * Þles (the ÓSoftwareÓ), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, sub ject to the following + * conditions: + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED ÓAS ISÓ, WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.coverage; + +import org.broadinstitute.sting.WalkerTest; +import org.junit.Test; + +import java.util.Arrays; + +public class CompareCallableLociWalkerIntegrationTest extends WalkerTest { + final static String commonArgs = "-R " + hg18Reference + " -T CompareCallableLoci -B comp1,Bed," + validationDataLocation + "1kg_slx.chr1_10mb.callable -B comp2,Bed," + validationDataLocation + "ga2_slx.chr1_10mb.callable -o %s"; + + @Test + public void testCompareCallableLociWalker1() { + String gatk_args = commonArgs + " -L chr1:1-10,000,000"; + WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList("70efebac55ff210bb022e9e22ed80a95")); + executeTest("CompareCallableLoci Walker", spec); + } +} \ No newline at end of file