diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index cbbb3d43f..7d1858a63 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.coverage; import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.commandline.Advanced; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -119,21 +120,6 @@ public class DepthOfCoverageWalker extends LocusWalker out; - /** - * Sets the low-coverage cutoff for granular binning. All loci with depth < START are counted in the first bin. - */ - @Argument(fullName = "start", doc = "Starting (left endpoint) for granular binning", required = false) - int start = 1; - /** - * Sets the high-coverage cutoff for granular binning. All loci with depth > END are counted in the last bin. - */ - @Argument(fullName = "stop", doc = "Ending (right endpoint) for granular binning", required = false) - int stop = 500; - /** - * Sets the number of bins for granular binning - */ - @Argument(fullName = "nBins", doc = "Number of bins to use for granular binning", required = false) - int nBins = 499; @Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth. Defaults to -1.", required = false) int minMappingQuality = -1; @Argument(fullName = "maxMappingQuality", doc = "Maximum mapping quality of reads to count towards depth. Defaults to 2^31-1 (Integer.MAX_VALUE).", required = false) @@ -142,16 +128,19 @@ public class DepthOfCoverageWalker extends LocusWalker END are counted in the last bin. + */ + @Advanced + @Argument(fullName = "stop", doc = "Ending (right endpoint) for granular binning", required = false) + int stop = 500; + /** + * Sets the number of bins for granular binning + */ + @Advanced + @Argument(fullName = "nBins", doc = "Number of bins to use for granular binning", required = false) + int nBins = 499; + /** * Do not tabulate the sample summary statistics (total, mean, median, quartile coverage per sample) */ @@ -174,27 +207,22 @@ public class DepthOfCoverageWalker extends LocusWalker partitionTypes = EnumSet.of(DoCOutputType.Partition.sample); + /** * Consider a spanning deletion as contributing to coverage. Also enables deletion counts in per-base output. */ + @Advanced @Argument(fullName = "includeDeletions", shortName = "dels", doc = "Include information on deletions", required = false) boolean includeDeletions = false; + + @Advanced @Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false) boolean ignoreDeletionSites = false; - /** - * Path to the RefSeq file for use in aggregating coverage statistics over genes - */ - @Argument(fullName = "calculateCoverageOverGenes", shortName = "geneList", doc = "Calculate the coverage statistics over this list of genes. Currently accepts RefSeq.", required = false) - File refSeqGeneList = null; - /** - * The format of the output file - */ - @Argument(fullName = "outputFormat", doc = "the format of the output file (e.g. csv, table, rtable); defaults to r-readable table", required = false) - String outputFormat = "rtable"; /** * A coverage threshold for summarizing (e.g. % bases >= CT for each sample) */ + @Advanced @Argument(fullName = "summaryCoverageThreshold", shortName = "ct", doc = "for summary file outputs, report the % of bases coverd to >= this number. Defaults to 15; can take multiple arguments.", required = false) int[] coverageThresholds = {15}; @@ -334,24 +362,29 @@ public class DepthOfCoverageWalker extends LocusWalker> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (includeRefNBases || BaseUtils.isRegularBase(ref.getBase())) { + if ( ! omitDepthOutput ) { + getCorrectStream(null, DoCOutputType.Aggregation.locus, DoCOutputType.FileType.summary).printf("%s",ref.getLocus()); // yes: print locus in map, and the rest of the info in reduce (for eventual cumulatives) + //System.out.printf("\t[log]\t%s",ref.getLocus()); + } - if ( ! omitDepthOutput ) { - getCorrectStream(null, DoCOutputType.Aggregation.locus, DoCOutputType.FileType.summary).printf("%s",ref.getLocus()); // yes: print locus in map, and the rest of the info in reduce (for eventual cumulatives) - //System.out.printf("\t[log]\t%s",ref.getLocus()); + return CoverageUtils.getBaseCountsByPartition(context,minMappingQuality,maxMappingQuality,minBaseQuality,maxBaseQuality,partitionTypes); + } else { + return null; } - - return CoverageUtils.getBaseCountsByPartition(context,minMappingQuality,maxMappingQuality,minBaseQuality,maxBaseQuality,partitionTypes); } public CoveragePartitioner reduce(Map> thisMap, CoveragePartitioner prevReduce) { - if ( ! omitDepthOutput ) { - //checkOrder(prevReduce); // tests prevReduce.getIdentifiersByType().get(t) against the initialized header order - printDepths(getCorrectStream(null, DoCOutputType.Aggregation.locus, DoCOutputType.FileType.summary),thisMap,prevReduce.getIdentifiersByType()); - // this is an additional iteration through thisMap, plus dealing with IO, so should be much slower without - // turning on omit - } + if ( thisMap != null ) { // skip sites we didn't want to include in the calculation (ref Ns) + if ( ! omitDepthOutput ) { + //checkOrder(prevReduce); // tests prevReduce.getIdentifiersByType().get(t) against the initialized header order + printDepths(getCorrectStream(null, DoCOutputType.Aggregation.locus, DoCOutputType.FileType.summary),thisMap,prevReduce.getIdentifiersByType()); + // this is an additional iteration through thisMap, plus dealing with IO, so should be much slower without + // turning on omit + } - prevReduce.update(thisMap); // note that in "useBoth" cases, this method alters the thisMap object + prevReduce.update(thisMap); // note that in "useBoth" cases, this method alters the thisMap object + } return prevReduce; } diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index d48deab1b..def2fc349 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -28,9 +28,7 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.Arrays; @@ -133,7 +131,7 @@ public class Haplotype { } } else if( refAllele.length() < altAllele.length() ) { // insertion final int altAlleleLength = altAllele.length(); - for( int iii = newHaplotype.length -1; iii > haplotypeInsertLocation + altAlleleLength; iii-- ) { + for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) { newHaplotype[iii] = newHaplotype[iii-altAlleleLength]; } for( int iii = 0; iii < altAlleleLength; iii++ ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 1bdee802b..3c2ed18e4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -544,12 +544,15 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { } /** - * return true if this is a symbolic allele (e.g. ) otherwise false + * return true if this is a symbolic allele (e.g. ) or + * structural variation breakend (with [ or ]), otherwise false * @param allele the allele to check * @return true if the allele is a symbolic allele, otherwise false */ private static boolean isSymbolicAllele(String allele) { - return (allele != null && allele.startsWith("<") && allele.endsWith(">") && allele.length() > 2); + return (allele != null && allele.length() > 2 && + ((allele.startsWith("<") && allele.endsWith(">")) || + (allele.contains("[") || allele.contains("]")))); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java index 02512c8dc..682c76617 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java @@ -36,8 +36,10 @@ public class AlignmentStartWithNoTiesComparator implements Comparator result = cmpContig; else { - if (r1.getAlignmentStart() < r2.getAlignmentStart()) result = -1; - else result = 1; + if (r1.getAlignmentStart() < r2.getAlignmentStart()) + result = -1; + else + result = 1; } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java index c3f437f11..52b4109fe 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java @@ -212,7 +212,13 @@ public class Allele implements Comparable { * @return true if the bases represent a symbolic allele */ public static boolean wouldBeSymbolicAllele(byte[] bases) { - return bases.length > 2 && bases[0] == '<' && bases[bases.length-1] == '>'; + if ( bases.length <= 2 ) + return false; + else { + final String strBases = new String(bases); + return (bases[0] == '<' && bases[bases.length-1] == '>') || + (strBases.contains("[") || strBases.contains("]")); + } } /** diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java index 1c58346b4..6f1370008 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java @@ -94,4 +94,14 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest { execute("testNoCoverageDueToFiltering",spec); } + + public void testRefNHandling(boolean includeNs, final String md5) { + String command = "-R " + b37KGReference + " -L 20:26,319,565-26,319,575 -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -T DepthOfCoverage -baseCounts --omitIntervalStatistics --omitLocusTable --omitPerSampleStats -o %s"; + if ( includeNs ) command += " --includeRefNSites"; + WalkerTestSpec spec = new WalkerTestSpec(command, 1, Arrays.asList(md5)); + executeTest("Testing DoC " + (includeNs ? "with" : "without") + " reference Ns", spec); + } + + @Test public void testRefNWithNs() { testRefNHandling(true, "24cd2da2e4323ce6fd76217ba6dc2834"); } + @Test public void testRefNWithoutNs() { testRefNHandling(false, "4fc0f1a2e968f777d693abcefd4fb7af"); } } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index c8a0c0ed6..ca5fcf419 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -9,7 +9,7 @@ import java.util.List; public class VCFIntegrationTest extends WalkerTest { - @Test + @Test(enabled = true) public void testReadingAndWritingWitHNoChanges() { String md5ofInputVCF = "a990ba187a69ca44cb9bc2bb44d00447"; @@ -25,4 +25,18 @@ public class VCFIntegrationTest extends WalkerTest { WalkerTestSpec spec2 = new WalkerTestSpec(test2, 1, Arrays.asList(md5ofInputVCF)); executeTest("Test Variants To VCF from new output", spec2); } + + @Test + // See https://getsatisfaction.com/gsa/topics/support_vcf_4_1_structural_variation_breakend_alleles?utm_content=topic_link&utm_medium=email&utm_source=new_topic + public void testReadingAndWritingBreakpointAlleles() { + String testVCF = testDir + "breakpoint-example.vcf"; + //String testVCF = validationDataLocation + "multiallelic.vcf"; + + String baseCommand = "-R " + b37KGReference + " -NO_HEADER -o %s "; + + String test1 = baseCommand + "-T SelectVariants -V " + testVCF; + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("76075307afd26b4db6234795d9fb3c2f")); + executeTest("Test reading and writing breakpoint VCF", spec1); + } + } diff --git a/public/testdata/breakpoint-example.vcf b/public/testdata/breakpoint-example.vcf new file mode 100644 index 000000000..f015e1721 --- /dev/null +++ b/public/testdata/breakpoint-example.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.1 +#CHROM POS ID REF ALT QUAL FILTER INFO +22 50 bnd_W G G]22:6000] 6 PASS SVTYPE=BND;MATEID=bnd_Y +22 51 bnd_V T ]22:55]T 6 PASS SVTYPE=BND;MATEID=bnd_U +22 55 bnd_U C C[22:51[ 6 PASS SVTYPE=BND;MATEID=bnd_V +22 6000 bnd_Y A A]22:50] 6 PASS SVTYPE=BND;MATEID=bnd_W \ No newline at end of file