diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.java deleted file mode 100644 index de781b839..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.refdata; - -import java.io.File; - -/** - * An interface marking that a given Tribble codec can look at the file and determine whether the - * codec specifically parsing the contents of the file. - */ -public interface SelfScopingFeatureCodec { - /** - * This function returns true iff the File potentialInput can be parsed by this - * codec. - * - * The GATK assumes that there's never a situation where two SelfScopingFeaetureCodecs - * return true for the same file. If this occurs the GATK splits out an error. - * - * Note this function must never throw an error. All errors should be trapped - * and false returned. - * - * @param potentialInput the file to test for parsiability with this codec - * @return true if potentialInput can be parsed, false otherwise - */ - public boolean canDecode(final File potentialInput); -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java index c99aea254..c41444ef3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java @@ -30,16 +30,12 @@ import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.NameAwareCodec; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.help.GATKDocUtils; -import org.broadinstitute.sting.utils.help.HelpUtils; -import javax.mail.Header; import java.io.File; import java.util.*; @@ -159,10 +155,8 @@ public class FeatureManager { public FeatureDescriptor getByFiletype(File file) { List canParse = new ArrayList(); for ( FeatureDescriptor descriptor : featureDescriptors ) - if ( descriptor.getCodec() instanceof SelfScopingFeatureCodec ) { - if ( ((SelfScopingFeatureCodec) descriptor.getCodec()).canDecode(file) ) { - canParse.add(descriptor); - } + if ( descriptor.getCodec().canDecode(file) ) { + canParse.add(descriptor); } if ( canParse.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index e5f75f06d..3a21e97a4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -69,7 +69,7 @@ public class AlleleBalance extends InfoFieldAnnotation { if ( context == null ) continue; - if ( vc.isSNP() ) { + if ( vc.isSNP() && context.hasBasePileup() ) { final String bases = new String(context.getBasePileup().getBases()); if ( bases.length() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java index 820fd248a..06e91bf26 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java @@ -51,6 +51,9 @@ public class AlleleBalanceBySample extends GenotypeAnnotation implements Experim if ( altAlleles.size() == 0 ) return null; + if ( !stratifiedContext.hasBasePileup() ) + return null; + final String bases = new String(stratifiedContext.getBasePileup().getBases()); if ( bases.length() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java index 46aa6d0f3..761251259 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java @@ -59,6 +59,8 @@ public class BaseCounts extends InfoFieldAnnotation { int[] counts = new int[4]; for ( Map.Entry sample : stratifiedContexts.entrySet() ) { + if ( !sample.getValue().hasBasePileup() ) + continue; for (byte base : sample.getValue().getBasePileup().getBases() ) { int index = BaseUtils.simpleBaseToBaseIndex(base); if ( index != -1 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java index e0a2329f8..bd0d4e3fb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -33,18 +33,18 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( mendelianViolation == null ) { - if ( walker instanceof VariantAnnotator ) { + if ( walker instanceof VariantAnnotator && ((VariantAnnotator) walker).familyStr != null) { mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP ); } else { - throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator"); + throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid Family String file (-family) on the command line."); } } Map toRet = new HashMap(1); - boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && - vc.hasGenotype(mendelianViolation.getSampleDad()) && - vc.hasGenotype(mendelianViolation.getSampleMom()); + boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && vc.getGenotype(mendelianViolation.getSampleChild()).hasLikelihoods() && + vc.hasGenotype(mendelianViolation.getSampleDad()) && vc.getGenotype(mendelianViolation.getSampleDad()).hasLikelihoods() && + vc.hasGenotype(mendelianViolation.getSampleMom()) && vc.getGenotype(mendelianViolation.getSampleMom()).hasLikelihoods(); if ( hasAppropriateGenotypes ) toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 8f4bc0abd..087218cfb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -33,10 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.classloader.PluginManager; @@ -180,15 +177,16 @@ public class VariantAnnotator extends RodWalker implements Ann private void listAnnotationsAndExit() { + System.out.println("\nStandard annotations in the list below are marked with a '*'."); List> infoAnnotationClasses = new PluginManager(InfoFieldAnnotation.class).getPlugins(); System.out.println("\nAvailable annotations for the VCF INFO field:"); for (int i = 0; i < infoAnnotationClasses.size(); i++) - System.out.println("\t" + infoAnnotationClasses.get(i).getSimpleName()); + System.out.println("\t" + (StandardAnnotation.class.isAssignableFrom(infoAnnotationClasses.get(i)) ? "*" : "") + infoAnnotationClasses.get(i).getSimpleName()); System.out.println(); List> genotypeAnnotationClasses = new PluginManager(GenotypeAnnotation.class).getPlugins(); System.out.println("\nAvailable annotations for the VCF FORMAT field:"); for (int i = 0; i < genotypeAnnotationClasses.size(); i++) - System.out.println("\t" + genotypeAnnotationClasses.get(i).getSimpleName()); + System.out.println("\t" + (StandardAnnotation.class.isAssignableFrom(genotypeAnnotationClasses.get(i)) ? "*" : "") + genotypeAnnotationClasses.get(i).getSimpleName()); System.out.println(); System.out.println("\nAvailable classes/groups of annotations:"); for ( Class c : new PluginManager(AnnotationType.class).getInterfaces() ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index b28980d9c..f531cadd4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -528,10 +528,10 @@ public class PairHMMIndelErrorModel { } else { - byte[] readBases = Arrays.copyOfRange(unclippedReadBases,numStartClippedBases, + final byte[] readBases = Arrays.copyOfRange(unclippedReadBases,numStartClippedBases, unclippedReadBases.length-numEndClippedBases); - byte[] readQuals = Arrays.copyOfRange(unclippedReadQuals,numStartClippedBases, + final byte[] readQuals = Arrays.copyOfRange(unclippedReadQuals,numStartClippedBases, unclippedReadBases.length-numEndClippedBases); int j=0; @@ -540,6 +540,7 @@ public class PairHMMIndelErrorModel { double[][] matchMetricArray = null, XMetricArray = null, YMetricArray = null; byte[] previousHaplotypeSeen = null; double[] previousGOP = null; + double[] previousGCP = null; int startIdx; for (Allele a: haplotypeMap.keySet()) { @@ -555,7 +556,7 @@ public class PairHMMIndelErrorModel { long indStart = start - haplotype.getStartPosition(); long indStop = stop - haplotype.getStartPosition(); - byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(), + final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(), (int)indStart, (int)indStop); double readLikelihood; @@ -572,13 +573,14 @@ public class PairHMMIndelErrorModel { if (previousHaplotypeSeen == null) startIdx = 0; else { - int s1 = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen); - int s2 = computeFirstDifferingPosition(currentContextGOP, previousGOP); - startIdx = Math.min(s1,s2); + final int s1 = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen); + final int s2 = computeFirstDifferingPosition(currentContextGOP, previousGOP); + final int s3 = computeFirstDifferingPosition(currentContextGCP, previousGCP); + startIdx = Math.min(Math.min(s1, s2), s3); } previousHaplotypeSeen = haplotypeBases.clone(); previousGOP = currentContextGOP.clone(); - + previousGCP = currentContextGCP.clone(); readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, @@ -617,10 +619,10 @@ public class PairHMMIndelErrorModel { return 0; // sanity check for (int i=0; i < b1.length; i++ ){ - if ( b1[i]!= b2[i]) + if ( b1[i]!= b2[i] ) return i; } - return 0; // sanity check + return b1.length; } private int computeFirstDifferingPosition(double[] b1, double[] b2) { @@ -628,10 +630,10 @@ public class PairHMMIndelErrorModel { return 0; // sanity check for (int i=0; i < b1.length; i++ ){ - if ( b1[i]!= b2[i]) + if ( MathUtils.compareDoubles(b1[i], b2[i]) != 0 ) return i; } - return 0; // sanity check + return b1.length; } private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java index fd55d78a0..e64d00bf5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java @@ -365,7 +365,7 @@ public class GenotypeAndValidateWalker extends RodWalker 0 && context.getBasePileup().getBases().length < minDepth)) { + if (!context.hasReads() || !context.hasBasePileup() || (minDepth > 0 && context.getBasePileup().getBases().length < minDepth)) { counter.nUncovered = 1L; if (vcComp.getAttribute("GV").equals("T")) counter.nAltNotCalled = 1L; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 59e0bc530..f60a94a22 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -66,6 +66,10 @@ import java.util.*; * the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model. * *

+ * NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version). + * See http://www.r-project.org for more info on how to download and install R. + * + *

* See the GATK wiki for a tutorial and example recalibration accuracy plots. * http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 2e17d68d8..609593acc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -556,9 +556,9 @@ public class SelectVariants extends RodWalker { if (vc == null) return false; - // if we're not looking at specific samples then the absense of a compVC means discordance - if (NO_SAMPLES_SPECIFIED && (compVCs == null || compVCs.isEmpty())) - return true; + // if we're not looking at specific samples then the absence of a compVC means discordance + if (NO_SAMPLES_SPECIFIED) + return (compVCs == null || compVCs.isEmpty()); // check if we find it in the variant rod Map genotypes = vc.getGenotypes(samples); diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index a6df986ba..6e4ddddc4 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -58,15 +58,6 @@ public class ReadClipper { return hardClipByReferenceCoordinates(refStart, -1); } - private int numDeletions(GATKSAMRecord read) { - int result = 0; - for (CigarElement e: read.getCigar().getCigarElements()) { - if ( e.getOperator() == CigarOperator.DELETION || e.getOperator() == CigarOperator.D ) - result =+ e.getLength(); - } - return result; - } - protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); @@ -90,7 +81,7 @@ public class ReadClipper { @Requires("left <= right") public GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { - if (left == right) + if (read.isEmpty() || left == right) return new GATKSAMRecord(read.getHeader()); GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1); @@ -104,6 +95,9 @@ public class ReadClipper { } public GATKSAMRecord hardClipLowQualEnds(byte lowQual) { + if (read.isEmpty()) + return read; + byte [] quals = read.getBaseQualities(); int leftClipIndex = 0; int rightClipIndex = read.getReadLength() - 1; @@ -126,6 +120,9 @@ public class ReadClipper { } public GATKSAMRecord hardClipSoftClippedBases () { + if (read.isEmpty()) + return read; + int readIndex = 0; int cutLeft = -1; // first position to hard clip (inclusive) int cutRight = -1; // first position to hard clip (inclusive) @@ -182,6 +179,9 @@ public class ReadClipper { } public GATKSAMRecord hardClipLeadingInsertions() { + if (read.isEmpty()) + return read; + for(CigarElement cigarElement : read.getCigar().getCigarElements()) { if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.DELETION) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java index 413848543..90d305d73 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java @@ -249,4 +249,6 @@ public class BeagleCodec implements ReferenceDependentFeatureCodec getFeatureType() { return RefSeqFeature.class; } + + public boolean canDecode(final File potentialInput) { return false; } + } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java index f4633b2ce..d9f16c353 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java @@ -25,8 +25,8 @@ package org.broadinstitute.sting.utils.codecs.sampileup; +import org.broad.tribble.AbstractFeatureCodec; import org.broad.tribble.Feature; -import org.broad.tribble.FeatureCodec; import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.readers.LineReader; import org.broad.tribble.util.ParsingUtils; @@ -76,7 +76,7 @@ import static org.broadinstitute.sting.utils.codecs.sampileup.SAMPileupFeature.V * @author Matt Hanna * @since 2009 */ -public class SAMPileupCodec implements FeatureCodec { +public class SAMPileupCodec extends AbstractFeatureCodec { // the number of tokens we expect to parse from a pileup line private static final int expectedTokenCount = 10; private static final char fldDelim = '\t'; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java index d4bdb5aa9..0f2b94e63 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java @@ -27,8 +27,8 @@ package org.broadinstitute.sting.utils.codecs.samread; import net.sf.samtools.Cigar; import net.sf.samtools.TextCigarCodec; import net.sf.samtools.util.StringUtil; +import org.broad.tribble.AbstractFeatureCodec; import org.broad.tribble.Feature; -import org.broad.tribble.FeatureCodec; import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.readers.LineReader; import org.broad.tribble.util.ParsingUtils; @@ -52,7 +52,7 @@ import org.broad.tribble.util.ParsingUtils; * @author Matt Hanna * @since 2009 */ -public class SAMReadCodec implements FeatureCodec { +public class SAMReadCodec extends AbstractFeatureCodec { /* SL-XBC:1:10:628:923#0 16 Escherichia_coli_K12 1 37 76M = 1 0 AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGA B@>87<;A@?@957:>>@AA@B>@A9AB@B>@A@@@@@A;=AAB@BBBBBCBBBB@>A>:ABB@BAABCB=CA@CB */ // the number of tokens we expect to parse from a read line diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java index 1919ccbf0..bab7e73c0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java @@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -86,7 +87,13 @@ public class TableCodec implements ReferenceDependentFeatureCodec { public Object readHeader(LineReader reader) { String line = ""; try { + boolean isFirst = true; while ((line = reader.readLine()) != null) { + System.out.println(line); + if ( isFirst && ! line.startsWith(headerDelimiter) && ! line.startsWith(commentDelimiter)) { + throw new UserException.MalformedFile("TableCodec file does not have a header"); + } + isFirst &= line.startsWith(commentDelimiter); if (line.startsWith(headerDelimiter)) { if (header.size() > 0) throw new IllegalStateException("Input table file seems to have two header lines. The second is = " + line); String spl[] = line.split(delimiterRegex); @@ -101,4 +108,7 @@ public class TableCodec implements ReferenceDependentFeatureCodec { } return header; } + + public boolean canDecode(final File potentialInput) { return false; } + } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableFeature.java index a85849f0b..4b5c51bd4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableFeature.java @@ -1,7 +1,9 @@ package org.broadinstitute.sting.utils.codecs.table; + import org.broad.tribble.Feature; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; import java.util.List; @@ -44,6 +46,10 @@ public class TableFeature implements Feature { return values.get(columnPosition); } + public String toString() { + return String.format("%s\t%s",position.toString(), Utils.join("\t",values)); + } + public String get(String columnName) { int position = keys.indexOf(columnName); if (position < 0) throw new IllegalArgumentException("We don't have a column named " + columnName); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 0e0cb14bf..3377172dd 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -8,7 +8,6 @@ import org.broad.tribble.TribbleException; import org.broad.tribble.readers.LineReader; import org.broad.tribble.util.BlockCompressedInputStream; import org.broad.tribble.util.ParsingUtils; -import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; @@ -20,7 +19,7 @@ import java.util.*; import java.util.zip.GZIPInputStream; -public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, VCFParser, SelfScopingFeatureCodec { +public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, VCFParser { protected final static Logger log = Logger.getLogger(VCFCodec.class); protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index ede75817a..3fe1060dd 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -24,7 +24,10 @@ package org.broadinstitute.sting.utils.sam; -import net.sf.samtools.*; +import net.sf.samtools.BAMRecord; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.NGSPlatform; import java.util.HashMap; @@ -43,7 +46,9 @@ import java.util.Map; * */ public class GATKSAMRecord extends BAMRecord { - public static final String REDUCED_READ_QUALITY_TAG = "RR"; + public static final String REDUCED_READ_CONSENSUS_TAG = "RR"; + public static final String REDUCED_READ_FILTERED_TAG = "RF"; + // the SAMRecord data we're caching private String mReadString = null; private GATKSAMReadGroupRecord mReadGroup = null; @@ -83,8 +88,13 @@ public class GATKSAMRecord extends BAMRecord { read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getInferredInsertSize(), - new byte[]{}); - super.clearAttributes(); + null); + SAMReadGroupRecord samRG = read.getReadGroup(); + clearAttributes(); + if (samRG != null) { + GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG); + setReadGroup(rg); + } } public GATKSAMRecord(final SAMFileHeader header, @@ -131,6 +141,21 @@ public class GATKSAMRecord extends BAMRecord { return mReadGroup; } + @Override + public int hashCode() { + return super.hashCode(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + + if (!(o instanceof GATKSAMRecord)) return false; + + // note that we do not consider the GATKSAMRecord internal state at all + return super.equals(o); + } + /** * Efficient caching accessor that returns the GATK NGSPlatform of this read * @return @@ -142,17 +167,16 @@ public class GATKSAMRecord extends BAMRecord { public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) { mReadGroup = readGroup; retrievedReadGroup = true; + setAttribute("RG", mReadGroup.getId()); // todo -- this should be standardized, but we don't have access to SAMTagUtils! } - // - // - // Reduced read functions - // - // + /////////////////////////////////////////////////////////////////////////////// + // *** ReduceReads functions ***// + /////////////////////////////////////////////////////////////////////////////// public byte[] getReducedReadCounts() { if ( ! retrievedReduceReadCounts ) { - reducedReadCounts = getByteArrayAttribute(REDUCED_READ_QUALITY_TAG); + reducedReadCounts = getByteArrayAttribute(REDUCED_READ_CONSENSUS_TAG); retrievedReduceReadCounts = true; } @@ -167,6 +191,12 @@ public class GATKSAMRecord extends BAMRecord { return getReducedReadCounts()[i]; } + + /////////////////////////////////////////////////////////////////////////////// + // *** GATKSAMRecord specific methods ***// + /////////////////////////////////////////////////////////////////////////////// + + /** * Checks whether an attribute has been set for the given key. * @@ -220,18 +250,26 @@ public class GATKSAMRecord extends BAMRecord { return null; } - @Override - public int hashCode() { - return super.hashCode(); + /** + * Checks whether if the read has any bases. + * + * Empty reads can be dangerous as it may have no cigar strings, no read names and + * other missing attributes. + * + * @return true if the read has no bases + */ + public boolean isEmpty() { + return this.getReadLength() == 0; } - @Override - public boolean equals(Object o) { - if (this == o) return true; - - if (!(o instanceof GATKSAMRecord)) return false; - - // note that we do not consider the GATKSAMRecord internal state at all - return super.equals(o); + /** + * Clears all attributes except ReadGroup of the read. + */ + public void simplify () { + GATKSAMReadGroupRecord rg = getReadGroup(); + this.clearAttributes(); + setReadGroup(rg); } + + } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index e4ded491b..6e994be3a 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -64,6 +64,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testDiscordance--" + testFile, spec); } + @Test + public void testDiscordanceNoSampleSpecified() { + String testFile = validationDataLocation + "NA12878.hg19.example1.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T SelectVariants -R " + hg19Reference + " -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s -NO_HEADER", + 1, + Arrays.asList("5d7d899c0c4954ec59104aebfe4addd5") + ); + + executeTest("testDiscordanceNoSampleSpecified--" + testFile, spec); + } + @Test public void testConcordance() { String testFile = validationDataLocation + "NA12878.hg19.example1.vcf"; diff --git a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java index bc39d714e..46134cd24 100755 --- a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java @@ -29,7 +29,7 @@ public class ReadUtilsUnitTest extends BaseTest { reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length()); reducedRead.setReadBases(BASES.getBytes()); reducedRead.setBaseQualityString(QUALS); - reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_QUALITY_TAG, REDUCED_READ_COUNTS); + reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS); } private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) { diff --git a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java index 75bdc3142..3fb330853 100644 --- a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java @@ -162,6 +162,20 @@ public class IntervalIntegrationTest extends WalkerTest { executeTest("testMixedIntervalMerging", spec); } + @Test(enabled = true) + public void testBed() { + String md5 = "cf4278314ef8e4b996e1b798d8eb92cf"; + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T CountLoci" + + " -I " + validationDataLocation + "OV-0930.normal.chunk.bam" + + " -R " + hg18Reference + + " -o %s" + + " -L " + validationDataLocation + "intervalTest.bed", + 1, // just one output file + Arrays.asList(md5)); + executeTest("testBed", spec); + } + @Test(enabled = true) public void testComplexVCF() { String md5 = "166d77ac1b46a1ec38aa35ab7e628ab5"; diff --git a/settings/repository/org.broad/tribble-25.jar b/settings/repository/org.broad/tribble-41.jar similarity index 80% rename from settings/repository/org.broad/tribble-25.jar rename to settings/repository/org.broad/tribble-41.jar index e260764a5..76903f007 100644 Binary files a/settings/repository/org.broad/tribble-25.jar and b/settings/repository/org.broad/tribble-41.jar differ diff --git a/settings/repository/org.broad/tribble-25.xml b/settings/repository/org.broad/tribble-41.xml similarity index 51% rename from settings/repository/org.broad/tribble-25.xml rename to settings/repository/org.broad/tribble-41.xml index ed7a1fd69..6ee8bfb78 100644 --- a/settings/repository/org.broad/tribble-25.xml +++ b/settings/repository/org.broad/tribble-41.xml @@ -1,3 +1,3 @@ - +