Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Mark DePristo 2011-11-10 11:11:33 -05:00
commit 35fe9c8a06
26 changed files with 165 additions and 124 deletions

View File

@ -1,48 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata;
import java.io.File;
/**
* An interface marking that a given Tribble codec can look at the file and determine whether the
* codec specifically parsing the contents of the file.
*/
public interface SelfScopingFeatureCodec {
/**
* This function returns true iff the File potentialInput can be parsed by this
* codec.
*
* The GATK assumes that there's never a situation where two SelfScopingFeaetureCodecs
* return true for the same file. If this occurs the GATK splits out an error.
*
* Note this function must never throw an error. All errors should be trapped
* and false returned.
*
* @param potentialInput the file to test for parsiability with this codec
* @return true if potentialInput can be parsed, false otherwise
*/
public boolean canDecode(final File potentialInput);
}

View File

@ -30,16 +30,12 @@ import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureCodec;
import org.broad.tribble.NameAwareCodec; import org.broad.tribble.NameAwareCodec;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.GATKDocUtils; import org.broadinstitute.sting.utils.help.GATKDocUtils;
import org.broadinstitute.sting.utils.help.HelpUtils;
import javax.mail.Header;
import java.io.File; import java.io.File;
import java.util.*; import java.util.*;
@ -159,10 +155,8 @@ public class FeatureManager {
public FeatureDescriptor getByFiletype(File file) { public FeatureDescriptor getByFiletype(File file) {
List<FeatureDescriptor> canParse = new ArrayList<FeatureDescriptor>(); List<FeatureDescriptor> canParse = new ArrayList<FeatureDescriptor>();
for ( FeatureDescriptor descriptor : featureDescriptors ) for ( FeatureDescriptor descriptor : featureDescriptors )
if ( descriptor.getCodec() instanceof SelfScopingFeatureCodec ) { if ( descriptor.getCodec().canDecode(file) ) {
if ( ((SelfScopingFeatureCodec) descriptor.getCodec()).canDecode(file) ) { canParse.add(descriptor);
canParse.add(descriptor);
}
} }
if ( canParse.size() == 0 ) if ( canParse.size() == 0 )

View File

@ -69,7 +69,7 @@ public class AlleleBalance extends InfoFieldAnnotation {
if ( context == null ) if ( context == null )
continue; continue;
if ( vc.isSNP() ) { if ( vc.isSNP() && context.hasBasePileup() ) {
final String bases = new String(context.getBasePileup().getBases()); final String bases = new String(context.getBasePileup().getBases());
if ( bases.length() == 0 ) if ( bases.length() == 0 )
return null; return null;

View File

@ -51,6 +51,9 @@ public class AlleleBalanceBySample extends GenotypeAnnotation implements Experim
if ( altAlleles.size() == 0 ) if ( altAlleles.size() == 0 )
return null; return null;
if ( !stratifiedContext.hasBasePileup() )
return null;
final String bases = new String(stratifiedContext.getBasePileup().getBases()); final String bases = new String(stratifiedContext.getBasePileup().getBases());
if ( bases.length() == 0 ) if ( bases.length() == 0 )
return null; return null;

View File

@ -59,6 +59,8 @@ public class BaseCounts extends InfoFieldAnnotation {
int[] counts = new int[4]; int[] counts = new int[4];
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) { for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
if ( !sample.getValue().hasBasePileup() )
continue;
for (byte base : sample.getValue().getBasePileup().getBases() ) { for (byte base : sample.getValue().getBasePileup().getBases() ) {
int index = BaseUtils.simpleBaseToBaseIndex(base); int index = BaseUtils.simpleBaseToBaseIndex(base);
if ( index != -1 ) if ( index != -1 )

View File

@ -33,18 +33,18 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( mendelianViolation == null ) { if ( mendelianViolation == null ) {
if ( walker instanceof VariantAnnotator ) { if ( walker instanceof VariantAnnotator && ((VariantAnnotator) walker).familyStr != null) {
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP ); mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP );
} }
else { else {
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator"); throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid Family String file (-family) on the command line.");
} }
} }
Map<String,Object> toRet = new HashMap<String,Object>(1); Map<String,Object> toRet = new HashMap<String,Object>(1);
boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && vc.getGenotype(mendelianViolation.getSampleChild()).hasLikelihoods() &&
vc.hasGenotype(mendelianViolation.getSampleDad()) && vc.hasGenotype(mendelianViolation.getSampleDad()) && vc.getGenotype(mendelianViolation.getSampleDad()).hasLikelihoods() &&
vc.hasGenotype(mendelianViolation.getSampleMom()); vc.hasGenotype(mendelianViolation.getSampleMom()) && vc.getGenotype(mendelianViolation.getSampleMom()).hasLikelihoods();
if ( hasAppropriateGenotypes ) if ( hasAppropriateGenotypes )
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc)); toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc));

View File

@ -33,10 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.PluginManager;
@ -180,15 +177,16 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
private void listAnnotationsAndExit() { private void listAnnotationsAndExit() {
System.out.println("\nStandard annotations in the list below are marked with a '*'.");
List<Class<? extends InfoFieldAnnotation>> infoAnnotationClasses = new PluginManager<InfoFieldAnnotation>(InfoFieldAnnotation.class).getPlugins(); List<Class<? extends InfoFieldAnnotation>> infoAnnotationClasses = new PluginManager<InfoFieldAnnotation>(InfoFieldAnnotation.class).getPlugins();
System.out.println("\nAvailable annotations for the VCF INFO field:"); System.out.println("\nAvailable annotations for the VCF INFO field:");
for (int i = 0; i < infoAnnotationClasses.size(); i++) for (int i = 0; i < infoAnnotationClasses.size(); i++)
System.out.println("\t" + infoAnnotationClasses.get(i).getSimpleName()); System.out.println("\t" + (StandardAnnotation.class.isAssignableFrom(infoAnnotationClasses.get(i)) ? "*" : "") + infoAnnotationClasses.get(i).getSimpleName());
System.out.println(); System.out.println();
List<Class<? extends GenotypeAnnotation>> genotypeAnnotationClasses = new PluginManager<GenotypeAnnotation>(GenotypeAnnotation.class).getPlugins(); List<Class<? extends GenotypeAnnotation>> genotypeAnnotationClasses = new PluginManager<GenotypeAnnotation>(GenotypeAnnotation.class).getPlugins();
System.out.println("\nAvailable annotations for the VCF FORMAT field:"); System.out.println("\nAvailable annotations for the VCF FORMAT field:");
for (int i = 0; i < genotypeAnnotationClasses.size(); i++) for (int i = 0; i < genotypeAnnotationClasses.size(); i++)
System.out.println("\t" + genotypeAnnotationClasses.get(i).getSimpleName()); System.out.println("\t" + (StandardAnnotation.class.isAssignableFrom(genotypeAnnotationClasses.get(i)) ? "*" : "") + genotypeAnnotationClasses.get(i).getSimpleName());
System.out.println(); System.out.println();
System.out.println("\nAvailable classes/groups of annotations:"); System.out.println("\nAvailable classes/groups of annotations:");
for ( Class c : new PluginManager<AnnotationType>(AnnotationType.class).getInterfaces() ) for ( Class c : new PluginManager<AnnotationType>(AnnotationType.class).getInterfaces() )

View File

@ -528,10 +528,10 @@ public class PairHMMIndelErrorModel {
} }
else { else {
byte[] readBases = Arrays.copyOfRange(unclippedReadBases,numStartClippedBases, final byte[] readBases = Arrays.copyOfRange(unclippedReadBases,numStartClippedBases,
unclippedReadBases.length-numEndClippedBases); unclippedReadBases.length-numEndClippedBases);
byte[] readQuals = Arrays.copyOfRange(unclippedReadQuals,numStartClippedBases, final byte[] readQuals = Arrays.copyOfRange(unclippedReadQuals,numStartClippedBases,
unclippedReadBases.length-numEndClippedBases); unclippedReadBases.length-numEndClippedBases);
int j=0; int j=0;
@ -540,6 +540,7 @@ public class PairHMMIndelErrorModel {
double[][] matchMetricArray = null, XMetricArray = null, YMetricArray = null; double[][] matchMetricArray = null, XMetricArray = null, YMetricArray = null;
byte[] previousHaplotypeSeen = null; byte[] previousHaplotypeSeen = null;
double[] previousGOP = null; double[] previousGOP = null;
double[] previousGCP = null;
int startIdx; int startIdx;
for (Allele a: haplotypeMap.keySet()) { for (Allele a: haplotypeMap.keySet()) {
@ -555,7 +556,7 @@ public class PairHMMIndelErrorModel {
long indStart = start - haplotype.getStartPosition(); long indStart = start - haplotype.getStartPosition();
long indStop = stop - haplotype.getStartPosition(); long indStop = stop - haplotype.getStartPosition();
byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(), final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
(int)indStart, (int)indStop); (int)indStart, (int)indStop);
double readLikelihood; double readLikelihood;
@ -572,13 +573,14 @@ public class PairHMMIndelErrorModel {
if (previousHaplotypeSeen == null) if (previousHaplotypeSeen == null)
startIdx = 0; startIdx = 0;
else { else {
int s1 = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen); final int s1 = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen);
int s2 = computeFirstDifferingPosition(currentContextGOP, previousGOP); final int s2 = computeFirstDifferingPosition(currentContextGOP, previousGOP);
startIdx = Math.min(s1,s2); final int s3 = computeFirstDifferingPosition(currentContextGCP, previousGCP);
startIdx = Math.min(Math.min(s1, s2), s3);
} }
previousHaplotypeSeen = haplotypeBases.clone(); previousHaplotypeSeen = haplotypeBases.clone();
previousGOP = currentContextGOP.clone(); previousGOP = currentContextGOP.clone();
previousGCP = currentContextGCP.clone();
readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals,
@ -617,10 +619,10 @@ public class PairHMMIndelErrorModel {
return 0; // sanity check return 0; // sanity check
for (int i=0; i < b1.length; i++ ){ for (int i=0; i < b1.length; i++ ){
if ( b1[i]!= b2[i]) if ( b1[i]!= b2[i] )
return i; return i;
} }
return 0; // sanity check return b1.length;
} }
private int computeFirstDifferingPosition(double[] b1, double[] b2) { private int computeFirstDifferingPosition(double[] b1, double[] b2) {
@ -628,10 +630,10 @@ public class PairHMMIndelErrorModel {
return 0; // sanity check return 0; // sanity check
for (int i=0; i < b1.length; i++ ){ for (int i=0; i < b1.length; i++ ){
if ( b1[i]!= b2[i]) if ( MathUtils.compareDoubles(b1[i], b2[i]) != 0 )
return i; return i;
} }
return 0; // sanity check return b1.length;
} }
private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) { private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) {

View File

@ -365,7 +365,7 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
return counter; return counter;
// Do not operate on variants that are not covered to the optional minimum depth // Do not operate on variants that are not covered to the optional minimum depth
if (!context.hasReads() || (minDepth > 0 && context.getBasePileup().getBases().length < minDepth)) { if (!context.hasReads() || !context.hasBasePileup() || (minDepth > 0 && context.getBasePileup().getBases().length < minDepth)) {
counter.nUncovered = 1L; counter.nUncovered = 1L;
if (vcComp.getAttribute("GV").equals("T")) if (vcComp.getAttribute("GV").equals("T"))
counter.nAltNotCalled = 1L; counter.nAltNotCalled = 1L;

View File

@ -66,6 +66,10 @@ import java.util.*;
* the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model. * the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model.
* *
* <p> * <p>
* NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version).
* See <a target="r-project" href="http://www.r-project.org">http://www.r-project.org</a> for more info on how to download and install R.
*
* <p>
* See the GATK wiki for a tutorial and example recalibration accuracy plots. * See the GATK wiki for a tutorial and example recalibration accuracy plots.
* http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration * http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration
* *

View File

@ -556,9 +556,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
if (vc == null) if (vc == null)
return false; return false;
// if we're not looking at specific samples then the absense of a compVC means discordance // if we're not looking at specific samples then the absence of a compVC means discordance
if (NO_SAMPLES_SPECIFIED && (compVCs == null || compVCs.isEmpty())) if (NO_SAMPLES_SPECIFIED)
return true; return (compVCs == null || compVCs.isEmpty());
// check if we find it in the variant rod // check if we find it in the variant rod
Map<String, Genotype> genotypes = vc.getGenotypes(samples); Map<String, Genotype> genotypes = vc.getGenotypes(samples);

View File

@ -58,15 +58,6 @@ public class ReadClipper {
return hardClipByReferenceCoordinates(refStart, -1); return hardClipByReferenceCoordinates(refStart, -1);
} }
private int numDeletions(GATKSAMRecord read) {
int result = 0;
for (CigarElement e: read.getCigar().getCigarElements()) {
if ( e.getOperator() == CigarOperator.DELETION || e.getOperator() == CigarOperator.D )
result =+ e.getLength();
}
return result;
}
protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
@ -90,7 +81,7 @@ public class ReadClipper {
@Requires("left <= right") @Requires("left <= right")
public GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { public GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
if (left == right) if (read.isEmpty() || left == right)
return new GATKSAMRecord(read.getHeader()); return new GATKSAMRecord(read.getHeader());
GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1); GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1);
@ -104,6 +95,9 @@ public class ReadClipper {
} }
public GATKSAMRecord hardClipLowQualEnds(byte lowQual) { public GATKSAMRecord hardClipLowQualEnds(byte lowQual) {
if (read.isEmpty())
return read;
byte [] quals = read.getBaseQualities(); byte [] quals = read.getBaseQualities();
int leftClipIndex = 0; int leftClipIndex = 0;
int rightClipIndex = read.getReadLength() - 1; int rightClipIndex = read.getReadLength() - 1;
@ -126,6 +120,9 @@ public class ReadClipper {
} }
public GATKSAMRecord hardClipSoftClippedBases () { public GATKSAMRecord hardClipSoftClippedBases () {
if (read.isEmpty())
return read;
int readIndex = 0; int readIndex = 0;
int cutLeft = -1; // first position to hard clip (inclusive) int cutLeft = -1; // first position to hard clip (inclusive)
int cutRight = -1; // first position to hard clip (inclusive) int cutRight = -1; // first position to hard clip (inclusive)
@ -182,6 +179,9 @@ public class ReadClipper {
} }
public GATKSAMRecord hardClipLeadingInsertions() { public GATKSAMRecord hardClipLeadingInsertions() {
if (read.isEmpty())
return read;
for(CigarElement cigarElement : read.getCigar().getCigarElements()) { for(CigarElement cigarElement : read.getCigar().getCigarElements()) {
if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP && if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP &&
cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.DELETION) cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.DELETION)

View File

@ -249,4 +249,6 @@ public class BeagleCodec implements ReferenceDependentFeatureCodec<BeagleFeature
return bglFeature; return bglFeature;
} }
public boolean canDecode(final File potentialInput) { return false; }
} }

View File

@ -24,8 +24,8 @@
package org.broadinstitute.sting.utils.codecs.hapmap; package org.broadinstitute.sting.utils.codecs.hapmap;
import org.broad.tribble.AbstractFeatureCodec;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.annotation.Strand; import org.broad.tribble.annotation.Strand;
import org.broad.tribble.readers.LineReader; import org.broad.tribble.readers.LineReader;
@ -71,7 +71,7 @@ import java.util.Arrays;
* @author Mark DePristo * @author Mark DePristo
* @since 2010 * @since 2010
*/ */
public class RawHapMapCodec implements FeatureCodec { public class RawHapMapCodec extends AbstractFeatureCodec {
// the minimum number of features in the HapMap file line // the minimum number of features in the HapMap file line
private static final int minimumFeatureCount = 11; private static final int minimumFeatureCount = 11;

View File

@ -9,6 +9,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
/** /**
@ -138,4 +139,7 @@ public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature
public Class<RefSeqFeature> getFeatureType() { public Class<RefSeqFeature> getFeatureType() {
return RefSeqFeature.class; return RefSeqFeature.class;
} }
public boolean canDecode(final File potentialInput) { return false; }
} }

View File

@ -25,8 +25,8 @@
package org.broadinstitute.sting.utils.codecs.sampileup; package org.broadinstitute.sting.utils.codecs.sampileup;
import org.broad.tribble.AbstractFeatureCodec;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.LineReader; import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.ParsingUtils; import org.broad.tribble.util.ParsingUtils;
@ -76,7 +76,7 @@ import static org.broadinstitute.sting.utils.codecs.sampileup.SAMPileupFeature.V
* @author Matt Hanna * @author Matt Hanna
* @since 2009 * @since 2009
*/ */
public class SAMPileupCodec implements FeatureCodec<SAMPileupFeature> { public class SAMPileupCodec extends AbstractFeatureCodec<SAMPileupFeature> {
// the number of tokens we expect to parse from a pileup line // the number of tokens we expect to parse from a pileup line
private static final int expectedTokenCount = 10; private static final int expectedTokenCount = 10;
private static final char fldDelim = '\t'; private static final char fldDelim = '\t';

View File

@ -27,8 +27,8 @@ package org.broadinstitute.sting.utils.codecs.samread;
import net.sf.samtools.Cigar; import net.sf.samtools.Cigar;
import net.sf.samtools.TextCigarCodec; import net.sf.samtools.TextCigarCodec;
import net.sf.samtools.util.StringUtil; import net.sf.samtools.util.StringUtil;
import org.broad.tribble.AbstractFeatureCodec;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.LineReader; import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.ParsingUtils; import org.broad.tribble.util.ParsingUtils;
@ -52,7 +52,7 @@ import org.broad.tribble.util.ParsingUtils;
* @author Matt Hanna * @author Matt Hanna
* @since 2009 * @since 2009
*/ */
public class SAMReadCodec implements FeatureCodec<SAMReadFeature> { public class SAMReadCodec extends AbstractFeatureCodec<SAMReadFeature> {
/* SL-XBC:1:10:628:923#0 16 Escherichia_coli_K12 1 37 76M = 1 0 AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGA B@>87<;A@?@957:>>@AA@B>@A9AB@B>@A@@@@@A;=AAB@BBBBBCBBBB@>A>:ABB@BAABCB=CA@CB */ /* SL-XBC:1:10:628:923#0 16 Escherichia_coli_K12 1 37 76M = 1 0 AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGA B@>87<;A@?@957:>>@AA@B>@A9AB@B>@A@@@@@A;=AAB@BBBBBCBBBB@>A>:ABB@BAABCB=CA@CB */
// the number of tokens we expect to parse from a read line // the number of tokens we expect to parse from a read line

View File

@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -86,7 +87,13 @@ public class TableCodec implements ReferenceDependentFeatureCodec {
public Object readHeader(LineReader reader) { public Object readHeader(LineReader reader) {
String line = ""; String line = "";
try { try {
boolean isFirst = true;
while ((line = reader.readLine()) != null) { while ((line = reader.readLine()) != null) {
System.out.println(line);
if ( isFirst && ! line.startsWith(headerDelimiter) && ! line.startsWith(commentDelimiter)) {
throw new UserException.MalformedFile("TableCodec file does not have a header");
}
isFirst &= line.startsWith(commentDelimiter);
if (line.startsWith(headerDelimiter)) { if (line.startsWith(headerDelimiter)) {
if (header.size() > 0) throw new IllegalStateException("Input table file seems to have two header lines. The second is = " + line); if (header.size() > 0) throw new IllegalStateException("Input table file seems to have two header lines. The second is = " + line);
String spl[] = line.split(delimiterRegex); String spl[] = line.split(delimiterRegex);
@ -101,4 +108,7 @@ public class TableCodec implements ReferenceDependentFeatureCodec {
} }
return header; return header;
} }
public boolean canDecode(final File potentialInput) { return false; }
} }

View File

@ -1,7 +1,9 @@
package org.broadinstitute.sting.utils.codecs.table; package org.broadinstitute.sting.utils.codecs.table;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import java.util.List; import java.util.List;
@ -44,6 +46,10 @@ public class TableFeature implements Feature {
return values.get(columnPosition); return values.get(columnPosition);
} }
public String toString() {
return String.format("%s\t%s",position.toString(), Utils.join("\t",values));
}
public String get(String columnName) { public String get(String columnName) {
int position = keys.indexOf(columnName); int position = keys.indexOf(columnName);
if (position < 0) throw new IllegalArgumentException("We don't have a column named " + columnName); if (position < 0) throw new IllegalArgumentException("We don't have a column named " + columnName);

View File

@ -8,7 +8,6 @@ import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.LineReader; import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.BlockCompressedInputStream; import org.broad.tribble.util.BlockCompressedInputStream;
import org.broad.tribble.util.ParsingUtils; import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -20,7 +19,7 @@ import java.util.*;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, VCFParser, SelfScopingFeatureCodec { public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, VCFParser {
protected final static Logger log = Logger.getLogger(VCFCodec.class); protected final static Logger log = Logger.getLogger(VCFCodec.class);
protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column

View File

@ -24,7 +24,10 @@
package org.broadinstitute.sting.utils.sam; package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.*; import net.sf.samtools.BAMRecord;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.NGSPlatform;
import java.util.HashMap; import java.util.HashMap;
@ -43,7 +46,9 @@ import java.util.Map;
* *
*/ */
public class GATKSAMRecord extends BAMRecord { public class GATKSAMRecord extends BAMRecord {
public static final String REDUCED_READ_QUALITY_TAG = "RR"; public static final String REDUCED_READ_CONSENSUS_TAG = "RR";
public static final String REDUCED_READ_FILTERED_TAG = "RF";
// the SAMRecord data we're caching // the SAMRecord data we're caching
private String mReadString = null; private String mReadString = null;
private GATKSAMReadGroupRecord mReadGroup = null; private GATKSAMReadGroupRecord mReadGroup = null;
@ -83,8 +88,13 @@ public class GATKSAMRecord extends BAMRecord {
read.getMateReferenceIndex(), read.getMateReferenceIndex(),
read.getMateAlignmentStart(), read.getMateAlignmentStart(),
read.getInferredInsertSize(), read.getInferredInsertSize(),
new byte[]{}); null);
super.clearAttributes(); SAMReadGroupRecord samRG = read.getReadGroup();
clearAttributes();
if (samRG != null) {
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG);
setReadGroup(rg);
}
} }
public GATKSAMRecord(final SAMFileHeader header, public GATKSAMRecord(final SAMFileHeader header,
@ -131,6 +141,21 @@ public class GATKSAMRecord extends BAMRecord {
return mReadGroup; return mReadGroup;
} }
@Override
public int hashCode() {
return super.hashCode();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof GATKSAMRecord)) return false;
// note that we do not consider the GATKSAMRecord internal state at all
return super.equals(o);
}
/** /**
* Efficient caching accessor that returns the GATK NGSPlatform of this read * Efficient caching accessor that returns the GATK NGSPlatform of this read
* @return * @return
@ -142,17 +167,16 @@ public class GATKSAMRecord extends BAMRecord {
public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) { public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) {
mReadGroup = readGroup; mReadGroup = readGroup;
retrievedReadGroup = true; retrievedReadGroup = true;
setAttribute("RG", mReadGroup.getId()); // todo -- this should be standardized, but we don't have access to SAMTagUtils!
} }
// ///////////////////////////////////////////////////////////////////////////////
// // *** ReduceReads functions ***//
// Reduced read functions ///////////////////////////////////////////////////////////////////////////////
//
//
public byte[] getReducedReadCounts() { public byte[] getReducedReadCounts() {
if ( ! retrievedReduceReadCounts ) { if ( ! retrievedReduceReadCounts ) {
reducedReadCounts = getByteArrayAttribute(REDUCED_READ_QUALITY_TAG); reducedReadCounts = getByteArrayAttribute(REDUCED_READ_CONSENSUS_TAG);
retrievedReduceReadCounts = true; retrievedReduceReadCounts = true;
} }
@ -167,6 +191,12 @@ public class GATKSAMRecord extends BAMRecord {
return getReducedReadCounts()[i]; return getReducedReadCounts()[i];
} }
///////////////////////////////////////////////////////////////////////////////
// *** GATKSAMRecord specific methods ***//
///////////////////////////////////////////////////////////////////////////////
/** /**
* Checks whether an attribute has been set for the given key. * Checks whether an attribute has been set for the given key.
* *
@ -220,18 +250,26 @@ public class GATKSAMRecord extends BAMRecord {
return null; return null;
} }
@Override /**
public int hashCode() { * Checks whether if the read has any bases.
return super.hashCode(); *
* Empty reads can be dangerous as it may have no cigar strings, no read names and
* other missing attributes.
*
* @return true if the read has no bases
*/
public boolean isEmpty() {
return this.getReadLength() == 0;
} }
@Override /**
public boolean equals(Object o) { * Clears all attributes except ReadGroup of the read.
if (this == o) return true; */
public void simplify () {
if (!(o instanceof GATKSAMRecord)) return false; GATKSAMReadGroupRecord rg = getReadGroup();
this.clearAttributes();
// note that we do not consider the GATKSAMRecord internal state at all setReadGroup(rg);
return super.equals(o);
} }
} }

View File

@ -64,6 +64,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testDiscordance--" + testFile, spec); executeTest("testDiscordance--" + testFile, spec);
} }
@Test
public void testDiscordanceNoSampleSpecified() {
String testFile = validationDataLocation + "NA12878.hg19.example1.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + hg19Reference + " -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s -NO_HEADER",
1,
Arrays.asList("5d7d899c0c4954ec59104aebfe4addd5")
);
executeTest("testDiscordanceNoSampleSpecified--" + testFile, spec);
}
@Test @Test
public void testConcordance() { public void testConcordance() {
String testFile = validationDataLocation + "NA12878.hg19.example1.vcf"; String testFile = validationDataLocation + "NA12878.hg19.example1.vcf";

View File

@ -29,7 +29,7 @@ public class ReadUtilsUnitTest extends BaseTest {
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length()); reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
reducedRead.setReadBases(BASES.getBytes()); reducedRead.setReadBases(BASES.getBytes());
reducedRead.setBaseQualityString(QUALS); reducedRead.setBaseQualityString(QUALS);
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_QUALITY_TAG, REDUCED_READ_COUNTS); reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS);
} }
private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) { private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) {

View File

@ -162,6 +162,20 @@ public class IntervalIntegrationTest extends WalkerTest {
executeTest("testMixedIntervalMerging", spec); executeTest("testMixedIntervalMerging", spec);
} }
@Test(enabled = true)
public void testBed() {
String md5 = "cf4278314ef8e4b996e1b798d8eb92cf";
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T CountLoci" +
" -I " + validationDataLocation + "OV-0930.normal.chunk.bam" +
" -R " + hg18Reference +
" -o %s" +
" -L " + validationDataLocation + "intervalTest.bed",
1, // just one output file
Arrays.asList(md5));
executeTest("testBed", spec);
}
@Test(enabled = true) @Test(enabled = true)
public void testComplexVCF() { public void testComplexVCF() {
String md5 = "166d77ac1b46a1ec38aa35ab7e628ab5"; String md5 = "166d77ac1b46a1ec38aa35ab7e628ab5";

View File

@ -1,3 +1,3 @@
<ivy-module version="1.0"> <ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="25" status="integration" /> <info organisation="org.broad" module="tribble" revision="41" status="integration" />
</ivy-module> </ivy-module>