Merging Roger's Unit tests for Reduce Reads from RR repository

This commit is contained in:
Mauricio Carneiro 2011-11-16 17:26:49 -05:00
commit 72f00e2883
13 changed files with 1079 additions and 471 deletions

View File

@ -480,7 +480,7 @@ public class GenomeAnalysisEngine {
}
} else if (walker instanceof ReadPairWalker) {
if(readsDataSource != null && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.queryname)
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.queryname, "Read pair walkers can only walk over query name-sorted data. Please resort your input BAM file.");
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.queryname, "Read pair walkers are exceptions in that they cannot be run on coordinate-sorted BAMs but instead require query name-sorted files. You will need to resort your input BAM file in query name order to use this walker.");
if(intervals != null && !intervals.isEmpty())
throw new UserException.CommandLineException("Pairs traversal cannot be used in conjunction with intervals.");

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.samples;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
@ -110,6 +111,17 @@ public class Sample implements Comparable<Sample> { // implements java.io.Serial
return infoDB.getSample(paternalID);
}
public ArrayList<Sample> getParents(){
ArrayList<Sample> parents = new ArrayList<Sample>(2);
Sample parent = getMother();
if(parent != null)
parents.add(parent);
parent = getFather();
if(parent != null)
parents.add(parent);
return parents;
}
/**
* Get gender of the sample
* @return property of key "gender" - must be of type Gender

View File

@ -56,7 +56,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
// We refuse to parse SnpEff output files generated by unsupported versions, or
// lacking a SnpEff version number in the VCF header:
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.4" };
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd";
@ -77,13 +77,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
public enum InfoFieldKey {
EFFECT_KEY ("SNPEFF_EFFECT", -1),
IMPACT_KEY ("SNPEFF_IMPACT", 0),
CODON_CHANGE_KEY ("SNPEFF_CODON_CHANGE", 1),
AMINO_ACID_CHANGE_KEY ("SNPEFF_AMINO_ACID_CHANGE", 2),
GENE_NAME_KEY ("SNPEFF_GENE_NAME", 3),
GENE_BIOTYPE_KEY ("SNPEFF_GENE_BIOTYPE", 4),
TRANSCRIPT_ID_KEY ("SNPEFF_TRANSCRIPT_ID", 6),
EXON_ID_KEY ("SNPEFF_EXON_ID", 7),
FUNCTIONAL_CLASS_KEY ("SNPEFF_FUNCTIONAL_CLASS", -1);
FUNCTIONAL_CLASS_KEY ("SNPEFF_FUNCTIONAL_CLASS", 1),
CODON_CHANGE_KEY ("SNPEFF_CODON_CHANGE", 2),
AMINO_ACID_CHANGE_KEY ("SNPEFF_AMINO_ACID_CHANGE", 3),
GENE_NAME_KEY ("SNPEFF_GENE_NAME", 4),
GENE_BIOTYPE_KEY ("SNPEFF_GENE_BIOTYPE", 5),
TRANSCRIPT_ID_KEY ("SNPEFF_TRANSCRIPT_ID", 7),
EXON_ID_KEY ("SNPEFF_EXON_ID", 8);
// Actual text of the key
private final String keyName;
@ -110,70 +110,53 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
// are validated against this list.
public enum EffectType {
// High-impact effects:
FRAME_SHIFT (EffectFunctionalClass.NONE, false),
STOP_GAINED (EffectFunctionalClass.NONSENSE, false),
START_LOST (EffectFunctionalClass.NONE, false),
SPLICE_SITE_ACCEPTOR (EffectFunctionalClass.NONE, false),
SPLICE_SITE_DONOR (EffectFunctionalClass.NONE, false),
EXON_DELETED (EffectFunctionalClass.NONE, false),
STOP_LOST (EffectFunctionalClass.NONE, false),
SPLICE_SITE_ACCEPTOR,
SPLICE_SITE_DONOR,
START_LOST,
EXON_DELETED,
FRAME_SHIFT,
STOP_GAINED,
STOP_LOST,
// Moderate-impact effects:
NON_SYNONYMOUS_CODING (EffectFunctionalClass.MISSENSE, false),
CODON_CHANGE (EffectFunctionalClass.NONE, false),
CODON_INSERTION (EffectFunctionalClass.NONE, false),
CODON_CHANGE_PLUS_CODON_INSERTION (EffectFunctionalClass.NONE, false),
CODON_DELETION (EffectFunctionalClass.NONE, false),
CODON_CHANGE_PLUS_CODON_DELETION (EffectFunctionalClass.NONE, false),
UTR_5_DELETED (EffectFunctionalClass.NONE, false),
UTR_3_DELETED (EffectFunctionalClass.NONE, false),
NON_SYNONYMOUS_CODING,
CODON_CHANGE,
CODON_INSERTION,
CODON_CHANGE_PLUS_CODON_INSERTION,
CODON_DELETION,
CODON_CHANGE_PLUS_CODON_DELETION,
UTR_5_DELETED,
UTR_3_DELETED,
// Low-impact effects:
SYNONYMOUS_CODING (EffectFunctionalClass.SILENT, false),
SYNONYMOUS_START (EffectFunctionalClass.SILENT, false),
NON_SYNONYMOUS_START (EffectFunctionalClass.SILENT, false),
SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false),
NON_SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false),
START_GAINED (EffectFunctionalClass.NONE, false),
SYNONYMOUS_START,
NON_SYNONYMOUS_START,
START_GAINED,
SYNONYMOUS_CODING,
SYNONYMOUS_STOP,
NON_SYNONYMOUS_STOP,
// Modifiers:
NONE (EffectFunctionalClass.NONE, true),
CHROMOSOME (EffectFunctionalClass.NONE, true),
INTERGENIC (EffectFunctionalClass.NONE, true),
UPSTREAM (EffectFunctionalClass.NONE, true),
UTR_5_PRIME (EffectFunctionalClass.NONE, true),
CDS (EffectFunctionalClass.NONE, true),
GENE (EffectFunctionalClass.NONE, true),
TRANSCRIPT (EffectFunctionalClass.NONE, true),
EXON (EffectFunctionalClass.NONE, true),
INTRON (EffectFunctionalClass.NONE, true),
UTR_3_PRIME (EffectFunctionalClass.NONE, true),
DOWNSTREAM (EffectFunctionalClass.NONE, true),
INTRON_CONSERVED (EffectFunctionalClass.NONE, true),
INTERGENIC_CONSERVED (EffectFunctionalClass.NONE, true),
REGULATION (EffectFunctionalClass.NONE, true),
CUSTOM (EffectFunctionalClass.NONE, true),
WITHIN_NON_CODING_GENE (EffectFunctionalClass.NONE, true);
private final EffectFunctionalClass functionalClass;
private final boolean isModifier;
EffectType ( EffectFunctionalClass functionalClass, boolean isModifier ) {
this.functionalClass = functionalClass;
this.isModifier = isModifier;
}
public EffectFunctionalClass getFunctionalClass() {
return functionalClass;
}
public boolean isModifier() {
return isModifier;
}
NONE,
CHROMOSOME,
CUSTOM,
CDS,
GENE,
TRANSCRIPT,
EXON,
INTRON_CONSERVED,
UTR_5_PRIME,
UTR_3_PRIME,
DOWNSTREAM,
INTRAGENIC,
INTERGENIC,
INTERGENIC_CONSERVED,
UPSTREAM,
REGULATION,
INTRON
}
// SnpEff labels each effect as either LOW, MODERATE, or HIGH impact. We take the additional step of
// classifying some of the LOW impact effects as MODIFIERs.
// SnpEff labels each effect as either LOW, MODERATE, or HIGH impact, or as a MODIFIER.
public enum EffectImpact {
MODIFIER (0),
LOW (1),
@ -202,7 +185,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
UNKNOWN
}
// We assign a functional class to each SnpEff effect.
// SnpEff assigns a functional class to each effect.
public enum EffectFunctionalClass {
NONE (0),
SILENT (1),
@ -379,13 +362,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
public List<String> getKeyNames() {
return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(),
InfoFieldKey.IMPACT_KEY.getKeyName(),
InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(),
InfoFieldKey.CODON_CHANGE_KEY.getKeyName(),
InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(),
InfoFieldKey.GENE_NAME_KEY.getKeyName(),
InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),
InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),
InfoFieldKey.EXON_ID_KEY.getKeyName(),
InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()
InfoFieldKey.EXON_ID_KEY.getKeyName()
);
}
@ -393,13 +376,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
return Arrays.asList(
new VCFInfoHeaderLine(InfoFieldKey.EFFECT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
new VCFInfoHeaderLine(InfoFieldKey.IMPACT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(EffectImpact.values())),
new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values())),
new VCFInfoHeaderLine(InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New codon for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant (in HGVS style)"),
new VCFInfoHeaderLine(InfoFieldKey.GENE_NAME_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene biotype for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values()))
new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant")
);
}
@ -409,6 +392,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
protected static class SnpEffEffect {
private EffectType effect;
private EffectImpact impact;
private EffectFunctionalClass functionalClass;
private String codonChange;
private String aminoAcidChange;
private String geneName;
@ -420,16 +404,21 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
private String parseError = null;
private boolean isWellFormed = true;
private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 8;
private static final int NUMBER_OF_METADATA_FIELDS_UPON_WARNING = 9;
private static final int NUMBER_OF_METADATA_FIELDS_UPON_ERROR = 10;
private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 9;
private static final int NUMBER_OF_METADATA_FIELDS_UPON_EITHER_WARNING_OR_ERROR = 10;
private static final int NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR = 11;
// Note that contrary to the description for the EFF field layout that SnpEff adds to the VCF header,
// errors come after warnings, not vice versa:
private static final int SNPEFF_WARNING_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_WARNING - 1;
private static final int SNPEFF_ERROR_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_ERROR - 1;
// If there is either a warning OR an error, it will be in the last field. If there is both
// a warning AND an error, the warning will be in the second-to-last field, and the error will
// be in the last field.
private static final int SNPEFF_WARNING_OR_ERROR_FIELD_UPON_SINGLE_ERROR = NUMBER_OF_METADATA_FIELDS_UPON_EITHER_WARNING_OR_ERROR - 1;
private static final int SNPEFF_WARNING_FIELD_UPON_BOTH_WARNING_AND_ERROR = NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR - 2;
private static final int SNPEFF_ERROR_FIELD_UPON_BOTH_WARNING_AND_ERROR = NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR - 1;
private static final int SNPEFF_CODING_FIELD_INDEX = 5;
// Position of the field indicating whether the effect is coding or non-coding. This field is used
// in selecting the most significant effect, but is not included in the annotations we return
// since it can be deduced from the SNPEFF_GENE_BIOTYPE field.
private static final int SNPEFF_CODING_FIELD_INDEX = 6;
public SnpEffEffect ( String effectName, String[] effectMetadata ) {
parseEffectName(effectName);
@ -447,11 +436,14 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
private void parseEffectMetadata ( String[] effectMetadata ) {
if ( effectMetadata.length != EXPECTED_NUMBER_OF_METADATA_FIELDS ) {
if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_WARNING ) {
parseError(String.format("SnpEff issued the following warning: %s", effectMetadata[SNPEFF_WARNING_FIELD_INDEX]));
if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_EITHER_WARNING_OR_ERROR ) {
parseError(String.format("SnpEff issued the following warning or error: \"%s\"",
effectMetadata[SNPEFF_WARNING_OR_ERROR_FIELD_UPON_SINGLE_ERROR]));
}
else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_ERROR ) {
parseError(String.format("SnpEff issued the following error: %s", effectMetadata[SNPEFF_ERROR_FIELD_INDEX]));
else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR ) {
parseError(String.format("SnpEff issued the following warning: \"%s\", and the following error: \"%s\"",
effectMetadata[SNPEFF_WARNING_FIELD_UPON_BOTH_WARNING_AND_ERROR],
effectMetadata[SNPEFF_ERROR_FIELD_UPON_BOTH_WARNING_AND_ERROR]));
}
else {
parseError(String.format("Wrong number of effect metadata fields. Expected %d but found %d",
@ -461,23 +453,33 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
return;
}
if ( effect != null && effect.isModifier() ) {
impact = EffectImpact.MODIFIER;
// The impact field will never be empty, and should always contain one of the enumerated values:
try {
impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
}
else {
catch ( IllegalArgumentException e ) {
parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
}
// The functional class field will be empty when the effect has no functional class associated with it:
if ( effectMetadata[InfoFieldKey.FUNCTIONAL_CLASS_KEY.getFieldIndex()].trim().length() > 0 ) {
try {
impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
functionalClass = EffectFunctionalClass.valueOf(effectMetadata[InfoFieldKey.FUNCTIONAL_CLASS_KEY.getFieldIndex()]);
}
catch ( IllegalArgumentException e ) {
parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
parseError(String.format("Unrecognized value for effect functional class: %s", effectMetadata[InfoFieldKey.FUNCTIONAL_CLASS_KEY.getFieldIndex()]));
}
}
else {
functionalClass = EffectFunctionalClass.NONE;
}
codonChange = effectMetadata[InfoFieldKey.CODON_CHANGE_KEY.getFieldIndex()];
aminoAcidChange = effectMetadata[InfoFieldKey.AMINO_ACID_CHANGE_KEY.getFieldIndex()];
geneName = effectMetadata[InfoFieldKey.GENE_NAME_KEY.getFieldIndex()];
geneBiotype = effectMetadata[InfoFieldKey.GENE_BIOTYPE_KEY.getFieldIndex()];
// The coding field will be empty when SnpEff has no coding info for the effect:
if ( effectMetadata[SNPEFF_CODING_FIELD_INDEX].trim().length() > 0 ) {
try {
coding = EffectCoding.valueOf(effectMetadata[SNPEFF_CODING_FIELD_INDEX]);
@ -534,7 +536,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
return true;
}
else if ( impact.isSameImpactAs(other.impact) ) {
return effect.getFunctionalClass().isHigherPriorityThan(other.effect.getFunctionalClass());
return functionalClass.isHigherPriorityThan(other.functionalClass);
}
return false;
@ -545,13 +547,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
addAnnotation(annotations, InfoFieldKey.EFFECT_KEY.getKeyName(), effect.toString());
addAnnotation(annotations, InfoFieldKey.IMPACT_KEY.getKeyName(), impact.toString());
addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), functionalClass.toString());
addAnnotation(annotations, InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), codonChange);
addAnnotation(annotations, InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), aminoAcidChange);
addAnnotation(annotations, InfoFieldKey.GENE_NAME_KEY.getKeyName(), geneName);
addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype);
addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID);
addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID);
addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), effect.getFunctionalClass().toString());
return annotations;
}

View File

@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -36,7 +35,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Map;
@ -83,8 +81,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
* @param priors priors to use for GLs
* @param GLs hash of sample->GL to fill in
* @param alternateAlleleToUse the alternate allele to use, null if not set
*
* @param useBAQedPileup
* @param useBAQedPileup should we use the BAQed pileup or the raw one?
* @return genotype likelihoods per sample for AA, AB, BB
*/
public abstract Allele getLikelihoods(RefMetaDataTracker tracker,
@ -93,13 +90,14 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Map<String, MultiallelicGenotypeLikelihoods> GLs,
Allele alternateAlleleToUse, boolean useBAQedPileup);
Allele alternateAlleleToUse,
boolean useBAQedPileup);
protected int getFilteredDepth(ReadBackedPileup pileup) {
int count = 0;
for ( PileupElement p : pileup ) {
if ( BaseUtils.isRegularBase( p.getBase() ) )
count++;
count += p.getRepresentativeCount();
}
return count;

View File

@ -1,140 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.walkers.ReadPairWalker;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import java.io.PrintStream;
import java.util.Collection;
import java.util.List;
/**
* Counts the number of read pairs encountered in a file sorted in
* query name order. Breaks counts down by total pairs and number
* of paired reads.
*
*
* <h2>Input</h2>
* <p>
* One or more bam files.
* </p>
*
* <h2>Output</h2>
* <p>
* Number of pairs seen.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T CountPairs \
* -o output.txt \
* -I input.bam
* </pre>
*
* @author mhanna
*/
public class CountPairsWalker extends ReadPairWalker<Integer,Long> {
@Output
private PrintStream out;
/**
* How many reads are the first in a pair, based on flag 0x0040 from the SAM spec.
*/
private long firstOfPair = 0;
/**
* How many reads are the second in a pair, based on flag 0x0080 from the SAM spec.
*/
private long secondOfPair = 0;
/**
* A breakdown of the total number of reads seen with exactly the same read name.
*/
private List<Long> pairCountsByType = new ExpandingArrayList<Long>();
/**
* Maps a read pair to a given reduce of type MapType. Semantics determined by subclasser.
* @param reads Collection of reads having the same name.
* @return Semantics defined by implementer.
*/
@Override
public Integer map(Collection<SAMRecord> reads) {
if(pairCountsByType.get(reads.size()) != null)
pairCountsByType.set(reads.size(),pairCountsByType.get(reads.size())+1);
else
pairCountsByType.set(reads.size(),1L);
for(SAMRecord read: reads) {
if(read.getFirstOfPairFlag()) firstOfPair++;
if(read.getSecondOfPairFlag()) secondOfPair++;
}
return 1;
}
/**
* No pairs at the beginning of a traversal.
* @return 0 always.
*/
@Override
public Long reduceInit() {
return 0L;
}
/**
* Combine number of pairs seen in this iteration (always 1) with total number of pairs
* seen in previous iterations.
* @param value Pairs in this iteration (1), from the map function.
* @param sum Count of all pairs in prior iterations.
* @return All pairs encountered in previous iterations + all pairs encountered in this iteration (sum + 1).
*/
@Override
public Long reduce(Integer value, Long sum) {
return value + sum;
}
/**
* Print summary statistics over the entire traversal.
* @param sum A count of all read pairs viewed.
*/
@Override
public void onTraversalDone(Long sum) {
out.printf("Total number of pairs : %d%n",sum);
out.printf("Total number of first reads in pair : %d%n",firstOfPair);
out.printf("Total number of second reads in pair: %d%n",secondOfPair);
for(int i = 1; i < pairCountsByType.size(); i++) {
if(pairCountsByType.get(i) == null)
continue;
out.printf("Pairs of size %d: %d%n",i,pairCountsByType.get(i));
}
}
}

View File

@ -25,7 +25,13 @@
package org.broadinstitute.sting.utils.variantcontext;
import org.broad.tribble.TribbleException;
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.jgrapht.util.MathUtil;
import java.util.EnumMap;
import java.util.Map;
public class GenotypeLikelihoods {
public static final boolean CAP_PLS = false;
@ -94,6 +100,48 @@ public class GenotypeLikelihoods {
return likelihoodsAsString_PLs;
}
//Return genotype likelihoods as an EnumMap with Genotypes as keys and likelihoods as values
//Returns null in case of missing likelihoods
public EnumMap<Genotype.Type,Double> getAsMap(boolean normalizeFromLog10){
//Make sure that the log10likelihoods are set
double[] likelihoods = normalizeFromLog10 ? MathUtils.normalizeFromLog10(getAsVector()) : getAsVector();
if(likelihoods == null)
return null;
EnumMap<Genotype.Type,Double> likelihoodsMap = new EnumMap<Genotype.Type, Double>(Genotype.Type.class);
likelihoodsMap.put(Genotype.Type.HOM_REF,likelihoods[Genotype.Type.HOM_REF.ordinal()-1]);
likelihoodsMap.put(Genotype.Type.HET,likelihoods[Genotype.Type.HET.ordinal()-1]);
likelihoodsMap.put(Genotype.Type.HOM_VAR, likelihoods[Genotype.Type.HOM_VAR.ordinal() - 1]);
return likelihoodsMap;
}
//Return the neg log10 Genotype Quality (GQ) for the given genotype
//Returns Double.NEGATIVE_INFINITY in case of missing genotype
public double getNegLog10GQ(Genotype.Type genotype){
double qual = Double.NEGATIVE_INFINITY;
EnumMap<Genotype.Type,Double> likelihoods = getAsMap(false);
if(likelihoods == null)
return qual;
for(Map.Entry<Genotype.Type,Double> likelihood : likelihoods.entrySet()){
if(likelihood.getKey() == genotype)
continue;
if(likelihood.getValue() > qual)
qual = likelihood.getValue();
}
//Quality of the most likely genotype = likelihood(most likely) - likelihood (2nd best)
qual = likelihoods.get(genotype) - qual;
//Quality of other genotypes 1-P(G)
if (qual < 0) {
double[] normalized = MathUtils.normalizeFromLog10(getAsVector());
double chosenGenotype = normalized[genotype.ordinal()-1];
qual = -1.0 * Math.log10(1.0 - chosenGenotype);
}
return qual;
}
private final static double[] parsePLsIntoLikelihoods(String likelihoodsAsString_PLs) {
if ( !likelihoodsAsString_PLs.equals(VCFConstants.MISSING_VALUE_v4) ) {
String[] strings = likelihoodsAsString_PLs.split(",");

View File

@ -33,7 +33,7 @@ public class SnpEffUnitTest {
@Test
public void testParseWellFormedEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertTrue( effect.isWellFormed() && effect.isCoding() );
@ -42,7 +42,7 @@ public class SnpEffUnitTest {
@Test
public void testParseInvalidEffectNameEffect() {
String effectName = "MADE_UP_EFFECT";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertFalse(effect.isWellFormed());
@ -51,7 +51,7 @@ public class SnpEffUnitTest {
@Test
public void testParseInvalidEffectImpactEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MEDIUM", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
String[] effectMetadata = { "MEDIUM", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertFalse(effect.isWellFormed());
@ -60,27 +60,27 @@ public class SnpEffUnitTest {
@Test
public void testParseWrongNumberOfMetadataFieldsEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990" };
String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertFalse(effect.isWellFormed());
}
@Test
public void testParseSnpEffWarningEffect() {
public void testParseSnpEffOneWarningOrErrorEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING" };
String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING_OR_ERROR_TEXT" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning: SNPEFF_WARNING") );
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning or error: \"SNPEFF_WARNING_OR_ERROR_TEXT\"") );
}
@Test
public void testParseSnpEffErrorEffect() {
public void testParseSnpEffBothWarningAndErrorEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "", "SNPEFF_ERROR" };
String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING_TEXT", "SNPEFF_ERROR_TEXT" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following error: SNPEFF_ERROR") );
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning: \"SNPEFF_WARNING_TEXT\", and the following error: \"SNPEFF_ERROR_TEXT\"") );
}
}

View File

@ -148,9 +148,9 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
"snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429",
"snpEff2.0.4.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429",
1,
Arrays.asList("122321a85e448f21679f6ca15c5e22ad")
Arrays.asList("51258f5c880bd1ca3eb45a1711335c66")
);
executeTest("Testing SnpEff annotations", spec);
}

View File

@ -6,23 +6,131 @@ import org.testng.annotations.Test;
import java.util.Arrays;
public class PhaseByTransmissionIntegrationTest extends WalkerTest {
private static String phaseByTransmissionTestDataRoot = validationDataLocation + "/PhaseByTransmission";
private static String fundamentalTestVCF = phaseByTransmissionTestDataRoot + "/" + "FundamentalsTest.unfiltered.vcf";
private static String phaseByTransmissionTestDataRoot = validationDataLocation + "PhaseByTransmission/";
private static String goodFamilyFile = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.goodFamilies.ped";
private static String TNTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TN.vcf";
private static String TPTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TP.vcf";
private static String FPTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.FP.vcf";
private static String SpecialTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.Special.vcf";
//Tests using PbT on all genotypes with default parameters
//And all reporting options
@Test
public void testBasicFunctionality() {
public void testTrueNegativeMV() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + fundamentalTestVCF,
"-f NA12892+NA12891=NA12878",
"--variant " + TNTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-mvf %s",
"-o %s"
),
2,
Arrays.asList("16fefda693156eadf1481fd9de23facb","9418a7a6405b78179ca13a67b8bfcc14")
);
executeTest("testTrueNegativeMV", spec);
}
@Test
public void testTruePositiveMV() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + TPTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-mvf %s",
"-o %s"
),
2,
Arrays.asList("14cf1d21a54d8b9fb506df178b634c56","efc66ae3d036715b721f9bd35b65d556")
);
executeTest("testTruePositiveMV", spec);
}
@Test
public void testFalsePositiveMV() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + FPTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-mvf %s",
"-o %s"
),
2,
Arrays.asList("f9b0fae9fe1e0f09b883a292b0e70a12","398724bc1e65314cc5ee92706e05a3ee")
);
executeTest("testFalsePositiveMV", spec);
}
@Test
public void testSpecialCases() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + SpecialTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-mvf %s",
"-o %s"
),
2,
Arrays.asList("b8d1aa3789ce77b45430c62d13ee3006","a1a333e08fafb288cda0e7711909e1c3")
);
executeTest("testSpecialCases", spec);
}
//Test using a different prior
//Here the FP file is used but as the prior is lowered, 3 turn to TP
@Test
public void testPriorOption() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + FPTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-prior 1e-4",
"-mvf %s",
"-o %s"
),
2,
Arrays.asList("7201ce7cc47db5840ac6b647709f7c33","c11b5e7cd7459d90d0160f917eff3b1e")
);
executeTest("testPriorOption", spec);
}
//Test when running without MV reporting option
//This is the exact same test file as FP but should not generate a .mvf file
@Test
public void testMVFileOption() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + FPTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-o %s"
),
1,
Arrays.asList("")
Arrays.asList("398724bc1e65314cc5ee92706e05a3ee")
);
executeTest("testBasicFunctionality", spec);
executeTest("testMVFileOption", spec);
}
}

View File

@ -21,16 +21,16 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-T VariantEval",
"-R " + b37KGReference,
"--dbsnp " + b37dbSNP132,
"--eval " + validationDataLocation + "snpEff.AFR.unfiltered.VariantAnnotator.output.vcf",
"--eval " + validationDataLocation + "snpEff2.0.4.AFR.unfiltered.VariantAnnotator.output.vcf",
"-noEV",
"-EV TiTvVariantEvaluator",
"-noST",
"-ST FunctionalClass",
"-L " + validationDataLocation + "snpEff.AFR.unfiltered.VariantAnnotator.output.vcf",
"-L " + validationDataLocation + "snpEff2.0.4.AFR.unfiltered.VariantAnnotator.output.vcf",
"-o %s"
),
1,
Arrays.asList("d9dcb352c53106f54fcc981f15d35a90")
Arrays.asList("a36414421621b377d6146d58d2fcecd0")
);
executeTest("testFunctionClassWithSnpeff", spec);
}

View File

@ -41,11 +41,6 @@ public class SimpleTimerUnitTest extends BaseTest {
double t6 = t.getElapsedTime();
Assert.assertTrue(t5 >= t4, "Restarted timer elapsed time should be after elapsed time preceding the restart");
Assert.assertTrue(t6 >= t5, "Second elapsed time not after the first in restarted timer");
t.stop().start();
Assert.assertTrue(t.isRunning(), "second started timer isn't running");
Assert.assertTrue(t.getElapsedTime() >= 0.0, "elapsed time should have been reset");
Assert.assertTrue(t.getElapsedTime() < t6, "elapsed time isn't less than time before start call"); // we should have effective no elapsed time
}
private final static void idleLoop() {

View File

@ -29,10 +29,13 @@ package org.broadinstitute.sting.utils.variantcontext;
// the imports for unit testing.
import org.broadinstitute.sting.utils.MathUtils;
import org.testng.Assert;
import org.testng.annotations.Test;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import java.util.EnumMap;
/**
* Basic unit test for Genotype likelihoods objects
@ -69,6 +72,50 @@ public class GenotypeLikelihoodsUnitTest {
gl.getAsVector();
}
@Test
public void testGetAsMap(){
GenotypeLikelihoods gl = new GenotypeLikelihoods(v);
//Log scale
EnumMap<Genotype.Type,Double> glMap = gl.getAsMap(false);
Assert.assertEquals(v[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
Assert.assertEquals(v[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
Assert.assertEquals(v[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
//Linear scale
glMap = gl.getAsMap(true);
double [] vl = MathUtils.normalizeFromLog10(v);
Assert.assertEquals(vl[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
Assert.assertEquals(vl[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
Assert.assertEquals(vl[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
//Test missing likelihoods
gl = new GenotypeLikelihoods(".");
glMap = gl.getAsMap(false);
Assert.assertNull(glMap);
}
@Test
public void testGetNegLog10GQ(){
GenotypeLikelihoods gl = new GenotypeLikelihoods(vPLString);
//GQ for the best guess genotype
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HET),3.9);
double[] test = MathUtils.normalizeFromLog10(gl.getAsVector());
//GQ for the other genotypes
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_REF), -1.0 * Math.log10(1.0 - test[Genotype.Type.HOM_REF.ordinal()-1]));
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_VAR), -1.0 * Math.log10(1.0 - test[Genotype.Type.HOM_VAR.ordinal()-1]));
//Test missing likelihoods
gl = new GenotypeLikelihoods(".");
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_REF),Double.NEGATIVE_INFINITY);
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HET),Double.NEGATIVE_INFINITY);
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_VAR),Double.NEGATIVE_INFINITY);
}
private void assertDoubleArraysAreEqual(double[] v1, double[] v2) {
Assert.assertEquals(v1.length, v2.length);
for ( int i = 0; i < v1.length; i++ ) {