Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2011-11-18 07:53:13 -05:00
commit 77ef2be9b8
31 changed files with 1821 additions and 869 deletions

View File

@ -76,7 +76,7 @@
<dependency org="org.apache.poi" name="poi-ooxml" rev="3.8-beta3" />
<!-- snpEff annotator for pipelines -->
<dependency org="net.sf.snpeff" name="snpeff" rev="2.0.2" />
<dependency org="net.sf.snpeff" name="snpeff" rev="2.0.4rc3" />
<!-- Exclude dependencies on sun libraries where the downloads aren't available but included in the jvm. -->
<exclude org="javax.servlet" />

View File

@ -480,7 +480,7 @@ public class GenomeAnalysisEngine {
}
} else if (walker instanceof ReadPairWalker) {
if(readsDataSource != null && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.queryname)
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.queryname, "Read pair walkers can only walk over query name-sorted data. Please resort your input BAM file.");
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.queryname, "Read pair walkers are exceptions in that they cannot be run on coordinate-sorted BAMs but instead require query name-sorted files. You will need to resort your input BAM file in query name order to use this walker.");
if(intervals != null && !intervals.isEmpty())
throw new UserException.CommandLineException("Pairs traversal cannot be used in conjunction with intervals.");

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.samples;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
@ -110,6 +111,17 @@ public class Sample implements Comparable<Sample> { // implements java.io.Serial
return infoDB.getSample(paternalID);
}
public ArrayList<Sample> getParents(){
ArrayList<Sample> parents = new ArrayList<Sample>(2);
Sample parent = getMother();
if(parent != null)
parents.add(parent);
parent = getFather();
if(parent != null)
parents.add(parent);
return parents;
}
/**
* Get gender of the sample
* @return property of key "gender" - must be of type Gender

View File

@ -49,5 +49,5 @@ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnno
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.DEPTH_KEY); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Filtered Depth")); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered")); }
}

View File

@ -56,7 +56,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
// We refuse to parse SnpEff output files generated by unsupported versions, or
// lacking a SnpEff version number in the VCF header:
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.4" };
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd";
@ -77,13 +77,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
public enum InfoFieldKey {
EFFECT_KEY ("SNPEFF_EFFECT", -1),
IMPACT_KEY ("SNPEFF_IMPACT", 0),
CODON_CHANGE_KEY ("SNPEFF_CODON_CHANGE", 1),
AMINO_ACID_CHANGE_KEY ("SNPEFF_AMINO_ACID_CHANGE", 2),
GENE_NAME_KEY ("SNPEFF_GENE_NAME", 3),
GENE_BIOTYPE_KEY ("SNPEFF_GENE_BIOTYPE", 4),
TRANSCRIPT_ID_KEY ("SNPEFF_TRANSCRIPT_ID", 6),
EXON_ID_KEY ("SNPEFF_EXON_ID", 7),
FUNCTIONAL_CLASS_KEY ("SNPEFF_FUNCTIONAL_CLASS", -1);
FUNCTIONAL_CLASS_KEY ("SNPEFF_FUNCTIONAL_CLASS", 1),
CODON_CHANGE_KEY ("SNPEFF_CODON_CHANGE", 2),
AMINO_ACID_CHANGE_KEY ("SNPEFF_AMINO_ACID_CHANGE", 3),
GENE_NAME_KEY ("SNPEFF_GENE_NAME", 4),
GENE_BIOTYPE_KEY ("SNPEFF_GENE_BIOTYPE", 5),
TRANSCRIPT_ID_KEY ("SNPEFF_TRANSCRIPT_ID", 7),
EXON_ID_KEY ("SNPEFF_EXON_ID", 8);
// Actual text of the key
private final String keyName;
@ -110,70 +110,53 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
// are validated against this list.
public enum EffectType {
// High-impact effects:
FRAME_SHIFT (EffectFunctionalClass.NONE, false),
STOP_GAINED (EffectFunctionalClass.NONSENSE, false),
START_LOST (EffectFunctionalClass.NONE, false),
SPLICE_SITE_ACCEPTOR (EffectFunctionalClass.NONE, false),
SPLICE_SITE_DONOR (EffectFunctionalClass.NONE, false),
EXON_DELETED (EffectFunctionalClass.NONE, false),
STOP_LOST (EffectFunctionalClass.NONE, false),
SPLICE_SITE_ACCEPTOR,
SPLICE_SITE_DONOR,
START_LOST,
EXON_DELETED,
FRAME_SHIFT,
STOP_GAINED,
STOP_LOST,
// Moderate-impact effects:
NON_SYNONYMOUS_CODING (EffectFunctionalClass.MISSENSE, false),
CODON_CHANGE (EffectFunctionalClass.NONE, false),
CODON_INSERTION (EffectFunctionalClass.NONE, false),
CODON_CHANGE_PLUS_CODON_INSERTION (EffectFunctionalClass.NONE, false),
CODON_DELETION (EffectFunctionalClass.NONE, false),
CODON_CHANGE_PLUS_CODON_DELETION (EffectFunctionalClass.NONE, false),
UTR_5_DELETED (EffectFunctionalClass.NONE, false),
UTR_3_DELETED (EffectFunctionalClass.NONE, false),
NON_SYNONYMOUS_CODING,
CODON_CHANGE,
CODON_INSERTION,
CODON_CHANGE_PLUS_CODON_INSERTION,
CODON_DELETION,
CODON_CHANGE_PLUS_CODON_DELETION,
UTR_5_DELETED,
UTR_3_DELETED,
// Low-impact effects:
SYNONYMOUS_CODING (EffectFunctionalClass.SILENT, false),
SYNONYMOUS_START (EffectFunctionalClass.SILENT, false),
NON_SYNONYMOUS_START (EffectFunctionalClass.SILENT, false),
SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false),
NON_SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false),
START_GAINED (EffectFunctionalClass.NONE, false),
SYNONYMOUS_START,
NON_SYNONYMOUS_START,
START_GAINED,
SYNONYMOUS_CODING,
SYNONYMOUS_STOP,
NON_SYNONYMOUS_STOP,
// Modifiers:
NONE (EffectFunctionalClass.NONE, true),
CHROMOSOME (EffectFunctionalClass.NONE, true),
INTERGENIC (EffectFunctionalClass.NONE, true),
UPSTREAM (EffectFunctionalClass.NONE, true),
UTR_5_PRIME (EffectFunctionalClass.NONE, true),
CDS (EffectFunctionalClass.NONE, true),
GENE (EffectFunctionalClass.NONE, true),
TRANSCRIPT (EffectFunctionalClass.NONE, true),
EXON (EffectFunctionalClass.NONE, true),
INTRON (EffectFunctionalClass.NONE, true),
UTR_3_PRIME (EffectFunctionalClass.NONE, true),
DOWNSTREAM (EffectFunctionalClass.NONE, true),
INTRON_CONSERVED (EffectFunctionalClass.NONE, true),
INTERGENIC_CONSERVED (EffectFunctionalClass.NONE, true),
REGULATION (EffectFunctionalClass.NONE, true),
CUSTOM (EffectFunctionalClass.NONE, true),
WITHIN_NON_CODING_GENE (EffectFunctionalClass.NONE, true);
private final EffectFunctionalClass functionalClass;
private final boolean isModifier;
EffectType ( EffectFunctionalClass functionalClass, boolean isModifier ) {
this.functionalClass = functionalClass;
this.isModifier = isModifier;
}
public EffectFunctionalClass getFunctionalClass() {
return functionalClass;
}
public boolean isModifier() {
return isModifier;
}
NONE,
CHROMOSOME,
CUSTOM,
CDS,
GENE,
TRANSCRIPT,
EXON,
INTRON_CONSERVED,
UTR_5_PRIME,
UTR_3_PRIME,
DOWNSTREAM,
INTRAGENIC,
INTERGENIC,
INTERGENIC_CONSERVED,
UPSTREAM,
REGULATION,
INTRON
}
// SnpEff labels each effect as either LOW, MODERATE, or HIGH impact. We take the additional step of
// classifying some of the LOW impact effects as MODIFIERs.
// SnpEff labels each effect as either LOW, MODERATE, or HIGH impact, or as a MODIFIER.
public enum EffectImpact {
MODIFIER (0),
LOW (1),
@ -202,7 +185,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
UNKNOWN
}
// We assign a functional class to each SnpEff effect.
// SnpEff assigns a functional class to each effect.
public enum EffectFunctionalClass {
NONE (0),
SILENT (1),
@ -379,13 +362,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
public List<String> getKeyNames() {
return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(),
InfoFieldKey.IMPACT_KEY.getKeyName(),
InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(),
InfoFieldKey.CODON_CHANGE_KEY.getKeyName(),
InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(),
InfoFieldKey.GENE_NAME_KEY.getKeyName(),
InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),
InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),
InfoFieldKey.EXON_ID_KEY.getKeyName(),
InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()
InfoFieldKey.EXON_ID_KEY.getKeyName()
);
}
@ -393,13 +376,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
return Arrays.asList(
new VCFInfoHeaderLine(InfoFieldKey.EFFECT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
new VCFInfoHeaderLine(InfoFieldKey.IMPACT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(EffectImpact.values())),
new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values())),
new VCFInfoHeaderLine(InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New codon for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant (in HGVS style)"),
new VCFInfoHeaderLine(InfoFieldKey.GENE_NAME_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene biotype for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values()))
new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant")
);
}
@ -409,6 +392,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
protected static class SnpEffEffect {
private EffectType effect;
private EffectImpact impact;
private EffectFunctionalClass functionalClass;
private String codonChange;
private String aminoAcidChange;
private String geneName;
@ -420,16 +404,21 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
private String parseError = null;
private boolean isWellFormed = true;
private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 8;
private static final int NUMBER_OF_METADATA_FIELDS_UPON_WARNING = 9;
private static final int NUMBER_OF_METADATA_FIELDS_UPON_ERROR = 10;
private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 9;
private static final int NUMBER_OF_METADATA_FIELDS_UPON_EITHER_WARNING_OR_ERROR = 10;
private static final int NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR = 11;
// Note that contrary to the description for the EFF field layout that SnpEff adds to the VCF header,
// errors come after warnings, not vice versa:
private static final int SNPEFF_WARNING_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_WARNING - 1;
private static final int SNPEFF_ERROR_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_ERROR - 1;
// If there is either a warning OR an error, it will be in the last field. If there is both
// a warning AND an error, the warning will be in the second-to-last field, and the error will
// be in the last field.
private static final int SNPEFF_WARNING_OR_ERROR_FIELD_UPON_SINGLE_ERROR = NUMBER_OF_METADATA_FIELDS_UPON_EITHER_WARNING_OR_ERROR - 1;
private static final int SNPEFF_WARNING_FIELD_UPON_BOTH_WARNING_AND_ERROR = NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR - 2;
private static final int SNPEFF_ERROR_FIELD_UPON_BOTH_WARNING_AND_ERROR = NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR - 1;
private static final int SNPEFF_CODING_FIELD_INDEX = 5;
// Position of the field indicating whether the effect is coding or non-coding. This field is used
// in selecting the most significant effect, but is not included in the annotations we return
// since it can be deduced from the SNPEFF_GENE_BIOTYPE field.
private static final int SNPEFF_CODING_FIELD_INDEX = 6;
public SnpEffEffect ( String effectName, String[] effectMetadata ) {
parseEffectName(effectName);
@ -447,11 +436,14 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
private void parseEffectMetadata ( String[] effectMetadata ) {
if ( effectMetadata.length != EXPECTED_NUMBER_OF_METADATA_FIELDS ) {
if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_WARNING ) {
parseError(String.format("SnpEff issued the following warning: %s", effectMetadata[SNPEFF_WARNING_FIELD_INDEX]));
if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_EITHER_WARNING_OR_ERROR ) {
parseError(String.format("SnpEff issued the following warning or error: \"%s\"",
effectMetadata[SNPEFF_WARNING_OR_ERROR_FIELD_UPON_SINGLE_ERROR]));
}
else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_ERROR ) {
parseError(String.format("SnpEff issued the following error: %s", effectMetadata[SNPEFF_ERROR_FIELD_INDEX]));
else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR ) {
parseError(String.format("SnpEff issued the following warning: \"%s\", and the following error: \"%s\"",
effectMetadata[SNPEFF_WARNING_FIELD_UPON_BOTH_WARNING_AND_ERROR],
effectMetadata[SNPEFF_ERROR_FIELD_UPON_BOTH_WARNING_AND_ERROR]));
}
else {
parseError(String.format("Wrong number of effect metadata fields. Expected %d but found %d",
@ -461,23 +453,33 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
return;
}
if ( effect != null && effect.isModifier() ) {
impact = EffectImpact.MODIFIER;
// The impact field will never be empty, and should always contain one of the enumerated values:
try {
impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
}
else {
catch ( IllegalArgumentException e ) {
parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
}
// The functional class field will be empty when the effect has no functional class associated with it:
if ( effectMetadata[InfoFieldKey.FUNCTIONAL_CLASS_KEY.getFieldIndex()].trim().length() > 0 ) {
try {
impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
functionalClass = EffectFunctionalClass.valueOf(effectMetadata[InfoFieldKey.FUNCTIONAL_CLASS_KEY.getFieldIndex()]);
}
catch ( IllegalArgumentException e ) {
parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
parseError(String.format("Unrecognized value for effect functional class: %s", effectMetadata[InfoFieldKey.FUNCTIONAL_CLASS_KEY.getFieldIndex()]));
}
}
else {
functionalClass = EffectFunctionalClass.NONE;
}
codonChange = effectMetadata[InfoFieldKey.CODON_CHANGE_KEY.getFieldIndex()];
aminoAcidChange = effectMetadata[InfoFieldKey.AMINO_ACID_CHANGE_KEY.getFieldIndex()];
geneName = effectMetadata[InfoFieldKey.GENE_NAME_KEY.getFieldIndex()];
geneBiotype = effectMetadata[InfoFieldKey.GENE_BIOTYPE_KEY.getFieldIndex()];
// The coding field will be empty when SnpEff has no coding info for the effect:
if ( effectMetadata[SNPEFF_CODING_FIELD_INDEX].trim().length() > 0 ) {
try {
coding = EffectCoding.valueOf(effectMetadata[SNPEFF_CODING_FIELD_INDEX]);
@ -534,7 +536,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
return true;
}
else if ( impact.isSameImpactAs(other.impact) ) {
return effect.getFunctionalClass().isHigherPriorityThan(other.effect.getFunctionalClass());
return functionalClass.isHigherPriorityThan(other.functionalClass);
}
return false;
@ -545,13 +547,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
addAnnotation(annotations, InfoFieldKey.EFFECT_KEY.getKeyName(), effect.toString());
addAnnotation(annotations, InfoFieldKey.IMPACT_KEY.getKeyName(), impact.toString());
addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), functionalClass.toString());
addAnnotation(annotations, InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), codonChange);
addAnnotation(annotations, InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), aminoAcidChange);
addAnnotation(annotations, InfoFieldKey.GENE_NAME_KEY.getKeyName(), geneName);
addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype);
addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID);
addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID);
addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), effect.getFunctionalClass().toString());
return annotations;
}

View File

@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -36,7 +35,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Map;
@ -83,8 +81,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
* @param priors priors to use for GLs
* @param GLs hash of sample->GL to fill in
* @param alternateAlleleToUse the alternate allele to use, null if not set
*
* @param useBAQedPileup
* @param useBAQedPileup should we use the BAQed pileup or the raw one?
* @return genotype likelihoods per sample for AA, AB, BB
*/
public abstract Allele getLikelihoods(RefMetaDataTracker tracker,
@ -93,13 +90,14 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Map<String, MultiallelicGenotypeLikelihoods> GLs,
Allele alternateAlleleToUse, boolean useBAQedPileup);
Allele alternateAlleleToUse,
boolean useBAQedPileup);
protected int getFilteredDepth(ReadBackedPileup pileup) {
int count = 0;
for ( PileupElement p : pileup ) {
if ( BaseUtils.isRegularBase( p.getBase() ) )
count++;
count += p.getRepresentativeCount();
}
return count;

View File

@ -258,7 +258,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
return result;

View File

@ -1,140 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.walkers.ReadPairWalker;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import java.io.PrintStream;
import java.util.Collection;
import java.util.List;
/**
* Counts the number of read pairs encountered in a file sorted in
* query name order. Breaks counts down by total pairs and number
* of paired reads.
*
*
* <h2>Input</h2>
* <p>
* One or more bam files.
* </p>
*
* <h2>Output</h2>
* <p>
* Number of pairs seen.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T CountPairs \
* -o output.txt \
* -I input.bam
* </pre>
*
* @author mhanna
*/
public class CountPairsWalker extends ReadPairWalker<Integer,Long> {
@Output
private PrintStream out;
/**
* How many reads are the first in a pair, based on flag 0x0040 from the SAM spec.
*/
private long firstOfPair = 0;
/**
* How many reads are the second in a pair, based on flag 0x0080 from the SAM spec.
*/
private long secondOfPair = 0;
/**
* A breakdown of the total number of reads seen with exactly the same read name.
*/
private List<Long> pairCountsByType = new ExpandingArrayList<Long>();
/**
* Maps a read pair to a given reduce of type MapType. Semantics determined by subclasser.
* @param reads Collection of reads having the same name.
* @return Semantics defined by implementer.
*/
@Override
public Integer map(Collection<SAMRecord> reads) {
if(pairCountsByType.get(reads.size()) != null)
pairCountsByType.set(reads.size(),pairCountsByType.get(reads.size())+1);
else
pairCountsByType.set(reads.size(),1L);
for(SAMRecord read: reads) {
if(read.getFirstOfPairFlag()) firstOfPair++;
if(read.getSecondOfPairFlag()) secondOfPair++;
}
return 1;
}
/**
* No pairs at the beginning of a traversal.
* @return 0 always.
*/
@Override
public Long reduceInit() {
return 0L;
}
/**
* Combine number of pairs seen in this iteration (always 1) with total number of pairs
* seen in previous iterations.
* @param value Pairs in this iteration (1), from the map function.
* @param sum Count of all pairs in prior iterations.
* @return All pairs encountered in previous iterations + all pairs encountered in this iteration (sum + 1).
*/
@Override
public Long reduce(Integer value, Long sum) {
return value + sum;
}
/**
* Print summary statistics over the entire traversal.
* @param sum A count of all read pairs viewed.
*/
@Override
public void onTraversalDone(Long sum) {
out.printf("Total number of pairs : %d%n",sum);
out.printf("Total number of first reads in pair : %d%n",firstOfPair);
out.printf("Total number of second reads in pair: %d%n",secondOfPair);
for(int i = 1; i < pairCountsByType.size(); i++) {
if(pairCountsByType.get(i) == null)
continue;
out.printf("Pairs of size %d: %d%n",i,pairCountsByType.get(i));
}
}
}

View File

@ -24,10 +24,8 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.apache.poi.hpsf.Variant;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
@ -275,8 +273,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
private double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 0;
/**
* Variants are kept in memory to guarantee that exactly n variants will be chosen randomly, so use it only for a reasonable
* number of variants. Use --select_random_fraction for larger numbers of variants.
* Variants are kept in memory to guarantee that exactly n variants will be chosen randomly, so make sure you supply the program with enough memory
* given your input set. This option will NOT work well for large callsets; use --select_random_fraction for sets with a large numbers of variants.
*/
@Argument(fullName="select_random_number", shortName="number", doc="Selects a number of variants at random from the variant track", required=false)
private int numRandom = 0;
@ -532,7 +530,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
}
}
if (SELECT_RANDOM_NUMBER) {
randomlyAddVariant(++variantNumber, sub, ref.getBase());
randomlyAddVariant(++variantNumber, sub);
}
else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
vcfWriter.add(sub);
@ -705,7 +703,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
return sub;
}
private void randomlyAddVariant(int rank, VariantContext vc, byte refBase) {
private void randomlyAddVariant(int rank, VariantContext vc) {
if (nVariantsAdded < numRandom)
variantArray[nVariantsAdded++] = new RandomVariantStructure(vc);

View File

@ -554,4 +554,54 @@ public class GenomeLocParser {
return createGenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength(), true);
}
/**
* Creates a loc to the left (starting at the loc start + 1) of maxBasePairs size.
* @param loc The original loc
* @param maxBasePairs The maximum number of basePairs
* @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the start of the contig.
*/
@Requires({"loc != null", "maxBasePairs > 0"})
public GenomeLoc createGenomeLocAtStart(GenomeLoc loc, int maxBasePairs) {
if (GenomeLoc.isUnmapped(loc))
return null;
String contigName = loc.getContig();
SAMSequenceRecord contig = contigInfo.getSequence(contigName);
int contigIndex = contig.getSequenceIndex();
int start = loc.getStart() - maxBasePairs;
int stop = loc.getStart() - 1;
if (start < 1)
start = 1;
if (stop < 1)
return null;
return createGenomeLoc(contigName, contigIndex, start, stop, true);
}
/**
* Creates a loc to the right (starting at the loc stop + 1) of maxBasePairs size.
* @param loc The original loc
* @param maxBasePairs The maximum number of basePairs
* @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the end of the contig.
*/
@Requires({"loc != null", "maxBasePairs > 0"})
public GenomeLoc createGenomeLocAtStop(GenomeLoc loc, int maxBasePairs) {
if (GenomeLoc.isUnmapped(loc))
return null;
String contigName = loc.getContig();
SAMSequenceRecord contig = contigInfo.getSequence(contigName);
int contigIndex = contig.getSequenceIndex();
int contigLength = contig.getSequenceLength();
int start = loc.getStop() + 1;
int stop = loc.getStop() + maxBasePairs;
if (start > contigLength)
return null;
if (stop > contigLength)
stop = contigLength;
return createGenomeLoc(contigName, contigIndex, start, stop, true);
}
}

View File

@ -233,8 +233,12 @@ public class IntervalUtils {
*
* Returns a null string if there are no differences, otherwise returns a string describing the difference
* (useful for UnitTests). Assumes both lists are sorted
*
* @param masterArg sorted master genome locs
* @param testArg sorted test genome locs
* @return null string if there are no difference, otherwise a string describing the difference
*/
public static final String equateIntervals(List<GenomeLoc> masterArg, List<GenomeLoc> testArg) {
public static String equateIntervals(List<GenomeLoc> masterArg, List<GenomeLoc> testArg) {
LinkedList<GenomeLoc> master = new LinkedList<GenomeLoc>(masterArg);
LinkedList<GenomeLoc> test = new LinkedList<GenomeLoc>(testArg);
@ -317,23 +321,6 @@ public class IntervalUtils {
return lengths;
}
/**
* Counts the number of interval files an interval list can be split into using scatterIntervalArguments.
* @param locs The genome locs.
* @return The maximum number of parts the intervals can be split into.
*/
public static int countContigIntervals(List<GenomeLoc> locs) {
int maxFiles = 0;
String contig = null;
for (GenomeLoc loc: locs) {
if (contig == null || !contig.equals(loc.getContig())) {
maxFiles++;
contig = loc.getContig();
}
}
return maxFiles;
}
/**
* Splits an interval list into multiple files.
* @param fileHeader The sam file header.
@ -373,7 +360,6 @@ public class IntervalUtils {
* @return A list of lists of genome locs, split according to splits
*/
public static List<List<GenomeLoc>> splitIntervalsToSubLists(List<GenomeLoc> locs, List<Integer> splits) {
int locIndex = 1;
int start = 0;
List<List<GenomeLoc>> sublists = new ArrayList<List<GenomeLoc>>(splits.size());
for (Integer stop: splits) {
@ -465,7 +451,7 @@ public class IntervalUtils {
@Requires({"remaining != null", "!remaining.isEmpty()", "idealSplitSize > 0"})
@Ensures({"result != null"})
final static SplitLocusRecursive splitLocusIntervals1(LinkedList<GenomeLoc> remaining, long idealSplitSize) {
static SplitLocusRecursive splitLocusIntervals1(LinkedList<GenomeLoc> remaining, long idealSplitSize) {
final List<GenomeLoc> split = new ArrayList<GenomeLoc>();
long size = 0;
@ -579,10 +565,101 @@ public class IntervalUtils {
}
}
public static final long intervalSize(final List<GenomeLoc> locs) {
public static long intervalSize(final List<GenomeLoc> locs) {
long size = 0;
for ( final GenomeLoc loc : locs )
size += loc.size();
return size;
}
public static void writeFlankingIntervals(File reference, File inputIntervals, File flankingIntervals, int basePairs) {
ReferenceDataSource referenceDataSource = new ReferenceDataSource(reference);
GenomeLocParser parser = new GenomeLocParser(referenceDataSource.getReference());
List<GenomeLoc> originalList = intervalFileToList(parser, inputIntervals.getAbsolutePath());
if (originalList.isEmpty())
throw new UserException.MalformedFile(inputIntervals, "File contains no intervals");
List<GenomeLoc> flankingList = getFlankingIntervals(parser, originalList, basePairs);
if (flankingList.isEmpty())
throw new UserException.MalformedFile(inputIntervals, "Unable to produce any flanks for the intervals");
SAMFileHeader samFileHeader = new SAMFileHeader();
samFileHeader.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary());
IntervalList intervalList = new IntervalList(samFileHeader);
int i = 0;
for (GenomeLoc loc: flankingList)
intervalList.add(toInterval(loc, ++i));
intervalList.write(flankingIntervals);
}
/**
* Returns a list of intervals between the passed int locs. Does not extend UNMAPPED locs.
* @param parser A genome loc parser for creating the new intervals
* @param locs Original genome locs
* @param basePairs Number of base pairs on each side of loc
* @return The list of intervals between the locs
*/
public static List<GenomeLoc> getFlankingIntervals(final GenomeLocParser parser, final List<GenomeLoc> locs, final int basePairs) {
List<GenomeLoc> sorted = sortAndMergeIntervals(parser, locs, IntervalMergingRule.ALL).toList();
if (sorted.size() == 0)
return Collections.emptyList();
LinkedHashMap<String, List<GenomeLoc>> locsByContig = splitByContig(sorted);
List<GenomeLoc> expanded = new ArrayList<GenomeLoc>();
for (String contig: locsByContig.keySet()) {
List<GenomeLoc> contigLocs = locsByContig.get(contig);
int contigLocsSize = contigLocs.size();
GenomeLoc startLoc, stopLoc;
// Create loc at start of the list
startLoc = parser.createGenomeLocAtStart(contigLocs.get(0), basePairs);
if (startLoc != null)
expanded.add(startLoc);
// Create locs between each loc[i] and loc[i+1]
for (int i = 0; i < contigLocsSize - 1; i++) {
stopLoc = parser.createGenomeLocAtStop(contigLocs.get(i), basePairs);
startLoc = parser.createGenomeLocAtStart(contigLocs.get(i + 1), basePairs);
if (stopLoc.getStop() + 1 >= startLoc.getStart()) {
// NOTE: This is different than GenomeLoc.merge()
// merge() returns a loc which covers the entire range of stop and start,
// possibly returning positions inside loc(i) or loc(i+1)
// We want to make sure that the start of the stopLoc is used, and the stop of the startLoc
GenomeLoc merged = parser.createGenomeLoc(
stopLoc.getContig(), stopLoc.getStart(), startLoc.getStop());
expanded.add(merged);
} else {
expanded.add(stopLoc);
expanded.add(startLoc);
}
}
// Create loc at the end of the list
stopLoc = parser.createGenomeLocAtStop(contigLocs.get(contigLocsSize - 1), basePairs);
if (stopLoc != null)
expanded.add(stopLoc);
}
return expanded;
}
private static LinkedHashMap<String, List<GenomeLoc>> splitByContig(List<GenomeLoc> sorted) {
LinkedHashMap<String, List<GenomeLoc>> splits = new LinkedHashMap<String, List<GenomeLoc>>();
GenomeLoc last = null;
List<GenomeLoc> contigLocs = null;
for (GenomeLoc loc: sorted) {
if (GenomeLoc.isUnmapped(loc))
continue;
if (last == null || !last.onSameContig(loc)) {
contigLocs = new ArrayList<GenomeLoc>();
splits.put(loc.getContig(), contigLocs);
}
contigLocs.add(loc);
last = loc;
}
return splits;
}
}

View File

@ -188,7 +188,9 @@ public class GATKSAMRecord extends BAMRecord {
}
public final byte getReducedCount(final int i) {
return getReducedReadCounts()[i];
byte firstCount = getReducedReadCounts()[0];
byte offsetCount = getReducedReadCounts()[i];
return (i==0) ? firstCount : (byte) Math.min(firstCount + offsetCount, Byte.MAX_VALUE);
}

View File

@ -25,7 +25,13 @@
package org.broadinstitute.sting.utils.variantcontext;
import org.broad.tribble.TribbleException;
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.jgrapht.util.MathUtil;
import java.util.EnumMap;
import java.util.Map;
public class GenotypeLikelihoods {
public static final boolean CAP_PLS = false;
@ -94,6 +100,48 @@ public class GenotypeLikelihoods {
return likelihoodsAsString_PLs;
}
//Return genotype likelihoods as an EnumMap with Genotypes as keys and likelihoods as values
//Returns null in case of missing likelihoods
public EnumMap<Genotype.Type,Double> getAsMap(boolean normalizeFromLog10){
//Make sure that the log10likelihoods are set
double[] likelihoods = normalizeFromLog10 ? MathUtils.normalizeFromLog10(getAsVector()) : getAsVector();
if(likelihoods == null)
return null;
EnumMap<Genotype.Type,Double> likelihoodsMap = new EnumMap<Genotype.Type, Double>(Genotype.Type.class);
likelihoodsMap.put(Genotype.Type.HOM_REF,likelihoods[Genotype.Type.HOM_REF.ordinal()-1]);
likelihoodsMap.put(Genotype.Type.HET,likelihoods[Genotype.Type.HET.ordinal()-1]);
likelihoodsMap.put(Genotype.Type.HOM_VAR, likelihoods[Genotype.Type.HOM_VAR.ordinal() - 1]);
return likelihoodsMap;
}
//Return the neg log10 Genotype Quality (GQ) for the given genotype
//Returns Double.NEGATIVE_INFINITY in case of missing genotype
public double getNegLog10GQ(Genotype.Type genotype){
double qual = Double.NEGATIVE_INFINITY;
EnumMap<Genotype.Type,Double> likelihoods = getAsMap(false);
if(likelihoods == null)
return qual;
for(Map.Entry<Genotype.Type,Double> likelihood : likelihoods.entrySet()){
if(likelihood.getKey() == genotype)
continue;
if(likelihood.getValue() > qual)
qual = likelihood.getValue();
}
//Quality of the most likely genotype = likelihood(most likely) - likelihood (2nd best)
qual = likelihoods.get(genotype) - qual;
//Quality of other genotypes 1-P(G)
if (qual < 0) {
double[] normalized = MathUtils.normalizeFromLog10(getAsVector());
double chosenGenotype = normalized[genotype.ordinal()-1];
qual = -1.0 * Math.log10(1.0 - chosenGenotype);
}
return qual;
}
private final static double[] parsePLsIntoLikelihoods(String likelihoodsAsString_PLs) {
if ( !likelihoodsAsString_PLs.equals(VCFConstants.MISSING_VALUE_v4) ) {
String[] strings = likelihoodsAsString_PLs.split(",");

View File

@ -223,7 +223,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param referenceBaseForIndel padded reference base
*/
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes, Byte referenceBaseForIndel) {
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false);
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false, true);
}
/**
@ -240,7 +240,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param attributes attributes
*/
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false);
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false, true);
}
/**
@ -261,7 +261,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param referenceBaseForIndel padded reference base
*/
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, Byte referenceBaseForIndel) {
this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, referenceBaseForIndel, true);
this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, referenceBaseForIndel, true, true);
}
/**
@ -278,7 +278,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param attributes attributes
*/
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, negLog10PError, filters, attributes, null, false);
this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, negLog10PError, filters, attributes, null, false, true);
}
/**
@ -291,7 +291,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param alleles alleles
*/
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles) {
this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, null, false);
this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, null, false, true);
}
/**
@ -314,7 +314,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param other the VariantContext to copy
*/
public VariantContext(VariantContext other) {
this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false);
this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false, true);
}
/**
@ -331,11 +331,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param attributes attributes
* @param referenceBaseForIndel padded reference base
* @param genotypesAreUnparsed true if the genotypes have not yet been parsed
* @param performValidation if true, call validate() as the final step in construction
*/
private VariantContext(String source, String contig, long start, long stop,
Collection<Allele> alleles, Map<String, Genotype> genotypes,
double negLog10PError, Set<String> filters, Map<String, ?> attributes,
Byte referenceBaseForIndel, boolean genotypesAreUnparsed) {
Byte referenceBaseForIndel, boolean genotypesAreUnparsed,
boolean performValidation ) {
if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
this.contig = contig;
this.start = start;
@ -371,39 +373,57 @@ public class VariantContext implements Feature { // to enable tribble intergrati
}
}
validate();
if ( performValidation ) {
validate();
}
}
// ---------------------------------------------------------------------------------------------------------
//
// Partial-cloning routines (because Variant Context is immutable).
//
// IMPORTANT: These routines assume that the VariantContext on which they're called is already valid.
// Due to this assumption, they explicitly tell the constructor NOT to perform validation by
// calling validate(), and instead perform validation only on the data that's changed.
//
// Note that we don't call vc.getGenotypes() because that triggers the lazy loading.
// Also note that we need to create a new attributes map because it's unmodifiable and the constructor may try to modify it.
//
// ---------------------------------------------------------------------------------------------------------
public static VariantContext modifyGenotypes(VariantContext vc, Map<String, Genotype> genotypes) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), false);
VariantContext modifiedVC = new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), false, false);
modifiedVC.validateGenotypes();
return modifiedVC;
}
public static VariantContext modifyLocation(VariantContext vc, String chr, int start, int end) {
return new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true);
VariantContext modifiedVC = new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true, false);
// Since start and end have changed, we need to call both validateAlleles() and validateReferencePadding(),
// since those validation routines rely on the values of start and end:
modifiedVC.validateAlleles();
modifiedVC.validateReferencePadding();
return modifiedVC;
}
public static VariantContext modifyFilters(VariantContext vc, Set<String> filters) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true);
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true, false);
}
public static VariantContext modifyAttributes(VariantContext vc, Map<String, Object> attributes) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true);
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true, false);
}
public static VariantContext modifyReferencePadding(VariantContext vc, Byte b) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), b, true);
VariantContext modifiedVC = new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), b, true, false);
modifiedVC.validateReferencePadding();
return modifiedVC;
}
public static VariantContext modifyPErrorFiltersAndAttributes(VariantContext vc, double negLog10PError, Set<String> filters, Map<String, Object> attributes) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true);
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true, false);
}
// ---------------------------------------------------------------------------------------------------------

View File

@ -33,7 +33,7 @@ public class SnpEffUnitTest {
@Test
public void testParseWellFormedEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertTrue( effect.isWellFormed() && effect.isCoding() );
@ -42,7 +42,7 @@ public class SnpEffUnitTest {
@Test
public void testParseInvalidEffectNameEffect() {
String effectName = "MADE_UP_EFFECT";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertFalse(effect.isWellFormed());
@ -51,7 +51,7 @@ public class SnpEffUnitTest {
@Test
public void testParseInvalidEffectImpactEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MEDIUM", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
String[] effectMetadata = { "MEDIUM", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertFalse(effect.isWellFormed());
@ -60,27 +60,27 @@ public class SnpEffUnitTest {
@Test
public void testParseWrongNumberOfMetadataFieldsEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990" };
String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertFalse(effect.isWellFormed());
}
@Test
public void testParseSnpEffWarningEffect() {
public void testParseSnpEffOneWarningOrErrorEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING" };
String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING_OR_ERROR_TEXT" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning: SNPEFF_WARNING") );
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning or error: \"SNPEFF_WARNING_OR_ERROR_TEXT\"") );
}
@Test
public void testParseSnpEffErrorEffect() {
public void testParseSnpEffBothWarningAndErrorEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "", "SNPEFF_ERROR" };
String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING_TEXT", "SNPEFF_ERROR_TEXT" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following error: SNPEFF_ERROR") );
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning: \"SNPEFF_WARNING_TEXT\", and the following error: \"SNPEFF_ERROR_TEXT\"") );
}
}

View File

@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("8e7de435105499cd71ffc099e268a83e"));
Arrays.asList("9beb795536e95954f810835c6058f2ad"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -40,7 +40,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("64b6804cb1e27826e3a47089349be581"));
Arrays.asList("2977bb30c8b84a5f4094fe6090658561"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -64,7 +64,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("fd1ffb669800c2e07df1e2719aa38e49"));
Arrays.asList("49d989f467b8d6d8f98f7c1b67cd4a05"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -72,7 +72,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("09f8e840770a9411ff77508e0ed0837f"));
Arrays.asList("0948cd1dba7d61f283cc4cf2a7757d92"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@ -80,7 +80,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testExcludeAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("b49fe03aa4b675db80a9db38a3552c95"));
Arrays.asList("33062eccd6eb73bc49440365430454c4"));
executeTest("test exclude annotations", spec);
}
@ -88,7 +88,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
Arrays.asList("78d2c19f8107d865970dbaf3e12edd92"));
Arrays.asList("062155edec46a8c52243475fbf3a2943"));
executeTest("test overwriting header", spec);
}
@ -96,7 +96,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoReads() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + validationDataLocation + "vcfexample3empty.vcf -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
Arrays.asList("16e3a1403fc376320d7c69492cad9345"));
Arrays.asList("06635f2dd91b539bfbce9bf7914d8e43"));
executeTest("not passing it any reads", spec);
}
@ -104,7 +104,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + validationDataLocation + "vcfexample3empty.vcf -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
Arrays.asList("3da8ca2b6bdaf6e92d94a8c77a71313d"));
Arrays.asList("820eeba1f6e3a0758a69d937c524a38e"));
executeTest("getting DB tag with dbSNP", spec);
}
@ -112,7 +112,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithHapMap() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --comp:H3 " + validationDataLocation + "fakeHM3.vcf -G Standard --variant " + validationDataLocation + "vcfexample3empty.vcf -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688"));
Arrays.asList("31cc2ce157dd20771418c08d6b3be1fa"));
executeTest("getting DB tag with HM3", spec);
}
@ -120,7 +120,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testUsingExpression() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
Arrays.asList("ae30a1ac7bfbc3d22a327f8b689cad31"));
Arrays.asList("074865f8f8c0ca7bfd58681f396c49e9"));
executeTest("using expression", spec);
}
@ -128,7 +128,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testUsingExpressionWithID() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.ID -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
Arrays.asList("1b4921085b26cbfe07d53b7c947de1e5"));
Arrays.asList("97b26db8135d083566fb585a677fbe8a"));
executeTest("using expression with ID", spec);
}
@ -148,9 +148,9 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
"snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429",
"snpEff2.0.4.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429",
1,
Arrays.asList("122321a85e448f21679f6ca15c5e22ad")
Arrays.asList("51258f5c880bd1ca3eb45a1711335c66")
);
executeTest("Testing SnpEff annotations", spec);
}

View File

@ -5,10 +5,8 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test;
import java.io.File;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
// ********************************************************************************** //
@ -30,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("b27939251539439a382538e507e03507"));
Arrays.asList("364a6112e6034571a896d4c68b6ff02a"));
executeTest("test MultiSample Pilot1", spec);
}
@ -38,12 +36,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("8de2602679ffc92388da0b6cb4325ef6"));
Arrays.asList("6cce55734ecd1a56c33f0ecba67d1258"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("6458f3b8fe4954e2ffc2af972aaab19e"));
Arrays.asList("b2a00130a4b6f9a3c31288a236f5d159"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@ -51,7 +49,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("6762b72ae60155ad71738d7c76b80e4b"));
Arrays.asList("3ccce5d909f8f128e496f6841836e5f7"));
executeTest("test SingleSample Pilot2", spec);
}
@ -61,7 +59,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "bc71dba7bbdb23e7d5cc60461fdd897b";
private final static String COMPRESSED_OUTPUT_MD5 = "890143b366050e78d6c6ba6b2c6b6864";
@Test
public void testCompressedOutput() {
@ -82,7 +80,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
String md5 = "b9504e446b9313559c3ed97add7e8dc1";
String md5 = "95614280c565ad90f8c000376fef822c";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -113,8 +111,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testCallingParameters() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "--min_base_quality_score 26", "bb3f294eab3e2cf52c70e63b23aac5ee" );
e.put( "--computeSLOD", "eb34979efaadba1e34bd82bcacf5c722" );
e.put( "--min_base_quality_score 26", "7acb1a5aee5fdadb0cc0ea07a212efc6" );
e.put( "--computeSLOD", "e9d23a08472e4e27b4f25e844f5bad57" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -127,9 +125,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOutputParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-sites_only", "d40114aa201aa33ff5f174f15b6b73af" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "3c681b053fd2280f3c42041d24243752" );
e.put( "--output_mode EMIT_ALL_SITES", "eafa6d71c5ecd64dfee5d7a3f60e392e" );
e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "94e53320f14c5ff29d62f68d36b46fcd" );
e.put( "--output_mode EMIT_ALL_SITES", "73ad1cc41786b12c5f0e6f3e9ec2b728" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -143,12 +141,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("c71ca370947739cb7d87b59452be7a07"));
Arrays.asList("902327e8a45fe585c8dfd1a7c4fcf60f"));
executeTest("test confidence 1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("1c0a599d475cc7d5e745df6e9b6c0d29"));
Arrays.asList("9ef66764a0ecfe89c3e4e1a93a69e349"));
executeTest("test confidence 2", spec2);
}
@ -160,8 +158,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "f84da90c310367bd51f2ab6e346fa3d8" );
e.put( 1.0 / 1850, "5791e7fef40d4412b6d8f84e0a809c6c" );
e.put( 0.01, "46243ecc2b9dc716f48ea280c9bb7e72" );
e.put( 1.0 / 1850, "6b2a59dbc76984db6d4d6d6b5ee5d62c" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -185,7 +183,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("9cc9538ac83770e12bd0830d285bfbd0"));
Arrays.asList("f0fbe472f155baf594b1eeb58166edef"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -204,7 +202,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("eaf8043edb46dfbe9f97ae03baa797ed"));
Arrays.asList("8c87c749a7bb5a76ed8504d4ec254272"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -223,7 +221,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("eeba568272f9b42d5450da75c7cc6d2d"));
Arrays.asList("a64d2e65b5927260e4ce0d948760cc5c"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -238,7 +236,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("5fe98ee853586dc9db58f0bc97daea63"));
Arrays.asList("2ad52c2e75b3ffbfd8f03237c444e8e6"));
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
}
@ -251,7 +249,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("19ff9bd3139480bdf79dcbf117cf2b24"));
Arrays.asList("69107157632714150fc068d412e31939"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -261,7 +259,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("118918f2e9e56a3cfc5ccb2856d529c8"));
Arrays.asList("84f065e115004e4c9d9d62f1bf6b4f44"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
}
@ -271,7 +269,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("a20799237accd52c1b8c2ac096309c8f"));
Arrays.asList("7ea8372e4074f90ec942f964e1863351"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
}
@ -281,7 +279,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
Arrays.asList("18ef8181157b4ac3eb8492f538467f92"));
Arrays.asList("94afa8c011b47af7323c821bf19106b4"));
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
}
@ -290,7 +288,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
Arrays.asList("ad884e511a751b05e64db5314314365a"));
Arrays.asList("1e02f57fafaa41db71c531eb25e148e1"));
executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4);
}

View File

@ -6,23 +6,131 @@ import org.testng.annotations.Test;
import java.util.Arrays;
public class PhaseByTransmissionIntegrationTest extends WalkerTest {
private static String phaseByTransmissionTestDataRoot = validationDataLocation + "/PhaseByTransmission";
private static String fundamentalTestVCF = phaseByTransmissionTestDataRoot + "/" + "FundamentalsTest.unfiltered.vcf";
private static String phaseByTransmissionTestDataRoot = validationDataLocation + "PhaseByTransmission/";
private static String goodFamilyFile = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.goodFamilies.ped";
private static String TNTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TN.vcf";
private static String TPTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TP.vcf";
private static String FPTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.FP.vcf";
private static String SpecialTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.Special.vcf";
//Tests using PbT on all genotypes with default parameters
//And all reporting options
@Test
public void testBasicFunctionality() {
public void testTrueNegativeMV() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + fundamentalTestVCF,
"-f NA12892+NA12891=NA12878",
"--variant " + TNTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-mvf %s",
"-o %s"
),
2,
Arrays.asList("16fefda693156eadf1481fd9de23facb","9418a7a6405b78179ca13a67b8bfcc14")
);
executeTest("testTrueNegativeMV", spec);
}
@Test
public void testTruePositiveMV() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + TPTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-mvf %s",
"-o %s"
),
2,
Arrays.asList("14cf1d21a54d8b9fb506df178b634c56","efc66ae3d036715b721f9bd35b65d556")
);
executeTest("testTruePositiveMV", spec);
}
@Test
public void testFalsePositiveMV() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + FPTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-mvf %s",
"-o %s"
),
2,
Arrays.asList("f9b0fae9fe1e0f09b883a292b0e70a12","398724bc1e65314cc5ee92706e05a3ee")
);
executeTest("testFalsePositiveMV", spec);
}
@Test
public void testSpecialCases() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + SpecialTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-mvf %s",
"-o %s"
),
2,
Arrays.asList("b8d1aa3789ce77b45430c62d13ee3006","a1a333e08fafb288cda0e7711909e1c3")
);
executeTest("testSpecialCases", spec);
}
//Test using a different prior
//Here the FP file is used but as the prior is lowered, 3 turn to TP
@Test
public void testPriorOption() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + FPTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-prior 1e-4",
"-mvf %s",
"-o %s"
),
2,
Arrays.asList("7201ce7cc47db5840ac6b647709f7c33","c11b5e7cd7459d90d0160f917eff3b1e")
);
executeTest("testPriorOption", spec);
}
//Test when running without MV reporting option
//This is the exact same test file as FP but should not generate a .mvf file
@Test
public void testMVFileOption() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + FPTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-o %s"
),
1,
Arrays.asList("")
Arrays.asList("398724bc1e65314cc5ee92706e05a3ee")
);
executeTest("testBasicFunctionality", spec);
executeTest("testMVFileOption", spec);
}
}

View File

@ -21,16 +21,16 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-T VariantEval",
"-R " + b37KGReference,
"--dbsnp " + b37dbSNP132,
"--eval " + validationDataLocation + "snpEff.AFR.unfiltered.VariantAnnotator.output.vcf",
"--eval " + validationDataLocation + "snpEff2.0.4.AFR.unfiltered.VariantAnnotator.output.vcf",
"-noEV",
"-EV TiTvVariantEvaluator",
"-noST",
"-ST FunctionalClass",
"-L " + validationDataLocation + "snpEff.AFR.unfiltered.VariantAnnotator.output.vcf",
"-L " + validationDataLocation + "snpEff2.0.4.AFR.unfiltered.VariantAnnotator.output.vcf",
"-o %s"
),
1,
Arrays.asList("d9dcb352c53106f54fcc981f15d35a90")
Arrays.asList("a36414421621b377d6146d58d2fcecd0")
);
executeTest("testFunctionClassWithSnpeff", spec);
}

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -11,6 +10,7 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
/**
@ -36,7 +36,6 @@ public class GenomeLocParserUnitTest extends BaseTest {
@Test
public void testGetContigIndexValid() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
assertEquals(genomeLocParser.getContigIndex("chr1"), 0); // should be in the reference
}
@ -67,7 +66,6 @@ public class GenomeLocParserUnitTest extends BaseTest {
@Test
public void testGetContigInfoKnownContig() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
assertEquals(0, "chr1".compareTo(genomeLocParser.getContigInfo("chr1").getSequenceName())); // should be in the reference
}
@ -191,4 +189,104 @@ public class GenomeLocParserUnitTest extends BaseTest {
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,-2)); // bad stop
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",10,11)); // bad start, past end
}
private static class FlankingGenomeLocTestData extends TestDataProvider {
final GenomeLocParser parser;
final int basePairs;
final GenomeLoc original, flankStart, flankStop;
private FlankingGenomeLocTestData(String name, GenomeLocParser parser, int basePairs, String original, String flankStart, String flankStop) {
super(FlankingGenomeLocTestData.class, name);
this.parser = parser;
this.basePairs = basePairs;
this.original = parse(parser, original);
this.flankStart = flankStart == null ? null : parse(parser, flankStart);
this.flankStop = flankStop == null ? null : parse(parser, flankStop);
}
private static GenomeLoc parse(GenomeLocParser parser, String str) {
return "unmapped".equals(str) ? GenomeLoc.UNMAPPED : parser.parseGenomeLoc(str);
}
}
@DataProvider(name = "flankingGenomeLocs")
public Object[][] getFlankingGenomeLocs() {
int contigLength = 10000;
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigLength);
GenomeLocParser parser = new GenomeLocParser(header.getSequenceDictionary());
new FlankingGenomeLocTestData("atStartBase1", parser, 1,
"chr1:1", null, "chr1:2");
new FlankingGenomeLocTestData("atStartBase50", parser, 50,
"chr1:1", null, "chr1:2-51");
new FlankingGenomeLocTestData("atStartRange50", parser, 50,
"chr1:1-10", null, "chr1:11-60");
new FlankingGenomeLocTestData("atEndBase1", parser, 1,
"chr1:" + contigLength, "chr1:" + (contigLength - 1), null);
new FlankingGenomeLocTestData("atEndBase50", parser, 50,
"chr1:" + contigLength, String.format("chr1:%d-%d", contigLength - 50, contigLength - 1), null);
new FlankingGenomeLocTestData("atEndRange50", parser, 50,
String.format("chr1:%d-%d", contigLength - 10, contigLength),
String.format("chr1:%d-%d", contigLength - 60, contigLength - 11),
null);
new FlankingGenomeLocTestData("nearStartBase1", parser, 1,
"chr1:2", "chr1:1", "chr1:3");
new FlankingGenomeLocTestData("nearStartRange50", parser, 50,
"chr1:21-30", "chr1:1-20", "chr1:31-80");
new FlankingGenomeLocTestData("nearEndBase1", parser, 1,
"chr1:" + (contigLength - 1), "chr1:" + (contigLength - 2), "chr1:" + contigLength);
new FlankingGenomeLocTestData("nearEndRange50", parser, 50,
String.format("chr1:%d-%d", contigLength - 30, contigLength - 21),
String.format("chr1:%d-%d", contigLength - 80, contigLength - 31),
String.format("chr1:%d-%d", contigLength - 20, contigLength));
new FlankingGenomeLocTestData("beyondStartBase1", parser, 1,
"chr1:3", "chr1:2", "chr1:4");
new FlankingGenomeLocTestData("beyondStartRange50", parser, 50,
"chr1:101-200", "chr1:51-100", "chr1:201-250");
new FlankingGenomeLocTestData("beyondEndBase1", parser, 1,
"chr1:" + (contigLength - 3),
"chr1:" + (contigLength - 4),
"chr1:" + (contigLength - 2));
new FlankingGenomeLocTestData("beyondEndRange50", parser, 50,
String.format("chr1:%d-%d", contigLength - 200, contigLength - 101),
String.format("chr1:%d-%d", contigLength - 250, contigLength - 201),
String.format("chr1:%d-%d", contigLength - 100, contigLength - 51));
new FlankingGenomeLocTestData("unmapped", parser, 50,
"unmapped", null, null);
new FlankingGenomeLocTestData("fullContig", parser, 50,
"chr1", null, null);
return FlankingGenomeLocTestData.getTests(FlankingGenomeLocTestData.class);
}
@Test(dataProvider = "flankingGenomeLocs")
public void testCreateGenomeLocAtStart(FlankingGenomeLocTestData data) {
GenomeLoc actual = data.parser.createGenomeLocAtStart(data.original, data.basePairs);
String description = String.format("%n name: %s%n original: %s%n actual: %s%n expected: %s%n",
data.toString(), data.original, actual, data.flankStart);
assertEquals(actual, data.flankStart, description);
}
@Test(dataProvider = "flankingGenomeLocs")
public void testCreateGenomeLocAtStop(FlankingGenomeLocTestData data) {
GenomeLoc actual = data.parser.createGenomeLocAtStop(data.original, data.basePairs);
String description = String.format("%n name: %s%n original: %s%n actual: %s%n expected: %s%n",
data.toString(), data.original, actual, data.flankStop);
assertEquals(actual, data.flankStop, description);
}
}

View File

@ -16,7 +16,8 @@ public class ReadUtilsUnitTest extends BaseTest {
GATKSAMRecord read, reducedRead;
final static String BASES = "ACTG";
final static String QUALS = "!+5?";
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40};
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40};
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30}; // just the offsets
@BeforeTest
public void init() {
@ -29,7 +30,7 @@ public class ReadUtilsUnitTest extends BaseTest {
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
reducedRead.setReadBases(BASES.getBytes());
reducedRead.setBaseQualityString(QUALS);
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS);
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
}
private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) {

View File

@ -41,11 +41,6 @@ public class SimpleTimerUnitTest extends BaseTest {
double t6 = t.getElapsedTime();
Assert.assertTrue(t5 >= t4, "Restarted timer elapsed time should be after elapsed time preceding the restart");
Assert.assertTrue(t6 >= t5, "Second elapsed time not after the first in restarted timer");
t.stop().start();
Assert.assertTrue(t.isRunning(), "second started timer isn't running");
Assert.assertTrue(t.getElapsedTime() >= 0.0, "elapsed time should have been reset");
Assert.assertTrue(t.getElapsedTime() < t6, "elapsed time isn't less than time before start call"); // we should have effective no elapsed time
}
private final static void idleLoop() {

View File

@ -30,8 +30,10 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import org.testng.annotations.*;
import java.util.LinkedList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
@ -44,180 +46,214 @@ public class ReadClipperUnitTest extends BaseTest {
// TODO: Add error messages on failed tests
//int debug = 0;
GATKSAMRecord read, expected;
ReadClipper readClipper;
final static String BASES = "ACTG";
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
@BeforeClass
public void testIfEqual( GATKSAMRecord read, byte[] readBases, String baseQuals, String cigar) {
Assert.assertEquals(read.getReadBases(), readBases);
Assert.assertEquals(read.getBaseQualityString(), baseQuals);
Assert.assertEquals(read.getCigarString(), cigar);
}
public class testParameter {
int inputStart;
int inputStop;
int substringStart;
int substringStop;
String cigar;
public testParameter(int InputStart, int InputStop, int SubstringStart, int SubstringStop, String Cigar) {
inputStart = InputStart;
inputStop = InputStop;
substringStart = SubstringStart;
substringStop = SubstringStop;
cigar = Cigar;
}
}
// What the test read looks like
// Ref: 1 2 3 4 5 6 7 8
// Read: 0 1 2 3 - - - -
// -----------------------------
// Bases: A C T G - - - -
// Quals: ! + 5 ? - - - -
@BeforeMethod
public void init() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
read.setReadUnmappedFlag(true);
read.setReadBases(new String(BASES).getBytes());
read.setBaseQualityString(new String(QUALS));
readClipper = new ReadClipper(read);
//logger.warn(read.getCigarString());
}
@Test ( enabled = false )
@Test ( enabled = true )
public void testHardClipBothEndsByReferenceCoordinates() {
logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
//int debug = 1;
//Clip whole read
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(0,0), new GATKSAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(1,1), new GATKSAMRecord(read.getHeader()));
//clip 1 base
expected = readClipper.hardClipBothEndsByReferenceCoordinates(0,3);
expected = readClipper.hardClipBothEndsByReferenceCoordinates(1,4);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,3));
Assert.assertEquals(expected.getCigarString(), "1H2M1H");
}
@Test ( enabled = false )
@Test ( enabled = true )
public void testHardClipByReadCoordinates() {
logger.warn("Executing testHardClipByReadCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new GATKSAMRecord(read.getHeader()));
//clip 1 base at start
expected = readClipper.hardClipByReadCoordinates(0,0);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
Assert.assertEquals(expected.getCigarString(), "1H3M");
List<testParameter> testList = new LinkedList<testParameter>();
testList.add(new testParameter(0,0,1,4,"1H3M"));//clip 1 base at start
testList.add(new testParameter(3,3,0,3,"3M1H"));//clip 1 base at end
testList.add(new testParameter(0,1,2,4,"2H2M"));//clip 2 bases at start
testList.add(new testParameter(2,3,0,2,"2M2H"));//clip 2 bases at end
//clip 1 base at end
expected = readClipper.hardClipByReadCoordinates(3,3);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
Assert.assertEquals(expected.getCigarString(), "3M1H");
//clip 2 bases at start
expected = readClipper.hardClipByReadCoordinates(0,1);
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
Assert.assertEquals(expected.getCigarString(), "2H2M");
//clip 2 bases at end
expected = readClipper.hardClipByReadCoordinates(2,3);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
Assert.assertEquals(expected.getCigarString(), "2M2H");
for ( testParameter p : testList ) {
init();
//logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
testIfEqual( readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop),
BASES.substring(p.substringStart,p.substringStop).getBytes(),
QUALS.substring(p.substringStart,p.substringStop),
p.cigar );
}
}
@Test ( enabled = false )
@Test ( enabled = true )
public void testHardClipByReferenceCoordinates() {
logger.warn("Executing testHardClipByReferenceCoordinates");
//logger.warn(debug);
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new GATKSAMRecord(read.getHeader()));
//clip 1 base at start
expected = readClipper.hardClipByReferenceCoordinates(-1,1);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
Assert.assertEquals(expected.getCigarString(), "1H3M");
List<testParameter> testList = new LinkedList<testParameter>();
testList.add(new testParameter(-1,1,1,4,"1H3M"));//clip 1 base at start
testList.add(new testParameter(4,-1,0,3,"3M1H"));//clip 1 base at end
testList.add(new testParameter(-1,2,2,4,"2H2M"));//clip 2 bases at start
testList.add(new testParameter(3,-1,0,2,"2M2H"));//clip 2 bases at end
//clip 1 base at end
expected = readClipper.hardClipByReferenceCoordinates(3,-1);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
Assert.assertEquals(expected.getCigarString(), "3M1H");
//clip 2 bases at start
expected = readClipper.hardClipByReferenceCoordinates(-1,2);
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
Assert.assertEquals(expected.getCigarString(), "2H2M");
//clip 2 bases at end
expected = readClipper.hardClipByReferenceCoordinates(2,-1);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
Assert.assertEquals(expected.getCigarString(), "2M2H");
for ( testParameter p : testList ) {
init();
//logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
testIfEqual( readClipper.hardClipByReferenceCoordinates(p.inputStart,p.inputStop),
BASES.substring(p.substringStart,p.substringStop).getBytes(),
QUALS.substring(p.substringStart,p.substringStop),
p.cigar );
}
}
@Test ( enabled = false )
@Test ( enabled = true )
public void testHardClipByReferenceCoordinatesLeftTail() {
init();
logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new GATKSAMRecord(read.getHeader()));
//clip 1 base at start
expected = readClipper.hardClipByReferenceCoordinatesLeftTail(1);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
Assert.assertEquals(expected.getCigarString(), "1H3M");
List<testParameter> testList = new LinkedList<testParameter>();
testList.add(new testParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new testParameter(2, -1, 2, 4, "2H2M"));//clip 2 bases at start
//clip 2 bases at start
expected = readClipper.hardClipByReferenceCoordinatesLeftTail(2);
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
Assert.assertEquals(expected.getCigarString(), "2H2M");
for ( testParameter p : testList ) {
init();
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
testIfEqual( readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart),
BASES.substring(p.substringStart,p.substringStop).getBytes(),
QUALS.substring(p.substringStart,p.substringStop),
p.cigar );
}
}
@Test ( enabled = false )
@Test ( enabled = true )
public void testHardClipByReferenceCoordinatesRightTail() {
init();
logger.warn("Executing testHardClipByReferenceCoordinatesRightTail");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new GATKSAMRecord(read.getHeader()));
//clip 1 base at end
expected = readClipper.hardClipByReferenceCoordinatesRightTail(3);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
Assert.assertEquals(expected.getCigarString(), "3M1H");
List<testParameter> testList = new LinkedList<testParameter>();
testList.add(new testParameter(-1, 4, 0, 3, "3M1H"));//clip 1 base at end
testList.add(new testParameter(-1, 3, 0, 2, "2M2H"));//clip 2 bases at end
//clip 2 bases at end
expected = readClipper.hardClipByReferenceCoordinatesRightTail(2);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
Assert.assertEquals(expected.getCigarString(), "2M2H");
for ( testParameter p : testList ) {
init();
//logger.warn("Testing Parameters: " + p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
testIfEqual( readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop),
BASES.substring(p.substringStart,p.substringStop).getBytes(),
QUALS.substring(p.substringStart,p.substringStop),
p.cigar );
}
}
@Test ( enabled = false )
@Test ( enabled = true ) // TODO This function is returning null reads
public void testHardClipLowQualEnds() {
logger.warn("Executing testHardClipByReferenceCoordinates");
logger.warn("Executing testHardClipByReferenceCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new GATKSAMRecord(read.getHeader()));
//clip 1 base at start
expected = readClipper.hardClipLowQualEnds((byte)34);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
Assert.assertEquals(expected.getCigarString(), "1H3M");
//clip 2 bases at start
expected = readClipper.hardClipLowQualEnds((byte)44);
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
Assert.assertEquals(expected.getCigarString(), "2H2M");
List<testParameter> testList = new LinkedList<testParameter>();
testList.add(new testParameter(1,-1,1,4,"1H3M"));//clip 1 base at start
testList.add(new testParameter(11,-1,2,4,"2H2M"));//clip 2 bases at start
for ( testParameter p : testList ) {
init();
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
testIfEqual( readClipper.hardClipLowQualEnds( (byte)p.inputStart ),
BASES.substring(p.substringStart,p.substringStop).getBytes(),
QUALS.substring(p.substringStart,p.substringStop),
p.cigar );
}
/* todo find a better way to test lowqual tail clipping on both sides
// Reverse Quals sequence
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
//clip 1 base at end
expected = readClipper.hardClipLowQualEnds((byte)34);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
Assert.assertEquals(expected.getCigarString(), "3M1H");
testList = new LinkedList<testParameter>();
testList.add(new testParameter(1,-1,0,3,"3M1H"));//clip 1 base at end
testList.add(new testParameter(11,-1,0,2,"2M2H"));//clip 2 bases at end
//clip 2 bases at end
expected = readClipper.hardClipLowQualEnds((byte)44);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
Assert.assertEquals(expected.getCigarString(), "2M2H");
// revert Qual sequence
readClipper.getRead().setBaseQualityString(QUALS);
for ( testParameter p : testList ) {
init();
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
testIfEqual( readClipper.hardClipLowQualEnds( (byte)p.inputStart ),
BASES.substring(p.substringStart,p.substringStop).getBytes(),
QUALS.substring(p.substringStart,p.substringStop),
p.cigar );
}
*/
}
}
public class CigarReadMaker {
}
@Test ( enabled = false )
public void testHardClipSoftClippedBases() {
// Generate a list of cigars to test
// We will use testParameter in the following way
// Right tail, left tail,
}
}

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.utils.interval;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.util.IntervalUtil;
import net.sf.samtools.SAMFileHeader;
import org.apache.commons.io.FileUtils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
@ -762,4 +762,225 @@ public class IntervalUtilsUnitTest extends BaseTest {
List<GenomeLoc> merged = IntervalUtils.mergeIntervalLocations(locs, IntervalMergingRule.ALL);
Assert.assertEquals(merged.size(), 1);
}
/*
Split into tests that can be written to files and tested by writeFlankingIntervals,
and lists that cannot but are still handled by getFlankingIntervals.
*/
private static abstract class FlankingIntervalsTestData extends TestDataProvider {
final public File referenceFile;
final public GenomeLocParser parser;
final int basePairs;
final List<GenomeLoc> original;
final List<GenomeLoc> expected;
protected FlankingIntervalsTestData(Class<?> clazz, String name, File referenceFile, GenomeLocParser parser,
int basePairs, List<String> original, List<String> expected) {
super(clazz, name);
this.referenceFile = referenceFile;
this.parser = parser;
this.basePairs = basePairs;
this.original = parse(parser, original);
this.expected = parse(parser, expected);
}
private static List<GenomeLoc> parse(GenomeLocParser parser, List<String> locs) {
List<GenomeLoc> parsed = new ArrayList<GenomeLoc>();
for (String loc: locs)
parsed.add("unmapped".equals(loc) ? GenomeLoc.UNMAPPED : parser.parseGenomeLoc(loc));
return parsed;
}
}
private static class FlankingIntervalsFile extends FlankingIntervalsTestData {
public FlankingIntervalsFile(String name, File referenceFile, GenomeLocParser parser,
int basePairs, List<String> original, List<String> expected) {
super(FlankingIntervalsFile.class, name, referenceFile, parser, basePairs, original, expected);
}
}
private static class FlankingIntervalsList extends FlankingIntervalsTestData {
public FlankingIntervalsList(String name, File referenceFile, GenomeLocParser parser,
int basePairs, List<String> original, List<String> expected) {
super(FlankingIntervalsList.class, name, referenceFile, parser, basePairs, original, expected);
}
}
/* Intervals where the original and the flanks can be written to files. */
@DataProvider(name = "flankingIntervalsFiles")
public Object[][] getFlankingIntervalsFiles() {
File hg19ReferenceFile = new File(BaseTest.hg19Reference);
int hg19Length1 = hg19GenomeLocParser.getContigInfo("1").getSequenceLength();
new FlankingIntervalsFile("atStartBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
Arrays.asList("1:1"),
Arrays.asList("1:2"));
new FlankingIntervalsFile("atStartBase50", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList("1:1"),
Arrays.asList("1:2-51"));
new FlankingIntervalsFile("atStartRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList("1:1-10"),
Arrays.asList("1:11-60"));
new FlankingIntervalsFile("atEndBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
Arrays.asList("1:" + hg19Length1),
Arrays.asList("1:" + (hg19Length1 - 1)));
new FlankingIntervalsFile("atEndBase50", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList("1:" + hg19Length1),
Arrays.asList(String.format("1:%d-%d", hg19Length1 - 50, hg19Length1 - 1)));
new FlankingIntervalsFile("atEndRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList(String.format("1:%d-%d", hg19Length1 - 10, hg19Length1)),
Arrays.asList(String.format("1:%d-%d", hg19Length1 - 60, hg19Length1 - 11)));
new FlankingIntervalsFile("nearStartBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
Arrays.asList("1:2"),
Arrays.asList("1:1", "1:3"));
new FlankingIntervalsFile("nearStartRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList("1:21-30"),
Arrays.asList("1:1-20", "1:31-80"));
new FlankingIntervalsFile("nearEndBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
Arrays.asList("1:" + (hg19Length1 - 1)),
Arrays.asList("1:" + (hg19Length1 - 2), "1:" + hg19Length1));
new FlankingIntervalsFile("nearEndRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList(String.format("1:%d-%d", hg19Length1 - 30, hg19Length1 - 21)),
Arrays.asList(
String.format("1:%d-%d", hg19Length1 - 80, hg19Length1 - 31),
String.format("1:%d-%d", hg19Length1 - 20, hg19Length1)));
new FlankingIntervalsFile("beyondStartBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
Arrays.asList("1:3"),
Arrays.asList("1:2", "1:4"));
new FlankingIntervalsFile("beyondStartRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList("1:101-200"),
Arrays.asList("1:51-100", "1:201-250"));
new FlankingIntervalsFile("beyondEndBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
Arrays.asList("1:" + (hg19Length1 - 3)),
Arrays.asList("1:" + (hg19Length1 - 4), "1:" + (hg19Length1 - 2)));
new FlankingIntervalsFile("beyondEndRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList(String.format("1:%d-%d", hg19Length1 - 200, hg19Length1 - 101)),
Arrays.asList(
String.format("1:%d-%d", hg19Length1 - 250, hg19Length1 - 201),
String.format("1:%d-%d", hg19Length1 - 100, hg19Length1 - 51)));
new FlankingIntervalsFile("betweenFar50", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList("1:101-200", "1:401-500"),
Arrays.asList("1:51-100", "1:201-250", "1:351-400", "1:501-550"));
new FlankingIntervalsFile("betweenSpan50", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList("1:101-200", "1:301-400"),
Arrays.asList("1:51-100", "1:201-300", "1:401-450"));
new FlankingIntervalsFile("betweenOverlap50", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList("1:101-200", "1:271-400"),
Arrays.asList("1:51-100", "1:201-270", "1:401-450"));
new FlankingIntervalsFile("betweenShort50", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList("1:101-200", "1:221-400"),
Arrays.asList("1:51-100", "1:201-220", "1:401-450"));
new FlankingIntervalsFile("betweenNone50", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList("1:101-200", "1:121-400"),
Arrays.asList("1:51-100", "1:401-450"));
new FlankingIntervalsFile("twoContigs", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList("1:101-200", "2:301-400"),
Arrays.asList("1:51-100", "1:201-250", "2:251-300", "2:401-450"));
// Explicit testing a problematic agilent target pair
new FlankingIntervalsFile("badAgilent", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList("2:74756257-74756411", "2:74756487-74756628"),
// wrong! ("2:74756206-74756256", "2:74756412-74756462", "2:74756436-74756486", "2:74756629-74756679")
Arrays.asList("2:74756207-74756256", "2:74756412-74756486", "2:74756629-74756678"));
return TestDataProvider.getTests(FlankingIntervalsFile.class);
}
/* Intervals where either the original and/or the flanks cannot be written to a file. */
@DataProvider(name = "flankingIntervalsLists")
public Object[][] getFlankingIntervalsLists() {
File hg19ReferenceFile = new File(BaseTest.hg19Reference);
List<String> empty = Collections.emptyList();
new FlankingIntervalsList("empty", hg19ReferenceFile, hg19GenomeLocParser, 50,
empty,
empty);
new FlankingIntervalsList("unmapped", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList("unmapped"),
empty);
new FlankingIntervalsList("fullContig", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList("1"),
empty);
new FlankingIntervalsList("fullContigs", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList("1", "2", "3"),
empty);
new FlankingIntervalsList("betweenWithUnmapped", hg19ReferenceFile, hg19GenomeLocParser, 50,
Arrays.asList("1:101-200", "1:301-400", "unmapped"),
Arrays.asList("1:51-100", "1:201-300", "1:401-450"));
return TestDataProvider.getTests(FlankingIntervalsList.class);
}
@Test(dataProvider = "flankingIntervalsFiles")
public void testWriteFlankingIntervals(FlankingIntervalsTestData data) throws Exception {
File originalFile = createTempFile("original.", ".intervals");
File flankingFile = createTempFile("flanking.", ".intervals");
try {
List<String> lines = new ArrayList<String>();
for (GenomeLoc loc: data.original)
lines.add(loc.toString());
FileUtils.writeLines(originalFile, lines);
IntervalUtils.writeFlankingIntervals(data.referenceFile, originalFile, flankingFile, data.basePairs);
List<GenomeLoc> actual = IntervalUtils.intervalFileToList(data.parser, flankingFile.getAbsolutePath());
String description = String.format("%n name: %s%n original: %s%n actual: %s%n expected: %s%n",
data.toString(), data.original, actual, data.expected);
Assert.assertEquals(actual, data.expected, description);
} finally {
FileUtils.deleteQuietly(originalFile);
FileUtils.deleteQuietly(flankingFile);
}
}
@Test(dataProvider = "flankingIntervalsLists", expectedExceptions = UserException.class)
public void testWritingBadFlankingIntervals(FlankingIntervalsTestData data) throws Exception {
File originalFile = createTempFile("original.", ".intervals");
File flankingFile = createTempFile("flanking.", ".intervals");
try {
List<String> lines = new ArrayList<String>();
for (GenomeLoc loc: data.original)
lines.add(loc.toString());
FileUtils.writeLines(originalFile, lines);
// Should throw a user exception on bad input if either the original
// intervals are empty or if the flanking intervals are empty
IntervalUtils.writeFlankingIntervals(data.referenceFile, originalFile, flankingFile, data.basePairs);
} finally {
FileUtils.deleteQuietly(originalFile);
FileUtils.deleteQuietly(flankingFile);
}
}
@Test(dataProvider = "flankingIntervalsLists")
public void testGetFlankingIntervals(FlankingIntervalsTestData data) {
List<GenomeLoc> actual = IntervalUtils.getFlankingIntervals(data.parser, data.original, data.basePairs);
String description = String.format("%n name: %s%n original: %s%n actual: %s%n expected: %s%n",
data.toString(), data.original, actual, data.expected);
Assert.assertEquals(actual, data.expected, description);
}
}

View File

@ -29,10 +29,13 @@ package org.broadinstitute.sting.utils.variantcontext;
// the imports for unit testing.
import org.broadinstitute.sting.utils.MathUtils;
import org.testng.Assert;
import org.testng.annotations.Test;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import java.util.EnumMap;
/**
* Basic unit test for Genotype likelihoods objects
@ -69,6 +72,50 @@ public class GenotypeLikelihoodsUnitTest {
gl.getAsVector();
}
@Test
public void testGetAsMap(){
GenotypeLikelihoods gl = new GenotypeLikelihoods(v);
//Log scale
EnumMap<Genotype.Type,Double> glMap = gl.getAsMap(false);
Assert.assertEquals(v[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
Assert.assertEquals(v[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
Assert.assertEquals(v[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
//Linear scale
glMap = gl.getAsMap(true);
double [] vl = MathUtils.normalizeFromLog10(v);
Assert.assertEquals(vl[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
Assert.assertEquals(vl[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
Assert.assertEquals(vl[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
//Test missing likelihoods
gl = new GenotypeLikelihoods(".");
glMap = gl.getAsMap(false);
Assert.assertNull(glMap);
}
@Test
public void testGetNegLog10GQ(){
GenotypeLikelihoods gl = new GenotypeLikelihoods(vPLString);
//GQ for the best guess genotype
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HET),3.9);
double[] test = MathUtils.normalizeFromLog10(gl.getAsVector());
//GQ for the other genotypes
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_REF), -1.0 * Math.log10(1.0 - test[Genotype.Type.HOM_REF.ordinal()-1]));
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_VAR), -1.0 * Math.log10(1.0 - test[Genotype.Type.HOM_VAR.ordinal()-1]));
//Test missing likelihoods
gl = new GenotypeLikelihoods(".");
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_REF),Double.NEGATIVE_INFINITY);
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HET),Double.NEGATIVE_INFINITY);
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_VAR),Double.NEGATIVE_INFINITY);
}
private void assertDoubleArraysAreEqual(double[] v1, double[] v2) {
Assert.assertEquals(v1.length, v2.length);
for ( int i = 0; i < v1.length; i++ ) {

View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.extensions.gatk
import org.broadinstitute.sting.queue.function.InProcessFunction
import org.broadinstitute.sting.commandline.{Output, Argument, Input}
import java.io.File
import org.broadinstitute.sting.utils.interval.IntervalUtils
class WriteFlankingIntervalsFunction extends InProcessFunction {
@Input(doc="The reference sequence")
var reference : File = _
@Input(doc="The interval list to flank")
var inputIntervals : File = _
@Output(doc="The output intervals file to write to")
var outputIntervals: File = _
@Argument(doc="Number of base pair to flank the input intervals")
var flankSize : Int = _
def run() {
IntervalUtils.writeFlankingIntervals(reference, inputIntervals, outputIntervals, flankSize)
}
}

View File

@ -1,135 +0,0 @@
package org.broadinstitute.sting.queue.library.ipf.intervals
import org.broadinstitute.sting.queue.function.InProcessFunction
import org.broadinstitute.sting.commandline._
import java.io.{PrintStream, File}
import collection.JavaConversions._
import org.broadinstitute.sting.utils.text.XReadLines
import net.sf.picard.reference.FastaSequenceFile
import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocParser}
import collection.immutable.TreeSet
// todo -- this is unsafe. Need to use a reference dictionary to ensure no off-contig targets are created
class ExpandIntervals(in : File, start: Int, size: Int, out: File, ref: File, ipType: String, opType: String) extends InProcessFunction {
@Input(doc="The interval list to expand") val inList : File = in
@Input(doc="The reference sequence") val refDict : File = ref
@Argument(doc="Number of basepair to start the expanded interval") val startInt : Int = start
@Argument(doc="Number of baispair to stop the expanded interval") val sizeInt : Int = size
@Output(doc="The output intervals file to write to") val outList : File = out
@Argument(doc="The output format for the intervals") val outTypeStr = opType
@Argument(doc="The input format for the intervals") val inTypeStr = ipType
var output : PrintStream = _
var parser : GenomeLocParser = _
var xrl : XReadLines = _
val outType = IntervalFormatType.convert(outTypeStr)
val inType = IntervalFormatType.convert(inTypeStr)
var offsetIn : Int = 0
var offsetOut : Int = 0
var first : Boolean = true
var lastTwo : (GenomeLoc,GenomeLoc) = _
var intervalCache : TreeSet[GenomeLoc] = _
val LINES_TO_CACHE : Int = 1000
def run = {
output = new PrintStream(outList)
intervalCache = new TreeSet[GenomeLoc]()(new Ordering[GenomeLoc]{
def compare(o1: GenomeLoc, o2: GenomeLoc) : Int = { o1.compareTo(o2) }
})
parser = new GenomeLocParser(new FastaSequenceFile(ref,true))
xrl = new XReadLines(inList)
offsetIn = if (isBed(inType)) 1 else 0
offsetOut = if( isBed(outType)) 1 else 0
var line : String = xrl.next
while ( line.startsWith("@") ) {
line = xrl.next
}
var prevLoc: GenomeLoc = null
var curLoc: GenomeLoc = null
var nextLoc : GenomeLoc = parseGenomeInterval(line)
var linesProcessed : Int = 1
while ( prevLoc != null || curLoc != null || nextLoc != null ) {
prevLoc = curLoc
curLoc = nextLoc
nextLoc = if ( xrl.hasNext ) parseGenomeInterval(xrl.next) else null
if ( curLoc != null ) {
val left: GenomeLoc = refine(expandLeft(curLoc),prevLoc)
val right: GenomeLoc = refine(expandRight(curLoc),nextLoc)
if ( left != null ) {
intervalCache += left
}
if ( right != null ) {
intervalCache += right
}
}
linesProcessed += 1
if ( linesProcessed % LINES_TO_CACHE == 0 ) {
val toPrint = intervalCache.filter( u => (u.isBefore(prevLoc) && u.distance(prevLoc) > startInt+sizeInt))
intervalCache = intervalCache -- toPrint
toPrint.foreach(u => output.print("%s%n".format(repr(u))))
}
//System.out.printf("%s".format(if ( curLoc == null ) "null" else repr(curLoc)))
}
intervalCache.foreach(u => output.print("%s%n".format(repr(u))))
output.close()
}
def expandLeft(g: GenomeLoc) : GenomeLoc = {
parser.createGenomeLoc(g.getContig,g.getStart-startInt-sizeInt,g.getStart-startInt)
}
def expandRight(g: GenomeLoc) : GenomeLoc = {
parser.createGenomeLoc(g.getContig,g.getStop+startInt,g.getStop+startInt+sizeInt)
}
def refine(newG: GenomeLoc, borderG: GenomeLoc) : GenomeLoc = {
if ( borderG == null || ! newG.overlapsP(borderG) ) {
return newG
} else {
if ( newG.getStart < borderG.getStart ) {
if ( borderG.getStart - startInt > newG.getStart ) {
return parser.createGenomeLoc(newG.getContig,newG.getStart,borderG.getStart-startInt)
}
} else {
if ( borderG.getStop + startInt < newG.getStop ){
return parser.createGenomeLoc(newG.getContig,borderG.getStop+startInt,newG.getStop)
}
}
}
null
}
def repr(loc : GenomeLoc) : String = {
if ( loc == null ) return "null"
if ( outType == IntervalFormatType.INTERVALS ) {
return "%s:%d-%d".format(loc.getContig,loc.getStart,loc.getStop)
} else {
return "%s\t%d\t%d".format(loc.getContig,loc.getStart-offsetOut,loc.getStop+offsetOut)
}
}
def isBed(t: IntervalFormatType.IntervalFormatType) : Boolean = {
t == IntervalFormatType.BED
}
def parseGenomeInterval( s : String ) : GenomeLoc = {
val sp = s.split("\\s+")
// todo -- maybe specify whether the bed format [0,6) --> (1,2,3,4,5) is what's wanted
if ( s.contains(":") ) parser.parseGenomeLoc(s) else parser.createGenomeLoc(sp(0),sp(1).toInt+offsetIn,sp(2).toInt-offsetIn)
}
object IntervalFormatType extends Enumeration("INTERVALS","BED","TDF") {
type IntervalFormatType = Value
val INTERVALS,BED,TDF = Value
def convert(s : String) : IntervalFormatType = {
if ( s.equals("INTERVALS") ) INTERVALS else { if (s.equals("BED") ) BED else TDF}
}
}
}

View File

@ -1,70 +0,0 @@
package org.broadinstitute.sting.queue.library.ipf.intervals
import org.broadinstitute.sting.queue.function.InProcessFunction
import collection.JavaConversions._
import org.broadinstitute.sting.commandline._
import java.io.{PrintStream, File}
import net.sf.samtools.{SAMSequenceRecord, SAMFileHeader, SAMSequenceDictionary}
import org.broadinstitute.sting.utils.text.XReadLines
import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocParser}
class IntersectIntervals(iVals: List[File], outFile: File, bed: Boolean) extends InProcessFunction {
@Input(doc="List of interval files to find the intersection of") val intervals : List[File] = iVals
@Output(doc="Output interval file to which to write") val output : File = outFile
@Argument(doc="Assume the input interval lists are sorted in the proper order") var assumeSorted = false
@Argument(doc="Is the tdf in bed file (0-based clopen: 0 5 for {1,2,3,4}?") var isBed = bed
var outStream : PrintStream = _
var contigs : List[String] = Nil
var dict : SAMSequenceDictionary = _
var parser : GenomeLocParser = _
def run = {
outStream = new PrintStream(output)
dict = new SAMSequenceDictionary
// note: memory hog
val sources : List[(List[(String,Int,Int)],Int)] = intervals.map(g => asScalaIterator(new XReadLines(g)).map(u => parse(u)).toList).zipWithIndex
sources.map(u => u._1).flatten.map(u => u._1).distinct.foreach(u => dict.addSequence(new SAMSequenceRecord(u,Integer.MAX_VALUE)))
parser = new GenomeLocParser(dict)
sources.map( (u: (List[(String,Int,Int)],Int)) => u._1.map(g => (newGenomeLoc(g),u._2))).flatten.sortWith( (a,b) => (a._1 compareTo b._1) < 0 ).foldLeft[List[List[(GenomeLoc,Int)]]](Nil)( (a,b) => overlapFold(a,b)).map(u => mapIntersect(u)).filter(h => h != null && h.size > 0).foreach(h => writeOut(h))
outStream.close()
}
def writeOut(g : GenomeLoc) : Unit = {
outStream.print("%s%n".format(g.toString))
}
def parse(s : String) : (String,Int,Int) = {
if ( s.contains(":") ) {
val split1 = s.split(":")
val split2 = split1(1).split("-")
return (split1(0),split2(0).toInt,split2(1).toInt)
} else {
val split = s.split("\\s+")
return (split(0),split(1).toInt + (if(isBed) 1 else 0) ,split(2).toInt - (if(isBed) 1 else 0) )
}
}
def newGenomeLoc(coords : (String,Int,Int) ) : GenomeLoc = {
parser.createGenomeLoc(coords._1,coords._2,coords._3)
}
def overlapFold( a: List[List[(GenomeLoc,Int)]], b: (GenomeLoc,Int) ) : List[List[(GenomeLoc,Int)]] = {
if ( a.last.forall(u => u._1.overlapsP(b._1)) ) {
a.init :+ (a.last :+ b)
} else {
a :+ ( a.last.dropWhile(u => ! u._1.overlapsP(b._1)) :+ b)
}
}
def mapIntersect( u: List[(GenomeLoc,Int)]) : GenomeLoc = {
if ( u.map(h => h._2).distinct.sum != range(1,intervals.size).sum ) { // if all sources not accounted for
null
}
u.map(h => h._1).reduceLeft[GenomeLoc]( (a,b) => a.intersect(b) )
}
def range(a: Int, b: Int) : Range = new Range(a,b+1,1)
}

View File

@ -1,3 +1,3 @@
<ivy-module version="1.0">
<info organisation="net.sf.snpeff" module="snpeff" revision="2.0.2" status="release" />
<info organisation="net.sf.snpeff" module="snpeff" revision="2.0.4rc3" status="release" />
</ivy-module>