Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
77ef2be9b8
2
ivy.xml
2
ivy.xml
|
|
@ -76,7 +76,7 @@
|
|||
<dependency org="org.apache.poi" name="poi-ooxml" rev="3.8-beta3" />
|
||||
|
||||
<!-- snpEff annotator for pipelines -->
|
||||
<dependency org="net.sf.snpeff" name="snpeff" rev="2.0.2" />
|
||||
<dependency org="net.sf.snpeff" name="snpeff" rev="2.0.4rc3" />
|
||||
|
||||
<!-- Exclude dependencies on sun libraries where the downloads aren't available but included in the jvm. -->
|
||||
<exclude org="javax.servlet" />
|
||||
|
|
|
|||
|
|
@ -480,7 +480,7 @@ public class GenomeAnalysisEngine {
|
|||
}
|
||||
} else if (walker instanceof ReadPairWalker) {
|
||||
if(readsDataSource != null && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.queryname)
|
||||
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.queryname, "Read pair walkers can only walk over query name-sorted data. Please resort your input BAM file.");
|
||||
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.queryname, "Read pair walkers are exceptions in that they cannot be run on coordinate-sorted BAMs but instead require query name-sorted files. You will need to resort your input BAM file in query name order to use this walker.");
|
||||
if(intervals != null && !intervals.isEmpty())
|
||||
throw new UserException.CommandLineException("Pairs traversal cannot be used in conjunction with intervals.");
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.samples;
|
|||
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -110,6 +111,17 @@ public class Sample implements Comparable<Sample> { // implements java.io.Serial
|
|||
return infoDB.getSample(paternalID);
|
||||
}
|
||||
|
||||
public ArrayList<Sample> getParents(){
|
||||
ArrayList<Sample> parents = new ArrayList<Sample>(2);
|
||||
Sample parent = getMother();
|
||||
if(parent != null)
|
||||
parents.add(parent);
|
||||
parent = getFather();
|
||||
if(parent != null)
|
||||
parents.add(parent);
|
||||
return parents;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get gender of the sample
|
||||
* @return property of key "gender" - must be of type Gender
|
||||
|
|
|
|||
|
|
@ -49,5 +49,5 @@ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnno
|
|||
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.DEPTH_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Filtered Depth")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered")); }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
|||
|
||||
// We refuse to parse SnpEff output files generated by unsupported versions, or
|
||||
// lacking a SnpEff version number in the VCF header:
|
||||
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
|
||||
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.4" };
|
||||
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
|
||||
public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd";
|
||||
|
||||
|
|
@ -77,13 +77,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
|||
public enum InfoFieldKey {
|
||||
EFFECT_KEY ("SNPEFF_EFFECT", -1),
|
||||
IMPACT_KEY ("SNPEFF_IMPACT", 0),
|
||||
CODON_CHANGE_KEY ("SNPEFF_CODON_CHANGE", 1),
|
||||
AMINO_ACID_CHANGE_KEY ("SNPEFF_AMINO_ACID_CHANGE", 2),
|
||||
GENE_NAME_KEY ("SNPEFF_GENE_NAME", 3),
|
||||
GENE_BIOTYPE_KEY ("SNPEFF_GENE_BIOTYPE", 4),
|
||||
TRANSCRIPT_ID_KEY ("SNPEFF_TRANSCRIPT_ID", 6),
|
||||
EXON_ID_KEY ("SNPEFF_EXON_ID", 7),
|
||||
FUNCTIONAL_CLASS_KEY ("SNPEFF_FUNCTIONAL_CLASS", -1);
|
||||
FUNCTIONAL_CLASS_KEY ("SNPEFF_FUNCTIONAL_CLASS", 1),
|
||||
CODON_CHANGE_KEY ("SNPEFF_CODON_CHANGE", 2),
|
||||
AMINO_ACID_CHANGE_KEY ("SNPEFF_AMINO_ACID_CHANGE", 3),
|
||||
GENE_NAME_KEY ("SNPEFF_GENE_NAME", 4),
|
||||
GENE_BIOTYPE_KEY ("SNPEFF_GENE_BIOTYPE", 5),
|
||||
TRANSCRIPT_ID_KEY ("SNPEFF_TRANSCRIPT_ID", 7),
|
||||
EXON_ID_KEY ("SNPEFF_EXON_ID", 8);
|
||||
|
||||
// Actual text of the key
|
||||
private final String keyName;
|
||||
|
|
@ -110,70 +110,53 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
|||
// are validated against this list.
|
||||
public enum EffectType {
|
||||
// High-impact effects:
|
||||
FRAME_SHIFT (EffectFunctionalClass.NONE, false),
|
||||
STOP_GAINED (EffectFunctionalClass.NONSENSE, false),
|
||||
START_LOST (EffectFunctionalClass.NONE, false),
|
||||
SPLICE_SITE_ACCEPTOR (EffectFunctionalClass.NONE, false),
|
||||
SPLICE_SITE_DONOR (EffectFunctionalClass.NONE, false),
|
||||
EXON_DELETED (EffectFunctionalClass.NONE, false),
|
||||
STOP_LOST (EffectFunctionalClass.NONE, false),
|
||||
SPLICE_SITE_ACCEPTOR,
|
||||
SPLICE_SITE_DONOR,
|
||||
START_LOST,
|
||||
EXON_DELETED,
|
||||
FRAME_SHIFT,
|
||||
STOP_GAINED,
|
||||
STOP_LOST,
|
||||
|
||||
// Moderate-impact effects:
|
||||
NON_SYNONYMOUS_CODING (EffectFunctionalClass.MISSENSE, false),
|
||||
CODON_CHANGE (EffectFunctionalClass.NONE, false),
|
||||
CODON_INSERTION (EffectFunctionalClass.NONE, false),
|
||||
CODON_CHANGE_PLUS_CODON_INSERTION (EffectFunctionalClass.NONE, false),
|
||||
CODON_DELETION (EffectFunctionalClass.NONE, false),
|
||||
CODON_CHANGE_PLUS_CODON_DELETION (EffectFunctionalClass.NONE, false),
|
||||
UTR_5_DELETED (EffectFunctionalClass.NONE, false),
|
||||
UTR_3_DELETED (EffectFunctionalClass.NONE, false),
|
||||
NON_SYNONYMOUS_CODING,
|
||||
CODON_CHANGE,
|
||||
CODON_INSERTION,
|
||||
CODON_CHANGE_PLUS_CODON_INSERTION,
|
||||
CODON_DELETION,
|
||||
CODON_CHANGE_PLUS_CODON_DELETION,
|
||||
UTR_5_DELETED,
|
||||
UTR_3_DELETED,
|
||||
|
||||
// Low-impact effects:
|
||||
SYNONYMOUS_CODING (EffectFunctionalClass.SILENT, false),
|
||||
SYNONYMOUS_START (EffectFunctionalClass.SILENT, false),
|
||||
NON_SYNONYMOUS_START (EffectFunctionalClass.SILENT, false),
|
||||
SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false),
|
||||
NON_SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false),
|
||||
START_GAINED (EffectFunctionalClass.NONE, false),
|
||||
SYNONYMOUS_START,
|
||||
NON_SYNONYMOUS_START,
|
||||
START_GAINED,
|
||||
SYNONYMOUS_CODING,
|
||||
SYNONYMOUS_STOP,
|
||||
NON_SYNONYMOUS_STOP,
|
||||
|
||||
// Modifiers:
|
||||
NONE (EffectFunctionalClass.NONE, true),
|
||||
CHROMOSOME (EffectFunctionalClass.NONE, true),
|
||||
INTERGENIC (EffectFunctionalClass.NONE, true),
|
||||
UPSTREAM (EffectFunctionalClass.NONE, true),
|
||||
UTR_5_PRIME (EffectFunctionalClass.NONE, true),
|
||||
CDS (EffectFunctionalClass.NONE, true),
|
||||
GENE (EffectFunctionalClass.NONE, true),
|
||||
TRANSCRIPT (EffectFunctionalClass.NONE, true),
|
||||
EXON (EffectFunctionalClass.NONE, true),
|
||||
INTRON (EffectFunctionalClass.NONE, true),
|
||||
UTR_3_PRIME (EffectFunctionalClass.NONE, true),
|
||||
DOWNSTREAM (EffectFunctionalClass.NONE, true),
|
||||
INTRON_CONSERVED (EffectFunctionalClass.NONE, true),
|
||||
INTERGENIC_CONSERVED (EffectFunctionalClass.NONE, true),
|
||||
REGULATION (EffectFunctionalClass.NONE, true),
|
||||
CUSTOM (EffectFunctionalClass.NONE, true),
|
||||
WITHIN_NON_CODING_GENE (EffectFunctionalClass.NONE, true);
|
||||
|
||||
private final EffectFunctionalClass functionalClass;
|
||||
private final boolean isModifier;
|
||||
|
||||
EffectType ( EffectFunctionalClass functionalClass, boolean isModifier ) {
|
||||
this.functionalClass = functionalClass;
|
||||
this.isModifier = isModifier;
|
||||
}
|
||||
|
||||
public EffectFunctionalClass getFunctionalClass() {
|
||||
return functionalClass;
|
||||
}
|
||||
|
||||
public boolean isModifier() {
|
||||
return isModifier;
|
||||
}
|
||||
NONE,
|
||||
CHROMOSOME,
|
||||
CUSTOM,
|
||||
CDS,
|
||||
GENE,
|
||||
TRANSCRIPT,
|
||||
EXON,
|
||||
INTRON_CONSERVED,
|
||||
UTR_5_PRIME,
|
||||
UTR_3_PRIME,
|
||||
DOWNSTREAM,
|
||||
INTRAGENIC,
|
||||
INTERGENIC,
|
||||
INTERGENIC_CONSERVED,
|
||||
UPSTREAM,
|
||||
REGULATION,
|
||||
INTRON
|
||||
}
|
||||
|
||||
// SnpEff labels each effect as either LOW, MODERATE, or HIGH impact. We take the additional step of
|
||||
// classifying some of the LOW impact effects as MODIFIERs.
|
||||
// SnpEff labels each effect as either LOW, MODERATE, or HIGH impact, or as a MODIFIER.
|
||||
public enum EffectImpact {
|
||||
MODIFIER (0),
|
||||
LOW (1),
|
||||
|
|
@ -202,7 +185,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
|||
UNKNOWN
|
||||
}
|
||||
|
||||
// We assign a functional class to each SnpEff effect.
|
||||
// SnpEff assigns a functional class to each effect.
|
||||
public enum EffectFunctionalClass {
|
||||
NONE (0),
|
||||
SILENT (1),
|
||||
|
|
@ -379,13 +362,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
|||
public List<String> getKeyNames() {
|
||||
return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(),
|
||||
InfoFieldKey.IMPACT_KEY.getKeyName(),
|
||||
InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(),
|
||||
InfoFieldKey.CODON_CHANGE_KEY.getKeyName(),
|
||||
InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(),
|
||||
InfoFieldKey.GENE_NAME_KEY.getKeyName(),
|
||||
InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),
|
||||
InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),
|
||||
InfoFieldKey.EXON_ID_KEY.getKeyName(),
|
||||
InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()
|
||||
InfoFieldKey.EXON_ID_KEY.getKeyName()
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -393,13 +376,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
|||
return Arrays.asList(
|
||||
new VCFInfoHeaderLine(InfoFieldKey.EFFECT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.IMPACT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(EffectImpact.values())),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values())),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New codon for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant (in HGVS style)"),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.GENE_NAME_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene biotype for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values()))
|
||||
new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant")
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -409,6 +392,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
|||
protected static class SnpEffEffect {
|
||||
private EffectType effect;
|
||||
private EffectImpact impact;
|
||||
private EffectFunctionalClass functionalClass;
|
||||
private String codonChange;
|
||||
private String aminoAcidChange;
|
||||
private String geneName;
|
||||
|
|
@ -420,16 +404,21 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
|||
private String parseError = null;
|
||||
private boolean isWellFormed = true;
|
||||
|
||||
private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 8;
|
||||
private static final int NUMBER_OF_METADATA_FIELDS_UPON_WARNING = 9;
|
||||
private static final int NUMBER_OF_METADATA_FIELDS_UPON_ERROR = 10;
|
||||
private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 9;
|
||||
private static final int NUMBER_OF_METADATA_FIELDS_UPON_EITHER_WARNING_OR_ERROR = 10;
|
||||
private static final int NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR = 11;
|
||||
|
||||
// Note that contrary to the description for the EFF field layout that SnpEff adds to the VCF header,
|
||||
// errors come after warnings, not vice versa:
|
||||
private static final int SNPEFF_WARNING_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_WARNING - 1;
|
||||
private static final int SNPEFF_ERROR_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_ERROR - 1;
|
||||
// If there is either a warning OR an error, it will be in the last field. If there is both
|
||||
// a warning AND an error, the warning will be in the second-to-last field, and the error will
|
||||
// be in the last field.
|
||||
private static final int SNPEFF_WARNING_OR_ERROR_FIELD_UPON_SINGLE_ERROR = NUMBER_OF_METADATA_FIELDS_UPON_EITHER_WARNING_OR_ERROR - 1;
|
||||
private static final int SNPEFF_WARNING_FIELD_UPON_BOTH_WARNING_AND_ERROR = NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR - 2;
|
||||
private static final int SNPEFF_ERROR_FIELD_UPON_BOTH_WARNING_AND_ERROR = NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR - 1;
|
||||
|
||||
private static final int SNPEFF_CODING_FIELD_INDEX = 5;
|
||||
// Position of the field indicating whether the effect is coding or non-coding. This field is used
|
||||
// in selecting the most significant effect, but is not included in the annotations we return
|
||||
// since it can be deduced from the SNPEFF_GENE_BIOTYPE field.
|
||||
private static final int SNPEFF_CODING_FIELD_INDEX = 6;
|
||||
|
||||
public SnpEffEffect ( String effectName, String[] effectMetadata ) {
|
||||
parseEffectName(effectName);
|
||||
|
|
@ -447,11 +436,14 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
|||
|
||||
private void parseEffectMetadata ( String[] effectMetadata ) {
|
||||
if ( effectMetadata.length != EXPECTED_NUMBER_OF_METADATA_FIELDS ) {
|
||||
if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_WARNING ) {
|
||||
parseError(String.format("SnpEff issued the following warning: %s", effectMetadata[SNPEFF_WARNING_FIELD_INDEX]));
|
||||
if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_EITHER_WARNING_OR_ERROR ) {
|
||||
parseError(String.format("SnpEff issued the following warning or error: \"%s\"",
|
||||
effectMetadata[SNPEFF_WARNING_OR_ERROR_FIELD_UPON_SINGLE_ERROR]));
|
||||
}
|
||||
else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_ERROR ) {
|
||||
parseError(String.format("SnpEff issued the following error: %s", effectMetadata[SNPEFF_ERROR_FIELD_INDEX]));
|
||||
else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR ) {
|
||||
parseError(String.format("SnpEff issued the following warning: \"%s\", and the following error: \"%s\"",
|
||||
effectMetadata[SNPEFF_WARNING_FIELD_UPON_BOTH_WARNING_AND_ERROR],
|
||||
effectMetadata[SNPEFF_ERROR_FIELD_UPON_BOTH_WARNING_AND_ERROR]));
|
||||
}
|
||||
else {
|
||||
parseError(String.format("Wrong number of effect metadata fields. Expected %d but found %d",
|
||||
|
|
@ -461,23 +453,33 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
|||
return;
|
||||
}
|
||||
|
||||
if ( effect != null && effect.isModifier() ) {
|
||||
impact = EffectImpact.MODIFIER;
|
||||
// The impact field will never be empty, and should always contain one of the enumerated values:
|
||||
try {
|
||||
impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
|
||||
}
|
||||
else {
|
||||
catch ( IllegalArgumentException e ) {
|
||||
parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
|
||||
}
|
||||
|
||||
// The functional class field will be empty when the effect has no functional class associated with it:
|
||||
if ( effectMetadata[InfoFieldKey.FUNCTIONAL_CLASS_KEY.getFieldIndex()].trim().length() > 0 ) {
|
||||
try {
|
||||
impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
|
||||
functionalClass = EffectFunctionalClass.valueOf(effectMetadata[InfoFieldKey.FUNCTIONAL_CLASS_KEY.getFieldIndex()]);
|
||||
}
|
||||
catch ( IllegalArgumentException e ) {
|
||||
parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
|
||||
parseError(String.format("Unrecognized value for effect functional class: %s", effectMetadata[InfoFieldKey.FUNCTIONAL_CLASS_KEY.getFieldIndex()]));
|
||||
}
|
||||
}
|
||||
else {
|
||||
functionalClass = EffectFunctionalClass.NONE;
|
||||
}
|
||||
|
||||
codonChange = effectMetadata[InfoFieldKey.CODON_CHANGE_KEY.getFieldIndex()];
|
||||
aminoAcidChange = effectMetadata[InfoFieldKey.AMINO_ACID_CHANGE_KEY.getFieldIndex()];
|
||||
geneName = effectMetadata[InfoFieldKey.GENE_NAME_KEY.getFieldIndex()];
|
||||
geneBiotype = effectMetadata[InfoFieldKey.GENE_BIOTYPE_KEY.getFieldIndex()];
|
||||
|
||||
// The coding field will be empty when SnpEff has no coding info for the effect:
|
||||
if ( effectMetadata[SNPEFF_CODING_FIELD_INDEX].trim().length() > 0 ) {
|
||||
try {
|
||||
coding = EffectCoding.valueOf(effectMetadata[SNPEFF_CODING_FIELD_INDEX]);
|
||||
|
|
@ -534,7 +536,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
|||
return true;
|
||||
}
|
||||
else if ( impact.isSameImpactAs(other.impact) ) {
|
||||
return effect.getFunctionalClass().isHigherPriorityThan(other.effect.getFunctionalClass());
|
||||
return functionalClass.isHigherPriorityThan(other.functionalClass);
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
@ -545,13 +547,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
|||
|
||||
addAnnotation(annotations, InfoFieldKey.EFFECT_KEY.getKeyName(), effect.toString());
|
||||
addAnnotation(annotations, InfoFieldKey.IMPACT_KEY.getKeyName(), impact.toString());
|
||||
addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), functionalClass.toString());
|
||||
addAnnotation(annotations, InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), codonChange);
|
||||
addAnnotation(annotations, InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), aminoAcidChange);
|
||||
addAnnotation(annotations, InfoFieldKey.GENE_NAME_KEY.getKeyName(), geneName);
|
||||
addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype);
|
||||
addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID);
|
||||
addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID);
|
||||
addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), effect.getFunctionalClass().toString());
|
||||
|
||||
return annotations;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
@ -36,7 +35,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -83,8 +81,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
* @param priors priors to use for GLs
|
||||
* @param GLs hash of sample->GL to fill in
|
||||
* @param alternateAlleleToUse the alternate allele to use, null if not set
|
||||
*
|
||||
* @param useBAQedPileup
|
||||
* @param useBAQedPileup should we use the BAQed pileup or the raw one?
|
||||
* @return genotype likelihoods per sample for AA, AB, BB
|
||||
*/
|
||||
public abstract Allele getLikelihoods(RefMetaDataTracker tracker,
|
||||
|
|
@ -93,13 +90,14 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Map<String, MultiallelicGenotypeLikelihoods> GLs,
|
||||
Allele alternateAlleleToUse, boolean useBAQedPileup);
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup);
|
||||
|
||||
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
||||
int count = 0;
|
||||
for ( PileupElement p : pileup ) {
|
||||
if ( BaseUtils.isRegularBase( p.getBase() ) )
|
||||
count++;
|
||||
count += p.getRepresentativeCount();
|
||||
}
|
||||
|
||||
return count;
|
||||
|
|
|
|||
|
|
@ -258,7 +258,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
|
||||
|
||||
return result;
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,140 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadPairWalker;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Counts the number of read pairs encountered in a file sorted in
|
||||
* query name order. Breaks counts down by total pairs and number
|
||||
* of paired reads.
|
||||
*
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* One or more bam files.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* Number of pairs seen.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T CountPairs \
|
||||
* -o output.txt \
|
||||
* -I input.bam
|
||||
* </pre>
|
||||
*
|
||||
* @author mhanna
|
||||
*/
|
||||
public class CountPairsWalker extends ReadPairWalker<Integer,Long> {
|
||||
@Output
|
||||
private PrintStream out;
|
||||
|
||||
/**
|
||||
* How many reads are the first in a pair, based on flag 0x0040 from the SAM spec.
|
||||
*/
|
||||
private long firstOfPair = 0;
|
||||
|
||||
/**
|
||||
* How many reads are the second in a pair, based on flag 0x0080 from the SAM spec.
|
||||
*/
|
||||
private long secondOfPair = 0;
|
||||
|
||||
/**
|
||||
* A breakdown of the total number of reads seen with exactly the same read name.
|
||||
*/
|
||||
private List<Long> pairCountsByType = new ExpandingArrayList<Long>();
|
||||
|
||||
/**
|
||||
* Maps a read pair to a given reduce of type MapType. Semantics determined by subclasser.
|
||||
* @param reads Collection of reads having the same name.
|
||||
* @return Semantics defined by implementer.
|
||||
*/
|
||||
@Override
|
||||
public Integer map(Collection<SAMRecord> reads) {
|
||||
if(pairCountsByType.get(reads.size()) != null)
|
||||
pairCountsByType.set(reads.size(),pairCountsByType.get(reads.size())+1);
|
||||
else
|
||||
pairCountsByType.set(reads.size(),1L);
|
||||
|
||||
for(SAMRecord read: reads) {
|
||||
if(read.getFirstOfPairFlag()) firstOfPair++;
|
||||
if(read.getSecondOfPairFlag()) secondOfPair++;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* No pairs at the beginning of a traversal.
|
||||
* @return 0 always.
|
||||
*/
|
||||
@Override
|
||||
public Long reduceInit() {
|
||||
return 0L;
|
||||
}
|
||||
|
||||
/**
|
||||
* Combine number of pairs seen in this iteration (always 1) with total number of pairs
|
||||
* seen in previous iterations.
|
||||
* @param value Pairs in this iteration (1), from the map function.
|
||||
* @param sum Count of all pairs in prior iterations.
|
||||
* @return All pairs encountered in previous iterations + all pairs encountered in this iteration (sum + 1).
|
||||
*/
|
||||
@Override
|
||||
public Long reduce(Integer value, Long sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print summary statistics over the entire traversal.
|
||||
* @param sum A count of all read pairs viewed.
|
||||
*/
|
||||
@Override
|
||||
public void onTraversalDone(Long sum) {
|
||||
out.printf("Total number of pairs : %d%n",sum);
|
||||
out.printf("Total number of first reads in pair : %d%n",firstOfPair);
|
||||
out.printf("Total number of second reads in pair: %d%n",secondOfPair);
|
||||
for(int i = 1; i < pairCountsByType.size(); i++) {
|
||||
if(pairCountsByType.get(i) == null)
|
||||
continue;
|
||||
out.printf("Pairs of size %d: %d%n",i,pairCountsByType.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -24,10 +24,8 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.apache.poi.hpsf.Variant;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
|
@ -275,8 +273,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
private double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 0;
|
||||
|
||||
/**
|
||||
* Variants are kept in memory to guarantee that exactly n variants will be chosen randomly, so use it only for a reasonable
|
||||
* number of variants. Use --select_random_fraction for larger numbers of variants.
|
||||
* Variants are kept in memory to guarantee that exactly n variants will be chosen randomly, so make sure you supply the program with enough memory
|
||||
* given your input set. This option will NOT work well for large callsets; use --select_random_fraction for sets with a large numbers of variants.
|
||||
*/
|
||||
@Argument(fullName="select_random_number", shortName="number", doc="Selects a number of variants at random from the variant track", required=false)
|
||||
private int numRandom = 0;
|
||||
|
|
@ -532,7 +530,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
if (SELECT_RANDOM_NUMBER) {
|
||||
randomlyAddVariant(++variantNumber, sub, ref.getBase());
|
||||
randomlyAddVariant(++variantNumber, sub);
|
||||
}
|
||||
else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
|
||||
vcfWriter.add(sub);
|
||||
|
|
@ -705,7 +703,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
return sub;
|
||||
}
|
||||
|
||||
private void randomlyAddVariant(int rank, VariantContext vc, byte refBase) {
|
||||
private void randomlyAddVariant(int rank, VariantContext vc) {
|
||||
if (nVariantsAdded < numRandom)
|
||||
variantArray[nVariantsAdded++] = new RandomVariantStructure(vc);
|
||||
|
||||
|
|
|
|||
|
|
@ -554,4 +554,54 @@ public class GenomeLocParser {
|
|||
return createGenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength(), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a loc to the left (starting at the loc start + 1) of maxBasePairs size.
|
||||
* @param loc The original loc
|
||||
* @param maxBasePairs The maximum number of basePairs
|
||||
* @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the start of the contig.
|
||||
*/
|
||||
@Requires({"loc != null", "maxBasePairs > 0"})
|
||||
public GenomeLoc createGenomeLocAtStart(GenomeLoc loc, int maxBasePairs) {
|
||||
if (GenomeLoc.isUnmapped(loc))
|
||||
return null;
|
||||
String contigName = loc.getContig();
|
||||
SAMSequenceRecord contig = contigInfo.getSequence(contigName);
|
||||
int contigIndex = contig.getSequenceIndex();
|
||||
|
||||
int start = loc.getStart() - maxBasePairs;
|
||||
int stop = loc.getStart() - 1;
|
||||
|
||||
if (start < 1)
|
||||
start = 1;
|
||||
if (stop < 1)
|
||||
return null;
|
||||
|
||||
return createGenomeLoc(contigName, contigIndex, start, stop, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a loc to the right (starting at the loc stop + 1) of maxBasePairs size.
|
||||
* @param loc The original loc
|
||||
* @param maxBasePairs The maximum number of basePairs
|
||||
* @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the end of the contig.
|
||||
*/
|
||||
@Requires({"loc != null", "maxBasePairs > 0"})
|
||||
public GenomeLoc createGenomeLocAtStop(GenomeLoc loc, int maxBasePairs) {
|
||||
if (GenomeLoc.isUnmapped(loc))
|
||||
return null;
|
||||
String contigName = loc.getContig();
|
||||
SAMSequenceRecord contig = contigInfo.getSequence(contigName);
|
||||
int contigIndex = contig.getSequenceIndex();
|
||||
int contigLength = contig.getSequenceLength();
|
||||
|
||||
int start = loc.getStop() + 1;
|
||||
int stop = loc.getStop() + maxBasePairs;
|
||||
|
||||
if (start > contigLength)
|
||||
return null;
|
||||
if (stop > contigLength)
|
||||
stop = contigLength;
|
||||
|
||||
return createGenomeLoc(contigName, contigIndex, start, stop, true);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -233,8 +233,12 @@ public class IntervalUtils {
|
|||
*
|
||||
* Returns a null string if there are no differences, otherwise returns a string describing the difference
|
||||
* (useful for UnitTests). Assumes both lists are sorted
|
||||
*
|
||||
* @param masterArg sorted master genome locs
|
||||
* @param testArg sorted test genome locs
|
||||
* @return null string if there are no difference, otherwise a string describing the difference
|
||||
*/
|
||||
public static final String equateIntervals(List<GenomeLoc> masterArg, List<GenomeLoc> testArg) {
|
||||
public static String equateIntervals(List<GenomeLoc> masterArg, List<GenomeLoc> testArg) {
|
||||
LinkedList<GenomeLoc> master = new LinkedList<GenomeLoc>(masterArg);
|
||||
LinkedList<GenomeLoc> test = new LinkedList<GenomeLoc>(testArg);
|
||||
|
||||
|
|
@ -317,23 +321,6 @@ public class IntervalUtils {
|
|||
return lengths;
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts the number of interval files an interval list can be split into using scatterIntervalArguments.
|
||||
* @param locs The genome locs.
|
||||
* @return The maximum number of parts the intervals can be split into.
|
||||
*/
|
||||
public static int countContigIntervals(List<GenomeLoc> locs) {
|
||||
int maxFiles = 0;
|
||||
String contig = null;
|
||||
for (GenomeLoc loc: locs) {
|
||||
if (contig == null || !contig.equals(loc.getContig())) {
|
||||
maxFiles++;
|
||||
contig = loc.getContig();
|
||||
}
|
||||
}
|
||||
return maxFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits an interval list into multiple files.
|
||||
* @param fileHeader The sam file header.
|
||||
|
|
@ -373,7 +360,6 @@ public class IntervalUtils {
|
|||
* @return A list of lists of genome locs, split according to splits
|
||||
*/
|
||||
public static List<List<GenomeLoc>> splitIntervalsToSubLists(List<GenomeLoc> locs, List<Integer> splits) {
|
||||
int locIndex = 1;
|
||||
int start = 0;
|
||||
List<List<GenomeLoc>> sublists = new ArrayList<List<GenomeLoc>>(splits.size());
|
||||
for (Integer stop: splits) {
|
||||
|
|
@ -465,7 +451,7 @@ public class IntervalUtils {
|
|||
|
||||
@Requires({"remaining != null", "!remaining.isEmpty()", "idealSplitSize > 0"})
|
||||
@Ensures({"result != null"})
|
||||
final static SplitLocusRecursive splitLocusIntervals1(LinkedList<GenomeLoc> remaining, long idealSplitSize) {
|
||||
static SplitLocusRecursive splitLocusIntervals1(LinkedList<GenomeLoc> remaining, long idealSplitSize) {
|
||||
final List<GenomeLoc> split = new ArrayList<GenomeLoc>();
|
||||
long size = 0;
|
||||
|
||||
|
|
@ -579,10 +565,101 @@ public class IntervalUtils {
|
|||
}
|
||||
}
|
||||
|
||||
public static final long intervalSize(final List<GenomeLoc> locs) {
|
||||
public static long intervalSize(final List<GenomeLoc> locs) {
|
||||
long size = 0;
|
||||
for ( final GenomeLoc loc : locs )
|
||||
size += loc.size();
|
||||
return size;
|
||||
}
|
||||
|
||||
public static void writeFlankingIntervals(File reference, File inputIntervals, File flankingIntervals, int basePairs) {
|
||||
ReferenceDataSource referenceDataSource = new ReferenceDataSource(reference);
|
||||
GenomeLocParser parser = new GenomeLocParser(referenceDataSource.getReference());
|
||||
List<GenomeLoc> originalList = intervalFileToList(parser, inputIntervals.getAbsolutePath());
|
||||
|
||||
if (originalList.isEmpty())
|
||||
throw new UserException.MalformedFile(inputIntervals, "File contains no intervals");
|
||||
|
||||
List<GenomeLoc> flankingList = getFlankingIntervals(parser, originalList, basePairs);
|
||||
|
||||
if (flankingList.isEmpty())
|
||||
throw new UserException.MalformedFile(inputIntervals, "Unable to produce any flanks for the intervals");
|
||||
|
||||
SAMFileHeader samFileHeader = new SAMFileHeader();
|
||||
samFileHeader.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary());
|
||||
IntervalList intervalList = new IntervalList(samFileHeader);
|
||||
int i = 0;
|
||||
for (GenomeLoc loc: flankingList)
|
||||
intervalList.add(toInterval(loc, ++i));
|
||||
intervalList.write(flankingIntervals);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of intervals between the passed int locs. Does not extend UNMAPPED locs.
|
||||
* @param parser A genome loc parser for creating the new intervals
|
||||
* @param locs Original genome locs
|
||||
* @param basePairs Number of base pairs on each side of loc
|
||||
* @return The list of intervals between the locs
|
||||
*/
|
||||
public static List<GenomeLoc> getFlankingIntervals(final GenomeLocParser parser, final List<GenomeLoc> locs, final int basePairs) {
|
||||
List<GenomeLoc> sorted = sortAndMergeIntervals(parser, locs, IntervalMergingRule.ALL).toList();
|
||||
|
||||
if (sorted.size() == 0)
|
||||
return Collections.emptyList();
|
||||
|
||||
LinkedHashMap<String, List<GenomeLoc>> locsByContig = splitByContig(sorted);
|
||||
List<GenomeLoc> expanded = new ArrayList<GenomeLoc>();
|
||||
for (String contig: locsByContig.keySet()) {
|
||||
List<GenomeLoc> contigLocs = locsByContig.get(contig);
|
||||
int contigLocsSize = contigLocs.size();
|
||||
|
||||
GenomeLoc startLoc, stopLoc;
|
||||
|
||||
// Create loc at start of the list
|
||||
startLoc = parser.createGenomeLocAtStart(contigLocs.get(0), basePairs);
|
||||
if (startLoc != null)
|
||||
expanded.add(startLoc);
|
||||
|
||||
// Create locs between each loc[i] and loc[i+1]
|
||||
for (int i = 0; i < contigLocsSize - 1; i++) {
|
||||
stopLoc = parser.createGenomeLocAtStop(contigLocs.get(i), basePairs);
|
||||
startLoc = parser.createGenomeLocAtStart(contigLocs.get(i + 1), basePairs);
|
||||
if (stopLoc.getStop() + 1 >= startLoc.getStart()) {
|
||||
// NOTE: This is different than GenomeLoc.merge()
|
||||
// merge() returns a loc which covers the entire range of stop and start,
|
||||
// possibly returning positions inside loc(i) or loc(i+1)
|
||||
// We want to make sure that the start of the stopLoc is used, and the stop of the startLoc
|
||||
GenomeLoc merged = parser.createGenomeLoc(
|
||||
stopLoc.getContig(), stopLoc.getStart(), startLoc.getStop());
|
||||
expanded.add(merged);
|
||||
} else {
|
||||
expanded.add(stopLoc);
|
||||
expanded.add(startLoc);
|
||||
}
|
||||
}
|
||||
|
||||
// Create loc at the end of the list
|
||||
stopLoc = parser.createGenomeLocAtStop(contigLocs.get(contigLocsSize - 1), basePairs);
|
||||
if (stopLoc != null)
|
||||
expanded.add(stopLoc);
|
||||
}
|
||||
return expanded;
|
||||
}
|
||||
|
||||
private static LinkedHashMap<String, List<GenomeLoc>> splitByContig(List<GenomeLoc> sorted) {
|
||||
LinkedHashMap<String, List<GenomeLoc>> splits = new LinkedHashMap<String, List<GenomeLoc>>();
|
||||
GenomeLoc last = null;
|
||||
List<GenomeLoc> contigLocs = null;
|
||||
for (GenomeLoc loc: sorted) {
|
||||
if (GenomeLoc.isUnmapped(loc))
|
||||
continue;
|
||||
if (last == null || !last.onSameContig(loc)) {
|
||||
contigLocs = new ArrayList<GenomeLoc>();
|
||||
splits.put(loc.getContig(), contigLocs);
|
||||
}
|
||||
contigLocs.add(loc);
|
||||
last = loc;
|
||||
}
|
||||
return splits;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -188,7 +188,9 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
}
|
||||
|
||||
public final byte getReducedCount(final int i) {
|
||||
return getReducedReadCounts()[i];
|
||||
byte firstCount = getReducedReadCounts()[0];
|
||||
byte offsetCount = getReducedReadCounts()[i];
|
||||
return (i==0) ? firstCount : (byte) Math.min(firstCount + offsetCount, Byte.MAX_VALUE);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,13 @@
|
|||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.jgrapht.util.MathUtil;
|
||||
|
||||
import java.util.EnumMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class GenotypeLikelihoods {
|
||||
public static final boolean CAP_PLS = false;
|
||||
|
|
@ -94,6 +100,48 @@ public class GenotypeLikelihoods {
|
|||
return likelihoodsAsString_PLs;
|
||||
}
|
||||
|
||||
//Return genotype likelihoods as an EnumMap with Genotypes as keys and likelihoods as values
|
||||
//Returns null in case of missing likelihoods
|
||||
public EnumMap<Genotype.Type,Double> getAsMap(boolean normalizeFromLog10){
|
||||
//Make sure that the log10likelihoods are set
|
||||
double[] likelihoods = normalizeFromLog10 ? MathUtils.normalizeFromLog10(getAsVector()) : getAsVector();
|
||||
if(likelihoods == null)
|
||||
return null;
|
||||
EnumMap<Genotype.Type,Double> likelihoodsMap = new EnumMap<Genotype.Type, Double>(Genotype.Type.class);
|
||||
likelihoodsMap.put(Genotype.Type.HOM_REF,likelihoods[Genotype.Type.HOM_REF.ordinal()-1]);
|
||||
likelihoodsMap.put(Genotype.Type.HET,likelihoods[Genotype.Type.HET.ordinal()-1]);
|
||||
likelihoodsMap.put(Genotype.Type.HOM_VAR, likelihoods[Genotype.Type.HOM_VAR.ordinal() - 1]);
|
||||
return likelihoodsMap;
|
||||
}
|
||||
|
||||
//Return the neg log10 Genotype Quality (GQ) for the given genotype
|
||||
//Returns Double.NEGATIVE_INFINITY in case of missing genotype
|
||||
public double getNegLog10GQ(Genotype.Type genotype){
|
||||
|
||||
double qual = Double.NEGATIVE_INFINITY;
|
||||
EnumMap<Genotype.Type,Double> likelihoods = getAsMap(false);
|
||||
if(likelihoods == null)
|
||||
return qual;
|
||||
for(Map.Entry<Genotype.Type,Double> likelihood : likelihoods.entrySet()){
|
||||
if(likelihood.getKey() == genotype)
|
||||
continue;
|
||||
if(likelihood.getValue() > qual)
|
||||
qual = likelihood.getValue();
|
||||
|
||||
}
|
||||
|
||||
//Quality of the most likely genotype = likelihood(most likely) - likelihood (2nd best)
|
||||
qual = likelihoods.get(genotype) - qual;
|
||||
|
||||
//Quality of other genotypes 1-P(G)
|
||||
if (qual < 0) {
|
||||
double[] normalized = MathUtils.normalizeFromLog10(getAsVector());
|
||||
double chosenGenotype = normalized[genotype.ordinal()-1];
|
||||
qual = -1.0 * Math.log10(1.0 - chosenGenotype);
|
||||
}
|
||||
return qual;
|
||||
}
|
||||
|
||||
private final static double[] parsePLsIntoLikelihoods(String likelihoodsAsString_PLs) {
|
||||
if ( !likelihoodsAsString_PLs.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
String[] strings = likelihoodsAsString_PLs.split(",");
|
||||
|
|
|
|||
|
|
@ -223,7 +223,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @param referenceBaseForIndel padded reference base
|
||||
*/
|
||||
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes, Byte referenceBaseForIndel) {
|
||||
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false);
|
||||
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -240,7 +240,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @param attributes attributes
|
||||
*/
|
||||
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
|
||||
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false);
|
||||
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -261,7 +261,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @param referenceBaseForIndel padded reference base
|
||||
*/
|
||||
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, Byte referenceBaseForIndel) {
|
||||
this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, referenceBaseForIndel, true);
|
||||
this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, referenceBaseForIndel, true, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -278,7 +278,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @param attributes attributes
|
||||
*/
|
||||
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
|
||||
this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, negLog10PError, filters, attributes, null, false);
|
||||
this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, negLog10PError, filters, attributes, null, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -291,7 +291,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @param alleles alleles
|
||||
*/
|
||||
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles) {
|
||||
this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, null, false);
|
||||
this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, null, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -314,7 +314,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @param other the VariantContext to copy
|
||||
*/
|
||||
public VariantContext(VariantContext other) {
|
||||
this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false);
|
||||
this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -331,11 +331,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @param attributes attributes
|
||||
* @param referenceBaseForIndel padded reference base
|
||||
* @param genotypesAreUnparsed true if the genotypes have not yet been parsed
|
||||
* @param performValidation if true, call validate() as the final step in construction
|
||||
*/
|
||||
private VariantContext(String source, String contig, long start, long stop,
|
||||
Collection<Allele> alleles, Map<String, Genotype> genotypes,
|
||||
double negLog10PError, Set<String> filters, Map<String, ?> attributes,
|
||||
Byte referenceBaseForIndel, boolean genotypesAreUnparsed) {
|
||||
Byte referenceBaseForIndel, boolean genotypesAreUnparsed,
|
||||
boolean performValidation ) {
|
||||
if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
|
||||
this.contig = contig;
|
||||
this.start = start;
|
||||
|
|
@ -371,39 +373,57 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
}
|
||||
}
|
||||
|
||||
validate();
|
||||
if ( performValidation ) {
|
||||
validate();
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Partial-cloning routines (because Variant Context is immutable).
|
||||
//
|
||||
// IMPORTANT: These routines assume that the VariantContext on which they're called is already valid.
|
||||
// Due to this assumption, they explicitly tell the constructor NOT to perform validation by
|
||||
// calling validate(), and instead perform validation only on the data that's changed.
|
||||
//
|
||||
// Note that we don't call vc.getGenotypes() because that triggers the lazy loading.
|
||||
// Also note that we need to create a new attributes map because it's unmodifiable and the constructor may try to modify it.
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public static VariantContext modifyGenotypes(VariantContext vc, Map<String, Genotype> genotypes) {
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), false);
|
||||
VariantContext modifiedVC = new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), false, false);
|
||||
modifiedVC.validateGenotypes();
|
||||
return modifiedVC;
|
||||
}
|
||||
|
||||
public static VariantContext modifyLocation(VariantContext vc, String chr, int start, int end) {
|
||||
return new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true);
|
||||
VariantContext modifiedVC = new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true, false);
|
||||
|
||||
// Since start and end have changed, we need to call both validateAlleles() and validateReferencePadding(),
|
||||
// since those validation routines rely on the values of start and end:
|
||||
modifiedVC.validateAlleles();
|
||||
modifiedVC.validateReferencePadding();
|
||||
|
||||
return modifiedVC;
|
||||
}
|
||||
|
||||
public static VariantContext modifyFilters(VariantContext vc, Set<String> filters) {
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true);
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true, false);
|
||||
}
|
||||
|
||||
public static VariantContext modifyAttributes(VariantContext vc, Map<String, Object> attributes) {
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true);
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true, false);
|
||||
}
|
||||
|
||||
public static VariantContext modifyReferencePadding(VariantContext vc, Byte b) {
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), b, true);
|
||||
VariantContext modifiedVC = new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), b, true, false);
|
||||
modifiedVC.validateReferencePadding();
|
||||
return modifiedVC;
|
||||
}
|
||||
|
||||
public static VariantContext modifyPErrorFiltersAndAttributes(VariantContext vc, double negLog10PError, Set<String> filters, Map<String, Object> attributes) {
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true);
|
||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true, false);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ public class SnpEffUnitTest {
|
|||
@Test
|
||||
public void testParseWellFormedEffect() {
|
||||
String effectName = "NON_SYNONYMOUS_CODING";
|
||||
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
|
||||
String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
|
||||
|
||||
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
|
||||
Assert.assertTrue( effect.isWellFormed() && effect.isCoding() );
|
||||
|
|
@ -42,7 +42,7 @@ public class SnpEffUnitTest {
|
|||
@Test
|
||||
public void testParseInvalidEffectNameEffect() {
|
||||
String effectName = "MADE_UP_EFFECT";
|
||||
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
|
||||
String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
|
||||
|
||||
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
|
||||
Assert.assertFalse(effect.isWellFormed());
|
||||
|
|
@ -51,7 +51,7 @@ public class SnpEffUnitTest {
|
|||
@Test
|
||||
public void testParseInvalidEffectImpactEffect() {
|
||||
String effectName = "NON_SYNONYMOUS_CODING";
|
||||
String[] effectMetadata = { "MEDIUM", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
|
||||
String[] effectMetadata = { "MEDIUM", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
|
||||
|
||||
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
|
||||
Assert.assertFalse(effect.isWellFormed());
|
||||
|
|
@ -60,27 +60,27 @@ public class SnpEffUnitTest {
|
|||
@Test
|
||||
public void testParseWrongNumberOfMetadataFieldsEffect() {
|
||||
String effectName = "NON_SYNONYMOUS_CODING";
|
||||
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990" };
|
||||
String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990" };
|
||||
|
||||
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
|
||||
Assert.assertFalse(effect.isWellFormed());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseSnpEffWarningEffect() {
|
||||
public void testParseSnpEffOneWarningOrErrorEffect() {
|
||||
String effectName = "NON_SYNONYMOUS_CODING";
|
||||
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING" };
|
||||
String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING_OR_ERROR_TEXT" };
|
||||
|
||||
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
|
||||
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning: SNPEFF_WARNING") );
|
||||
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning or error: \"SNPEFF_WARNING_OR_ERROR_TEXT\"") );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseSnpEffErrorEffect() {
|
||||
public void testParseSnpEffBothWarningAndErrorEffect() {
|
||||
String effectName = "NON_SYNONYMOUS_CODING";
|
||||
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "", "SNPEFF_ERROR" };
|
||||
String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING_TEXT", "SNPEFF_ERROR_TEXT" };
|
||||
|
||||
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
|
||||
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following error: SNPEFF_ERROR") );
|
||||
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning: \"SNPEFF_WARNING_TEXT\", and the following error: \"SNPEFF_ERROR_TEXT\"") );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("8e7de435105499cd71ffc099e268a83e"));
|
||||
Arrays.asList("9beb795536e95954f810835c6058f2ad"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -40,7 +40,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("64b6804cb1e27826e3a47089349be581"));
|
||||
Arrays.asList("2977bb30c8b84a5f4094fe6090658561"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -64,7 +64,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("fd1ffb669800c2e07df1e2719aa38e49"));
|
||||
Arrays.asList("49d989f467b8d6d8f98f7c1b67cd4a05"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -72,7 +72,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("09f8e840770a9411ff77508e0ed0837f"));
|
||||
Arrays.asList("0948cd1dba7d61f283cc4cf2a7757d92"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -80,7 +80,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testExcludeAnnotations() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("b49fe03aa4b675db80a9db38a3552c95"));
|
||||
Arrays.asList("33062eccd6eb73bc49440365430454c4"));
|
||||
executeTest("test exclude annotations", spec);
|
||||
}
|
||||
|
||||
|
|
@ -88,7 +88,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testOverwritingHeader() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
|
||||
Arrays.asList("78d2c19f8107d865970dbaf3e12edd92"));
|
||||
Arrays.asList("062155edec46a8c52243475fbf3a2943"));
|
||||
executeTest("test overwriting header", spec);
|
||||
}
|
||||
|
||||
|
|
@ -96,7 +96,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoReads() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + validationDataLocation + "vcfexample3empty.vcf -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("16e3a1403fc376320d7c69492cad9345"));
|
||||
Arrays.asList("06635f2dd91b539bfbce9bf7914d8e43"));
|
||||
executeTest("not passing it any reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -104,7 +104,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + validationDataLocation + "vcfexample3empty.vcf -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("3da8ca2b6bdaf6e92d94a8c77a71313d"));
|
||||
Arrays.asList("820eeba1f6e3a0758a69d937c524a38e"));
|
||||
executeTest("getting DB tag with dbSNP", spec);
|
||||
}
|
||||
|
||||
|
|
@ -112,7 +112,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithHapMap() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --comp:H3 " + validationDataLocation + "fakeHM3.vcf -G Standard --variant " + validationDataLocation + "vcfexample3empty.vcf -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688"));
|
||||
Arrays.asList("31cc2ce157dd20771418c08d6b3be1fa"));
|
||||
executeTest("getting DB tag with HM3", spec);
|
||||
}
|
||||
|
||||
|
|
@ -120,7 +120,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testUsingExpression() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("ae30a1ac7bfbc3d22a327f8b689cad31"));
|
||||
Arrays.asList("074865f8f8c0ca7bfd58681f396c49e9"));
|
||||
executeTest("using expression", spec);
|
||||
}
|
||||
|
||||
|
|
@ -128,7 +128,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testUsingExpressionWithID() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.ID -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("1b4921085b26cbfe07d53b7c947de1e5"));
|
||||
Arrays.asList("97b26db8135d083566fb585a677fbe8a"));
|
||||
executeTest("using expression with ID", spec);
|
||||
}
|
||||
|
||||
|
|
@ -148,9 +148,9 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
|
||||
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
|
||||
"snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429",
|
||||
"snpEff2.0.4.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429",
|
||||
1,
|
||||
Arrays.asList("122321a85e448f21679f6ca15c5e22ad")
|
||||
Arrays.asList("51258f5c880bd1ca3eb45a1711335c66")
|
||||
);
|
||||
executeTest("Testing SnpEff annotations", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,10 +5,8 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
// ********************************************************************************** //
|
||||
|
|
@ -30,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("b27939251539439a382538e507e03507"));
|
||||
Arrays.asList("364a6112e6034571a896d4c68b6ff02a"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -38,12 +36,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("8de2602679ffc92388da0b6cb4325ef6"));
|
||||
Arrays.asList("6cce55734ecd1a56c33f0ecba67d1258"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("6458f3b8fe4954e2ffc2af972aaab19e"));
|
||||
Arrays.asList("b2a00130a4b6f9a3c31288a236f5d159"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -51,7 +49,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("6762b72ae60155ad71738d7c76b80e4b"));
|
||||
Arrays.asList("3ccce5d909f8f128e496f6841836e5f7"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -61,7 +59,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "bc71dba7bbdb23e7d5cc60461fdd897b";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "890143b366050e78d6c6ba6b2c6b6864";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -82,7 +80,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
||||
|
||||
String md5 = "b9504e446b9313559c3ed97add7e8dc1";
|
||||
String md5 = "95614280c565ad90f8c000376fef822c";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
|
|
@ -113,8 +111,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCallingParameters() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "--min_base_quality_score 26", "bb3f294eab3e2cf52c70e63b23aac5ee" );
|
||||
e.put( "--computeSLOD", "eb34979efaadba1e34bd82bcacf5c722" );
|
||||
e.put( "--min_base_quality_score 26", "7acb1a5aee5fdadb0cc0ea07a212efc6" );
|
||||
e.put( "--computeSLOD", "e9d23a08472e4e27b4f25e844f5bad57" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -127,9 +125,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testOutputParameter() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-sites_only", "d40114aa201aa33ff5f174f15b6b73af" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "3c681b053fd2280f3c42041d24243752" );
|
||||
e.put( "--output_mode EMIT_ALL_SITES", "eafa6d71c5ecd64dfee5d7a3f60e392e" );
|
||||
e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "94e53320f14c5ff29d62f68d36b46fcd" );
|
||||
e.put( "--output_mode EMIT_ALL_SITES", "73ad1cc41786b12c5f0e6f3e9ec2b728" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -143,12 +141,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("c71ca370947739cb7d87b59452be7a07"));
|
||||
Arrays.asList("902327e8a45fe585c8dfd1a7c4fcf60f"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||
Arrays.asList("1c0a599d475cc7d5e745df6e9b6c0d29"));
|
||||
Arrays.asList("9ef66764a0ecfe89c3e4e1a93a69e349"));
|
||||
executeTest("test confidence 2", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -160,8 +158,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHeterozyosity() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 0.01, "f84da90c310367bd51f2ab6e346fa3d8" );
|
||||
e.put( 1.0 / 1850, "5791e7fef40d4412b6d8f84e0a809c6c" );
|
||||
e.put( 0.01, "46243ecc2b9dc716f48ea280c9bb7e72" );
|
||||
e.put( 1.0 / 1850, "6b2a59dbc76984db6d4d6d6b5ee5d62c" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -185,7 +183,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("9cc9538ac83770e12bd0830d285bfbd0"));
|
||||
Arrays.asList("f0fbe472f155baf594b1eeb58166edef"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -204,7 +202,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("eaf8043edb46dfbe9f97ae03baa797ed"));
|
||||
Arrays.asList("8c87c749a7bb5a76ed8504d4ec254272"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -223,7 +221,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("eeba568272f9b42d5450da75c7cc6d2d"));
|
||||
Arrays.asList("a64d2e65b5927260e4ce0d948760cc5c"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -238,7 +236,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("5fe98ee853586dc9db58f0bc97daea63"));
|
||||
Arrays.asList("2ad52c2e75b3ffbfd8f03237c444e8e6"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -251,7 +249,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("19ff9bd3139480bdf79dcbf117cf2b24"));
|
||||
Arrays.asList("69107157632714150fc068d412e31939"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -261,7 +259,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("118918f2e9e56a3cfc5ccb2856d529c8"));
|
||||
Arrays.asList("84f065e115004e4c9d9d62f1bf6b4f44"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -271,7 +269,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("a20799237accd52c1b8c2ac096309c8f"));
|
||||
Arrays.asList("7ea8372e4074f90ec942f964e1863351"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -281,7 +279,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
|
||||
Arrays.asList("18ef8181157b4ac3eb8492f538467f92"));
|
||||
Arrays.asList("94afa8c011b47af7323c821bf19106b4"));
|
||||
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
|
||||
}
|
||||
|
||||
|
|
@ -290,7 +288,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
||||
Arrays.asList("ad884e511a751b05e64db5314314365a"));
|
||||
Arrays.asList("1e02f57fafaa41db71c531eb25e148e1"));
|
||||
executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,23 +6,131 @@ import org.testng.annotations.Test;
|
|||
import java.util.Arrays;
|
||||
|
||||
public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
||||
private static String phaseByTransmissionTestDataRoot = validationDataLocation + "/PhaseByTransmission";
|
||||
private static String fundamentalTestVCF = phaseByTransmissionTestDataRoot + "/" + "FundamentalsTest.unfiltered.vcf";
|
||||
private static String phaseByTransmissionTestDataRoot = validationDataLocation + "PhaseByTransmission/";
|
||||
private static String goodFamilyFile = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.goodFamilies.ped";
|
||||
private static String TNTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TN.vcf";
|
||||
private static String TPTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TP.vcf";
|
||||
private static String FPTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.FP.vcf";
|
||||
private static String SpecialTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.Special.vcf";
|
||||
|
||||
//Tests using PbT on all genotypes with default parameters
|
||||
//And all reporting options
|
||||
@Test
|
||||
public void testBasicFunctionality() {
|
||||
public void testTrueNegativeMV() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T PhaseByTransmission",
|
||||
"-NO_HEADER",
|
||||
"-R " + b37KGReference,
|
||||
"--variant " + fundamentalTestVCF,
|
||||
"-f NA12892+NA12891=NA12878",
|
||||
"--variant " + TNTest,
|
||||
"-ped "+ goodFamilyFile,
|
||||
"-L 1:10109-10315",
|
||||
"-mvf %s",
|
||||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("16fefda693156eadf1481fd9de23facb","9418a7a6405b78179ca13a67b8bfcc14")
|
||||
);
|
||||
executeTest("testTrueNegativeMV", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTruePositiveMV() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T PhaseByTransmission",
|
||||
"-NO_HEADER",
|
||||
"-R " + b37KGReference,
|
||||
"--variant " + TPTest,
|
||||
"-ped "+ goodFamilyFile,
|
||||
"-L 1:10109-10315",
|
||||
"-mvf %s",
|
||||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("14cf1d21a54d8b9fb506df178b634c56","efc66ae3d036715b721f9bd35b65d556")
|
||||
);
|
||||
executeTest("testTruePositiveMV", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFalsePositiveMV() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T PhaseByTransmission",
|
||||
"-NO_HEADER",
|
||||
"-R " + b37KGReference,
|
||||
"--variant " + FPTest,
|
||||
"-ped "+ goodFamilyFile,
|
||||
"-L 1:10109-10315",
|
||||
"-mvf %s",
|
||||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("f9b0fae9fe1e0f09b883a292b0e70a12","398724bc1e65314cc5ee92706e05a3ee")
|
||||
);
|
||||
executeTest("testFalsePositiveMV", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpecialCases() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T PhaseByTransmission",
|
||||
"-NO_HEADER",
|
||||
"-R " + b37KGReference,
|
||||
"--variant " + SpecialTest,
|
||||
"-ped "+ goodFamilyFile,
|
||||
"-L 1:10109-10315",
|
||||
"-mvf %s",
|
||||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("b8d1aa3789ce77b45430c62d13ee3006","a1a333e08fafb288cda0e7711909e1c3")
|
||||
);
|
||||
executeTest("testSpecialCases", spec);
|
||||
}
|
||||
|
||||
//Test using a different prior
|
||||
//Here the FP file is used but as the prior is lowered, 3 turn to TP
|
||||
@Test
|
||||
public void testPriorOption() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T PhaseByTransmission",
|
||||
"-NO_HEADER",
|
||||
"-R " + b37KGReference,
|
||||
"--variant " + FPTest,
|
||||
"-ped "+ goodFamilyFile,
|
||||
"-L 1:10109-10315",
|
||||
"-prior 1e-4",
|
||||
"-mvf %s",
|
||||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("7201ce7cc47db5840ac6b647709f7c33","c11b5e7cd7459d90d0160f917eff3b1e")
|
||||
);
|
||||
executeTest("testPriorOption", spec);
|
||||
}
|
||||
|
||||
//Test when running without MV reporting option
|
||||
//This is the exact same test file as FP but should not generate a .mvf file
|
||||
@Test
|
||||
public void testMVFileOption() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T PhaseByTransmission",
|
||||
"-NO_HEADER",
|
||||
"-R " + b37KGReference,
|
||||
"--variant " + FPTest,
|
||||
"-ped "+ goodFamilyFile,
|
||||
"-L 1:10109-10315",
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("")
|
||||
Arrays.asList("398724bc1e65314cc5ee92706e05a3ee")
|
||||
);
|
||||
executeTest("testBasicFunctionality", spec);
|
||||
executeTest("testMVFileOption", spec);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,16 +21,16 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"--dbsnp " + b37dbSNP132,
|
||||
"--eval " + validationDataLocation + "snpEff.AFR.unfiltered.VariantAnnotator.output.vcf",
|
||||
"--eval " + validationDataLocation + "snpEff2.0.4.AFR.unfiltered.VariantAnnotator.output.vcf",
|
||||
"-noEV",
|
||||
"-EV TiTvVariantEvaluator",
|
||||
"-noST",
|
||||
"-ST FunctionalClass",
|
||||
"-L " + validationDataLocation + "snpEff.AFR.unfiltered.VariantAnnotator.output.vcf",
|
||||
"-L " + validationDataLocation + "snpEff2.0.4.AFR.unfiltered.VariantAnnotator.output.vcf",
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("d9dcb352c53106f54fcc981f15d35a90")
|
||||
Arrays.asList("a36414421621b377d6146d58d2fcecd0")
|
||||
);
|
||||
executeTest("testFunctionClassWithSnpeff", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils;
|
|||
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -11,6 +10,7 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
|||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
|
|
@ -36,7 +36,6 @@ public class GenomeLocParserUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testGetContigIndexValid() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
|
||||
assertEquals(genomeLocParser.getContigIndex("chr1"), 0); // should be in the reference
|
||||
}
|
||||
|
||||
|
|
@ -67,7 +66,6 @@ public class GenomeLocParserUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testGetContigInfoKnownContig() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
|
||||
assertEquals(0, "chr1".compareTo(genomeLocParser.getContigInfo("chr1").getSequenceName())); // should be in the reference
|
||||
}
|
||||
|
||||
|
|
@ -191,4 +189,104 @@ public class GenomeLocParserUnitTest extends BaseTest {
|
|||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,-2)); // bad stop
|
||||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",10,11)); // bad start, past end
|
||||
}
|
||||
|
||||
private static class FlankingGenomeLocTestData extends TestDataProvider {
|
||||
final GenomeLocParser parser;
|
||||
final int basePairs;
|
||||
final GenomeLoc original, flankStart, flankStop;
|
||||
|
||||
private FlankingGenomeLocTestData(String name, GenomeLocParser parser, int basePairs, String original, String flankStart, String flankStop) {
|
||||
super(FlankingGenomeLocTestData.class, name);
|
||||
this.parser = parser;
|
||||
this.basePairs = basePairs;
|
||||
this.original = parse(parser, original);
|
||||
this.flankStart = flankStart == null ? null : parse(parser, flankStart);
|
||||
this.flankStop = flankStop == null ? null : parse(parser, flankStop);
|
||||
}
|
||||
|
||||
private static GenomeLoc parse(GenomeLocParser parser, String str) {
|
||||
return "unmapped".equals(str) ? GenomeLoc.UNMAPPED : parser.parseGenomeLoc(str);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "flankingGenomeLocs")
|
||||
public Object[][] getFlankingGenomeLocs() {
|
||||
int contigLength = 10000;
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigLength);
|
||||
GenomeLocParser parser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
|
||||
new FlankingGenomeLocTestData("atStartBase1", parser, 1,
|
||||
"chr1:1", null, "chr1:2");
|
||||
|
||||
new FlankingGenomeLocTestData("atStartBase50", parser, 50,
|
||||
"chr1:1", null, "chr1:2-51");
|
||||
|
||||
new FlankingGenomeLocTestData("atStartRange50", parser, 50,
|
||||
"chr1:1-10", null, "chr1:11-60");
|
||||
|
||||
new FlankingGenomeLocTestData("atEndBase1", parser, 1,
|
||||
"chr1:" + contigLength, "chr1:" + (contigLength - 1), null);
|
||||
|
||||
new FlankingGenomeLocTestData("atEndBase50", parser, 50,
|
||||
"chr1:" + contigLength, String.format("chr1:%d-%d", contigLength - 50, contigLength - 1), null);
|
||||
|
||||
new FlankingGenomeLocTestData("atEndRange50", parser, 50,
|
||||
String.format("chr1:%d-%d", contigLength - 10, contigLength),
|
||||
String.format("chr1:%d-%d", contigLength - 60, contigLength - 11),
|
||||
null);
|
||||
|
||||
new FlankingGenomeLocTestData("nearStartBase1", parser, 1,
|
||||
"chr1:2", "chr1:1", "chr1:3");
|
||||
|
||||
new FlankingGenomeLocTestData("nearStartRange50", parser, 50,
|
||||
"chr1:21-30", "chr1:1-20", "chr1:31-80");
|
||||
|
||||
new FlankingGenomeLocTestData("nearEndBase1", parser, 1,
|
||||
"chr1:" + (contigLength - 1), "chr1:" + (contigLength - 2), "chr1:" + contigLength);
|
||||
|
||||
new FlankingGenomeLocTestData("nearEndRange50", parser, 50,
|
||||
String.format("chr1:%d-%d", contigLength - 30, contigLength - 21),
|
||||
String.format("chr1:%d-%d", contigLength - 80, contigLength - 31),
|
||||
String.format("chr1:%d-%d", contigLength - 20, contigLength));
|
||||
|
||||
new FlankingGenomeLocTestData("beyondStartBase1", parser, 1,
|
||||
"chr1:3", "chr1:2", "chr1:4");
|
||||
|
||||
new FlankingGenomeLocTestData("beyondStartRange50", parser, 50,
|
||||
"chr1:101-200", "chr1:51-100", "chr1:201-250");
|
||||
|
||||
new FlankingGenomeLocTestData("beyondEndBase1", parser, 1,
|
||||
"chr1:" + (contigLength - 3),
|
||||
"chr1:" + (contigLength - 4),
|
||||
"chr1:" + (contigLength - 2));
|
||||
|
||||
new FlankingGenomeLocTestData("beyondEndRange50", parser, 50,
|
||||
String.format("chr1:%d-%d", contigLength - 200, contigLength - 101),
|
||||
String.format("chr1:%d-%d", contigLength - 250, contigLength - 201),
|
||||
String.format("chr1:%d-%d", contigLength - 100, contigLength - 51));
|
||||
|
||||
new FlankingGenomeLocTestData("unmapped", parser, 50,
|
||||
"unmapped", null, null);
|
||||
|
||||
new FlankingGenomeLocTestData("fullContig", parser, 50,
|
||||
"chr1", null, null);
|
||||
|
||||
return FlankingGenomeLocTestData.getTests(FlankingGenomeLocTestData.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "flankingGenomeLocs")
|
||||
public void testCreateGenomeLocAtStart(FlankingGenomeLocTestData data) {
|
||||
GenomeLoc actual = data.parser.createGenomeLocAtStart(data.original, data.basePairs);
|
||||
String description = String.format("%n name: %s%n original: %s%n actual: %s%n expected: %s%n",
|
||||
data.toString(), data.original, actual, data.flankStart);
|
||||
assertEquals(actual, data.flankStart, description);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "flankingGenomeLocs")
|
||||
public void testCreateGenomeLocAtStop(FlankingGenomeLocTestData data) {
|
||||
GenomeLoc actual = data.parser.createGenomeLocAtStop(data.original, data.basePairs);
|
||||
String description = String.format("%n name: %s%n original: %s%n actual: %s%n expected: %s%n",
|
||||
data.toString(), data.original, actual, data.flankStop);
|
||||
assertEquals(actual, data.flankStop, description);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,7 +16,8 @@ public class ReadUtilsUnitTest extends BaseTest {
|
|||
GATKSAMRecord read, reducedRead;
|
||||
final static String BASES = "ACTG";
|
||||
final static String QUALS = "!+5?";
|
||||
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40};
|
||||
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40};
|
||||
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30}; // just the offsets
|
||||
|
||||
@BeforeTest
|
||||
public void init() {
|
||||
|
|
@ -29,7 +30,7 @@ public class ReadUtilsUnitTest extends BaseTest {
|
|||
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
|
||||
reducedRead.setReadBases(BASES.getBytes());
|
||||
reducedRead.setBaseQualityString(QUALS);
|
||||
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS);
|
||||
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
|
||||
}
|
||||
|
||||
private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) {
|
||||
|
|
|
|||
|
|
@ -41,11 +41,6 @@ public class SimpleTimerUnitTest extends BaseTest {
|
|||
double t6 = t.getElapsedTime();
|
||||
Assert.assertTrue(t5 >= t4, "Restarted timer elapsed time should be after elapsed time preceding the restart");
|
||||
Assert.assertTrue(t6 >= t5, "Second elapsed time not after the first in restarted timer");
|
||||
|
||||
t.stop().start();
|
||||
Assert.assertTrue(t.isRunning(), "second started timer isn't running");
|
||||
Assert.assertTrue(t.getElapsedTime() >= 0.0, "elapsed time should have been reset");
|
||||
Assert.assertTrue(t.getElapsedTime() < t6, "elapsed time isn't less than time before start call"); // we should have effective no elapsed time
|
||||
}
|
||||
|
||||
private final static void idleLoop() {
|
||||
|
|
|
|||
|
|
@ -30,8 +30,10 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.annotations.*;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -44,180 +46,214 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
|
||||
// TODO: Add error messages on failed tests
|
||||
|
||||
//int debug = 0;
|
||||
|
||||
GATKSAMRecord read, expected;
|
||||
ReadClipper readClipper;
|
||||
final static String BASES = "ACTG";
|
||||
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
|
||||
|
||||
@BeforeClass
|
||||
|
||||
public void testIfEqual( GATKSAMRecord read, byte[] readBases, String baseQuals, String cigar) {
|
||||
Assert.assertEquals(read.getReadBases(), readBases);
|
||||
Assert.assertEquals(read.getBaseQualityString(), baseQuals);
|
||||
Assert.assertEquals(read.getCigarString(), cigar);
|
||||
}
|
||||
|
||||
public class testParameter {
|
||||
int inputStart;
|
||||
int inputStop;
|
||||
int substringStart;
|
||||
int substringStop;
|
||||
String cigar;
|
||||
|
||||
public testParameter(int InputStart, int InputStop, int SubstringStart, int SubstringStop, String Cigar) {
|
||||
inputStart = InputStart;
|
||||
inputStop = InputStop;
|
||||
substringStart = SubstringStart;
|
||||
substringStop = SubstringStop;
|
||||
cigar = Cigar;
|
||||
}
|
||||
}
|
||||
|
||||
// What the test read looks like
|
||||
// Ref: 1 2 3 4 5 6 7 8
|
||||
// Read: 0 1 2 3 - - - -
|
||||
// -----------------------------
|
||||
// Bases: A C T G - - - -
|
||||
// Quals: ! + 5 ? - - - -
|
||||
|
||||
@BeforeMethod
|
||||
public void init() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
|
||||
read.setReadUnmappedFlag(true);
|
||||
read.setReadBases(new String(BASES).getBytes());
|
||||
read.setBaseQualityString(new String(QUALS));
|
||||
|
||||
readClipper = new ReadClipper(read);
|
||||
//logger.warn(read.getCigarString());
|
||||
}
|
||||
|
||||
@Test ( enabled = false )
|
||||
@Test ( enabled = true )
|
||||
public void testHardClipBothEndsByReferenceCoordinates() {
|
||||
logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
|
||||
|
||||
logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
|
||||
//int debug = 1;
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(0,0), new GATKSAMRecord(read.getHeader()));
|
||||
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(1,1), new GATKSAMRecord(read.getHeader()));
|
||||
|
||||
//clip 1 base
|
||||
expected = readClipper.hardClipBothEndsByReferenceCoordinates(0,3);
|
||||
expected = readClipper.hardClipBothEndsByReferenceCoordinates(1,4);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,3).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,3));
|
||||
Assert.assertEquals(expected.getCigarString(), "1H2M1H");
|
||||
|
||||
}
|
||||
|
||||
@Test ( enabled = false )
|
||||
@Test ( enabled = true )
|
||||
public void testHardClipByReadCoordinates() {
|
||||
|
||||
logger.warn("Executing testHardClipByReadCoordinates");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new GATKSAMRecord(read.getHeader()));
|
||||
|
||||
//clip 1 base at start
|
||||
expected = readClipper.hardClipByReadCoordinates(0,0);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
|
||||
Assert.assertEquals(expected.getCigarString(), "1H3M");
|
||||
List<testParameter> testList = new LinkedList<testParameter>();
|
||||
testList.add(new testParameter(0,0,1,4,"1H3M"));//clip 1 base at start
|
||||
testList.add(new testParameter(3,3,0,3,"3M1H"));//clip 1 base at end
|
||||
testList.add(new testParameter(0,1,2,4,"2H2M"));//clip 2 bases at start
|
||||
testList.add(new testParameter(2,3,0,2,"2M2H"));//clip 2 bases at end
|
||||
|
||||
//clip 1 base at end
|
||||
expected = readClipper.hardClipByReadCoordinates(3,3);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
|
||||
Assert.assertEquals(expected.getCigarString(), "3M1H");
|
||||
|
||||
//clip 2 bases at start
|
||||
expected = readClipper.hardClipByReadCoordinates(0,1);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
|
||||
Assert.assertEquals(expected.getCigarString(), "2H2M");
|
||||
|
||||
//clip 2 bases at end
|
||||
expected = readClipper.hardClipByReadCoordinates(2,3);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
|
||||
Assert.assertEquals(expected.getCigarString(), "2M2H");
|
||||
for ( testParameter p : testList ) {
|
||||
init();
|
||||
//logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
|
||||
testIfEqual( readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop),
|
||||
BASES.substring(p.substringStart,p.substringStop).getBytes(),
|
||||
QUALS.substring(p.substringStart,p.substringStop),
|
||||
p.cigar );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test ( enabled = false )
|
||||
@Test ( enabled = true )
|
||||
public void testHardClipByReferenceCoordinates() {
|
||||
logger.warn("Executing testHardClipByReferenceCoordinates");
|
||||
|
||||
//logger.warn(debug);
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new GATKSAMRecord(read.getHeader()));
|
||||
|
||||
//clip 1 base at start
|
||||
expected = readClipper.hardClipByReferenceCoordinates(-1,1);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
|
||||
Assert.assertEquals(expected.getCigarString(), "1H3M");
|
||||
List<testParameter> testList = new LinkedList<testParameter>();
|
||||
testList.add(new testParameter(-1,1,1,4,"1H3M"));//clip 1 base at start
|
||||
testList.add(new testParameter(4,-1,0,3,"3M1H"));//clip 1 base at end
|
||||
testList.add(new testParameter(-1,2,2,4,"2H2M"));//clip 2 bases at start
|
||||
testList.add(new testParameter(3,-1,0,2,"2M2H"));//clip 2 bases at end
|
||||
|
||||
//clip 1 base at end
|
||||
expected = readClipper.hardClipByReferenceCoordinates(3,-1);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
|
||||
Assert.assertEquals(expected.getCigarString(), "3M1H");
|
||||
|
||||
//clip 2 bases at start
|
||||
expected = readClipper.hardClipByReferenceCoordinates(-1,2);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
|
||||
Assert.assertEquals(expected.getCigarString(), "2H2M");
|
||||
|
||||
//clip 2 bases at end
|
||||
expected = readClipper.hardClipByReferenceCoordinates(2,-1);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
|
||||
Assert.assertEquals(expected.getCigarString(), "2M2H");
|
||||
for ( testParameter p : testList ) {
|
||||
init();
|
||||
//logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
|
||||
testIfEqual( readClipper.hardClipByReferenceCoordinates(p.inputStart,p.inputStop),
|
||||
BASES.substring(p.substringStart,p.substringStop).getBytes(),
|
||||
QUALS.substring(p.substringStart,p.substringStop),
|
||||
p.cigar );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test ( enabled = false )
|
||||
@Test ( enabled = true )
|
||||
public void testHardClipByReferenceCoordinatesLeftTail() {
|
||||
init();
|
||||
logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new GATKSAMRecord(read.getHeader()));
|
||||
|
||||
//clip 1 base at start
|
||||
expected = readClipper.hardClipByReferenceCoordinatesLeftTail(1);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
|
||||
Assert.assertEquals(expected.getCigarString(), "1H3M");
|
||||
List<testParameter> testList = new LinkedList<testParameter>();
|
||||
testList.add(new testParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start
|
||||
testList.add(new testParameter(2, -1, 2, 4, "2H2M"));//clip 2 bases at start
|
||||
|
||||
//clip 2 bases at start
|
||||
expected = readClipper.hardClipByReferenceCoordinatesLeftTail(2);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
|
||||
Assert.assertEquals(expected.getCigarString(), "2H2M");
|
||||
for ( testParameter p : testList ) {
|
||||
init();
|
||||
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
|
||||
testIfEqual( readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart),
|
||||
BASES.substring(p.substringStart,p.substringStop).getBytes(),
|
||||
QUALS.substring(p.substringStart,p.substringStop),
|
||||
p.cigar );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test ( enabled = false )
|
||||
@Test ( enabled = true )
|
||||
public void testHardClipByReferenceCoordinatesRightTail() {
|
||||
init();
|
||||
logger.warn("Executing testHardClipByReferenceCoordinatesRightTail");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new GATKSAMRecord(read.getHeader()));
|
||||
|
||||
//clip 1 base at end
|
||||
expected = readClipper.hardClipByReferenceCoordinatesRightTail(3);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
|
||||
Assert.assertEquals(expected.getCigarString(), "3M1H");
|
||||
List<testParameter> testList = new LinkedList<testParameter>();
|
||||
testList.add(new testParameter(-1, 4, 0, 3, "3M1H"));//clip 1 base at end
|
||||
testList.add(new testParameter(-1, 3, 0, 2, "2M2H"));//clip 2 bases at end
|
||||
|
||||
//clip 2 bases at end
|
||||
expected = readClipper.hardClipByReferenceCoordinatesRightTail(2);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
|
||||
Assert.assertEquals(expected.getCigarString(), "2M2H");
|
||||
for ( testParameter p : testList ) {
|
||||
init();
|
||||
//logger.warn("Testing Parameters: " + p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
|
||||
testIfEqual( readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop),
|
||||
BASES.substring(p.substringStart,p.substringStop).getBytes(),
|
||||
QUALS.substring(p.substringStart,p.substringStop),
|
||||
p.cigar );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test ( enabled = false )
|
||||
@Test ( enabled = true ) // TODO This function is returning null reads
|
||||
public void testHardClipLowQualEnds() {
|
||||
logger.warn("Executing testHardClipByReferenceCoordinates");
|
||||
|
||||
logger.warn("Executing testHardClipByReferenceCoordinates");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new GATKSAMRecord(read.getHeader()));
|
||||
|
||||
//clip 1 base at start
|
||||
expected = readClipper.hardClipLowQualEnds((byte)34);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
|
||||
Assert.assertEquals(expected.getCigarString(), "1H3M");
|
||||
|
||||
//clip 2 bases at start
|
||||
expected = readClipper.hardClipLowQualEnds((byte)44);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
|
||||
Assert.assertEquals(expected.getCigarString(), "2H2M");
|
||||
List<testParameter> testList = new LinkedList<testParameter>();
|
||||
testList.add(new testParameter(1,-1,1,4,"1H3M"));//clip 1 base at start
|
||||
testList.add(new testParameter(11,-1,2,4,"2H2M"));//clip 2 bases at start
|
||||
|
||||
for ( testParameter p : testList ) {
|
||||
init();
|
||||
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
|
||||
testIfEqual( readClipper.hardClipLowQualEnds( (byte)p.inputStart ),
|
||||
BASES.substring(p.substringStart,p.substringStop).getBytes(),
|
||||
QUALS.substring(p.substringStart,p.substringStop),
|
||||
p.cigar );
|
||||
}
|
||||
/* todo find a better way to test lowqual tail clipping on both sides
|
||||
// Reverse Quals sequence
|
||||
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
|
||||
|
||||
//clip 1 base at end
|
||||
expected = readClipper.hardClipLowQualEnds((byte)34);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
|
||||
Assert.assertEquals(expected.getCigarString(), "3M1H");
|
||||
testList = new LinkedList<testParameter>();
|
||||
testList.add(new testParameter(1,-1,0,3,"3M1H"));//clip 1 base at end
|
||||
testList.add(new testParameter(11,-1,0,2,"2M2H"));//clip 2 bases at end
|
||||
|
||||
//clip 2 bases at end
|
||||
expected = readClipper.hardClipLowQualEnds((byte)44);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
|
||||
Assert.assertEquals(expected.getCigarString(), "2M2H");
|
||||
|
||||
// revert Qual sequence
|
||||
readClipper.getRead().setBaseQualityString(QUALS);
|
||||
for ( testParameter p : testList ) {
|
||||
init();
|
||||
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
|
||||
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
|
||||
testIfEqual( readClipper.hardClipLowQualEnds( (byte)p.inputStart ),
|
||||
BASES.substring(p.substringStart,p.substringStop).getBytes(),
|
||||
QUALS.substring(p.substringStart,p.substringStop),
|
||||
p.cigar );
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
public class CigarReadMaker {
|
||||
|
||||
}
|
||||
|
||||
@Test ( enabled = false )
|
||||
public void testHardClipSoftClippedBases() {
|
||||
|
||||
// Generate a list of cigars to test
|
||||
// We will use testParameter in the following way
|
||||
// Right tail, left tail,
|
||||
}
|
||||
}
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
package org.broadinstitute.sting.utils.interval;
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.picard.util.IntervalUtil;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
|
|
@ -762,4 +762,225 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
List<GenomeLoc> merged = IntervalUtils.mergeIntervalLocations(locs, IntervalMergingRule.ALL);
|
||||
Assert.assertEquals(merged.size(), 1);
|
||||
}
|
||||
|
||||
/*
|
||||
Split into tests that can be written to files and tested by writeFlankingIntervals,
|
||||
and lists that cannot but are still handled by getFlankingIntervals.
|
||||
*/
|
||||
private static abstract class FlankingIntervalsTestData extends TestDataProvider {
|
||||
final public File referenceFile;
|
||||
final public GenomeLocParser parser;
|
||||
final int basePairs;
|
||||
final List<GenomeLoc> original;
|
||||
final List<GenomeLoc> expected;
|
||||
|
||||
protected FlankingIntervalsTestData(Class<?> clazz, String name, File referenceFile, GenomeLocParser parser,
|
||||
int basePairs, List<String> original, List<String> expected) {
|
||||
super(clazz, name);
|
||||
this.referenceFile = referenceFile;
|
||||
this.parser = parser;
|
||||
this.basePairs = basePairs;
|
||||
this.original = parse(parser, original);
|
||||
this.expected = parse(parser, expected);
|
||||
}
|
||||
|
||||
private static List<GenomeLoc> parse(GenomeLocParser parser, List<String> locs) {
|
||||
List<GenomeLoc> parsed = new ArrayList<GenomeLoc>();
|
||||
for (String loc: locs)
|
||||
parsed.add("unmapped".equals(loc) ? GenomeLoc.UNMAPPED : parser.parseGenomeLoc(loc));
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
|
||||
private static class FlankingIntervalsFile extends FlankingIntervalsTestData {
|
||||
public FlankingIntervalsFile(String name, File referenceFile, GenomeLocParser parser,
|
||||
int basePairs, List<String> original, List<String> expected) {
|
||||
super(FlankingIntervalsFile.class, name, referenceFile, parser, basePairs, original, expected);
|
||||
}
|
||||
}
|
||||
|
||||
private static class FlankingIntervalsList extends FlankingIntervalsTestData {
|
||||
public FlankingIntervalsList(String name, File referenceFile, GenomeLocParser parser,
|
||||
int basePairs, List<String> original, List<String> expected) {
|
||||
super(FlankingIntervalsList.class, name, referenceFile, parser, basePairs, original, expected);
|
||||
}
|
||||
}
|
||||
|
||||
/* Intervals where the original and the flanks can be written to files. */
|
||||
@DataProvider(name = "flankingIntervalsFiles")
|
||||
public Object[][] getFlankingIntervalsFiles() {
|
||||
File hg19ReferenceFile = new File(BaseTest.hg19Reference);
|
||||
int hg19Length1 = hg19GenomeLocParser.getContigInfo("1").getSequenceLength();
|
||||
|
||||
new FlankingIntervalsFile("atStartBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
|
||||
Arrays.asList("1:1"),
|
||||
Arrays.asList("1:2"));
|
||||
|
||||
new FlankingIntervalsFile("atStartBase50", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList("1:1"),
|
||||
Arrays.asList("1:2-51"));
|
||||
|
||||
new FlankingIntervalsFile("atStartRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList("1:1-10"),
|
||||
Arrays.asList("1:11-60"));
|
||||
|
||||
new FlankingIntervalsFile("atEndBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
|
||||
Arrays.asList("1:" + hg19Length1),
|
||||
Arrays.asList("1:" + (hg19Length1 - 1)));
|
||||
|
||||
new FlankingIntervalsFile("atEndBase50", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList("1:" + hg19Length1),
|
||||
Arrays.asList(String.format("1:%d-%d", hg19Length1 - 50, hg19Length1 - 1)));
|
||||
|
||||
new FlankingIntervalsFile("atEndRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList(String.format("1:%d-%d", hg19Length1 - 10, hg19Length1)),
|
||||
Arrays.asList(String.format("1:%d-%d", hg19Length1 - 60, hg19Length1 - 11)));
|
||||
|
||||
new FlankingIntervalsFile("nearStartBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
|
||||
Arrays.asList("1:2"),
|
||||
Arrays.asList("1:1", "1:3"));
|
||||
|
||||
new FlankingIntervalsFile("nearStartRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList("1:21-30"),
|
||||
Arrays.asList("1:1-20", "1:31-80"));
|
||||
|
||||
new FlankingIntervalsFile("nearEndBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
|
||||
Arrays.asList("1:" + (hg19Length1 - 1)),
|
||||
Arrays.asList("1:" + (hg19Length1 - 2), "1:" + hg19Length1));
|
||||
|
||||
new FlankingIntervalsFile("nearEndRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList(String.format("1:%d-%d", hg19Length1 - 30, hg19Length1 - 21)),
|
||||
Arrays.asList(
|
||||
String.format("1:%d-%d", hg19Length1 - 80, hg19Length1 - 31),
|
||||
String.format("1:%d-%d", hg19Length1 - 20, hg19Length1)));
|
||||
|
||||
new FlankingIntervalsFile("beyondStartBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
|
||||
Arrays.asList("1:3"),
|
||||
Arrays.asList("1:2", "1:4"));
|
||||
|
||||
new FlankingIntervalsFile("beyondStartRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList("1:101-200"),
|
||||
Arrays.asList("1:51-100", "1:201-250"));
|
||||
|
||||
new FlankingIntervalsFile("beyondEndBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
|
||||
Arrays.asList("1:" + (hg19Length1 - 3)),
|
||||
Arrays.asList("1:" + (hg19Length1 - 4), "1:" + (hg19Length1 - 2)));
|
||||
|
||||
new FlankingIntervalsFile("beyondEndRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList(String.format("1:%d-%d", hg19Length1 - 200, hg19Length1 - 101)),
|
||||
Arrays.asList(
|
||||
String.format("1:%d-%d", hg19Length1 - 250, hg19Length1 - 201),
|
||||
String.format("1:%d-%d", hg19Length1 - 100, hg19Length1 - 51)));
|
||||
|
||||
new FlankingIntervalsFile("betweenFar50", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList("1:101-200", "1:401-500"),
|
||||
Arrays.asList("1:51-100", "1:201-250", "1:351-400", "1:501-550"));
|
||||
|
||||
new FlankingIntervalsFile("betweenSpan50", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList("1:101-200", "1:301-400"),
|
||||
Arrays.asList("1:51-100", "1:201-300", "1:401-450"));
|
||||
|
||||
new FlankingIntervalsFile("betweenOverlap50", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList("1:101-200", "1:271-400"),
|
||||
Arrays.asList("1:51-100", "1:201-270", "1:401-450"));
|
||||
|
||||
new FlankingIntervalsFile("betweenShort50", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList("1:101-200", "1:221-400"),
|
||||
Arrays.asList("1:51-100", "1:201-220", "1:401-450"));
|
||||
|
||||
new FlankingIntervalsFile("betweenNone50", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList("1:101-200", "1:121-400"),
|
||||
Arrays.asList("1:51-100", "1:401-450"));
|
||||
|
||||
new FlankingIntervalsFile("twoContigs", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList("1:101-200", "2:301-400"),
|
||||
Arrays.asList("1:51-100", "1:201-250", "2:251-300", "2:401-450"));
|
||||
|
||||
// Explicit testing a problematic agilent target pair
|
||||
new FlankingIntervalsFile("badAgilent", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList("2:74756257-74756411", "2:74756487-74756628"),
|
||||
// wrong! ("2:74756206-74756256", "2:74756412-74756462", "2:74756436-74756486", "2:74756629-74756679")
|
||||
Arrays.asList("2:74756207-74756256", "2:74756412-74756486", "2:74756629-74756678"));
|
||||
|
||||
return TestDataProvider.getTests(FlankingIntervalsFile.class);
|
||||
}
|
||||
|
||||
/* Intervals where either the original and/or the flanks cannot be written to a file. */
|
||||
@DataProvider(name = "flankingIntervalsLists")
|
||||
public Object[][] getFlankingIntervalsLists() {
|
||||
File hg19ReferenceFile = new File(BaseTest.hg19Reference);
|
||||
List<String> empty = Collections.emptyList();
|
||||
|
||||
new FlankingIntervalsList("empty", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
empty,
|
||||
empty);
|
||||
|
||||
new FlankingIntervalsList("unmapped", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList("unmapped"),
|
||||
empty);
|
||||
|
||||
new FlankingIntervalsList("fullContig", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList("1"),
|
||||
empty);
|
||||
|
||||
new FlankingIntervalsList("fullContigs", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList("1", "2", "3"),
|
||||
empty);
|
||||
|
||||
new FlankingIntervalsList("betweenWithUnmapped", hg19ReferenceFile, hg19GenomeLocParser, 50,
|
||||
Arrays.asList("1:101-200", "1:301-400", "unmapped"),
|
||||
Arrays.asList("1:51-100", "1:201-300", "1:401-450"));
|
||||
|
||||
return TestDataProvider.getTests(FlankingIntervalsList.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "flankingIntervalsFiles")
|
||||
public void testWriteFlankingIntervals(FlankingIntervalsTestData data) throws Exception {
|
||||
File originalFile = createTempFile("original.", ".intervals");
|
||||
File flankingFile = createTempFile("flanking.", ".intervals");
|
||||
try {
|
||||
List<String> lines = new ArrayList<String>();
|
||||
for (GenomeLoc loc: data.original)
|
||||
lines.add(loc.toString());
|
||||
FileUtils.writeLines(originalFile, lines);
|
||||
|
||||
IntervalUtils.writeFlankingIntervals(data.referenceFile, originalFile, flankingFile, data.basePairs);
|
||||
|
||||
List<GenomeLoc> actual = IntervalUtils.intervalFileToList(data.parser, flankingFile.getAbsolutePath());
|
||||
|
||||
String description = String.format("%n name: %s%n original: %s%n actual: %s%n expected: %s%n",
|
||||
data.toString(), data.original, actual, data.expected);
|
||||
Assert.assertEquals(actual, data.expected, description);
|
||||
} finally {
|
||||
FileUtils.deleteQuietly(originalFile);
|
||||
FileUtils.deleteQuietly(flankingFile);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "flankingIntervalsLists", expectedExceptions = UserException.class)
|
||||
public void testWritingBadFlankingIntervals(FlankingIntervalsTestData data) throws Exception {
|
||||
File originalFile = createTempFile("original.", ".intervals");
|
||||
File flankingFile = createTempFile("flanking.", ".intervals");
|
||||
try {
|
||||
List<String> lines = new ArrayList<String>();
|
||||
for (GenomeLoc loc: data.original)
|
||||
lines.add(loc.toString());
|
||||
FileUtils.writeLines(originalFile, lines);
|
||||
|
||||
// Should throw a user exception on bad input if either the original
|
||||
// intervals are empty or if the flanking intervals are empty
|
||||
IntervalUtils.writeFlankingIntervals(data.referenceFile, originalFile, flankingFile, data.basePairs);
|
||||
} finally {
|
||||
FileUtils.deleteQuietly(originalFile);
|
||||
FileUtils.deleteQuietly(flankingFile);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "flankingIntervalsLists")
|
||||
public void testGetFlankingIntervals(FlankingIntervalsTestData data) {
|
||||
List<GenomeLoc> actual = IntervalUtils.getFlankingIntervals(data.parser, data.original, data.basePairs);
|
||||
String description = String.format("%n name: %s%n original: %s%n actual: %s%n expected: %s%n",
|
||||
data.toString(), data.original, actual, data.expected);
|
||||
Assert.assertEquals(actual, data.expected, description);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,10 +29,13 @@ package org.broadinstitute.sting.utils.variantcontext;
|
|||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
|
||||
import java.util.EnumMap;
|
||||
|
||||
|
||||
/**
|
||||
* Basic unit test for Genotype likelihoods objects
|
||||
|
|
@ -69,6 +72,50 @@ public class GenotypeLikelihoodsUnitTest {
|
|||
gl.getAsVector();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetAsMap(){
|
||||
GenotypeLikelihoods gl = new GenotypeLikelihoods(v);
|
||||
//Log scale
|
||||
EnumMap<Genotype.Type,Double> glMap = gl.getAsMap(false);
|
||||
Assert.assertEquals(v[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
|
||||
Assert.assertEquals(v[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
|
||||
Assert.assertEquals(v[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
|
||||
|
||||
//Linear scale
|
||||
glMap = gl.getAsMap(true);
|
||||
double [] vl = MathUtils.normalizeFromLog10(v);
|
||||
Assert.assertEquals(vl[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
|
||||
Assert.assertEquals(vl[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
|
||||
Assert.assertEquals(vl[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
|
||||
|
||||
//Test missing likelihoods
|
||||
gl = new GenotypeLikelihoods(".");
|
||||
glMap = gl.getAsMap(false);
|
||||
Assert.assertNull(glMap);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetNegLog10GQ(){
|
||||
GenotypeLikelihoods gl = new GenotypeLikelihoods(vPLString);
|
||||
|
||||
//GQ for the best guess genotype
|
||||
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HET),3.9);
|
||||
|
||||
double[] test = MathUtils.normalizeFromLog10(gl.getAsVector());
|
||||
|
||||
//GQ for the other genotypes
|
||||
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_REF), -1.0 * Math.log10(1.0 - test[Genotype.Type.HOM_REF.ordinal()-1]));
|
||||
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_VAR), -1.0 * Math.log10(1.0 - test[Genotype.Type.HOM_VAR.ordinal()-1]));
|
||||
|
||||
//Test missing likelihoods
|
||||
gl = new GenotypeLikelihoods(".");
|
||||
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_REF),Double.NEGATIVE_INFINITY);
|
||||
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HET),Double.NEGATIVE_INFINITY);
|
||||
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_VAR),Double.NEGATIVE_INFINITY);
|
||||
|
||||
}
|
||||
|
||||
private void assertDoubleArraysAreEqual(double[] v1, double[] v2) {
|
||||
Assert.assertEquals(v1.length, v2.length);
|
||||
for ( int i = 0; i < v1.length; i++ ) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.extensions.gatk
|
||||
|
||||
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||
import org.broadinstitute.sting.commandline.{Output, Argument, Input}
|
||||
import java.io.File
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils
|
||||
|
||||
class WriteFlankingIntervalsFunction extends InProcessFunction {
|
||||
@Input(doc="The reference sequence")
|
||||
var reference : File = _
|
||||
|
||||
@Input(doc="The interval list to flank")
|
||||
var inputIntervals : File = _
|
||||
|
||||
@Output(doc="The output intervals file to write to")
|
||||
var outputIntervals: File = _
|
||||
|
||||
@Argument(doc="Number of base pair to flank the input intervals")
|
||||
var flankSize : Int = _
|
||||
|
||||
def run() {
|
||||
IntervalUtils.writeFlankingIntervals(reference, inputIntervals, outputIntervals, flankSize)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,135 +0,0 @@
|
|||
package org.broadinstitute.sting.queue.library.ipf.intervals
|
||||
|
||||
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||
import org.broadinstitute.sting.commandline._
|
||||
import java.io.{PrintStream, File}
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.utils.text.XReadLines
|
||||
import net.sf.picard.reference.FastaSequenceFile
|
||||
import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocParser}
|
||||
import collection.immutable.TreeSet
|
||||
|
||||
// todo -- this is unsafe. Need to use a reference dictionary to ensure no off-contig targets are created
|
||||
class ExpandIntervals(in : File, start: Int, size: Int, out: File, ref: File, ipType: String, opType: String) extends InProcessFunction {
|
||||
@Input(doc="The interval list to expand") val inList : File = in
|
||||
@Input(doc="The reference sequence") val refDict : File = ref
|
||||
@Argument(doc="Number of basepair to start the expanded interval") val startInt : Int = start
|
||||
@Argument(doc="Number of baispair to stop the expanded interval") val sizeInt : Int = size
|
||||
@Output(doc="The output intervals file to write to") val outList : File = out
|
||||
@Argument(doc="The output format for the intervals") val outTypeStr = opType
|
||||
@Argument(doc="The input format for the intervals") val inTypeStr = ipType
|
||||
|
||||
var output : PrintStream = _
|
||||
var parser : GenomeLocParser = _
|
||||
var xrl : XReadLines = _
|
||||
val outType = IntervalFormatType.convert(outTypeStr)
|
||||
val inType = IntervalFormatType.convert(inTypeStr)
|
||||
|
||||
var offsetIn : Int = 0
|
||||
var offsetOut : Int = 0
|
||||
|
||||
var first : Boolean = true
|
||||
var lastTwo : (GenomeLoc,GenomeLoc) = _
|
||||
|
||||
var intervalCache : TreeSet[GenomeLoc] = _
|
||||
val LINES_TO_CACHE : Int = 1000
|
||||
|
||||
def run = {
|
||||
output = new PrintStream(outList)
|
||||
intervalCache = new TreeSet[GenomeLoc]()(new Ordering[GenomeLoc]{
|
||||
def compare(o1: GenomeLoc, o2: GenomeLoc) : Int = { o1.compareTo(o2) }
|
||||
})
|
||||
parser = new GenomeLocParser(new FastaSequenceFile(ref,true))
|
||||
xrl = new XReadLines(inList)
|
||||
offsetIn = if (isBed(inType)) 1 else 0
|
||||
offsetOut = if( isBed(outType)) 1 else 0
|
||||
var line : String = xrl.next
|
||||
while ( line.startsWith("@") ) {
|
||||
line = xrl.next
|
||||
}
|
||||
var prevLoc: GenomeLoc = null
|
||||
var curLoc: GenomeLoc = null
|
||||
var nextLoc : GenomeLoc = parseGenomeInterval(line)
|
||||
var linesProcessed : Int = 1
|
||||
while ( prevLoc != null || curLoc != null || nextLoc != null ) {
|
||||
prevLoc = curLoc
|
||||
curLoc = nextLoc
|
||||
nextLoc = if ( xrl.hasNext ) parseGenomeInterval(xrl.next) else null
|
||||
if ( curLoc != null ) {
|
||||
val left: GenomeLoc = refine(expandLeft(curLoc),prevLoc)
|
||||
val right: GenomeLoc = refine(expandRight(curLoc),nextLoc)
|
||||
if ( left != null ) {
|
||||
intervalCache += left
|
||||
}
|
||||
if ( right != null ) {
|
||||
intervalCache += right
|
||||
}
|
||||
}
|
||||
linesProcessed += 1
|
||||
if ( linesProcessed % LINES_TO_CACHE == 0 ) {
|
||||
val toPrint = intervalCache.filter( u => (u.isBefore(prevLoc) && u.distance(prevLoc) > startInt+sizeInt))
|
||||
intervalCache = intervalCache -- toPrint
|
||||
toPrint.foreach(u => output.print("%s%n".format(repr(u))))
|
||||
}
|
||||
//System.out.printf("%s".format(if ( curLoc == null ) "null" else repr(curLoc)))
|
||||
}
|
||||
|
||||
intervalCache.foreach(u => output.print("%s%n".format(repr(u))))
|
||||
|
||||
output.close()
|
||||
}
|
||||
|
||||
def expandLeft(g: GenomeLoc) : GenomeLoc = {
|
||||
parser.createGenomeLoc(g.getContig,g.getStart-startInt-sizeInt,g.getStart-startInt)
|
||||
}
|
||||
|
||||
def expandRight(g: GenomeLoc) : GenomeLoc = {
|
||||
parser.createGenomeLoc(g.getContig,g.getStop+startInt,g.getStop+startInt+sizeInt)
|
||||
}
|
||||
|
||||
def refine(newG: GenomeLoc, borderG: GenomeLoc) : GenomeLoc = {
|
||||
if ( borderG == null || ! newG.overlapsP(borderG) ) {
|
||||
return newG
|
||||
} else {
|
||||
if ( newG.getStart < borderG.getStart ) {
|
||||
if ( borderG.getStart - startInt > newG.getStart ) {
|
||||
return parser.createGenomeLoc(newG.getContig,newG.getStart,borderG.getStart-startInt)
|
||||
}
|
||||
} else {
|
||||
if ( borderG.getStop + startInt < newG.getStop ){
|
||||
return parser.createGenomeLoc(newG.getContig,borderG.getStop+startInt,newG.getStop)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
null
|
||||
}
|
||||
|
||||
def repr(loc : GenomeLoc) : String = {
|
||||
if ( loc == null ) return "null"
|
||||
if ( outType == IntervalFormatType.INTERVALS ) {
|
||||
return "%s:%d-%d".format(loc.getContig,loc.getStart,loc.getStop)
|
||||
} else {
|
||||
return "%s\t%d\t%d".format(loc.getContig,loc.getStart-offsetOut,loc.getStop+offsetOut)
|
||||
}
|
||||
}
|
||||
|
||||
def isBed(t: IntervalFormatType.IntervalFormatType) : Boolean = {
|
||||
t == IntervalFormatType.BED
|
||||
}
|
||||
|
||||
def parseGenomeInterval( s : String ) : GenomeLoc = {
|
||||
val sp = s.split("\\s+")
|
||||
// todo -- maybe specify whether the bed format [0,6) --> (1,2,3,4,5) is what's wanted
|
||||
if ( s.contains(":") ) parser.parseGenomeLoc(s) else parser.createGenomeLoc(sp(0),sp(1).toInt+offsetIn,sp(2).toInt-offsetIn)
|
||||
}
|
||||
|
||||
object IntervalFormatType extends Enumeration("INTERVALS","BED","TDF") {
|
||||
type IntervalFormatType = Value
|
||||
val INTERVALS,BED,TDF = Value
|
||||
|
||||
def convert(s : String) : IntervalFormatType = {
|
||||
if ( s.equals("INTERVALS") ) INTERVALS else { if (s.equals("BED") ) BED else TDF}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
package org.broadinstitute.sting.queue.library.ipf.intervals
|
||||
|
||||
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.commandline._
|
||||
import java.io.{PrintStream, File}
|
||||
import net.sf.samtools.{SAMSequenceRecord, SAMFileHeader, SAMSequenceDictionary}
|
||||
import org.broadinstitute.sting.utils.text.XReadLines
|
||||
import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocParser}
|
||||
|
||||
class IntersectIntervals(iVals: List[File], outFile: File, bed: Boolean) extends InProcessFunction {
|
||||
@Input(doc="List of interval files to find the intersection of") val intervals : List[File] = iVals
|
||||
@Output(doc="Output interval file to which to write") val output : File = outFile
|
||||
@Argument(doc="Assume the input interval lists are sorted in the proper order") var assumeSorted = false
|
||||
@Argument(doc="Is the tdf in bed file (0-based clopen: 0 5 for {1,2,3,4}?") var isBed = bed
|
||||
|
||||
|
||||
var outStream : PrintStream = _
|
||||
var contigs : List[String] = Nil
|
||||
var dict : SAMSequenceDictionary = _
|
||||
var parser : GenomeLocParser = _
|
||||
|
||||
def run = {
|
||||
outStream = new PrintStream(output)
|
||||
dict = new SAMSequenceDictionary
|
||||
// note: memory hog
|
||||
val sources : List[(List[(String,Int,Int)],Int)] = intervals.map(g => asScalaIterator(new XReadLines(g)).map(u => parse(u)).toList).zipWithIndex
|
||||
sources.map(u => u._1).flatten.map(u => u._1).distinct.foreach(u => dict.addSequence(new SAMSequenceRecord(u,Integer.MAX_VALUE)))
|
||||
parser = new GenomeLocParser(dict)
|
||||
sources.map( (u: (List[(String,Int,Int)],Int)) => u._1.map(g => (newGenomeLoc(g),u._2))).flatten.sortWith( (a,b) => (a._1 compareTo b._1) < 0 ).foldLeft[List[List[(GenomeLoc,Int)]]](Nil)( (a,b) => overlapFold(a,b)).map(u => mapIntersect(u)).filter(h => h != null && h.size > 0).foreach(h => writeOut(h))
|
||||
outStream.close()
|
||||
}
|
||||
|
||||
def writeOut(g : GenomeLoc) : Unit = {
|
||||
outStream.print("%s%n".format(g.toString))
|
||||
}
|
||||
|
||||
def parse(s : String) : (String,Int,Int) = {
|
||||
if ( s.contains(":") ) {
|
||||
val split1 = s.split(":")
|
||||
val split2 = split1(1).split("-")
|
||||
return (split1(0),split2(0).toInt,split2(1).toInt)
|
||||
} else {
|
||||
val split = s.split("\\s+")
|
||||
return (split(0),split(1).toInt + (if(isBed) 1 else 0) ,split(2).toInt - (if(isBed) 1 else 0) )
|
||||
}
|
||||
}
|
||||
|
||||
def newGenomeLoc(coords : (String,Int,Int) ) : GenomeLoc = {
|
||||
parser.createGenomeLoc(coords._1,coords._2,coords._3)
|
||||
}
|
||||
|
||||
def overlapFold( a: List[List[(GenomeLoc,Int)]], b: (GenomeLoc,Int) ) : List[List[(GenomeLoc,Int)]] = {
|
||||
if ( a.last.forall(u => u._1.overlapsP(b._1)) ) {
|
||||
a.init :+ (a.last :+ b)
|
||||
} else {
|
||||
a :+ ( a.last.dropWhile(u => ! u._1.overlapsP(b._1)) :+ b)
|
||||
}
|
||||
}
|
||||
|
||||
def mapIntersect( u: List[(GenomeLoc,Int)]) : GenomeLoc = {
|
||||
if ( u.map(h => h._2).distinct.sum != range(1,intervals.size).sum ) { // if all sources not accounted for
|
||||
null
|
||||
}
|
||||
u.map(h => h._1).reduceLeft[GenomeLoc]( (a,b) => a.intersect(b) )
|
||||
}
|
||||
|
||||
def range(a: Int, b: Int) : Range = new Range(a,b+1,1)
|
||||
|
||||
}
|
||||
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf.snpeff" module="snpeff" revision="2.0.2" status="release" />
|
||||
<info organisation="net.sf.snpeff" module="snpeff" revision="2.0.4rc3" status="release" />
|
||||
</ivy-module>
|
||||
Loading…
Reference in New Issue